chore_: add `klauspost/reedsolomon` module

2024-04-04 17:52:49 +02:00 · 2024-04-04 17:52:49 +02:00 · 4f2adc0ced
parent bd91f5ab49
commit 4f2adc0ced
398 changed files with 216863 additions and 3038 deletions
--- a/go.mod
+++ b/go.mod
@ -84,6 +84,7 @@ require (
 	github.com/ipfs/go-log/v2 v2.5.1
 	github.com/jellydator/ttlcache/v3 v3.2.0
 	github.com/jmoiron/sqlx v1.3.5
 	github.com/klauspost/reedsolomon v1.12.1
 	github.com/ladydascalie/currency v1.6.0
 	github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8
 	github.com/mutecomm/go-sqlcipher/v4 v4.4.2
@ -177,7 +178,7 @@ require (
 	github.com/jackpal/go-nat-pmp v1.0.2 // indirect
 	github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
 	github.com/klauspost/compress v1.16.7 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.5 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
 	github.com/koron/go-ssdp v0.0.4 // indirect
 	github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
 	github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
@ -279,7 +280,7 @@ require (
 	go.uber.org/fx v1.20.0 // indirect
 	golang.org/x/mod v0.12.0 // indirect
 	golang.org/x/sync v0.3.0 // indirect
-	golang.org/x/sys v0.11.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
 	golang.org/x/term v0.11.0 // indirect
 	golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect
 	golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
--- a/go.sum
+++ b/go.sum
@ -1295,10 +1295,12 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
 github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
 github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
-github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
 github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/klauspost/reedsolomon v1.12.1 h1:NhWgum1efX1x58daOBGCFWcxtEhOhXKKl1HAPQUp03Q=
 github.com/klauspost/reedsolomon v1.12.1/go.mod h1:nEi5Kjb6QqtbofI6s+cbG/j1da11c96IBYBSnVGtuBs=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -2637,8 +2639,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
-golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@ -9,10 +9,7 @@ You can access the CPU information by accessing the shared CPU variable of the c
 Package home: https://github.com/klauspost/cpuid
 [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
-[![Build Status][3]][4]
+[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
 [3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
 [4]: https://travis-ci.org/klauspost/cpuid
 ## installing
@ -285,7 +282,12 @@ Exit Code 1
 | AMXINT8            | Tile computational operations on 8-bit integers                                                                                                                                    |
 | AMXFP16            | Tile computational operations on FP16 numbers                                                                                                                                      |
 | AMXTILE            | Tile architecture                                                                                                                                                                  |
 | APX_F              | Intel APX                                                                                                                                                                          |
 | AVX                | AVX functions                                                                                                                                                                      |
 | AVX10              | If set the Intel AVX10 Converged Vector ISA is supported                                                                                                                           |
 | AVX10_128          | If set indicates that AVX10 128-bit vector support is present                                                                                                                      |
 | AVX10_256          | If set indicates that AVX10 256-bit vector support is present                                                                                                                      |
 | AVX10_512          | If set indicates that AVX10 512-bit vector support is present                                                                                                                      |
 | AVX2               | AVX2 functions                                                                                                                                                                     |
 | AVX512BF16         | AVX-512 BFLOAT16 Instructions                                                                                                                                                      |
 | AVX512BITALG       | AVX-512 Bit Algorithms                                                                                                                                                             |
@ -365,6 +367,8 @@ Exit Code 1
 | IDPRED_CTRL        | IPRED_DIS                                                                                                                                                                          |
 | INT_WBINVD         | WBINVD/WBNOINVD are interruptible.                                                                                                                                                 |
 | INVLPGB            | NVLPGB and TLBSYNC instruction supported                                                                                                                                           |
 | KEYLOCKER          | Key locker                                                                                                                                                                         |
 | KEYLOCKERW         | Key locker wide                                                                                                                                                                    |
 | LAHF               | LAHF/SAHF in long mode                                                                                                                                                             |
 | LAM                | If set, CPU supports Linear Address Masking                                                                                                                                        |
 | LBRVIRT            | LBR virtualization                                                                                                                                                                 |
@ -380,7 +384,7 @@ Exit Code 1
 | MOVDIRI            | Move Doubleword as Direct Store                                                                                                                                                    |
 | MOVSB_ZL           | Fast Zero-Length MOVSB                                                                                                                                                             |
 | MPX                | Intel MPX (Memory Protection Extensions)                                                                                                                                           |
-| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD       |
+| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD        |
 | MSRIRC             | Instruction Retired Counter MSR available                                                                                                                                          |
 | MSRLIST            | Read/Write List of Model Specific Registers                                                                                                                                        |
 | MSR_PAGEFLUSH      | Page Flush MSR available                                                                                                                                                           |
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@ -67,188 +67,200 @@ const (
 	// Keep index -1 as unknown
 	UNKNOWN = -1
-	// Add features
+	// x86 features
-	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	AESNI                               // Advanced Encryption Standard New Instructions
+	AESNI                                // Advanced Encryption Standard New Instructions
-	AMD3DNOW                            // AMD 3DNOW
+	AMD3DNOW                             // AMD 3DNOW
-	AMD3DNOWEXT                         // AMD 3DNowExt
+	AMD3DNOWEXT                          // AMD 3DNowExt
-	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
+	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
-	AMXFP16                             // Tile computational operations on FP16 numbers
+	AMXFP16                              // Tile computational operations on FP16 numbers
-	AMXINT8                             // Tile computational operations on 8-bit integers
+	AMXINT8                              // Tile computational operations on 8-bit integers
-	AMXTILE                             // Tile architecture
+	AMXTILE                              // Tile architecture
-	AVX                                 // AVX functions
+	APX_F                                // Intel APX
-	AVX2                                // AVX2 functions
+	AVX                                  // AVX functions
-	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
+	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
-	AVX512BITALG                        // AVX-512 Bit Algorithms
+	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
-	AVX512BW                            // AVX-512 Byte and Word Instructions
+	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
-	AVX512CD                            // AVX-512 Conflict Detection Instructions
+	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
-	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
+	AVX2                                 // AVX2 functions
-	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
+	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
-	AVX512F                             // AVX-512 Foundation
+	AVX512BITALG                         // AVX-512 Bit Algorithms
-	AVX512FP16                          // AVX-512 FP16 Instructions
+	AVX512BW                             // AVX-512 Byte and Word Instructions
-	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512CD                             // AVX-512 Conflict Detection Instructions
-	AVX512PF                            // AVX-512 Prefetch Instructions
+	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
-	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
+	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
-	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512F                              // AVX-512 Foundation
-	AVX512VL                            // AVX-512 Vector Length Extensions
+	AVX512FP16                           // AVX-512 FP16 Instructions
-	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
+	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
+	AVX512PF                             // AVX-512 Prefetch Instructions
-	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
+	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
-	AVXIFMA                             // AVX-IFMA instructions
+	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVXNECONVERT                        // AVX-NE-CONVERT instructions
+	AVX512VL                             // AVX-512 Vector Length Extensions
-	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
+	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
-	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
+	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
-	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
+	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
-	BHI_CTRL                            // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
+	AVXIFMA                              // AVX-IFMA instructions
-	BMI1                                // Bit Manipulation Instruction Set 1
+	AVXNECONVERT                         // AVX-NE-CONVERT instructions
-	BMI2                                // Bit Manipulation Instruction Set 2
+	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
-	CETIBT                              // Intel CET Indirect Branch Tracking
+	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
-	CETSS                               // Intel CET Shadow Stack
+	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
-	CLDEMOTE                            // Cache Line Demote
+	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
-	CLMUL                               // Carry-less Multiplication
+	BMI1                                 // Bit Manipulation Instruction Set 1
-	CLZERO                              // CLZERO instruction supported
+	BMI2                                 // Bit Manipulation Instruction Set 2
-	CMOV                                // i686 CMOV
+	CETIBT                               // Intel CET Indirect Branch Tracking
-	CMPCCXADD                           // CMPCCXADD instructions
+	CETSS                                // Intel CET Shadow Stack
-	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
+	CLDEMOTE                             // Cache Line Demote
-	CMPXCHG8                            // CMPXCHG8 instruction
+	CLMUL                                // Carry-less Multiplication
-	CPBOOST                             // Core Performance Boost
+	CLZERO                               // CLZERO instruction supported
-	CPPC                                // AMD: Collaborative Processor Performance Control
+	CMOV                                 // i686 CMOV
-	CX16                                // CMPXCHG16B Instruction
+	CMPCCXADD                            // CMPCCXADD instructions
-	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
+	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
-	ENQCMD                              // Enqueue Command
+	CMPXCHG8                             // CMPXCHG8 instruction
-	ERMS                                // Enhanced REP MOVSB/STOSB
+	CPBOOST                              // Core Performance Boost
-	F16C                                // Half-precision floating-point conversion
+	CPPC                                 // AMD: Collaborative Processor Performance Control
-	FLUSH_L1D                           // Flush L1D cache
+	CX16                                 // CMPXCHG16B Instruction
-	FMA3                                // Intel FMA 3. Does not imply AVX.
+	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
-	FMA4                                // Bulldozer FMA4 functions
+	ENQCMD                               // Enqueue Command
-	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	ERMS                                 // Enhanced REP MOVSB/STOSB
-	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	F16C                                 // Half-precision floating-point conversion
-	FSRM                                // Fast Short Rep Mov
+	FLUSH_L1D                            // Flush L1D cache
-	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FMA3                                 // Intel FMA 3. Does not imply AVX.
-	FXSROPT                             // FXSAVE/FXRSTOR optimizations
+	FMA4                                 // Bulldozer FMA4 functions
-	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
-	HLE                                 // Hardware Lock Elision
+	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
-	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	FSRM                                 // Fast Short Rep Mov
-	HTT                                 // Hyperthreading (enabled)
+	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
-	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
+	FXSROPT                              // FXSAVE/FXRSTOR optimizations
-	HYBRID_CPU                          // This part has CPUs of more than one type.
+	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
-	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
+	HLE                                  // Hardware Lock Elision
-	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
+	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
-	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
+	HTT                                  // Hyperthreading (enabled)
-	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
-	IBRS                                // AMD: Indirect Branch Restricted Speculation
+	HYBRID_CPU                           // This part has CPUs of more than one type.
-	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
+	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
-	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
+	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
-	IBS                                 // Instruction Based Sampling (AMD)
+	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
-	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
+	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
+	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
-	IBSFFV                              // Instruction Based Sampling Feature (AMD)
+	IBRS                                 // AMD: Indirect Branch Restricted Speculation
-	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
+	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
-	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
+	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
-	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
+	IBS                                  // Instruction Based Sampling (AMD)
-	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
+	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
-	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
-	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
+	IBSFFV                               // Instruction Based Sampling Feature (AMD)
-	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
+	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
-	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
+	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
-	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
+	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
-	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
+	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
-	IDPRED_CTRL                         // IPRED_DIS
+	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
-	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
+	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
-	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
+	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
-	LAHF                                // LAHF/SAHF in long mode
+	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
-	LAM                                 // If set, CPU supports Linear Address Masking
+	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
-	LBRVIRT                             // LBR virtualization
+	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
-	LZCNT                               // LZCNT instruction
+	IDPRED_CTRL                          // IPRED_DIS
-	MCAOVERFLOW                         // MCA overflow recovery support.
+	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
-	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
+	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
-	MCOMMIT                             // MCOMMIT instruction supported
+	KEYLOCKER                            // Key locker
-	MD_CLEAR                            // VERW clears CPU buffers
+	KEYLOCKERW                           // Key locker wide
-	MMX                                 // standard MMX
+	LAHF                                 // LAHF/SAHF in long mode
-	MMXEXT                              // SSE integer functions or AMD MMX ext
+	LAM                                  // If set, CPU supports Linear Address Masking
-	MOVBE                               // MOVBE instruction (big-endian)
+	LBRVIRT                              // LBR virtualization
-	MOVDIR64B                           // Move 64 Bytes as Direct Store
+	LZCNT                                // LZCNT instruction
-	MOVDIRI                             // Move Doubleword as Direct Store
+	MCAOVERFLOW                          // MCA overflow recovery support.
-	MOVSB_ZL                            // Fast Zero-Length MOVSB
+	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
-	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
+	MCOMMIT                              // MCOMMIT instruction supported
-	MPX                                 // Intel MPX (Memory Protection Extensions)
+	MD_CLEAR                             // VERW clears CPU buffers
-	MSRIRC                              // Instruction Retired Counter MSR available
+	MMX                                  // standard MMX
-	MSRLIST                             // Read/Write List of Model Specific Registers
+	MMXEXT                               // SSE integer functions or AMD MMX ext
-	MSR_PAGEFLUSH                       // Page Flush MSR available
+	MOVBE                                // MOVBE instruction (big-endian)
-	NRIPS                               // Indicates support for NRIP save on VMEXIT
+	MOVDIR64B                            // Move 64 Bytes as Direct Store
-	NX                                  // NX (No-Execute) bit
+	MOVDIRI                              // Move Doubleword as Direct Store
-	OSXSAVE                             // XSAVE enabled by OS
+	MOVSB_ZL                             // Fast Zero-Length MOVSB
-	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
+	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
-	POPCNT                              // POPCNT instruction
+	MPX                                  // Intel MPX (Memory Protection Extensions)
-	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	MSRIRC                               // Instruction Retired Counter MSR available
-	PREFETCHI                           // PREFETCHIT0/1 instructions
+	MSRLIST                              // Read/Write List of Model Specific Registers
-	PSFD                                // Predictive Store Forward Disable
+	MSR_PAGEFLUSH                        // Page Flush MSR available
-	RDPRU                               // RDPRU instruction supported
+	NRIPS                                // Indicates support for NRIP save on VMEXIT
-	RDRAND                              // RDRAND instruction is available
+	NX                                   // NX (No-Execute) bit
-	RDSEED                              // RDSEED instruction is available
+	OSXSAVE                              // XSAVE enabled by OS
-	RDTSCP                              // RDTSCP Instruction
+	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
-	RRSBA_CTRL                          // Restricted RSB Alternate
+	POPCNT                               // POPCNT instruction
-	RTM                                 // Restricted Transactional Memory
+	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
-	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
+	PREFETCHI                            // PREFETCHIT0/1 instructions
-	SERIALIZE                           // Serialize Instruction Execution
+	PSFD                                 // Predictive Store Forward Disable
-	SEV                                 // AMD Secure Encrypted Virtualization supported
+	RDPRU                                // RDPRU instruction supported
-	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
+	RDRAND                               // RDRAND instruction is available
-	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
+	RDSEED                               // RDSEED instruction is available
-	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
+	RDTSCP                               // RDTSCP Instruction
-	SEV_ES                              // AMD SEV Encrypted State supported
+	RRSBA_CTRL                           // Restricted RSB Alternate
-	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
+	RTM                                  // Restricted Transactional Memory
-	SEV_SNP                             // AMD SEV Secure Nested Paging supported
+	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
-	SGX                                 // Software Guard Extensions
+	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
-	SGXLC                               // Software Guard Extensions Launch Control
+	SERIALIZE                            // Serialize Instruction Execution
-	SHA                                 // Intel SHA Extensions
+	SEV                                  // AMD Secure Encrypted Virtualization supported
-	SME                                 // AMD Secure Memory Encryption supported
+	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
-	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
+	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
-	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
+	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
-	SRBDS_CTRL                          // SRBDS mitigation MSR available
+	SEV_ES                               // AMD SEV Encrypted State supported
-	SSE                                 // SSE functions
+	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
-	SSE2                                // P4 SSE functions
+	SEV_SNP                              // AMD SEV Secure Nested Paging supported
-	SSE3                                // Prescott SSE3 functions
+	SGX                                  // Software Guard Extensions
-	SSE4                                // Penryn SSE4.1 functions
+	SGXLC                                // Software Guard Extensions Launch Control
-	SSE42                               // Nehalem SSE4.2 functions
+	SHA                                  // Intel SHA Extensions
-	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
+	SME                                  // AMD Secure Memory Encryption supported
-	SSSE3                               // Conroe SSSE3 functions
+	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
-	STIBP                               // Single Thread Indirect Branch Predictors
+	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
-	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+	SRBDS_CTRL                           // SRBDS mitigation MSR available
-	STOSB_SHORT                         // Fast short STOSB
+	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
-	SUCCOR                              // Software uncorrectable error containment and recovery capability.
+	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
-	SVM                                 // AMD Secure Virtual Machine
+	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
-	SVMDA                               // Indicates support for the SVM decode assists.
+	SSE                                  // SSE functions
-	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SSE2                                 // P4 SSE functions
-	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
+	SSE3                                 // Prescott SSE3 functions
-	SVMNP                               // AMD SVM nested paging
+	SSE4                                 // Penryn SSE4.1 functions
-	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SSE42                                // Nehalem SSE4.2 functions
-	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
-	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SSSE3                                // Conroe SSSE3 functions
-	SYSEE                               // SYSENTER and SYSEXIT instructions
+	STIBP                                // Single Thread Indirect Branch Predictors
-	TBM                                 // AMD Trailing Bit Manipulation
+	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
-	TDX_GUEST                           // Intel Trust Domain Extensions Guest
+	STOSB_SHORT                          // Fast short STOSB
-	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
+	SUCCOR                               // Software uncorrectable error containment and recovery capability.
-	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	SVM                                  // AMD Secure Virtual Machine
-	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	SVMDA                                // Indicates support for the SVM decode assists.
-	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
-	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
+	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
-	VAES                                // Vector AES. AVX(512) versions requires additional checks.
+	SVMNP                                // AMD SVM nested paging
-	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
+	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
-	VMPL                                // AMD VM Permission Levels supported
+	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
-	VMSA_REGPROT                        // AMD VMSA Register Protection supported
+	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
-	VMX                                 // Virtual Machine Extensions
+	SYSEE                                // SYSENTER and SYSEXIT instructions
-	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	TBM                                  // AMD Trailing Bit Manipulation
-	VTE                                 // AMD Virtual Transparent Encryption supported
+	TDX_GUEST                            // Intel Trust Domain Extensions Guest
-	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
+	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
-	WBNOINVD                            // Write Back and Do Not Invalidate Cache
+	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
-	WRMSRNS                             // Non-Serializing Write to Model Specific Register
+	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
-	X87                                 // FPU
+	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
-	XGETBV1                             // Supports XGETBV with ECX = 1
+	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
-	XOP                                 // Bulldozer XOP functions
+	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
-	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
+	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
-	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
+	VMPL                                 // AMD VM Permission Levels supported
-	XSAVEOPT                            // XSAVEOPT available
+	VMSA_REGPROT                         // AMD VMSA Register Protection supported
-	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
+	VMX                                  // Virtual Machine Extensions
 	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
 	VTE                                  // AMD Virtual Transparent Encryption supported
 	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
 	WBNOINVD                             // Write Back and Do Not Invalidate Cache
 	WRMSRNS                              // Non-Serializing Write to Model Specific Register
 	X87                                  // FPU
 	XGETBV1                              // Supports XGETBV with ECX = 1
 	XOP                                  // Bulldozer XOP functions
 	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
 	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
 	XSAVEOPT                             // XSAVEOPT available
 	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS
 	// ARM features:
 	AESARM   // AES instructions
@ -302,9 +314,11 @@ type CPUInfo struct {
 		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
 		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
 	}
-	SGX       SGXSupport
+	SGX              SGXSupport
-	maxFunc   uint32
+	AMDMemEncryption AMDMemEncryptionSupport
-	maxExFunc uint32
+	AVX10Level       uint8
 	maxFunc          uint32
 	maxExFunc        uint32
 }
 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
@ -1071,6 +1085,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
 	return
 }
 type AMDMemEncryptionSupport struct {
 	Available          bool
 	CBitPossition      uint32
 	NumVMPL            uint32
 	PhysAddrReduction  uint32
 	NumEntryptedGuests uint32
 	MinSevNoEsAsid     uint32
 }
 func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
 	rval.Available = available
 	if !available {
 		return
 	}
 	_, b, c, d := cpuidex(0x8000001f, 0)
 	rval.CBitPossition = b & 0x3f
 	rval.PhysAddrReduction = (b >> 6) & 0x3F
 	rval.NumVMPL = (b >> 12) & 0xf
 	rval.NumEntryptedGuests = c
 	rval.MinSevNoEsAsid = d
 	return
 }
 func support() flagSet {
 	var fs flagSet
 	mfi := maxFunctionID()
@ -1165,6 +1205,7 @@ func support() flagSet {
 		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
 		fs.setIf(ecx&(1<<13) != 0, TME)
 		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
 		fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
 		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
 		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
 		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
@ -1202,6 +1243,8 @@ func support() flagSet {
 		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
 		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
 		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
 		fs.setIf(edx1&(1<<19) != 0, AVX10)
 		fs.setIf(edx1&(1<<21) != 0, APX_F)
 		// Only detect AVX-512 features if XGETBV is supported
 		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
@ -1252,6 +1295,19 @@ func support() flagSet {
 		fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
 		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
 		// Add keylocker features.
 		if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
 			_, ebx, _, _ := cpuidex(0x19, 0)
 			fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
 		}
 		// Add AVX10 features.
 		if fs.inSet(AVX10) && mfi >= 0x24 {
 			_, ebx, _, _ := cpuidex(0x24, 0)
 			fs.setIf(ebx&(1<<16) != 0, AVX10_128)
 			fs.setIf(ebx&(1<<17) != 0, AVX10_256)
 			fs.setIf(ebx&(1<<18) != 0, AVX10_512)
 		}
 	}
 	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
@ -1394,6 +1450,29 @@ func support() flagSet {
 		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
 	}
 	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
 		a, _, _, _ := cpuid(0x80000021)
 		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
 		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
 		fs.setIf((a>>29)&1 == 1, SRSO_NO)
 		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
 		fs.setIf((a>>27)&1 == 1, SBPB)
 	}
 	if mfi >= 0x20 {
 		// Microsoft has decided to purposefully hide the information
 		// of the guest TEE when VMs are being created using Hyper-V.
 		//
 		// This leads us to check for the Hyper-V cpuid features
 		// (0x4000000C), and then for the `ebx` value set.
 		//
 		// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
 		// we're mostly interested about,according to:
 		// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
 		_, ebx, _, _ := cpuid(0x4000000C)
 		fs.setIf(ebx == 0xbe3, TDX_GUEST)
 	}
 	if mfi >= 0x21 {
 		// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
 		_, ebx, ecx, edx := cpuid(0x21)
@ -1404,6 +1483,14 @@ func support() flagSet {
 	return fs
 }
 func (c *CPUInfo) supportAVX10() uint8 {
 	if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
 		_, ebx, _, _ := cpuidex(0x24, 0)
 		return uint8(ebx)
 	}
 	return 0
 }
 func valAsString(values ...uint32) []byte {
 	r := make([]byte, 4*len(values))
 	for i, v := range values {
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) {
 	c.Family, c.Model, c.Stepping = familyModel()
 	c.featureSet = support()
 	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
 	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
 	c.AVX10Level = c.supportAVX10()
 	c.cacheSize()
 	c.frequencies()
 }
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@ -16,210 +16,222 @@ func _() {
 	_ = x[AMXFP16-6]
 	_ = x[AMXINT8-7]
 	_ = x[AMXTILE-8]
-	_ = x[AVX-9]
+	_ = x[APX_F-9]
-	_ = x[AVX2-10]
+	_ = x[AVX-10]
-	_ = x[AVX512BF16-11]
+	_ = x[AVX10-11]
-	_ = x[AVX512BITALG-12]
+	_ = x[AVX10_128-12]
-	_ = x[AVX512BW-13]
+	_ = x[AVX10_256-13]
-	_ = x[AVX512CD-14]
+	_ = x[AVX10_512-14]
-	_ = x[AVX512DQ-15]
+	_ = x[AVX2-15]
-	_ = x[AVX512ER-16]
+	_ = x[AVX512BF16-16]
-	_ = x[AVX512F-17]
+	_ = x[AVX512BITALG-17]
-	_ = x[AVX512FP16-18]
+	_ = x[AVX512BW-18]
-	_ = x[AVX512IFMA-19]
+	_ = x[AVX512CD-19]
-	_ = x[AVX512PF-20]
+	_ = x[AVX512DQ-20]
-	_ = x[AVX512VBMI-21]
+	_ = x[AVX512ER-21]
-	_ = x[AVX512VBMI2-22]
+	_ = x[AVX512F-22]
-	_ = x[AVX512VL-23]
+	_ = x[AVX512FP16-23]
-	_ = x[AVX512VNNI-24]
+	_ = x[AVX512IFMA-24]
-	_ = x[AVX512VP2INTERSECT-25]
+	_ = x[AVX512PF-25]
-	_ = x[AVX512VPOPCNTDQ-26]
+	_ = x[AVX512VBMI-26]
-	_ = x[AVXIFMA-27]
+	_ = x[AVX512VBMI2-27]
-	_ = x[AVXNECONVERT-28]
+	_ = x[AVX512VL-28]
-	_ = x[AVXSLOW-29]
+	_ = x[AVX512VNNI-29]
-	_ = x[AVXVNNI-30]
+	_ = x[AVX512VP2INTERSECT-30]
-	_ = x[AVXVNNIINT8-31]
+	_ = x[AVX512VPOPCNTDQ-31]
-	_ = x[BHI_CTRL-32]
+	_ = x[AVXIFMA-32]
-	_ = x[BMI1-33]
+	_ = x[AVXNECONVERT-33]
-	_ = x[BMI2-34]
+	_ = x[AVXSLOW-34]
-	_ = x[CETIBT-35]
+	_ = x[AVXVNNI-35]
-	_ = x[CETSS-36]
+	_ = x[AVXVNNIINT8-36]
-	_ = x[CLDEMOTE-37]
+	_ = x[BHI_CTRL-37]
-	_ = x[CLMUL-38]
+	_ = x[BMI1-38]
-	_ = x[CLZERO-39]
+	_ = x[BMI2-39]
-	_ = x[CMOV-40]
+	_ = x[CETIBT-40]
-	_ = x[CMPCCXADD-41]
+	_ = x[CETSS-41]
-	_ = x[CMPSB_SCADBS_SHORT-42]
+	_ = x[CLDEMOTE-42]
-	_ = x[CMPXCHG8-43]
+	_ = x[CLMUL-43]
-	_ = x[CPBOOST-44]
+	_ = x[CLZERO-44]
-	_ = x[CPPC-45]
+	_ = x[CMOV-45]
-	_ = x[CX16-46]
+	_ = x[CMPCCXADD-46]
-	_ = x[EFER_LMSLE_UNS-47]
+	_ = x[CMPSB_SCADBS_SHORT-47]
-	_ = x[ENQCMD-48]
+	_ = x[CMPXCHG8-48]
-	_ = x[ERMS-49]
+	_ = x[CPBOOST-49]
-	_ = x[F16C-50]
+	_ = x[CPPC-50]
-	_ = x[FLUSH_L1D-51]
+	_ = x[CX16-51]
-	_ = x[FMA3-52]
+	_ = x[EFER_LMSLE_UNS-52]
-	_ = x[FMA4-53]
+	_ = x[ENQCMD-53]
-	_ = x[FP128-54]
+	_ = x[ERMS-54]
-	_ = x[FP256-55]
+	_ = x[F16C-55]
-	_ = x[FSRM-56]
+	_ = x[FLUSH_L1D-56]
-	_ = x[FXSR-57]
+	_ = x[FMA3-57]
-	_ = x[FXSROPT-58]
+	_ = x[FMA4-58]
-	_ = x[GFNI-59]
+	_ = x[FP128-59]
-	_ = x[HLE-60]
+	_ = x[FP256-60]
-	_ = x[HRESET-61]
+	_ = x[FSRM-61]
-	_ = x[HTT-62]
+	_ = x[FXSR-62]
-	_ = x[HWA-63]
+	_ = x[FXSROPT-63]
-	_ = x[HYBRID_CPU-64]
+	_ = x[GFNI-64]
-	_ = x[HYPERVISOR-65]
+	_ = x[HLE-65]
-	_ = x[IA32_ARCH_CAP-66]
+	_ = x[HRESET-66]
-	_ = x[IA32_CORE_CAP-67]
+	_ = x[HTT-67]
-	_ = x[IBPB-68]
+	_ = x[HWA-68]
-	_ = x[IBRS-69]
+	_ = x[HYBRID_CPU-69]
-	_ = x[IBRS_PREFERRED-70]
+	_ = x[HYPERVISOR-70]
-	_ = x[IBRS_PROVIDES_SMP-71]
+	_ = x[IA32_ARCH_CAP-71]
-	_ = x[IBS-72]
+	_ = x[IA32_CORE_CAP-72]
-	_ = x[IBSBRNTRGT-73]
+	_ = x[IBPB-73]
-	_ = x[IBSFETCHSAM-74]
+	_ = x[IBPB_BRTYPE-74]
-	_ = x[IBSFFV-75]
+	_ = x[IBRS-75]
-	_ = x[IBSOPCNT-76]
+	_ = x[IBRS_PREFERRED-76]
-	_ = x[IBSOPCNTEXT-77]
+	_ = x[IBRS_PROVIDES_SMP-77]
-	_ = x[IBSOPSAM-78]
+	_ = x[IBS-78]
-	_ = x[IBSRDWROPCNT-79]
+	_ = x[IBSBRNTRGT-79]
-	_ = x[IBSRIPINVALIDCHK-80]
+	_ = x[IBSFETCHSAM-80]
-	_ = x[IBS_FETCH_CTLX-81]
+	_ = x[IBSFFV-81]
-	_ = x[IBS_OPDATA4-82]
+	_ = x[IBSOPCNT-82]
-	_ = x[IBS_OPFUSE-83]
+	_ = x[IBSOPCNTEXT-83]
-	_ = x[IBS_PREVENTHOST-84]
+	_ = x[IBSOPSAM-84]
-	_ = x[IBS_ZEN4-85]
+	_ = x[IBSRDWROPCNT-85]
-	_ = x[IDPRED_CTRL-86]
+	_ = x[IBSRIPINVALIDCHK-86]
-	_ = x[INT_WBINVD-87]
+	_ = x[IBS_FETCH_CTLX-87]
-	_ = x[INVLPGB-88]
+	_ = x[IBS_OPDATA4-88]
-	_ = x[LAHF-89]
+	_ = x[IBS_OPFUSE-89]
-	_ = x[LAM-90]
+	_ = x[IBS_PREVENTHOST-90]
-	_ = x[LBRVIRT-91]
+	_ = x[IBS_ZEN4-91]
-	_ = x[LZCNT-92]
+	_ = x[IDPRED_CTRL-92]
-	_ = x[MCAOVERFLOW-93]
+	_ = x[INT_WBINVD-93]
-	_ = x[MCDT_NO-94]
+	_ = x[INVLPGB-94]
-	_ = x[MCOMMIT-95]
+	_ = x[KEYLOCKER-95]
-	_ = x[MD_CLEAR-96]
+	_ = x[KEYLOCKERW-96]
-	_ = x[MMX-97]
+	_ = x[LAHF-97]
-	_ = x[MMXEXT-98]
+	_ = x[LAM-98]
-	_ = x[MOVBE-99]
+	_ = x[LBRVIRT-99]
-	_ = x[MOVDIR64B-100]
+	_ = x[LZCNT-100]
-	_ = x[MOVDIRI-101]
+	_ = x[MCAOVERFLOW-101]
-	_ = x[MOVSB_ZL-102]
+	_ = x[MCDT_NO-102]
-	_ = x[MOVU-103]
+	_ = x[MCOMMIT-103]
-	_ = x[MPX-104]
+	_ = x[MD_CLEAR-104]
-	_ = x[MSRIRC-105]
+	_ = x[MMX-105]
-	_ = x[MSRLIST-106]
+	_ = x[MMXEXT-106]
-	_ = x[MSR_PAGEFLUSH-107]
+	_ = x[MOVBE-107]
-	_ = x[NRIPS-108]
+	_ = x[MOVDIR64B-108]
-	_ = x[NX-109]
+	_ = x[MOVDIRI-109]
-	_ = x[OSXSAVE-110]
+	_ = x[MOVSB_ZL-110]
-	_ = x[PCONFIG-111]
+	_ = x[MOVU-111]
-	_ = x[POPCNT-112]
+	_ = x[MPX-112]
-	_ = x[PPIN-113]
+	_ = x[MSRIRC-113]
-	_ = x[PREFETCHI-114]
+	_ = x[MSRLIST-114]
-	_ = x[PSFD-115]
+	_ = x[MSR_PAGEFLUSH-115]
-	_ = x[RDPRU-116]
+	_ = x[NRIPS-116]
-	_ = x[RDRAND-117]
+	_ = x[NX-117]
-	_ = x[RDSEED-118]
+	_ = x[OSXSAVE-118]
-	_ = x[RDTSCP-119]
+	_ = x[PCONFIG-119]
-	_ = x[RRSBA_CTRL-120]
+	_ = x[POPCNT-120]
-	_ = x[RTM-121]
+	_ = x[PPIN-121]
-	_ = x[RTM_ALWAYS_ABORT-122]
+	_ = x[PREFETCHI-122]
-	_ = x[SERIALIZE-123]
+	_ = x[PSFD-123]
-	_ = x[SEV-124]
+	_ = x[RDPRU-124]
-	_ = x[SEV_64BIT-125]
+	_ = x[RDRAND-125]
-	_ = x[SEV_ALTERNATIVE-126]
+	_ = x[RDSEED-126]
-	_ = x[SEV_DEBUGSWAP-127]
+	_ = x[RDTSCP-127]
-	_ = x[SEV_ES-128]
+	_ = x[RRSBA_CTRL-128]
-	_ = x[SEV_RESTRICTED-129]
+	_ = x[RTM-129]
-	_ = x[SEV_SNP-130]
+	_ = x[RTM_ALWAYS_ABORT-130]
-	_ = x[SGX-131]
+	_ = x[SBPB-131]
-	_ = x[SGXLC-132]
+	_ = x[SERIALIZE-132]
-	_ = x[SHA-133]
+	_ = x[SEV-133]
-	_ = x[SME-134]
+	_ = x[SEV_64BIT-134]
-	_ = x[SME_COHERENT-135]
+	_ = x[SEV_ALTERNATIVE-135]
-	_ = x[SPEC_CTRL_SSBD-136]
+	_ = x[SEV_DEBUGSWAP-136]
-	_ = x[SRBDS_CTRL-137]
+	_ = x[SEV_ES-137]
-	_ = x[SSE-138]
+	_ = x[SEV_RESTRICTED-138]
-	_ = x[SSE2-139]
+	_ = x[SEV_SNP-139]
-	_ = x[SSE3-140]
+	_ = x[SGX-140]
-	_ = x[SSE4-141]
+	_ = x[SGXLC-141]
-	_ = x[SSE42-142]
+	_ = x[SHA-142]
-	_ = x[SSE4A-143]
+	_ = x[SME-143]
-	_ = x[SSSE3-144]
+	_ = x[SME_COHERENT-144]
-	_ = x[STIBP-145]
+	_ = x[SPEC_CTRL_SSBD-145]
-	_ = x[STIBP_ALWAYSON-146]
+	_ = x[SRBDS_CTRL-146]
-	_ = x[STOSB_SHORT-147]
+	_ = x[SRSO_MSR_FIX-147]
-	_ = x[SUCCOR-148]
+	_ = x[SRSO_NO-148]
-	_ = x[SVM-149]
+	_ = x[SRSO_USER_KERNEL_NO-149]
-	_ = x[SVMDA-150]
+	_ = x[SSE-150]
-	_ = x[SVMFBASID-151]
+	_ = x[SSE2-151]
-	_ = x[SVML-152]
+	_ = x[SSE3-152]
-	_ = x[SVMNP-153]
+	_ = x[SSE4-153]
-	_ = x[SVMPF-154]
+	_ = x[SSE42-154]
-	_ = x[SVMPFT-155]
+	_ = x[SSE4A-155]
-	_ = x[SYSCALL-156]
+	_ = x[SSSE3-156]
-	_ = x[SYSEE-157]
+	_ = x[STIBP-157]
-	_ = x[TBM-158]
+	_ = x[STIBP_ALWAYSON-158]
-	_ = x[TDX_GUEST-159]
+	_ = x[STOSB_SHORT-159]
-	_ = x[TLB_FLUSH_NESTED-160]
+	_ = x[SUCCOR-160]
-	_ = x[TME-161]
+	_ = x[SVM-161]
-	_ = x[TOPEXT-162]
+	_ = x[SVMDA-162]
-	_ = x[TSCRATEMSR-163]
+	_ = x[SVMFBASID-163]
-	_ = x[TSXLDTRK-164]
+	_ = x[SVML-164]
-	_ = x[VAES-165]
+	_ = x[SVMNP-165]
-	_ = x[VMCBCLEAN-166]
+	_ = x[SVMPF-166]
-	_ = x[VMPL-167]
+	_ = x[SVMPFT-167]
-	_ = x[VMSA_REGPROT-168]
+	_ = x[SYSCALL-168]
-	_ = x[VMX-169]
+	_ = x[SYSEE-169]
-	_ = x[VPCLMULQDQ-170]
+	_ = x[TBM-170]
-	_ = x[VTE-171]
+	_ = x[TDX_GUEST-171]
-	_ = x[WAITPKG-172]
+	_ = x[TLB_FLUSH_NESTED-172]
-	_ = x[WBNOINVD-173]
+	_ = x[TME-173]
-	_ = x[WRMSRNS-174]
+	_ = x[TOPEXT-174]
-	_ = x[X87-175]
+	_ = x[TSCRATEMSR-175]
-	_ = x[XGETBV1-176]
+	_ = x[TSXLDTRK-176]
-	_ = x[XOP-177]
+	_ = x[VAES-177]
-	_ = x[XSAVE-178]
+	_ = x[VMCBCLEAN-178]
-	_ = x[XSAVEC-179]
+	_ = x[VMPL-179]
-	_ = x[XSAVEOPT-180]
+	_ = x[VMSA_REGPROT-180]
-	_ = x[XSAVES-181]
+	_ = x[VMX-181]
-	_ = x[AESARM-182]
+	_ = x[VPCLMULQDQ-182]
-	_ = x[ARMCPUID-183]
+	_ = x[VTE-183]
-	_ = x[ASIMD-184]
+	_ = x[WAITPKG-184]
-	_ = x[ASIMDDP-185]
+	_ = x[WBNOINVD-185]
-	_ = x[ASIMDHP-186]
+	_ = x[WRMSRNS-186]
-	_ = x[ASIMDRDM-187]
+	_ = x[X87-187]
-	_ = x[ATOMICS-188]
+	_ = x[XGETBV1-188]
-	_ = x[CRC32-189]
+	_ = x[XOP-189]
-	_ = x[DCPOP-190]
+	_ = x[XSAVE-190]
-	_ = x[EVTSTRM-191]
+	_ = x[XSAVEC-191]
-	_ = x[FCMA-192]
+	_ = x[XSAVEOPT-192]
-	_ = x[FP-193]
+	_ = x[XSAVES-193]
-	_ = x[FPHP-194]
+	_ = x[AESARM-194]
-	_ = x[GPA-195]
+	_ = x[ARMCPUID-195]
-	_ = x[JSCVT-196]
+	_ = x[ASIMD-196]
-	_ = x[LRCPC-197]
+	_ = x[ASIMDDP-197]
-	_ = x[PMULL-198]
+	_ = x[ASIMDHP-198]
-	_ = x[SHA1-199]
+	_ = x[ASIMDRDM-199]
-	_ = x[SHA2-200]
+	_ = x[ATOMICS-200]
-	_ = x[SHA3-201]
+	_ = x[CRC32-201]
-	_ = x[SHA512-202]
+	_ = x[DCPOP-202]
-	_ = x[SM3-203]
+	_ = x[EVTSTRM-203]
-	_ = x[SM4-204]
+	_ = x[FCMA-204]
-	_ = x[SVE-205]
+	_ = x[FP-205]
-	_ = x[lastID-206]
+	_ = x[FPHP-206]
 	_ = x[GPA-207]
 	_ = x[JSCVT-208]
 	_ = x[LRCPC-209]
 	_ = x[PMULL-210]
 	_ = x[SHA1-211]
 	_ = x[SHA2-212]
 	_ = x[SHA3-213]
 	_ = x[SHA512-214]
 	_ = x[SM3-215]
 	_ = x[SM4-216]
 	_ = x[SVE-217]
 	_ = x[lastID-218]
 	_ = x[firstID-0]
 }
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
 func (i FeatureID) String() string {
 	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
--- a/vendor/github.com/klauspost/reedsolomon/.gitignore
+++ b/vendor/github.com/klauspost/reedsolomon/.gitignore
@ -0,0 +1,26 @@
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o
 *.a
 *.so
 # Folders
 _obj
 _test
 # Architecture specific extensions/prefixes
 *.[568vq]
 [568vq].out
 *.cgo1.go
 *.cgo2.c
 _cgo_defun.c
 _cgo_gotypes.go
 _cgo_export.*
 _testmain.go
 *.exe
 *.test
 *.prof
 .idea
--- a/vendor/github.com/klauspost/reedsolomon/LICENSE
+++ b/vendor/github.com/klauspost/reedsolomon/LICENSE
@ -0,0 +1,23 @@
 The MIT License (MIT)
 Copyright (c) 2015 Klaus Post
 Copyright (c) 2015 Backblaze
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@ -0,0 +1,566 @@
 # Reed-Solomon
 [![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
 Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
 This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by 
 [Backblaze](http://backblaze.com), with some additional optimizations.
 For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
 For encoding high shard counts (>256) a Leopard implementation is used.
 For most platforms this performs close to the original Leopard implementation in terms of speed. 
 Package home: https://github.com/klauspost/reedsolomon
 Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
 # Installation
 To get the package use the standard:
 ```bash
 go get -u github.com/klauspost/reedsolomon
 ```
 Using Go modules is recommended.
 # Changes
 ## 2022
 * [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing.
 * [Leopard GF8](https://github.com/klauspost/reedsolomon#leopard-gf8) mode added, for faster processing of medium shard counts.
 * [Leopard GF16](https://github.com/klauspost/reedsolomon#leopard-compatible-gf16) mode added, for up to 65536 shards. 
 * [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
 ## 2021
 * Use `GOAMD64=v4` to enable faster AVX2.
 * Add progressive shard encoding.
 * Wider AVX2 loops
 * Limit concurrency on AVX2, since we are likely memory bound.
 * Allow 0 parity shards.
 * Allow disabling inversion cache.
 * Faster AVX2 encoding.
 <details>
 	<summary>See older changes</summary>
 ## May 2020
 * ARM64 optimizations, up to 2.5x faster.
 * Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
 * Much better performance when using a limited number of goroutines.
 * AVX512 is now using multiple cores.
 * Stream processing overhaul, big speedups in most cases.
 * AVX512 optimizations
 ## March 6, 2019
 The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
 ## February 8, 2019
 AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
 See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
 ## December 18, 2018
 Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
 ## November 18, 2017
 Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt 
 to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
 ## October 1, 2017
 * [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. 
 Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
 * Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) 
 has been increased for better multi-core scaling.
 * After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to 
 be used instead of allocating new memory.
 ## August 26, 2017
 *  The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` 
 function contributed by [chenzhongtao](https://github.com/chenzhongtao).
 * [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, 
 which gives a huge performance boost on this platform.
 ## July 20, 2017
 `ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. 
 This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
 ```Go
 func (e *YourEnc) ReconstructData(shards [][]byte) error {
 	return ReconstructData(shards)
 }
 ```
 You can of course also do your own implementation. 
 The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) 
 handles this without modifying the interface. 
 This is a good lesson on why returning interfaces is not a good design.
 </details>
 # Usage
 This section assumes you know the basics of Reed-Solomon encoding. 
 A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
 This package performs the calculation of the parity sets. The usage is therefore relatively simple.
 First of all, you need to choose your distribution of data and parity shards. 
 A 'good' distribution is very subjective, and will depend a lot on your usage scenario. 
 To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
 ```Go
    enc, err := reedsolomon.New(10, 3)
 ```
 This encoder will work for all parity sets with this distribution of data and parity shards. 
 If you will primarily be using it with one shard size it is recommended to use 
 [`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
 as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
 It is not required that all shards are this size. 
 Then you send and receive data that is a simple slice of byte slices; `[][]byte`. 
 In the example above, the top slice must have a length of 13.
 ```Go
    data := make([][]byte, 13)
 ```
 You should then fill the 10 first slices with *equally sized* data, 
 and create parity shards that will be populated with parity data. In this case we create the data in memory, 
 but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
 ```Go
    // Create all shards, size them at 50000 each
    for i := range input {
      data[i] := make([]byte, 50000)
    }
    // The above allocations can also be done by the encoder:
    // data := enc.(reedsolomon.Extended).AllocAligned(50000)
    // Fill some data into the data shards
    for i, in := range data[:10] {
      for j:= range in {
         in[j] = byte((i+j)&0xff)
      }
    }
 ```
 To populate the parity shards, you simply call `Encode()` with your data.
 ```Go
    err = enc.Encode(data)
 ```
 The only cases where you should get an error is, if the data shards aren't of equal size. 
 The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
 ```Go
    ok, err = enc.Verify(data)
 ```
 The final (and important) part is to be able to reconstruct missing shards. 
 For this to work, you need to know which parts of your data is missing. 
 The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, 
 you need to implement a hash check for each shard. 
 If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
 To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
 ```Go
    // Delete two data shards
    data[3] = nil
    data[7] = nil
    // Reconstruct the missing shards
    err := enc.Reconstruct(data)
 ```
 The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
 If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
 ```Go
    // Delete two data shards
    data[3] = nil
    data[7] = nil
    // Reconstruct just the missing data shards
    err := enc.ReconstructData(data)
 ```
 If you don't need all data shards you can use `ReconstructSome()`:
 ```Go
    // Delete two data shards
    data[3] = nil
    data[7] = nil
    // Reconstruct just the shard 3
    err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
 ```
 So to sum up reconstruction:
 * The number of data/parity shards must match the numbers used for encoding.
 * The order of shards must be the same as used when encoding.
 * You may only supply data you know is valid.
 * Invalid shards should be set to nil.
 For complete examples of an encoder and decoder see the 
 [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
 # Splitting/Joining Data
 You might have a large slice of data. 
 To help you split this, there are some helper functions that can split and join a single byte slice.
 ```Go
   bigfile, _ := ioutil.Readfile("myfile.data")
   // Split the file
   split, err := enc.Split(bigfile)
 ```
 This will split the file into the number of data shards set when creating the encoder and create empty parity shards. 
 An important thing to note is that you have to *keep track of the exact input size*. 
 If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
 To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: 
 ```Go
   // Join a data set and write it to io.Discard.
   err = enc.Join(io.Discard, data, len(bigfile))
 ```
 ## Aligned Allocations
 For AMD64 aligned inputs can make a big speed difference.
 This is an example of the speed difference when inputs are unaligned/aligned:
 ```
 BenchmarkEncode100x20x10000-32    	    7058	    172648 ns/op	6950.57 MB/s
 BenchmarkEncode100x20x10000-32    	    8406	    137911 ns/op	8701.24 MB/s
 ```
 This is mostly the case when dealing with odd-sized shards. 
 To facilitate this the package provides an `AllocAligned(shards, each int) [][]byte`. 
 This will allocate a number of shards, each with the size `each`.
 Each shard will then be aligned to a 64 byte boundary.
 Each encoder also has a `AllocAligned(each int) [][]byte` as an extended interface which will return the same, 
 but with the shard count configured in the encoder.   
 It is not possible to re-aligned already allocated slices, for example when using `Split`.
 When it is not possible to write to aligned shards, you should not copy to them.
 # Progressive encoding
 It is possible to encode individual shards using EncodeIdx:
 ```Go
 	// EncodeIdx will add parity for a single data shard.
 	// Parity shards should start out as 0. The caller must zero them.
 	// Data shards must be delivered exactly once. There is no check for this.
 	// The parity shards will always be updated and the data shards will remain the same.
 	EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
 ```
 This allows progressively encoding the parity by sending individual data shards.
 There is no requirement on shards being delivered in order, 
 but when sent in order it allows encoding shards one at the time,
 effectively allowing the operation to be streaming. 
 The result will be the same as encoding all shards at once.
 There is a minor speed penalty using this method, so send 
 shards at once if they are available.
 ## Example
 ```Go
 func test() {
    // Create an encoder with 7 data and 3 parity slices.
    enc, _ := reedsolomon.New(7, 3)
    // This will be our output parity.
    parity := make([][]byte, 3)
    for i := range parity {
        parity[i] = make([]byte, 10000)
    }
    for i := 0; i < 7; i++ {
        // Send data shards one at the time.
        _ = enc.EncodeIdx(make([]byte, 10000), i, parity)
    }
    // parity now contains parity, as if all data was sent in one call.
 }
 ```
 # Streaming/Merging
 It might seem like a limitation that all data should be in memory, 
 but an important property is that *as long as the number of data/parity shards are the same, 
 you can merge/split data sets*, and they will remain valid as a separate set.
 ```Go
    // Split the data set of 50000 elements into two of 25000
    splitA := make([][]byte, 13)
    splitB := make([][]byte, 13)
    // Merge into a 100000 element set
    merged := make([][]byte, 13)
    for i := range data {
      splitA[i] = data[i][:25000]
      splitB[i] = data[i][25000:]
      // Concatenate it to itself
 	  merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
 	  merged[i] = append(merged[i], data[i]...)
    }
    // Each part should still verify as ok.
    ok, err := enc.Verify(splitA)
    if ok && err == nil {
        log.Println("splitA ok")
    }
    ok, err = enc.Verify(splitB)
    if ok && err == nil {
        log.Println("splitB ok")
    }
    ok, err = enc.Verify(merge)
    if ok && err == nil {
        log.Println("merge ok")
    }
 ```
 This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. 
 For the best throughput, don't use too small blocks.
 This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. 
 This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
 # Streaming API
 There has been added support for a streaming API, to help perform fully streaming operations, 
 which enables you to do the same operations, but on streams. 
 To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function 
 to create the encoding/decoding interfaces. 
 You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams) 
 to ready an interface that reads/writes concurrently from the streams.
 You can specify the size of each operation using 
 [`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
 This will set the size of each read/write operation.
 Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. 
 Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. 
 If an error occurs in relation to a stream, 
 a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) 
 or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) 
 will help you determine which stream was the offender.
 There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
 For complete examples of a streaming encoder and decoder see the 
 [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
 GF16 (more than 256 shards) is not supported by the streaming interface. 
 # Advanced Options
 You can modify internal options which affects how jobs are split between and processed by goroutines.
 To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`. 
 If no Options are supplied, default options are used.
 Example of how to supply options:
 ```Go
     enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
 ```
 # Leopard Compatible GF16
 When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
 This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
 * The original and recovery data must not exceed 65536 pieces.
 * The shard size *must*  each be a multiple of 64 bytes.
 * Each buffer should have the same number of bytes.
 * Even the last shard must be rounded up to the block size.
 |                 | Regular | Leopard |
 |-----------------|---------|---------|
 | Encode          | ✓       | ✓       |
 | EncodeIdx       | ✓       | -       |
 | Verify          | ✓       | ✓       |
 | Reconstruct     | ✓       | ✓       |
 | ReconstructData | ✓       | ✓       |
 | ReconstructSome | ✓       | ✓ (+)   |
 | Update          | ✓       | -       |
 | Split           | ✓       | ✓       |
 | Join            | ✓       | ✓       |
 * (+) Same as calling `ReconstructData`.
 The Split/Join functions will help to split an input to the proper sizes.
 Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`. 
 Reconstruction matrix calculation is more time-consuming, 
 so be sure to include that as part of any benchmark you run.  
 For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
 Leopard mode currently always runs as a single goroutine, since multiple 
 goroutines doesn't provide any worthwhile speedup.
 ## Leopard GF8
 It is possible to replace the default reed-solomon encoder with a leopard compatible one.
 This will typically be faster when dealing with more than 20-30 shards.
 Note that the limitations listed above also applies to this mode. 
 See table below for speed with different number of shards.
 To enable Leopard GF8 mode use `WithLeopardGF(true)`.
 Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
 All implementation use inversion cache when available.
 Speed is total shard size for each operation. Data shard throughput is speed/2.
 AVX2 is used.
 | Encoder      | Shards      | Encode         | Recover All  | Recover One    |
 |--------------|-------------|----------------|--------------|----------------|
 | Cauchy       | 4+4         | 23076.83 MB/s  | 5444.02 MB/s | 10834.67 MB/s  |
 | Cauchy       | 8+8         | 15206.87 MB/s  | 4223.42 MB/s | 16181.62  MB/s |
 | Cauchy       | 16+16       | 7427.47 MB/s   | 3305.84 MB/s | 22480.41  MB/s |
 | Cauchy       | 32+32       | 3785.64 MB/s   | 2300.07 MB/s | 26181.31  MB/s |
 | Cauchy       | 64+64       | 1911.93 MB/s   | 1368.51 MB/s | 27992.93 MB/s  |
 | Cauchy       | 128+128     | 963.83 MB/s    | 1327.56 MB/s | 32866.86 MB/s  |
 | Leopard GF8  | 4+4         | 17061.28 MB/s  | 3099.06 MB/s | 4096.78 MB/s   |
 | Leopard GF8  | 8+8         | 10546.67 MB/s  | 2925.92 MB/s | 3964.00 MB/s   |
 | Leopard GF8  | 16+16       | 10961.37  MB/s | 2328.40 MB/s | 3110.22 MB/s   |
 | Leopard GF8  | 32+32       | 7111.47 MB/s   | 2374.61 MB/s | 3220.75 MB/s   |
 | Leopard GF8  | 64+64       | 7468.57 MB/s   | 2055.41 MB/s | 3061.81 MB/s   |
 | Leopard GF8  | 128+128     | 5479.99 MB/s   | 1953.21 MB/s | 2815.15 MB/s   |
 | Leopard GF16 | 256+256     | 6158.66 MB/s   | 454.14 MB/s  | 506.70 MB/s    |
 | Leopard GF16 | 512+512     | 4418.58 MB/s   | 685.75 MB/s  | 801.63 MB/s    |
 | Leopard GF16 | 1024+1024   | 4778.05 MB/s   | 814.51 MB/s  | 1080.19 MB/s   |
 | Leopard GF16 | 2048+2048   | 3417.05 MB/s   | 911.64 MB/s  | 1179.48 MB/s   |
 | Leopard GF16 | 4096+4096   | 3209.41 MB/s   | 729.13 MB/s  | 1135.06 MB/s   |
 | Leopard GF16 | 8192+8192   | 2034.11 MB/s   | 604.52 MB/s  | 842.13 MB/s    |
 | Leopard GF16 | 16384+16384 | 1525.88 MB/s   | 486.74 MB/s  | 750.01 MB/s    |
 | Leopard GF16 | 32768+32768 | 1138.67 MB/s   | 482.81 MB/s  | 712.73 MB/s    |
 "Traditional" encoding is faster until somewhere between 16 and 32 shards.
 Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
 Calculating the reconstruction matrix takes a significant amount of computation. 
 With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
 Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard. 
 # Performance
 Performance depends mainly on the number of parity shards. 
 In rough terms, doubling the number of parity shards will double the encoding time.
 Here are the throughput numbers with some different selections of data and parity shards. 
 For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
 | Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
 |------|--------|---------|------------|-----------|
 | 5    | 2      | 20,772  | 66,355     | 108,755   |
 | 8    | 8      | 6,815   | 38,338     | 70,516    |
 | 10   | 4      | 9,245   | 48,237     | 93,875    |
 | 50   | 20     | 2,063   | 12,130     | 22,828    |
 The throughput numbers here is the size of the encoded data and parity shards.
 If `runtime.GOMAXPROCS()` is set to a value higher than 1, 
 the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
 Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
 ```
 benchmark                            all MB/s     data MB/s    speedup
 BenchmarkReconstruct10x2x10000-8     2011.67      10530.10     5.23x
 BenchmarkReconstruct50x5x50000-8     4585.41      14301.60     3.12x
 BenchmarkReconstruct10x2x1M-8        8081.15      28216.41     3.49x
 BenchmarkReconstruct5x2x1M-8         5780.07      28015.37     4.85x
 BenchmarkReconstruct10x4x1M-8        4352.56      14367.61     3.30x
 BenchmarkReconstruct50x20x1M-8       1364.35      4189.79      3.07x
 BenchmarkReconstruct10x4x16M-8       1484.35      5779.53      3.89x
 ```
 The package will use [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) instructions combined with AVX512 when these are available.
 This further improves speed by up to 3x over AVX2 code paths.
 ## ARM64 NEON
 By exploiting NEON instructions the performance for ARM has been accelerated. 
 Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
 ```
 BenchmarkGalois128K-64        119562     10028 ns/op        13070.78 MB/s
 BenchmarkGalois1M-64           14380     83424 ns/op        12569.22 MB/s
 BenchmarkGaloisXor128K-64      96508     12432 ns/op        10543.29 MB/s
 BenchmarkGaloisXor1M-64        10000    100322 ns/op        10452.13 MB/s
 ```
 # Performance on ppc64le
 The performance for ppc64le has been accelerated. 
 This gives roughly a 10x performance improvement on this architecture as can be seen below:
 ```
 benchmark                      old MB/s     new MB/s     speedup
 BenchmarkGalois128K-160        948.87       8878.85      9.36x
 BenchmarkGalois1M-160          968.85       9041.92      9.33x
 BenchmarkGaloisXor128K-160     862.02       7905.00      9.17x
 BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
 ```
 # Legal
 > None of section below is legal advice. Seek your own legal counsel.
 > As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
 > Users are encouraged to independently verify they comply with all legal requirements. 
 As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/)
 there has been lawsuits related to possible patents of aspects of erasure coding functionality.
 As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
 This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.
 This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.
 The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left. 
 # Links
 * [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
 * [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
 * [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
 * [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
 * [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
 * [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
 * [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
 * [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
 * [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
 # License
 This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/reedsolomon/galois.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@ -0,0 +1,583 @@
 //go:build !noasm && !appengine && !gccgo && !nopshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 package reedsolomon
 const pshufb = true
 //go:noescape
 func galMulSSSE3(low, high, in, out []byte)
 //go:noescape
 func galMulSSSE3Xor(low, high, in, out []byte)
 //go:noescape
 func galMulAVX2Xor(low, high, in, out []byte)
 //go:noescape
 func galMulAVX2(low, high, in, out []byte)
 //go:noescape
 func galMulAVX2Xor_64(low, high, in, out []byte)
 //go:noescape
 func galMulAVX2_64(low, high, in, out []byte)
 // This is what the assembler routines do in blocks of 16 bytes:
 /*
 func galMulSSSE3(low, high, in, out []byte) {
 	for n, input := range in {
 		l := input & 0xf
 		h := input >> 4
 		out[n] = low[l] ^ high[h]
 	}
 }
 func galMulSSSE3Xor(low, high, in, out []byte) {
 	for n, input := range in {
 		l := input & 0xf
 		h := input >> 4
 		out[n] ^= low[l] ^ high[h]
 	}
 }
 */
 // bigSwitchover is the size where 64 bytes are processed per loop.
 const bigSwitchover = 128
 func galMulSlice(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		copy(out, in)
 		return
 	}
 	if o.useAVX2 {
 		if len(in) >= bigSwitchover {
 			galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 			done := (len(in) >> 6) << 6
 			in = in[done:]
 			out = out[done:]
 		}
 		if len(in) > 32 {
 			galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 			done := (len(in) >> 5) << 5
 			in = in[done:]
 			out = out[done:]
 		}
 	} else if o.useSSSE3 {
 		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done := (len(in) >> 4) << 4
 		in = in[done:]
 		out = out[done:]
 	}
 	out = out[:len(in)]
 	mt := mulTable[c][:256]
 	for i := range in {
 		out[i] = mt[in[i]]
 	}
 }
 func galMulSliceXor(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		sliceXor(in, out, o)
 		return
 	}
 	if o.useAVX2 {
 		if len(in) >= bigSwitchover {
 			galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 			done := (len(in) >> 6) << 6
 			in = in[done:]
 			out = out[done:]
 		}
 		if len(in) >= 32 {
 			galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 			done := (len(in) >> 5) << 5
 			in = in[done:]
 			out = out[done:]
 		}
 	} else if o.useSSSE3 {
 		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done := (len(in) >> 4) << 4
 		in = in[done:]
 		out = out[done:]
 	}
 	if len(in) == 0 {
 		return
 	}
 	out = out[:len(in)]
 	mt := mulTable[c][:256]
 	for i := range in {
 		out[i] ^= mt[in[i]]
 	}
 }
 // simple slice xor
 func sliceXor(in, out []byte, o *options) {
 	if o.useSSE2 {
 		if len(in) >= bigSwitchover {
 			if o.useAVX2 {
 				avx2XorSlice_64(in, out)
 				done := (len(in) >> 6) << 6
 				in = in[done:]
 				out = out[done:]
 			} else {
 				sSE2XorSlice_64(in, out)
 				done := (len(in) >> 6) << 6
 				in = in[done:]
 				out = out[done:]
 			}
 		}
 		if len(in) >= 16 {
 			sSE2XorSlice(in, out)
 			done := (len(in) >> 4) << 4
 			in = in[done:]
 			out = out[done:]
 		}
 	} else {
 		sliceXorGo(in, out, o)
 		return
 	}
 	out = out[:len(in)]
 	for i := range in {
 		out[i] ^= in[i]
 	}
 }
 // 4-way butterfly
 func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	if len(work[0]) == 0 {
 		return
 	}
 	t01 := &multiply256LUT[log_m01]
 	t23 := &multiply256LUT[log_m23]
 	t02 := &multiply256LUT[log_m02]
 	if o.useAVX512 {
 		if log_m01 == modulus {
 			if log_m23 == modulus {
 				if log_m02 == modulus {
 					ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus {
 					ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m23 == modulus {
 				if log_m02 == modulus {
 					ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus {
 					ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	} else if o.useAVX2 {
 		if log_m01 == modulus {
 			if log_m23 == modulus {
 				if log_m02 == modulus {
 					ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus {
 					ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m23 == modulus {
 				if log_m02 == modulus {
 					ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus {
 					ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	if len(work[0]) == 0 {
 		return
 	}
 	if false && o.useAvx512GFNI {
 		// Note that these currently require that length is multiple of 64.
 		t01 := gf2p811dMulMatrices[log_m01]
 		t23 := gf2p811dMulMatrices[log_m23]
 		t02 := gf2p811dMulMatrices[log_m02]
 		if log_m01 == modulus8 {
 			if log_m23 == modulus8 {
 				if log_m02 == modulus8 {
 					ifftDIT48_gfni_7(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_gfni_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus8 {
 					ifftDIT48_gfni_5(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_gfni_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m23 == modulus8 {
 				if log_m02 == modulus8 {
 					ifftDIT48_gfni_6(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_gfni_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus8 {
 					ifftDIT48_gfni_4(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_gfni_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	if o.useAVX2 {
 		// Note that these currently require that length is multiple of 64.
 		t01 := &multiply256LUT8[log_m01]
 		t23 := &multiply256LUT8[log_m23]
 		t02 := &multiply256LUT8[log_m02]
 		if log_m01 == modulus8 {
 			if log_m23 == modulus8 {
 				if log_m02 == modulus8 {
 					ifftDIT48_avx2_7(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_avx2_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus8 {
 					ifftDIT48_avx2_5(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_avx2_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m23 == modulus8 {
 				if log_m02 == modulus8 {
 					ifftDIT48_avx2_6(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_avx2_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m02 == modulus8 {
 					ifftDIT48_avx2_4(work, dist*24, t01, t23, t02)
 				} else {
 					ifftDIT48_avx2_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	if len(work[0]) == 0 {
 		return
 	}
 	t01 := &multiply256LUT[log_m01]
 	t23 := &multiply256LUT[log_m23]
 	t02 := &multiply256LUT[log_m02]
 	if o.useAVX512 {
 		if log_m02 == modulus {
 			if log_m01 == modulus {
 				if log_m23 == modulus {
 					fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus {
 					fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m01 == modulus {
 				if log_m23 == modulus {
 					fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus {
 					fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	} else if o.useAVX2 {
 		if log_m02 == modulus {
 			if log_m01 == modulus {
 				if log_m23 == modulus {
 					fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus {
 					fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m01 == modulus {
 				if log_m23 == modulus {
 					fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus {
 					fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	if len(work[0]) == 0 {
 		return
 	}
 	if false && o.useAvx512GFNI {
 		t01 := gf2p811dMulMatrices[log_m01]
 		t23 := gf2p811dMulMatrices[log_m23]
 		t02 := gf2p811dMulMatrices[log_m02]
 		// Note that these currently require that length is multiple of 64.
 		if log_m02 == modulus8 {
 			if log_m01 == modulus8 {
 				if log_m23 == modulus8 {
 					fftDIT48_gfni_7(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_gfni_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus8 {
 					fftDIT48_gfni_5(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_gfni_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m01 == modulus8 {
 				if log_m23 == modulus8 {
 					fftDIT48_gfni_6(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_gfni_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus8 {
 					fftDIT48_gfni_4(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_gfni_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	if o.useAVX2 {
 		t01 := &multiply256LUT8[log_m01]
 		t23 := &multiply256LUT8[log_m23]
 		t02 := &multiply256LUT8[log_m02]
 		// Note that these currently require that length is multiple of 64.
 		if log_m02 == modulus8 {
 			if log_m01 == modulus8 {
 				if log_m23 == modulus8 {
 					fftDIT48_avx2_7(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_avx2_3(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus8 {
 					fftDIT48_avx2_5(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_avx2_1(work, dist*24, t01, t23, t02)
 				}
 			}
 		} else {
 			if log_m01 == modulus8 {
 				if log_m23 == modulus8 {
 					fftDIT48_avx2_6(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_avx2_2(work, dist*24, t01, t23, t02)
 				}
 			} else {
 				if log_m23 == modulus8 {
 					fftDIT48_avx2_4(work, dist*24, t01, t23, t02)
 				} else {
 					fftDIT48_avx2_0(work, dist*24, t01, t23, t02)
 				}
 			}
 		}
 		return
 	}
 	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 2-way butterfly forward
 func fftDIT2(x, y []byte, log_m ffe, o *options) {
 	if len(x) == 0 {
 		return
 	}
 	if o.useAVX2 {
 		tmp := &multiply256LUT[log_m]
 		fftDIT2_avx2(x, y, tmp)
 	} else if o.useSSSE3 {
 		tmp := &multiply256LUT[log_m]
 		fftDIT2_ssse3(x, y, tmp)
 	} else {
 		// Reference version:
 		refMulAdd(x, y, log_m)
 		sliceXor(x, y, o)
 	}
 }
 // 2-way butterfly forward
 func fftDIT28(x, y []byte, log_m ffe8, o *options) {
 	if len(x) == 0 {
 		return
 	}
 	if o.useAVX2 {
 		fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
 		if len(x)&63 == 0 {
 			return
 		}
 		done := (len(y) >> 6) << 6
 		y = y[done:]
 		x = x[done:]
 	}
 	mulAdd8(x, y, log_m, o)
 	sliceXor(x, y, o)
 }
 // 2-way butterfly inverse
 func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
 	if len(x) == 0 {
 		return
 	}
 	if o.useAVX2 {
 		ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
 		if len(x)&63 == 0 {
 			return
 		}
 		done := (len(y) >> 6) << 6
 		y = y[done:]
 		x = x[done:]
 	}
 	sliceXor(x, y, o)
 	mulAdd8(x, y, log_m, o)
 }
 func mulAdd8(x, y []byte, log_m ffe8, o *options) {
 	if o.useAVX2 {
 		t := &multiply256LUT8[log_m]
 		galMulAVX2Xor_64(t[:16], t[16:32], y, x)
 		done := (len(y) >> 6) << 6
 		y = y[done:]
 		x = x[done:]
 	} else if o.useSSSE3 {
 		t := &multiply256LUT8[log_m]
 		galMulSSSE3Xor(t[:16], t[16:32], y, x)
 		done := (len(y) >> 4) << 4
 		y = y[done:]
 		x = x[done:]
 	}
 	refMulAdd8(x, y, log_m)
 }
 // 2-way butterfly
 func ifftDIT2(x, y []byte, log_m ffe, o *options) {
 	if len(x) == 0 {
 		return
 	}
 	if o.useAVX2 {
 		tmp := &multiply256LUT[log_m]
 		ifftDIT2_avx2(x, y, tmp)
 	} else if o.useSSSE3 {
 		tmp := &multiply256LUT[log_m]
 		ifftDIT2_ssse3(x, y, tmp)
 	} else {
 		// Reference version:
 		sliceXor(x, y, o)
 		refMulAdd(x, y, log_m)
 	}
 }
 func mulgf16(x, y []byte, log_m ffe, o *options) {
 	if len(x) == 0 {
 		return
 	}
 	if o.useAVX2 {
 		tmp := &multiply256LUT[log_m]
 		mulgf16_avx2(x, y, tmp)
 	} else if o.useSSSE3 {
 		tmp := &multiply256LUT[log_m]
 		mulgf16_ssse3(x, y, tmp)
 	} else {
 		refMul(x, y, log_m)
 	}
 }
 func mulgf8(out, in []byte, log_m ffe8, o *options) {
 	if o.useAVX2 {
 		t := &multiply256LUT8[log_m]
 		galMulAVX2_64(t[:16], t[16:32], in, out)
 		done := (len(in) >> 6) << 6
 		in = in[done:]
 		out = out[done:]
 	} else if o.useSSSE3 {
 		t := &multiply256LUT8[log_m]
 		galMulSSSE3(t[:16], t[16:32], in, out)
 		done := (len(in) >> 4) << 4
 		in = in[done:]
 		out = out[done:]
 	}
 	out = out[:len(in)]
 	mt := mul8LUTs[log_m].Value[:]
 	for i := range in {
 		out[i] = byte(mt[in[i]])
 	}
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
@ -0,0 +1,310 @@
 //+build !noasm
 //+build !appengine
 //+build !gccgo
 //+build !nopshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 // Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
 // and http://jerasure.org/jerasure/gf-complete/tree/master
 // func galMulSSSE3Xor(low, high, in, out []byte)
 TEXT ·galMulSSSE3Xor(SB), 7, $0
 	MOVQ   low+0(FP), SI     // SI: &low
 	MOVQ   high+24(FP), DX   // DX: &high
 	MOVOU  (SI), X6          // X6 low
 	MOVOU  (DX), X7          // X7: high
 	MOVQ   $15, BX           // BX: low mask
 	MOVQ   BX, X8
 	PXOR   X5, X5
 	MOVQ   in+48(FP), SI     // R11: &in
 	MOVQ   in_len+56(FP), R9 // R9: len(in)
 	MOVQ   out+72(FP), DX    // DX: &out
 	PSHUFB X5, X8            // X8: lomask (unpacked)
 	SHRQ   $4, R9            // len(in) / 16
 	MOVQ   SI, AX
 	MOVQ   DX, BX
 	ANDQ   $15, AX
 	ANDQ   $15, BX
 	CMPQ   R9, $0
 	JEQ    done_xor
 	ORQ    AX, BX
 	CMPQ   BX, $0
 	JNZ    loopback_xor
 loopback_xor_aligned:
 	MOVOA  (SI), X0             // in[x]
 	MOVOA  (DX), X4             // out[x]
 	MOVOA  X0, X1               // in[x]
 	MOVOA  X6, X2               // low copy
 	MOVOA  X7, X3               // high copy
 	PSRLQ  $4, X1               // X1: high input
 	PAND   X8, X0               // X0: low input
 	PAND   X8, X1               // X0: high input
 	PSHUFB X0, X2               // X2: mul low part
 	PSHUFB X1, X3               // X3: mul high part
 	PXOR   X2, X3               // X3: Result
 	PXOR   X4, X3               // X3: Result xor existing out
 	MOVOA  X3, (DX)             // Store
 	ADDQ   $16, SI              // in+=16
 	ADDQ   $16, DX              // out+=16
 	SUBQ   $1, R9
 	JNZ    loopback_xor_aligned
 	JMP    done_xor
 loopback_xor:
 	MOVOU  (SI), X0     // in[x]
 	MOVOU  (DX), X4     // out[x]
 	MOVOU  X0, X1       // in[x]
 	MOVOU  X6, X2       // low copy
 	MOVOU  X7, X3       // high copy
 	PSRLQ  $4, X1       // X1: high input
 	PAND   X8, X0       // X0: low input
 	PAND   X8, X1       // X0: high input
 	PSHUFB X0, X2       // X2: mul low part
 	PSHUFB X1, X3       // X3: mul high part
 	PXOR   X2, X3       // X3: Result
 	PXOR   X4, X3       // X3: Result xor existing out
 	MOVOU  X3, (DX)     // Store
 	ADDQ   $16, SI      // in+=16
 	ADDQ   $16, DX      // out+=16
 	SUBQ   $1, R9
 	JNZ    loopback_xor
 done_xor:
 	RET
 // func galMulSSSE3(low, high, in, out []byte)
 TEXT ·galMulSSSE3(SB), 7, $0
 	MOVQ   low+0(FP), SI     // SI: &low
 	MOVQ   high+24(FP), DX   // DX: &high
 	MOVOU  (SI), X6          // X6 low
 	MOVOU  (DX), X7          // X7: high
 	MOVQ   $15, BX           // BX: low mask
 	MOVQ   BX, X8
 	PXOR   X5, X5
 	MOVQ   in+48(FP), SI     // R11: &in
 	MOVQ   in_len+56(FP), R9 // R9: len(in)
 	MOVQ   out+72(FP), DX    // DX: &out
 	PSHUFB X5, X8            // X8: lomask (unpacked)
 	MOVQ   SI, AX
 	MOVQ   DX, BX
 	SHRQ   $4, R9            // len(in) / 16
 	ANDQ   $15, AX
 	ANDQ   $15, BX
 	CMPQ   R9, $0
 	JEQ    done
 	ORQ    AX, BX
 	CMPQ   BX, $0
 	JNZ    loopback
 loopback_aligned:
 	MOVOA  (SI), X0         // in[x]
 	MOVOA  X0, X1           // in[x]
 	MOVOA  X6, X2           // low copy
 	MOVOA  X7, X3           // high copy
 	PSRLQ  $4, X1           // X1: high input
 	PAND   X8, X0           // X0: low input
 	PAND   X8, X1           // X0: high input
 	PSHUFB X0, X2           // X2: mul low part
 	PSHUFB X1, X3           // X3: mul high part
 	PXOR   X2, X3           // X3: Result
 	MOVOA  X3, (DX)         // Store
 	ADDQ   $16, SI          // in+=16
 	ADDQ   $16, DX          // out+=16
 	SUBQ   $1, R9
 	JNZ    loopback_aligned
 	JMP    done
 loopback:
 	MOVOU  (SI), X0 // in[x]
 	MOVOU  X0, X1   // in[x]
 	MOVOA  X6, X2   // low copy
 	MOVOA  X7, X3   // high copy
 	PSRLQ  $4, X1   // X1: high input
 	PAND   X8, X0   // X0: low input
 	PAND   X8, X1   // X0: high input
 	PSHUFB X0, X2   // X2: mul low part
 	PSHUFB X1, X3   // X3: mul high part
 	PXOR   X2, X3   // X3: Result
 	MOVOU  X3, (DX) // Store
 	ADDQ   $16, SI  // in+=16
 	ADDQ   $16, DX  // out+=16
 	SUBQ   $1, R9
 	JNZ    loopback
 done:
 	RET
 // func galMulAVX2Xor(low, high, in, out []byte)
 TEXT ·galMulAVX2Xor(SB), 7, $0
 	MOVQ  low+0(FP), SI     // SI: &low
 	MOVQ  high+24(FP), DX   // DX: &high
 	MOVQ  $15, BX           // BX: low mask
 	MOVQ  BX, X5
 	MOVOU (SI), X6          // X6: low
 	MOVOU (DX), X7          // X7: high
 	MOVQ  in_len+56(FP), R9 // R9: len(in)
 	VINSERTI128  $1, X6, Y6, Y6 // low
 	VINSERTI128  $1, X7, Y7, Y7 // high
 	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
 	SHRQ  $5, R9         // len(in) / 32
 	MOVQ  out+72(FP), DX // DX: &out
 	MOVQ  in+48(FP), SI  // SI: &in
 	TESTQ R9, R9
 	JZ    done_xor_avx2
 loopback_xor_avx2:
 	VMOVDQU (SI), Y0
 	VMOVDQU (DX), Y4
 	VPSRLQ  $4, Y0, Y1 // Y1: high input
 	VPAND   Y8, Y0, Y0 // Y0: low input
 	VPAND   Y8, Y1, Y1 // Y1: high input
 	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
 	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
 	VPXOR   Y3, Y2, Y3 // Y3: Result
 	VPXOR   Y4, Y3, Y4 // Y4: Result
 	VMOVDQU Y4, (DX)
 	ADDQ $32, SI           // in+=32
 	ADDQ $32, DX           // out+=32
 	SUBQ $1, R9
 	JNZ  loopback_xor_avx2
 done_xor_avx2:
 	VZEROUPPER
 	RET
 // func galMulAVX2(low, high, in, out []byte)
 TEXT ·galMulAVX2(SB), 7, $0
 	MOVQ  low+0(FP), SI     // SI: &low
 	MOVQ  high+24(FP), DX   // DX: &high
 	MOVQ  $15, BX           // BX: low mask
 	MOVQ  BX, X5
 	MOVOU (SI), X6          // X6: low
 	MOVOU (DX), X7          // X7: high
 	MOVQ  in_len+56(FP), R9 // R9: len(in)
 	VINSERTI128  $1, X6, Y6, Y6 // low
 	VINSERTI128  $1, X7, Y7, Y7 // high
 	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
 	SHRQ  $5, R9         // len(in) / 32
 	MOVQ  out+72(FP), DX // DX: &out
 	MOVQ  in+48(FP), SI  // SI: &in
 	TESTQ R9, R9
 	JZ    done_avx2
 loopback_avx2:
 	VMOVDQU (SI), Y0
 	VPSRLQ  $4, Y0, Y1 // Y1: high input
 	VPAND   Y8, Y0, Y0 // Y0: low input
 	VPAND   Y8, Y1, Y1 // Y1: high input
 	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
 	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
 	VPXOR   Y3, Y2, Y4 // Y4: Result
 	VMOVDQU Y4, (DX)
 	ADDQ $32, SI       // in+=32
 	ADDQ $32, DX       // out+=32
 	SUBQ $1, R9
 	JNZ  loopback_avx2
 done_avx2:
 	VZEROUPPER
 	RET
 // func galMulAVX2Xor_64(low, high, in, out []byte)
 TEXT ·galMulAVX2Xor_64(SB), 7, $0
 	MOVQ low+0(FP), SI     // SI: &low
 	MOVQ high+24(FP), DX   // DX: &high
 	MOVQ $15, BX           // BX: low mask
 	MOVQ BX, X5
 	MOVQ in_len+56(FP), R9 // R9: len(in)
 	VBROADCASTI128 (SI), Y6 // low table
 	VBROADCASTI128 (DX), Y7 // high high table
 	VPBROADCASTB   X5, Y8   // Y8: lomask (unpacked)
 	SHRQ  $6, R9           // len(in) / 64
 	MOVQ  out+72(FP), DX   // DX: &out
 	MOVQ  in+48(FP), SI    // SI: &in
 	TESTQ R9, R9
 	JZ    done_xor_avx2_64
 loopback_xor_avx2_64:
 	VMOVDQU (SI), Y0
 	VMOVDQU 32(SI), Y10
 	VMOVDQU (DX), Y4
 	VMOVDQU 32(DX), Y14
 	VPSRLQ  $4, Y0, Y1    // Y1: high input
 	VPSRLQ  $4, Y10, Y11  // Y11: high input 2
 	VPAND   Y8, Y0, Y0    // Y0: low input
 	VPAND   Y8, Y10, Y10  // Y10: low input 2
 	VPAND   Y8, Y1, Y1    // Y11: high input
 	VPAND   Y8, Y11, Y11  // Y11: high input 2
 	VPSHUFB Y0, Y6, Y2    // Y2: mul low part
 	VPSHUFB Y10, Y6, Y12  // Y12: mul low part 2
 	VPSHUFB Y1, Y7, Y3    // Y3: mul high part
 	VPSHUFB Y11, Y7, Y13  // Y13: mul high part 2
 	VPXOR   Y3, Y2, Y3    // Y3: Result
 	VPXOR   Y13, Y12, Y13 // Y13: Result 2
 	VPXOR   Y4, Y3, Y4    // Y4: Result
 	VPXOR   Y14, Y13, Y14 // Y4: Result 2
 	VMOVDQU Y4, (DX)
 	VMOVDQU Y14, 32(DX)
 	ADDQ $64, SI              // in+=64
 	ADDQ $64, DX              // out+=64
 	SUBQ $1, R9
 	JNZ  loopback_xor_avx2_64
 done_xor_avx2_64:
 	VZEROUPPER
 	RET
 // func galMulAVX2_64(low, high, in, out []byte)
 TEXT ·galMulAVX2_64(SB), 7, $0
 	MOVQ           low+0(FP), SI     // SI: &low
 	MOVQ           high+24(FP), DX   // DX: &high
 	MOVQ           $15, BX           // BX: low mask
 	MOVQ           BX, X5
 	MOVQ           in_len+56(FP), R9 // R9: len(in)
 	VBROADCASTI128 (SI), Y6          // low table
 	VBROADCASTI128 (DX), Y7          // high high table
 	VPBROADCASTB   X5, Y8            // Y8: lomask (unpacked)
 	SHRQ  $6, R9         // len(in) / 64
 	MOVQ  out+72(FP), DX // DX: &out
 	MOVQ  in+48(FP), SI  // SI: &in
 	TESTQ R9, R9
 	JZ    done_avx2_64
 loopback_avx2_64:
 	VMOVDQU (SI), Y0
 	VMOVDQU 32(SI), Y10
 	VPSRLQ  $4, Y0, Y1    // Y1: high input
 	VPSRLQ  $4, Y10, Y11  // Y11: high input 2
 	VPAND   Y8, Y0, Y0    // Y0: low input
 	VPAND   Y8, Y10, Y10  // Y10: low input
 	VPAND   Y8, Y1, Y1    // Y1: high input
 	VPAND   Y8, Y11, Y11  // Y11: high input 2
 	VPSHUFB Y0, Y6, Y2    // Y2: mul low part
 	VPSHUFB Y10, Y6, Y12  // Y12: mul low part 2
 	VPSHUFB Y1, Y7, Y3    // Y3: mul high part
 	VPSHUFB Y11, Y7, Y13  // Y13: mul high part 2
 	VPXOR   Y3, Y2, Y4    // Y4: Result
 	VPXOR   Y13, Y12, Y14 // Y14: Result 2
 	VMOVDQU Y4, (DX)
 	VMOVDQU Y14, 32(DX)
 	ADDQ $64, SI          // in+=64
 	ADDQ $64, DX          // out+=64
 	SUBQ $1, R9
 	JNZ  loopback_avx2_64
 done_avx2_64:
 	VZEROUPPER
 	RET
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
@ -0,0 +1,130 @@
 //go:build !noasm && !appengine && !gccgo && !nopshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 // Copyright 2017, Minio, Inc.
 package reedsolomon
 const pshufb = true
 //go:noescape
 func galMulNEON(low, high, in, out []byte)
 //go:noescape
 func galMulXorNEON(low, high, in, out []byte)
 func galMulSlice(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		copy(out, in)
 		return
 	}
 	var done int
 	galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 	done = (len(in) >> 5) << 5
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mulTable[c][:256]
 		for i := done; i < len(in); i++ {
 			out[i] = mt[in[i]]
 		}
 	}
 }
 func galMulSliceXor(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		sliceXor(in, out, o)
 		return
 	}
 	var done int
 	galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 	done = (len(in) >> 5) << 5
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mulTable[c][:256]
 		for i := done; i < len(in); i++ {
 			out[i] ^= mt[in[i]]
 		}
 	}
 }
 // 4-way butterfly
 func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 2-way butterfly forward
 func fftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	refMulAdd(x, y, log_m)
 	// 64 byte aligned, always full.
 	xorSliceNEON(x, y)
 }
 // 2-way butterfly forward
 func fftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	mulAdd8(x, y, log_m, o)
 	sliceXor(x, y, o)
 }
 // 2-way butterfly
 func ifftDIT2(x, y []byte, log_m ffe, o *options) {
 	// 64 byte aligned, always full.
 	xorSliceNEON(x, y)
 	// Reference version:
 	refMulAdd(x, y, log_m)
 }
 // 2-way butterfly inverse
 func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	sliceXor(x, y, o)
 	mulAdd8(x, y, log_m, o)
 }
 func mulgf16(x, y []byte, log_m ffe, o *options) {
 	refMul(x, y, log_m)
 }
 func mulAdd8(out, in []byte, log_m ffe8, o *options) {
 	t := &multiply256LUT8[log_m]
 	galMulXorNEON(t[:16], t[16:32], in, out)
 	done := (len(in) >> 5) << 5
 	in = in[done:]
 	if len(in) > 0 {
 		out = out[done:]
 		refMulAdd8(in, out, log_m)
 	}
 }
 func mulgf8(out, in []byte, log_m ffe8, o *options) {
 	var done int
 	t := &multiply256LUT8[log_m]
 	galMulNEON(t[:16], t[16:32], in, out)
 	done = (len(in) >> 5) << 5
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mul8LUTs[log_m].Value[:]
 		for i := done; i < len(in); i++ {
 			out[i] ^= byte(mt[in[i]])
 		}
 	}
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
@ -0,0 +1,102 @@
 //+build !noasm
 //+build !appengine
 //+build !gccgo
 //+build !nopshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 // Copyright 2017, Minio, Inc.
 #define LOAD(LO1, LO2, HI1, HI2) \
 	VLD1.P 32(R1), [LO1.B16, LO2.B16] \
 	                                  \
 	\ // Get low input and high input
 	VUSHR  $4, LO1.B16, HI1.B16       \
 	VUSHR  $4, LO2.B16, HI2.B16       \
 	VAND   V8.B16, LO1.B16, LO1.B16   \
 	VAND   V8.B16, LO2.B16, LO2.B16
 #define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
 	\ // Mul low part and mul high part
 	VTBL V0.B16, [MUL_LO.B16], OUT1.B16  \
 	VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
 	VTBL V1.B16, [MUL_LO.B16], TMP1.B16  \
 	VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
 	                                     \
 	\ // Combine results
 	VEOR OUT2.B16, OUT1.B16, OUT1.B16    \
 	VEOR TMP2.B16, TMP1.B16, OUT2.B16
 // func galMulNEON(low, high, in, out []byte)
 TEXT ·galMulNEON(SB), 7, $0
 	MOVD in_base+48(FP), R1
 	MOVD in_len+56(FP), R2   // length of message
 	MOVD out_base+72(FP), R5
 	SUBS $32, R2
 	BMI  complete
 	MOVD low+0(FP), R10   // R10: &low
 	MOVD high+24(FP), R11 // R11: &high
 	VLD1 (R10), [V6.B16]
 	VLD1 (R11), [V7.B16]
 	//
 	// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
 	// WORD $0x4e010c68 // dup v8.16b, w3
 	//
 	MOVD $0x0f, R3
 	VMOV R3, V8.B[0]
 	VDUP V8.B[0], V8.B16
 loop:
 	// Main loop
 	LOAD(V0, V1, V10, V11)
 	GALOIS_MUL(V6, V7, V4, V5, V14, V15)
 	// Store result
 	VST1.P [V4.D2, V5.D2], 32(R5)
 	SUBS $32, R2
 	BPL  loop
 complete:
 	RET
 // func galMulXorNEON(low, high, in, out []byte)
 TEXT ·galMulXorNEON(SB), 7, $0
 	MOVD in_base+48(FP), R1
 	MOVD in_len+56(FP), R2   // length of message
 	MOVD out_base+72(FP), R5
 	SUBS $32, R2
 	BMI  completeXor
 	MOVD low+0(FP), R10   // R10: &low
 	MOVD high+24(FP), R11 // R11: &high
 	VLD1 (R10), [V6.B16]
 	VLD1 (R11), [V7.B16]
 	//
 	// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
 	// WORD $0x4e010c68 // dup v8.16b, w3
 	//
 	MOVD $0x0f, R3
 	VMOV R3, V8.B[0]
 	VDUP V8.B[0], V8.B16
 loopXor:
 	// Main loop
 	VLD1 (R5), [V20.B16, V21.B16]
 	LOAD(V0, V1, V10, V11)
 	GALOIS_MUL(V6, V7, V4, V5, V14, V15)
 	VEOR V20.B16, V4.B16, V4.B16
 	VEOR V21.B16, V5.B16, V5.B16
 	// Store result
 	VST1.P [V4.D2, V5.D2], 32(R5)
 	SUBS $32, R2
 	BPL  loopXor
 completeXor:
 	RET
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
@ -0,0 +1,33 @@
 //go:build !amd64 || noasm || appengine || gccgo || nogen
 package reedsolomon
 const maxAvx2Inputs = 1
 const maxAvx2Outputs = 1
 const minAvx2Size = 1
 const avxSizeMask = 0
 const avx2CodeGen = false
 func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
 func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
 func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
 func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
 func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
 func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
 	panic("codegen not available")
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@ -0,0 +1,91 @@
 //go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo || nopshufb) && (!ppc64le || noasm || appengine || gccgo || nopshufb)
 // Copyright 2015, Klaus Post, see LICENSE for details.
 package reedsolomon
 const pshufb = false
 func galMulSlice(c byte, in, out []byte, o *options) {
 	out = out[:len(in)]
 	if c == 1 {
 		copy(out, in)
 		return
 	}
 	mt := mulTable[c][:256]
 	for n, input := range in {
 		out[n] = mt[input]
 	}
 }
 func galMulSliceXor(c byte, in, out []byte, o *options) {
 	out = out[:len(in)]
 	if c == 1 {
 		sliceXor(in, out, o)
 		return
 	}
 	mt := mulTable[c][:256]
 	for n, input := range in {
 		out[n] ^= mt[input]
 	}
 }
 func init() {
 	defaultOptions.useAVX512 = false
 }
 // 4-way butterfly
 func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 2-way butterfly forward
 func fftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	refMulAdd(x, y, log_m)
 	sliceXorGo(x, y, o)
 }
 // 2-way butterfly forward
 func fftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	refMulAdd8(x, y, log_m)
 	sliceXorGo(x, y, o)
 }
 // 2-way butterfly inverse
 func ifftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	sliceXorGo(x, y, o)
 	refMulAdd(x, y, log_m)
 }
 // 2-way butterfly inverse
 func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	sliceXorGo(x, y, o)
 	refMulAdd8(x, y, log_m)
 }
 func mulgf16(x, y []byte, log_m ffe, o *options) {
 	refMul(x, y, log_m)
 }
 func mulgf8(x, y []byte, log_m ffe8, o *options) {
 	refMul8(x, y, log_m)
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_nopshufb_amd64.go
@ -0,0 +1,146 @@
 // Copyright 2015, Klaus Post, see LICENSE for details
 //go:build nopshufb && !noasm
 package reedsolomon
 // bigSwitchover is the size where 64 bytes are processed per loop.
 const bigSwitchover = 128
 const pshufb = false
 // simple slice xor
 func sliceXor(in, out []byte, o *options) {
 	if o.useSSE2 {
 		if len(in) >= bigSwitchover {
 			if o.useAVX2 {
 				avx2XorSlice_64(in, out)
 				done := (len(in) >> 6) << 6
 				in = in[done:]
 				out = out[done:]
 			} else {
 				sSE2XorSlice_64(in, out)
 				done := (len(in) >> 6) << 6
 				in = in[done:]
 				out = out[done:]
 			}
 		}
 		if len(in) >= 16 {
 			sSE2XorSlice(in, out)
 			done := (len(in) >> 4) << 4
 			in = in[done:]
 			out = out[done:]
 		}
 	} else {
 		sliceXorGo(in, out, o)
 		return
 	}
 	out = out[:len(in)]
 	for i := range in {
 		out[i] ^= in[i]
 	}
 }
 func galMulSlice(c byte, in, out []byte, o *options) {
 	out = out[:len(in)]
 	if c == 1 {
 		copy(out, in)
 		return
 	}
 	mt := mulTable[c][:256]
 	for len(in) >= 4 {
 		ii := (*[4]byte)(in)
 		oo := (*[4]byte)(out)
 		oo[0] = mt[ii[0]]
 		oo[1] = mt[ii[1]]
 		oo[2] = mt[ii[2]]
 		oo[3] = mt[ii[3]]
 		in = in[4:]
 		out = out[4:]
 	}
 	for n, input := range in {
 		out[n] = mt[input]
 	}
 }
 func galMulSliceXor(c byte, in, out []byte, o *options) {
 	out = out[:len(in)]
 	if c == 1 {
 		sliceXor(in, out, o)
 		return
 	}
 	mt := mulTable[c][:256]
 	for len(in) >= 4 {
 		ii := (*[4]byte)(in)
 		oo := (*[4]byte)(out)
 		oo[0] ^= mt[ii[0]]
 		oo[1] ^= mt[ii[1]]
 		oo[2] ^= mt[ii[2]]
 		oo[3] ^= mt[ii[3]]
 		in = in[4:]
 		out = out[4:]
 	}
 	for n, input := range in {
 		out[n] ^= mt[input]
 	}
 }
 func init() {
 	defaultOptions.useAVX512 = false
 }
 // 4-way butterfly
 func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 2-way butterfly forward
 func fftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	refMulAdd(x, y, log_m)
 	sliceXor(x, y, o)
 }
 // 2-way butterfly forward
 func fftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	refMulAdd8(x, y, log_m)
 	sliceXor(x, y, o)
 }
 // 2-way butterfly inverse
 func ifftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	sliceXor(x, y, o)
 	refMulAdd(x, y, log_m)
 }
 // 2-way butterfly inverse
 func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	sliceXor(x, y, o)
 	refMulAdd8(x, y, log_m)
 }
 func mulgf16(x, y []byte, log_m ffe, o *options) {
 	refMul(x, y, log_m)
 }
 func mulgf8(x, y []byte, log_m ffe8, o *options) {
 	refMul8(x, y, log_m)
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
@ -0,0 +1,13 @@
 //go:build !amd64 || noasm || appengine || gccgo || pshufb
 // Copyright 2020, Klaus Post, see LICENSE for details.
 package reedsolomon
 func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
 	panic("codeSomeShardsAvx512 should not be called if built without asm")
 }
 func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
 	panic("codeSomeShardsAvx512P should not be called if built without asm")
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
@ -0,0 +1,146 @@
 //go:build !noasm && !appengine && !gccgo && !nopshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 // Copyright 2018, Minio, Inc.
 package reedsolomon
 const pshufb = true
 //go:noescape
 func galMulPpc(low, high, in, out []byte)
 //go:noescape
 func galMulPpcXor(low, high, in, out []byte)
 // This is what the assembler routines do in blocks of 16 bytes:
 /*
 func galMulPpc(low, high, in, out []byte) {
 	for n, input := range in {
 		l := input & 0xf
 		h := input >> 4
 		out[n] = low[l] ^ high[h]
 	}
 }
 func galMulPpcXor(low, high, in, out []byte) {
 	for n, input := range in {
 		l := input & 0xf
 		h := input >> 4
 		out[n] ^= low[l] ^ high[h]
 	}
 }
 */
 func galMulSlice(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		copy(out, in)
 		return
 	}
 	done := (len(in) >> 4) << 4
 	if done > 0 {
 		galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
 	}
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mulTable[c][:256]
 		for i := done; i < len(in); i++ {
 			out[i] = mt[in[i]]
 		}
 	}
 }
 func galMulSliceXor(c byte, in, out []byte, o *options) {
 	if c == 1 {
 		sliceXor(in, out, o)
 		return
 	}
 	done := (len(in) >> 4) << 4
 	if done > 0 {
 		galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
 	}
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mulTable[c][:256]
 		for i := done; i < len(in); i++ {
 			out[i] ^= mt[in[i]]
 		}
 	}
 }
 // 4-way butterfly
 func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
 	fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 4-way butterfly
 func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
 	fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
 }
 // 2-way butterfly forward
 func fftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	refMulAdd(x, y, log_m)
 	sliceXorGo(x, y, o)
 }
 // 2-way butterfly forward
 func fftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	mulAdd8(x, y, log_m, o)
 	sliceXorGo(x, y, o)
 }
 // 2-way butterfly inverse
 func ifftDIT2(x, y []byte, log_m ffe, o *options) {
 	// Reference version:
 	sliceXorGo(x, y, o)
 	refMulAdd(x, y, log_m)
 }
 // 2-way butterfly inverse
 func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
 	// Reference version:
 	sliceXorGo(x, y, o)
 	mulAdd8(x, y, log_m, o)
 }
 func mulgf16(x, y []byte, log_m ffe, o *options) {
 	refMul(x, y, log_m)
 }
 func mulAdd8(out, in []byte, log_m ffe8, o *options) {
 	t := &multiply256LUT8[log_m]
 	galMulPpcXor(t[:16], t[16:32], in, out)
 	done := (len(in) >> 4) << 4
 	in = in[done:]
 	if len(in) > 0 {
 		out = out[done:]
 		refMulAdd8(in, out, log_m)
 	}
 }
 func mulgf8(out, in []byte, log_m ffe8, o *options) {
 	var done int
 	t := &multiply256LUT8[log_m]
 	galMulPpc(t[:16], t[16:32], in, out)
 	done = (len(in) >> 4) << 4
 	remain := len(in) - done
 	if remain > 0 {
 		mt := mul8LUTs[log_m].Value[:]
 		for i := done; i < len(in); i++ {
 			out[i] ^= byte(mt[in[i]])
 		}
 	}
 }
--- a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
@ -0,0 +1,127 @@
 //+build !noasm
 //+build !appengine
 //+build !gccgo
 //+build !pshufb
 // Copyright 2015, Klaus Post, see LICENSE for details.
 // Copyright 2018, Minio, Inc.
 #include "textflag.h"
 #define LOW       R3
 #define HIGH      R4
 #define IN        R5
 #define LEN       R6
 #define OUT       R7
 #define CONSTANTS R8
 #define OFFSET    R9
 #define OFFSET1   R10
 #define OFFSET2   R11
 #define X6        VS34
 #define X6_       V2
 #define X7        VS35
 #define X7_       V3
 #define MSG       VS36
 #define MSG_      V4
 #define MSG_HI    VS37
 #define MSG_HI_   V5
 #define RESULT    VS38
 #define RESULT_   V6
 #define ROTATE    VS39
 #define ROTATE_   V7
 #define MASK      VS40
 #define MASK_     V8
 #define FLIP      VS41
 #define FLIP_     V9
 // func galMulPpc(low, high, in, out []byte)
 TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
 	MOVD low+0(FP), LOW
 	MOVD high+24(FP), HIGH
 	MOVD in+48(FP), IN
 	MOVD in_len+56(FP), LEN
 	MOVD out+72(FP), OUT
 	MOVD $16, OFFSET1
 	MOVD $32, OFFSET2
 	MOVD   $·constants(SB), CONSTANTS
 	LXVD2X (CONSTANTS)(R0), ROTATE
 	LXVD2X (CONSTANTS)(OFFSET1), MASK
 	LXVD2X (CONSTANTS)(OFFSET2), FLIP
 	LXVD2X (LOW)(R0), X6
 	LXVD2X (HIGH)(R0), X7
 	VPERM  X6_, V31, FLIP_, X6_
 	VPERM  X7_, V31, FLIP_, X7_
 	MOVD $0, OFFSET
 loop:
 	LXVD2X (IN)(OFFSET), MSG
 	VSRB  MSG_, ROTATE_, MSG_HI_
 	VAND  MSG_, MASK_, MSG_
 	VPERM X6_, V31, MSG_, MSG_
 	VPERM X7_, V31, MSG_HI_, MSG_HI_
 	VXOR MSG_, MSG_HI_, MSG_
 	STXVD2X MSG, (OUT)(OFFSET)
 	ADD $16, OFFSET, OFFSET
 	CMP LEN, OFFSET
 	BGT loop
 	RET
 // func galMulPpcXorlow, high, in, out []byte)
 TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
 	MOVD low+0(FP), LOW
 	MOVD high+24(FP), HIGH
 	MOVD in+48(FP), IN
 	MOVD in_len+56(FP), LEN
 	MOVD out+72(FP), OUT
 	MOVD $16, OFFSET1
 	MOVD $32, OFFSET2
 	MOVD   $·constants(SB), CONSTANTS
 	LXVD2X (CONSTANTS)(R0), ROTATE
 	LXVD2X (CONSTANTS)(OFFSET1), MASK
 	LXVD2X (CONSTANTS)(OFFSET2), FLIP
 	LXVD2X (LOW)(R0), X6
 	LXVD2X (HIGH)(R0), X7
 	VPERM  X6_, V31, FLIP_, X6_
 	VPERM  X7_, V31, FLIP_, X7_
 	MOVD $0, OFFSET
 loopXor:
 	LXVD2X (IN)(OFFSET), MSG
 	LXVD2X (OUT)(OFFSET), RESULT
 	VSRB  MSG_, ROTATE_, MSG_HI_
 	VAND  MSG_, MASK_, MSG_
 	VPERM X6_, V31, MSG_, MSG_
 	VPERM X7_, V31, MSG_HI_, MSG_HI_
 	VXOR MSG_, MSG_HI_, MSG_
 	VXOR MSG_, RESULT_, RESULT_
 	STXVD2X RESULT, (OUT)(OFFSET)
 	ADD $16, OFFSET, OFFSET
 	CMP LEN, OFFSET
 	BGT loopXor
 	RET
 DATA ·constants+0x0(SB)/8, $0x0404040404040404
 DATA ·constants+0x8(SB)/8, $0x0404040404040404
 DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
 DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
 DATA ·constants+0x20(SB)/8, $0x0706050403020100
 DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
 GLOBL ·constants(SB), 8, $48
--- a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
@ -0,0 +1,164 @@
 /**
 * A thread-safe tree which caches inverted matrices.
 *
 * Copyright 2016, Peter Collins
 */
 package reedsolomon
 import (
 	"errors"
 	"sync"
 )
 // The tree uses a Reader-Writer mutex to make it thread-safe
 // when accessing cached matrices and inserting new ones.
 type inversionTree struct {
 	mutex sync.RWMutex
 	root  inversionNode
 }
 type inversionNode struct {
 	matrix   matrix
 	children []*inversionNode
 }
 // newInversionTree initializes a tree for storing inverted matrices.
 // Note that the root node is the identity matrix as it implies
 // there were no errors with the original data.
 func newInversionTree(dataShards, parityShards int) *inversionTree {
 	identity, _ := identityMatrix(dataShards)
 	return &inversionTree{
 		root: inversionNode{
 			matrix:   identity,
 			children: make([]*inversionNode, dataShards+parityShards),
 		},
 	}
 }
 // GetInvertedMatrix returns the cached inverted matrix or nil if it
 // is not found in the tree keyed on the indices of invalid rows.
 func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
 	if t == nil {
 		return nil
 	}
 	// Lock the tree for reading before accessing the tree.
 	t.mutex.RLock()
 	defer t.mutex.RUnlock()
 	// If no invalid indices were give we should return the root
 	// identity matrix.
 	if len(invalidIndices) == 0 {
 		return t.root.matrix
 	}
 	// Recursively search for the inverted matrix in the tree, passing in
 	// 0 as the parent index as we start at the root of the tree.
 	return t.root.getInvertedMatrix(invalidIndices, 0)
 }
 // errAlreadySet is returned if the root node matrix is overwritten
 var errAlreadySet = errors.New("the root node identity matrix is already set")
 // InsertInvertedMatrix inserts a new inverted matrix into the tree
 // keyed by the indices of invalid rows.  The total number of shards
 // is required for creating the proper length lists of child nodes for
 // each node.
 func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
 	if t == nil {
 		return nil
 	}
 	// If no invalid indices were given then we are done because the
 	// root node is already set with the identity matrix.
 	if len(invalidIndices) == 0 {
 		return errAlreadySet
 	}
 	if !matrix.IsSquare() {
 		return errNotSquare
 	}
 	// Lock the tree for writing and reading before accessing the tree.
 	t.mutex.Lock()
 	defer t.mutex.Unlock()
 	// Recursively create nodes for the inverted matrix in the tree until
 	// we reach the node to insert the matrix to.  We start by passing in
 	// 0 as the parent index as we start at the root of the tree.
 	t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
 	return nil
 }
 func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
 	// Get the child node to search next from the list of children.  The
 	// list of children starts relative to the parent index passed in
 	// because the indices of invalid rows is sorted (by default).  As we
 	// search recursively, the first invalid index gets popped off the list,
 	// so when searching through the list of children, use that first invalid
 	// index to find the child node.
 	firstIndex := invalidIndices[0]
 	node := n.children[firstIndex-parent]
 	// If the child node doesn't exist in the list yet, fail fast by
 	// returning, so we can construct and insert the proper inverted matrix.
 	if node == nil {
 		return nil
 	}
 	// If there's more than one invalid index left in the list we should
 	// keep searching recursively.
 	if len(invalidIndices) > 1 {
 		// Search recursively on the child node by passing in the invalid indices
 		// with the first index popped off the front.  Also the parent index to
 		// pass down is the first index plus one.
 		return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
 	}
 	// If there aren't any more invalid indices to search, we've found our
 	// node.  Return it, however keep in mind that the matrix could still be
 	// nil because intermediary nodes in the tree are created sometimes with
 	// their inversion matrices uninitialized.
 	return node.matrix
 }
 func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
 	// As above, get the child node to search next from the list of children.
 	// The list of children starts relative to the parent index passed in
 	// because the indices of invalid rows is sorted (by default).  As we
 	// search recursively, the first invalid index gets popped off the list,
 	// so when searching through the list of children, use that first invalid
 	// index to find the child node.
 	firstIndex := invalidIndices[0]
 	node := n.children[firstIndex-parent]
 	// If the child node doesn't exist in the list yet, create a new
 	// node because we have the writer lock and add it to the list
 	// of children.
 	if node == nil {
 		// Make the length of the list of children equal to the number
 		// of shards minus the first invalid index because the list of
 		// invalid indices is sorted, so only this length of errors
 		// are possible in the tree.
 		node = &inversionNode{
 			children: make([]*inversionNode, shards-firstIndex),
 		}
 		// Insert the new node into the tree at the first index relative
 		// to the parent index that was given in this recursive call.
 		n.children[firstIndex-parent] = node
 	}
 	// If there's more than one invalid index left in the list we should
 	// keep searching recursively in order to find the node to add our
 	// matrix.
 	if len(invalidIndices) > 1 {
 		// As above, search recursively on the child node by passing in
 		// the invalid indices with the first index popped off the front.
 		// Also the total number of shards and parent index are passed down
 		// which is equal to the first index plus one.
 		node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
 	} else {
 		// If there aren't any more invalid indices to search, we've found our
 		// node.  Cache the inverted matrix in this node.
 		node.matrix = matrix
 	}
 }
--- a/vendor/github.com/klauspost/reedsolomon/leopard.go
+++ b/vendor/github.com/klauspost/reedsolomon/leopard.go
--- a/vendor/github.com/klauspost/reedsolomon/leopard8.go
+++ b/vendor/github.com/klauspost/reedsolomon/leopard8.go
--- a/vendor/github.com/klauspost/reedsolomon/matrix.go
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
@ -0,0 +1,281 @@
 /**
 * Matrix Algebra over an 8-bit Galois Field
 *
 * Copyright 2015, Klaus Post
 * Copyright 2015, Backblaze, Inc.
 */
 package reedsolomon
 import (
 	"errors"
 	"fmt"
 	"strconv"
 	"strings"
 )
 // byte[row][col]
 type matrix [][]byte
 // newMatrix returns a matrix of zeros.
 func newMatrix(rows, cols int) (matrix, error) {
 	if rows <= 0 {
 		return nil, errInvalidRowSize
 	}
 	if cols <= 0 {
 		return nil, errInvalidColSize
 	}
 	m := matrix(make([][]byte, rows))
 	for i := range m {
 		m[i] = make([]byte, cols)
 	}
 	return m, nil
 }
 // NewMatrixData initializes a matrix with the given row-major data.
 // Note that data is not copied from input.
 func newMatrixData(data [][]byte) (matrix, error) {
 	m := matrix(data)
 	err := m.Check()
 	if err != nil {
 		return nil, err
 	}
 	return m, nil
 }
 // IdentityMatrix returns an identity matrix of the given size.
 func identityMatrix(size int) (matrix, error) {
 	m, err := newMatrix(size, size)
 	if err != nil {
 		return nil, err
 	}
 	for i := range m {
 		m[i][i] = 1
 	}
 	return m, nil
 }
 // errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
 var errInvalidRowSize = errors.New("invalid row size")
 // errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
 var errInvalidColSize = errors.New("invalid column size")
 // errColSizeMismatch is returned if the size of matrix columns mismatch.
 var errColSizeMismatch = errors.New("column size is not the same for all rows")
 func (m matrix) Check() error {
 	rows := len(m)
 	if rows == 0 {
 		return errInvalidRowSize
 	}
 	cols := len(m[0])
 	if cols == 0 {
 		return errInvalidColSize
 	}
 	for _, col := range m {
 		if len(col) != cols {
 			return errColSizeMismatch
 		}
 	}
 	return nil
 }
 // String returns a human-readable string of the matrix contents.
 //
 // Example: [[1, 2], [3, 4]]
 func (m matrix) String() string {
 	rowOut := make([]string, 0, len(m))
 	for _, row := range m {
 		colOut := make([]string, 0, len(row))
 		for _, col := range row {
 			colOut = append(colOut, strconv.Itoa(int(col)))
 		}
 		rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
 	}
 	return "[" + strings.Join(rowOut, ", ") + "]"
 }
 // Multiply multiplies this matrix (the one on the left) by another
 // matrix (the one on the right) and returns a new matrix with the result.
 func (m matrix) Multiply(right matrix) (matrix, error) {
 	if len(m[0]) != len(right) {
 		return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
 	}
 	result, _ := newMatrix(len(m), len(right[0]))
 	for r, row := range result {
 		for c := range row {
 			var value byte
 			for i := range m[0] {
 				value ^= galMultiply(m[r][i], right[i][c])
 			}
 			result[r][c] = value
 		}
 	}
 	return result, nil
 }
 // Augment returns the concatenation of this matrix and the matrix on the right.
 func (m matrix) Augment(right matrix) (matrix, error) {
 	if len(m) != len(right) {
 		return nil, errMatrixSize
 	}
 	result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
 	for r, row := range m {
 		for c := range row {
 			result[r][c] = m[r][c]
 		}
 		cols := len(m[0])
 		for c := range right[0] {
 			result[r][cols+c] = right[r][c]
 		}
 	}
 	return result, nil
 }
 // errMatrixSize is returned if matrix dimensions are doesn't match.
 var errMatrixSize = errors.New("matrix sizes do not match")
 func (m matrix) SameSize(n matrix) error {
 	if len(m) != len(n) {
 		return errMatrixSize
 	}
 	for i := range m {
 		if len(m[i]) != len(n[i]) {
 			return errMatrixSize
 		}
 	}
 	return nil
 }
 // SubMatrix returns a part of this matrix. Data is copied.
 func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
 	result, err := newMatrix(rmax-rmin, cmax-cmin)
 	if err != nil {
 		return nil, err
 	}
 	// OPTME: If used heavily, use copy function to copy slice
 	for r := rmin; r < rmax; r++ {
 		for c := cmin; c < cmax; c++ {
 			result[r-rmin][c-cmin] = m[r][c]
 		}
 	}
 	return result, nil
 }
 // SwapRows Exchanges two rows in the matrix.
 func (m matrix) SwapRows(r1, r2 int) error {
 	if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
 		return errInvalidRowSize
 	}
 	m[r2], m[r1] = m[r1], m[r2]
 	return nil
 }
 // IsSquare will return true if the matrix is square, otherwise false.
 func (m matrix) IsSquare() bool {
 	return len(m) == len(m[0])
 }
 // errSingular is returned if the matrix is singular and cannot be inversed
 var errSingular = errors.New("matrix is singular")
 // errNotSquare is returned if attempting to inverse a non-square matrix.
 var errNotSquare = errors.New("only square matrices can be inverted")
 // Invert returns the inverse of this matrix.
 // Returns ErrSingular when the matrix is singular and doesn't have an inverse.
 // The matrix must be square, otherwise ErrNotSquare is returned.
 func (m matrix) Invert() (matrix, error) {
 	if !m.IsSquare() {
 		return nil, errNotSquare
 	}
 	size := len(m)
 	work, _ := identityMatrix(size)
 	work, _ = m.Augment(work)
 	err := work.gaussianElimination()
 	if err != nil {
 		return nil, err
 	}
 	return work.SubMatrix(0, size, size, size*2)
 }
 func (m matrix) gaussianElimination() error {
 	rows := len(m)
 	columns := len(m[0])
 	// Clear out the part below the main diagonal and scale the main
 	// diagonal to be 1.
 	for r := 0; r < rows; r++ {
 		// If the element on the diagonal is 0, find a row below
 		// that has a non-zero and swap them.
 		if m[r][r] == 0 {
 			for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
 				if m[rowBelow][r] != 0 {
 					err := m.SwapRows(r, rowBelow)
 					if err != nil {
 						return err
 					}
 					break
 				}
 			}
 		}
 		// If we couldn't find one, the matrix is singular.
 		if m[r][r] == 0 {
 			return errSingular
 		}
 		// Scale to 1.
 		if m[r][r] != 1 {
 			scale := galOneOver(m[r][r])
 			for c := 0; c < columns; c++ {
 				m[r][c] = galMultiply(m[r][c], scale)
 			}
 		}
 		// Make everything below the 1 be a 0 by subtracting
 		// a multiple of it.  (Subtraction and addition are
 		// both exclusive or in the Galois field.)
 		for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
 			if m[rowBelow][r] != 0 {
 				scale := m[rowBelow][r]
 				for c := 0; c < columns; c++ {
 					m[rowBelow][c] ^= galMultiply(scale, m[r][c])
 				}
 			}
 		}
 	}
 	// Now clear the part above the main diagonal.
 	for d := 0; d < rows; d++ {
 		for rowAbove := 0; rowAbove < d; rowAbove++ {
 			if m[rowAbove][d] != 0 {
 				scale := m[rowAbove][d]
 				for c := 0; c < columns; c++ {
 					m[rowAbove][c] ^= galMultiply(scale, m[d][c])
 				}
 			}
 		}
 	}
 	return nil
 }
 // Create a Vandermonde matrix, which is guaranteed to have the
 // property that any subset of rows that forms a square matrix
 // is invertible.
 func vandermonde(rows, cols int) (matrix, error) {
 	result, err := newMatrix(rows, cols)
 	if err != nil {
 		return nil, err
 	}
 	for r, row := range result {
 		for c := range row {
 			result[r][c] = galExp(byte(r), c)
 		}
 	}
 	return result, nil
 }
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@ -0,0 +1,323 @@
 package reedsolomon
 import (
 	"runtime"
 	"strings"
 	"github.com/klauspost/cpuid/v2"
 )
 // Option allows to override processing parameters.
 type Option func(*options)
 type options struct {
 	maxGoroutines int
 	minSplitSize  int
 	shardSize     int
 	perRound      int
 	useAvxGNFI,
 	useAvx512GFNI,
 	useAVX512,
 	useAVX2,
 	useSSSE3,
 	useSSE2 bool
 	useJerasureMatrix    bool
 	usePAR1Matrix        bool
 	useCauchy            bool
 	fastOneParity        bool
 	inversionCache       bool
 	forcedInversionCache bool
 	customMatrix         [][]byte
 	withLeopard          leopardMode
 	// stream options
 	concReads  bool
 	concWrites bool
 	streamBS   int
 }
 var defaultOptions = options{
 	maxGoroutines:  384,
 	minSplitSize:   -1,
 	fastOneParity:  false,
 	inversionCache: true,
 	// Detect CPU capabilities.
 	useSSSE3:      cpuid.CPU.Supports(cpuid.SSSE3),
 	useSSE2:       cpuid.CPU.Supports(cpuid.SSE2),
 	useAVX2:       cpuid.CPU.Supports(cpuid.AVX2),
 	useAVX512:     cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL),
 	useAvx512GFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ),
 	useAvxGNFI:    cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI),
 }
 // leopardMode controls the use of leopard GF in encoding and decoding.
 type leopardMode int
 const (
 	// leopardAsNeeded only switches to leopard 16-bit when there are more than
 	// 256 shards.
 	leopardAsNeeded leopardMode = iota
 	// leopardGF16 uses leopard in 16-bit mode for all shard counts.
 	leopardGF16
 	// leopardAlways uses 8-bit leopard for shards less than or equal to 256,
 	// 16-bit leopard otherwise.
 	leopardAlways
 )
 func init() {
 	if runtime.GOMAXPROCS(0) <= 1 {
 		defaultOptions.maxGoroutines = 1
 	}
 }
 // WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
 // Jobs will be split into this many parts, unless each goroutine would have to process
 // less than minSplitSize bytes (set with WithMinSplitSize).
 // For the best speed, keep this well above the GOMAXPROCS number for more fine grained
 // scheduling.
 // If n <= 0, it is ignored.
 func WithMaxGoroutines(n int) Option {
 	return func(o *options) {
 		if n > 0 {
 			o.maxGoroutines = n
 		}
 	}
 }
 // WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
 // specific shard size.
 // Send in the shard size you expect to send. Other shard sizes will work, but may not
 // run at the optimal speed.
 // Overwrites WithMaxGoroutines.
 // If shardSize <= 0, it is ignored.
 func WithAutoGoroutines(shardSize int) Option {
 	return func(o *options) {
 		o.shardSize = shardSize
 	}
 }
 // WithMinSplitSize is the minimum encoding size in bytes per goroutine.
 // By default this parameter is determined by CPU cache characteristics.
 // See WithMaxGoroutines on how jobs are split.
 // If n <= 0, it is ignored.
 func WithMinSplitSize(n int) Option {
 	return func(o *options) {
 		if n > 0 {
 			o.minSplitSize = n
 		}
 	}
 }
 // WithConcurrentStreams will enable concurrent reads and writes on the streams.
 // Default: Disabled, meaning only one stream will be read/written at the time.
 // Ignored if not used on a stream input.
 func WithConcurrentStreams(enabled bool) Option {
 	return func(o *options) {
 		o.concReads, o.concWrites = enabled, enabled
 	}
 }
 // WithConcurrentStreamReads will enable concurrent reads from the input streams.
 // Default: Disabled, meaning only one stream will be read at the time.
 // Ignored if not used on a stream input.
 func WithConcurrentStreamReads(enabled bool) Option {
 	return func(o *options) {
 		o.concReads = enabled
 	}
 }
 // WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
 // Default: Disabled, meaning only one stream will be written at the time.
 // Ignored if not used on a stream input.
 func WithConcurrentStreamWrites(enabled bool) Option {
 	return func(o *options) {
 		o.concWrites = enabled
 	}
 }
 // WithInversionCache allows to control the inversion cache.
 // This will cache reconstruction matrices so they can be reused.
 // Enabled by default, or <= 64 shards for Leopard encoding.
 func WithInversionCache(enabled bool) Option {
 	return func(o *options) {
 		o.inversionCache = enabled
 		o.forcedInversionCache = true
 	}
 }
 // WithStreamBlockSize allows to set a custom block size per round of reads/writes.
 // If not set, any shard size set with WithAutoGoroutines will be used.
 // If WithAutoGoroutines is also unset, 4MB will be used.
 // Ignored if not used on stream.
 func WithStreamBlockSize(n int) Option {
 	return func(o *options) {
 		o.streamBS = n
 	}
 }
 // WithSSSE3 allows to enable/disable SSSE3 instructions.
 // If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
 func WithSSSE3(enabled bool) Option {
 	return func(o *options) {
 		o.useSSSE3 = enabled
 	}
 }
 // WithAVX2 allows to enable/disable AVX2 instructions.
 // If not set, AVX will be turned on or off automatically based on CPU ID information.
 // This will also disable AVX GFNI instructions.
 func WithAVX2(enabled bool) Option {
 	return func(o *options) {
 		o.useAVX2 = enabled
 		if o.useAvxGNFI {
 			o.useAvxGNFI = enabled
 		}
 	}
 }
 // WithSSE2 allows to enable/disable SSE2 instructions.
 // If not set, SSE2 will be turned on or off automatically based on CPU ID information.
 func WithSSE2(enabled bool) Option {
 	return func(o *options) {
 		o.useSSE2 = enabled
 	}
 }
 // WithAVX512 allows to enable/disable AVX512 (and GFNI) instructions.
 func WithAVX512(enabled bool) Option {
 	return func(o *options) {
 		o.useAVX512 = enabled
 		o.useAvx512GFNI = enabled
 	}
 }
 // WithGFNI allows to enable/disable AVX512+GFNI instructions.
 // If not set, GFNI will be turned on or off automatically based on CPU ID information.
 func WithGFNI(enabled bool) Option {
 	return func(o *options) {
 		o.useAvx512GFNI = enabled
 	}
 }
 // WithAVXGFNI allows to enable/disable GFNI with AVX instructions.
 // If not set, GFNI will be turned on or off automatically based on CPU ID information.
 func WithAVXGFNI(enabled bool) Option {
 	return func(o *options) {
 		o.useAvxGNFI = enabled
 	}
 }
 // WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
 // matrix in the same way as done by the Jerasure library.
 // The first row and column of the coding matrix only contains 1's in this method
 // so the first parity chunk is always equal to XOR of all data chunks.
 func WithJerasureMatrix() Option {
 	return func(o *options) {
 		o.useJerasureMatrix = true
 		o.usePAR1Matrix = false
 		o.useCauchy = false
 	}
 }
 // WithPAR1Matrix causes the encoder to build the matrix how PARv1
 // does. Note that the method they use is buggy, and may lead to cases
 // where recovery is impossible, even if there are enough parity
 // shards.
 func WithPAR1Matrix() Option {
 	return func(o *options) {
 		o.useJerasureMatrix = false
 		o.usePAR1Matrix = true
 		o.useCauchy = false
 	}
 }
 // WithCauchyMatrix will make the encoder build a Cauchy style matrix.
 // The output of this is not compatible with the standard output.
 // A Cauchy matrix is faster to generate. This does not affect data throughput,
 // but will result in slightly faster start-up time.
 func WithCauchyMatrix() Option {
 	return func(o *options) {
 		o.useJerasureMatrix = false
 		o.usePAR1Matrix = false
 		o.useCauchy = true
 	}
 }
 // WithFastOneParityMatrix will switch the matrix to a simple xor
 // if there is only one parity shard.
 // The PAR1 matrix already has this property so it has little effect there.
 func WithFastOneParityMatrix() Option {
 	return func(o *options) {
 		o.fastOneParity = true
 	}
 }
 // WithCustomMatrix causes the encoder to use the manually specified matrix.
 // customMatrix represents only the parity chunks.
 // customMatrix must have at least ParityShards rows and DataShards columns.
 // It can be used for interoperability with libraries which generate
 // the matrix differently or to implement more complex coding schemes like LRC
 // (locally reconstructible codes).
 func WithCustomMatrix(customMatrix [][]byte) Option {
 	return func(o *options) {
 		o.customMatrix = customMatrix
 	}
 }
 // WithLeopardGF16 will always use leopard GF16 for encoding,
 // even when there is less than 256 shards.
 // This will likely improve reconstruction time for some setups.
 // This is not compatible with Leopard output for <= 256 shards.
 // Note that Leopard places certain restrictions on use see other documentation.
 func WithLeopardGF16(enabled bool) Option {
 	return func(o *options) {
 		if enabled {
 			o.withLeopard = leopardGF16
 		} else {
 			o.withLeopard = leopardAsNeeded
 		}
 	}
 }
 // WithLeopardGF will use leopard GF for encoding, even when there are fewer than
 // 256 shards.
 // This will likely improve reconstruction time for some setups.
 // Note that Leopard places certain restrictions on use see other documentation.
 func WithLeopardGF(enabled bool) Option {
 	return func(o *options) {
 		if enabled {
 			o.withLeopard = leopardAlways
 		} else {
 			o.withLeopard = leopardAsNeeded
 		}
 	}
 }
 func (o *options) cpuOptions() string {
 	var res []string
 	if o.useSSE2 {
 		res = append(res, "SSE2")
 	}
 	if o.useAVX2 {
 		res = append(res, "AVX2")
 	}
 	if o.useSSSE3 {
 		res = append(res, "SSSE3")
 	}
 	if o.useAVX512 {
 		res = append(res, "AVX512")
 	}
 	if o.useAvx512GFNI {
 		res = append(res, "AVX512+GFNI")
 	}
 	if o.useAvxGNFI {
 		res = append(res, "AVX+GFNI")
 	}
 	if len(res) == 0 {
 		return "pure Go"
 	}
 	return strings.Join(res, ",")
 }
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@ -0,0 +1,614 @@
 /**
 * Reed-Solomon Coding over 8-bit values.
 *
 * Copyright 2015, Klaus Post
 * Copyright 2015, Backblaze, Inc.
 */
 package reedsolomon
 import (
 	"errors"
 	"fmt"
 	"io"
 	"sync"
 )
 // StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
 // It provides a fully streaming interface, and processes data in blocks of up to 4MB.
 //
 // For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
 // since the streaming interface has a start up overhead.
 //
 // For all operations, no readers and writers should not assume any order/size of
 // individual reads/writes.
 //
 // For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
 // folder.
 type StreamEncoder interface {
 	// Encode parity shards for a set of data shards.
 	//
 	// Input is 'shards' containing readers for data shards followed by parity shards
 	// io.Writer.
 	//
 	// The number of shards must match the number given to NewStream().
 	//
 	// Each reader must supply the same number of bytes.
 	//
 	// The parity shards will be written to the writer.
 	// The number of bytes written will match the input size.
 	//
 	// If a data stream returns an error, a StreamReadError type error
 	// will be returned. If a parity writer returns an error, a
 	// StreamWriteError will be returned.
 	Encode(data []io.Reader, parity []io.Writer) error
 	// Verify returns true if the parity shards contain correct data.
 	//
 	// The number of shards must match the number total data+parity shards
 	// given to NewStream().
 	//
 	// Each reader must supply the same number of bytes.
 	// If a shard stream returns an error, a StreamReadError type error
 	// will be returned.
 	Verify(shards []io.Reader) (bool, error)
 	// Reconstruct will recreate the missing shards if possible.
 	//
 	// Given a list of valid shards (to read) and invalid shards (to write)
 	//
 	// You indicate that a shard is missing by setting it to nil in the 'valid'
 	// slice and at the same time setting a non-nil writer in "fill".
 	// An index cannot contain both non-nil 'valid' and 'fill' entry.
 	// If both are provided 'ErrReconstructMismatch' is returned.
 	//
 	// If there are too few shards to reconstruct the missing
 	// ones, ErrTooFewShards will be returned.
 	//
 	// The reconstructed shard set is complete, but integrity is not verified.
 	// Use the Verify function to check if data set is ok.
 	Reconstruct(valid []io.Reader, fill []io.Writer) error
 	// Split a an input stream into the number of shards given to the encoder.
 	//
 	// The data will be split into equally sized shards.
 	// If the data size isn't dividable by the number of shards,
 	// the last shard will contain extra zeros.
 	//
 	// You must supply the total size of your input.
 	// 'ErrShortData' will be returned if it is unable to retrieve the
 	// number of bytes indicated.
 	Split(data io.Reader, dst []io.Writer, size int64) (err error)
 	// Join the shards and write the data segment to dst.
 	//
 	// Only the data shards are considered.
 	//
 	// You must supply the exact output size you want.
 	// If there are to few shards given, ErrTooFewShards will be returned.
 	// If the total data size is less than outSize, ErrShortData will be returned.
 	Join(dst io.Writer, shards []io.Reader, outSize int64) error
 }
 // StreamReadError is returned when a read error is encountered
 // that relates to a supplied stream.
 // This will allow you to find out which reader has failed.
 type StreamReadError struct {
 	Err    error // The error
 	Stream int   // The stream number on which the error occurred
 }
 // Error returns the error as a string
 func (s StreamReadError) Error() string {
 	return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
 }
 // String returns the error as a string
 func (s StreamReadError) String() string {
 	return s.Error()
 }
 // StreamWriteError is returned when a write error is encountered
 // that relates to a supplied stream. This will allow you to
 // find out which reader has failed.
 type StreamWriteError struct {
 	Err    error // The error
 	Stream int   // The stream number on which the error occurred
 }
 // Error returns the error as a string
 func (s StreamWriteError) Error() string {
 	return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
 }
 // String returns the error as a string
 func (s StreamWriteError) String() string {
 	return s.Error()
 }
 // rsStream contains a matrix for a specific
 // distribution of datashards and parity shards.
 // Construct if using NewStream()
 type rsStream struct {
 	r *reedSolomon
 	o options
 	// Shard reader
 	readShards func(dst [][]byte, in []io.Reader) error
 	// Shard writer
 	writeShards func(out []io.Writer, in [][]byte) error
 	blockPool sync.Pool
 }
 // NewStream creates a new encoder and initializes it to
 // the number of data shards and parity shards that
 // you want to use. You can reuse this encoder.
 // Note that the maximum number of data shards is 256.
 func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
 	if dataShards+parityShards > 256 {
 		return nil, ErrMaxShardNum
 	}
 	r := rsStream{o: defaultOptions}
 	for _, opt := range o {
 		opt(&r.o)
 	}
 	// Override block size if shard size is set.
 	if r.o.streamBS == 0 && r.o.shardSize > 0 {
 		r.o.streamBS = r.o.shardSize
 	}
 	if r.o.streamBS <= 0 {
 		r.o.streamBS = 4 << 20
 	}
 	if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
 		o = append(o, WithAutoGoroutines(r.o.streamBS))
 	}
 	enc, err := New(dataShards, parityShards, o...)
 	if err != nil {
 		return nil, err
 	}
 	r.r = enc.(*reedSolomon)
 	r.blockPool.New = func() interface{} {
 		return AllocAligned(dataShards+parityShards, r.o.streamBS)
 	}
 	r.readShards = readShards
 	r.writeShards = writeShards
 	if r.o.concReads {
 		r.readShards = cReadShards
 	}
 	if r.o.concWrites {
 		r.writeShards = cWriteShards
 	}
 	return &r, err
 }
 // NewStreamC creates a new encoder and initializes it to
 // the number of data shards and parity shards given.
 //
 // This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
 func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
 	return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
 }
 func (r *rsStream) createSlice() [][]byte {
 	out := r.blockPool.Get().([][]byte)
 	for i := range out {
 		out[i] = out[i][:r.o.streamBS]
 	}
 	return out
 }
 // Encodes parity shards for a set of data shards.
 //
 // Input is 'shards' containing readers for data shards followed by parity shards
 // io.Writer.
 //
 // The number of shards must match the number given to NewStream().
 //
 // Each reader must supply the same number of bytes.
 //
 // The parity shards will be written to the writer.
 // The number of bytes written will match the input size.
 //
 // If a data stream returns an error, a StreamReadError type error
 // will be returned. If a parity writer returns an error, a
 // StreamWriteError will be returned.
 func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
 	if len(data) != r.r.dataShards {
 		return ErrTooFewShards
 	}
 	if len(parity) != r.r.parityShards {
 		return ErrTooFewShards
 	}
 	all := r.createSlice()
 	defer r.blockPool.Put(all)
 	in := all[:r.r.dataShards]
 	out := all[r.r.dataShards:]
 	read := 0
 	for {
 		err := r.readShards(in, data)
 		switch err {
 		case nil:
 		case io.EOF:
 			if read == 0 {
 				return ErrShardNoData
 			}
 			return nil
 		default:
 			return err
 		}
 		out = trimShards(out, shardSize(in))
 		read += shardSize(in)
 		err = r.r.Encode(all)
 		if err != nil {
 			return err
 		}
 		err = r.writeShards(parity, out)
 		if err != nil {
 			return err
 		}
 	}
 }
 // Trim the shards so they are all the same size
 func trimShards(in [][]byte, size int) [][]byte {
 	for i := range in {
 		if len(in[i]) != 0 {
 			in[i] = in[i][0:size]
 		}
 		if len(in[i]) < size {
 			in[i] = in[i][:0]
 		}
 	}
 	return in
 }
 func readShards(dst [][]byte, in []io.Reader) error {
 	if len(in) != len(dst) {
 		panic("internal error: in and dst size do not match")
 	}
 	size := -1
 	for i := range in {
 		if in[i] == nil {
 			dst[i] = dst[i][:0]
 			continue
 		}
 		n, err := io.ReadFull(in[i], dst[i])
 		// The error is EOF only if no bytes were read.
 		// If an EOF happens after reading some but not all the bytes,
 		// ReadFull returns ErrUnexpectedEOF.
 		switch err {
 		case io.ErrUnexpectedEOF, io.EOF:
 			if size < 0 {
 				size = n
 			} else if n != size {
 				// Shard sizes must match.
 				return ErrShardSize
 			}
 			dst[i] = dst[i][0:n]
 		case nil:
 			continue
 		default:
 			return StreamReadError{Err: err, Stream: i}
 		}
 	}
 	if size == 0 {
 		return io.EOF
 	}
 	return nil
 }
 func writeShards(out []io.Writer, in [][]byte) error {
 	if len(out) != len(in) {
 		panic("internal error: in and out size do not match")
 	}
 	for i := range in {
 		if out[i] == nil {
 			continue
 		}
 		n, err := out[i].Write(in[i])
 		if err != nil {
 			return StreamWriteError{Err: err, Stream: i}
 		}
 		//
 		if n != len(in[i]) {
 			return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
 		}
 	}
 	return nil
 }
 type readResult struct {
 	n    int
 	size int
 	err  error
 }
 // cReadShards reads shards concurrently
 func cReadShards(dst [][]byte, in []io.Reader) error {
 	if len(in) != len(dst) {
 		panic("internal error: in and dst size do not match")
 	}
 	var wg sync.WaitGroup
 	wg.Add(len(in))
 	res := make(chan readResult, len(in))
 	for i := range in {
 		if in[i] == nil {
 			dst[i] = dst[i][:0]
 			wg.Done()
 			continue
 		}
 		go func(i int) {
 			defer wg.Done()
 			n, err := io.ReadFull(in[i], dst[i])
 			// The error is EOF only if no bytes were read.
 			// If an EOF happens after reading some but not all the bytes,
 			// ReadFull returns ErrUnexpectedEOF.
 			res <- readResult{size: n, err: err, n: i}
 		}(i)
 	}
 	wg.Wait()
 	close(res)
 	size := -1
 	for r := range res {
 		switch r.err {
 		case io.ErrUnexpectedEOF, io.EOF:
 			if size < 0 {
 				size = r.size
 			} else if r.size != size {
 				// Shard sizes must match.
 				return ErrShardSize
 			}
 			dst[r.n] = dst[r.n][0:r.size]
 		case nil:
 		default:
 			return StreamReadError{Err: r.err, Stream: r.n}
 		}
 	}
 	if size == 0 {
 		return io.EOF
 	}
 	return nil
 }
 // cWriteShards writes shards concurrently
 func cWriteShards(out []io.Writer, in [][]byte) error {
 	if len(out) != len(in) {
 		panic("internal error: in and out size do not match")
 	}
 	var errs = make(chan error, len(out))
 	var wg sync.WaitGroup
 	wg.Add(len(out))
 	for i := range in {
 		go func(i int) {
 			defer wg.Done()
 			if out[i] == nil {
 				errs <- nil
 				return
 			}
 			n, err := out[i].Write(in[i])
 			if err != nil {
 				errs <- StreamWriteError{Err: err, Stream: i}
 				return
 			}
 			if n != len(in[i]) {
 				errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
 			}
 		}(i)
 	}
 	wg.Wait()
 	close(errs)
 	for err := range errs {
 		if err != nil {
 			return err
 		}
 	}
 	return nil
 }
 // Verify returns true if the parity shards contain correct data.
 //
 // The number of shards must match the number total data+parity shards
 // given to NewStream().
 //
 // Each reader must supply the same number of bytes.
 // If a shard stream returns an error, a StreamReadError type error
 // will be returned.
 func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
 	if len(shards) != r.r.totalShards {
 		return false, ErrTooFewShards
 	}
 	read := 0
 	all := r.createSlice()
 	defer r.blockPool.Put(all)
 	for {
 		err := r.readShards(all, shards)
 		if err == io.EOF {
 			if read == 0 {
 				return false, ErrShardNoData
 			}
 			return true, nil
 		}
 		if err != nil {
 			return false, err
 		}
 		read += shardSize(all)
 		ok, err := r.r.Verify(all)
 		if !ok || err != nil {
 			return ok, err
 		}
 	}
 }
 // ErrReconstructMismatch is returned by the StreamEncoder, if you supply
 // "valid" and "fill" streams on the same index.
 // Therefore it is impossible to see if you consider the shard valid
 // or would like to have it reconstructed.
 var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
 // Reconstruct will recreate the missing shards if possible.
 //
 // Given a list of valid shards (to read) and invalid shards (to write)
 //
 // You indicate that a shard is missing by setting it to nil in the 'valid'
 // slice and at the same time setting a non-nil writer in "fill".
 // An index cannot contain both non-nil 'valid' and 'fill' entry.
 //
 // If there are too few shards to reconstruct the missing
 // ones, ErrTooFewShards will be returned.
 //
 // The reconstructed shard set is complete when explicitly asked for all missing shards.
 // However its integrity is not automatically verified.
 // Use the Verify function to check in case the data set is complete.
 func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
 	if len(valid) != r.r.totalShards {
 		return ErrTooFewShards
 	}
 	if len(fill) != r.r.totalShards {
 		return ErrTooFewShards
 	}
 	all := r.createSlice()
 	defer r.blockPool.Put(all)
 	reconDataOnly := true
 	for i := range valid {
 		if valid[i] != nil && fill[i] != nil {
 			return ErrReconstructMismatch
 		}
 		if i >= r.r.dataShards && fill[i] != nil {
 			reconDataOnly = false
 		}
 	}
 	read := 0
 	for {
 		err := r.readShards(all, valid)
 		if err == io.EOF {
 			if read == 0 {
 				return ErrShardNoData
 			}
 			return nil
 		}
 		if err != nil {
 			return err
 		}
 		read += shardSize(all)
 		all = trimShards(all, shardSize(all))
 		if reconDataOnly {
 			err = r.r.ReconstructData(all) // just reconstruct missing data shards
 		} else {
 			err = r.r.Reconstruct(all) //  reconstruct all missing shards
 		}
 		if err != nil {
 			return err
 		}
 		err = r.writeShards(fill, all)
 		if err != nil {
 			return err
 		}
 	}
 }
 // Join the shards and write the data segment to dst.
 //
 // Only the data shards are considered.
 //
 // You must supply the exact output size you want.
 // If there are to few shards given, ErrTooFewShards will be returned.
 // If the total data size is less than outSize, ErrShortData will be returned.
 func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
 	// Do we have enough shards?
 	if len(shards) < r.r.dataShards {
 		return ErrTooFewShards
 	}
 	// Trim off parity shards if any
 	shards = shards[:r.r.dataShards]
 	for i := range shards {
 		if shards[i] == nil {
 			return StreamReadError{Err: ErrShardNoData, Stream: i}
 		}
 	}
 	// Join all shards
 	src := io.MultiReader(shards...)
 	// Copy data to dst
 	n, err := io.CopyN(dst, src, outSize)
 	if err == io.EOF {
 		return ErrShortData
 	}
 	if err != nil {
 		return err
 	}
 	if n != outSize {
 		return ErrShortData
 	}
 	return nil
 }
 // Split a an input stream into the number of shards given to the encoder.
 //
 // The data will be split into equally sized shards.
 // If the data size isn't dividable by the number of shards,
 // the last shard will contain extra zeros.
 //
 // You must supply the total size of your input.
 // 'ErrShortData' will be returned if it is unable to retrieve the
 // number of bytes indicated.
 func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
 	if size == 0 {
 		return ErrShortData
 	}
 	if len(dst) != r.r.dataShards {
 		return ErrInvShardNum
 	}
 	for i := range dst {
 		if dst[i] == nil {
 			return StreamWriteError{Err: ErrShardNoData, Stream: i}
 		}
 	}
 	// Calculate number of bytes per shard.
 	perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
 	// Pad data to r.Shards*perShard.
 	paddingSize := (int64(r.r.totalShards) * perShard) - size
 	data = io.MultiReader(data, io.LimitReader(zeroPaddingReader{}, paddingSize))
 	// Split into equal-length shards and copy.
 	for i := range dst {
 		n, err := io.CopyN(dst[i], data, perShard)
 		if err != io.EOF && err != nil {
 			return err
 		}
 		if n != perShard {
 			return ErrShortData
 		}
 	}
 	return nil
 }
 type zeroPaddingReader struct{}
 var _ io.Reader = &zeroPaddingReader{}
 func (t zeroPaddingReader) Read(p []byte) (n int, err error) {
 	n = len(p)
 	for i := 0; i < n; i++ {
 		p[i] = 0
 	}
 	return n, nil
 }
--- a/vendor/github.com/klauspost/reedsolomon/unsafe.go
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe.go
@ -0,0 +1,41 @@
 //go:build !noasm && !nounsafe && !gccgo && !appengine
 /**
 * Reed-Solomon Coding over 8-bit values.
 *
 * Copyright 2023, Klaus Post
 */
 package reedsolomon
 import (
 	"unsafe"
 )
 // AllocAligned allocates 'shards' slices, with 'each' bytes.
 // Each slice will start on a 64 byte aligned boundary.
 func AllocAligned(shards, each int) [][]byte {
 	if false {
 		res := make([][]byte, shards)
 		for i := range res {
 			res[i] = make([]byte, each)
 		}
 		return res
 	}
 	const (
 		alignEach  = 64
 		alignStart = 64
 	)
 	eachAligned := ((each + alignEach - 1) / alignEach) * alignEach
 	total := make([]byte, eachAligned*shards+63)
 	align := uint(uintptr(unsafe.Pointer(&total[0]))) & (alignStart - 1)
 	if align > 0 {
 		total = total[alignStart-align:]
 	}
 	res := make([][]byte, shards)
 	for i := range res {
 		res[i] = total[:each:eachAligned]
 		total = total[eachAligned:]
 	}
 	return res
 }
--- a/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
+++ b/vendor/github.com/klauspost/reedsolomon/unsafe_disabled.go
@ -0,0 +1,23 @@
 //go:build noasm || nounsafe || gccgo || appengine
 /**
 * Reed-Solomon Coding over 8-bit values.
 *
 * Copyright 2023, Klaus Post
 */
 package reedsolomon
 // AllocAligned allocates 'shards' slices, with 'each' bytes.
 // Each slice will start on a 64 byte aligned boundary.
 func AllocAligned(shards, each int) [][]byte {
 	eachAligned := ((each + 63) / 64) * 64
 	total := make([]byte, eachAligned*shards+63)
 	// We cannot do initial align without "unsafe", just use native alignment.
 	res := make([][]byte, shards)
 	for i := range res {
 		res[i] = total[:each:eachAligned]
 		total = total[eachAligned:]
 	}
 	return res
 }
--- a/vendor/github.com/klauspost/reedsolomon/xor_arm64.go
+++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.go
@ -0,0 +1,19 @@
 //go:build !noasm && !appengine && !gccgo
 package reedsolomon
 //go:noescape
 func xorSliceNEON(in, out []byte)
 // simple slice xor
 func sliceXor(in, out []byte, o *options) {
 	xorSliceNEON(in, out)
 	done := (len(in) >> 5) << 5
 	remain := len(in) - done
 	if remain > 0 {
 		for i := done; i < len(in); i++ {
 			out[i] ^= in[i]
 		}
 	}
 }
--- a/vendor/github.com/klauspost/reedsolomon/xor_arm64.s
+++ b/vendor/github.com/klauspost/reedsolomon/xor_arm64.s
@ -0,0 +1,29 @@
 //+build !noasm
 //+build !appengine
 //+build !gccgo
 // func xorSliceNEON(in, out []byte)
 TEXT ·xorSliceNEON(SB), 7, $0
 	MOVD in_base+0(FP), R1
 	MOVD in_len+8(FP), R2    // length of message
 	MOVD out_base+24(FP), R5
 	SUBS $32, R2
 	BMI  completeXor
 loopXor:
 	// Main loop
 	VLD1.P 32(R1), [V0.B16, V1.B16]
 	VLD1   (R5), [V20.B16, V21.B16]
 	VEOR V20.B16, V0.B16, V4.B16
 	VEOR V21.B16, V1.B16, V5.B16
 	// Store result
 	VST1.P [V4.D2, V5.D2], 32(R5)
 	SUBS $32, R2
 	BPL  loopXor
 completeXor:
 	RET
--- a/vendor/github.com/klauspost/reedsolomon/xor_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/xor_noasm.go
@ -0,0 +1,7 @@
 //go:build noasm || gccgo || appengine || (!amd64 && !arm64)
 package reedsolomon
 func sliceXor(in, out []byte, o *options) {
 	sliceXorGo(in, out, o)
 }
--- a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
+++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@ -38,7 +38,7 @@ var X86 struct {
 	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
 	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
 	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
-	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions Instructions
+	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions
 	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
 	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
 	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
@ -54,6 +54,9 @@ var X86 struct {
 	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
 	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
 	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
 	HasAMXTile          bool // Advanced Matrix Extension Tile instructions
 	HasAMXInt8          bool // Advanced Matrix Extension Int8 instructions
 	HasAMXBF16          bool // Advanced Matrix Extension BFloat16 instructions
 	HasBMI1             bool // Bit manipulation instruction set 1
 	HasBMI2             bool // Bit manipulation instruction set 2
 	HasCX16             bool // Compare and exchange 16 Bytes
--- a/vendor/golang.org/x/sys/cpu/cpu_aix.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_aix.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build aix
 // +build aix
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (386 || amd64 || amd64p32) && gc
 // +build 386 amd64 amd64p32
 // +build gc
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gccgo
 // +build gccgo
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gccgo
 // +build gccgo
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (386 || amd64 || amd64p32) && gccgo
 // +build 386 amd64 amd64p32
 // +build gccgo
 #include <cpuid.h>
 #include <stdint.h>
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (386 || amd64 || amd64p32) && gccgo
 // +build 386 amd64 amd64p32
 // +build gccgo
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_linux.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !386 && !amd64 && !amd64p32 && !arm64
 // +build !386,!amd64,!amd64p32,!arm64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && (mips64 || mips64le)
 // +build linux
 // +build mips64 mips64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
 // +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && (ppc64 || ppc64le)
 // +build linux
 // +build ppc64 ppc64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_loong64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_loong64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build loong64
 // +build loong64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build mips64 || mips64le
 // +build mips64 mips64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build mips || mipsle
 // +build mips mipsle
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !linux && arm
 // +build !linux,arm
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !linux && !netbsd && !openbsd && arm64
 // +build !linux,!netbsd,!openbsd,arm64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !linux && (mips64 || mips64le)
 // +build !linux
 // +build mips64 mips64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_ppc64x.go
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !aix && !linux && (ppc64 || ppc64le)
 // +build !aix
 // +build !linux
 // +build ppc64 ppc64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_riscv64.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !linux && riscv64
 // +build !linux,riscv64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build ppc64 || ppc64le
 // +build ppc64 ppc64le
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@ -3,10 +3,9 @@
 // license that can be found in the LICENSE file.
 //go:build riscv64
 // +build riscv64
 package cpu
-const cacheLineSize = 32
+const cacheLineSize = 64
 func initOptions() {}
--- a/vendor/golang.org/x/sys/cpu/cpu_s390x.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/cpu/cpu_wasm.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build wasm
 // +build wasm
 package cpu
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build 386 || amd64 || amd64p32
 // +build 386 amd64 amd64p32
 package cpu
@ -37,6 +36,9 @@ func initOptions() {
 		{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
 		{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
 		{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
 		{Name: "amxtile", Feature: &X86.HasAMXTile},
 		{Name: "amxint8", Feature: &X86.HasAMXInt8},
 		{Name: "amxbf16", Feature: &X86.HasAMXBF16},
 		{Name: "bmi1", Feature: &X86.HasBMI1},
 		{Name: "bmi2", Feature: &X86.HasBMI2},
 		{Name: "cx16", Feature: &X86.HasCX16},
@ -138,6 +140,10 @@ func archInit() {
 		eax71, _, _, _ := cpuid(7, 1)
 		X86.HasAVX512BF16 = isSet(5, eax71)
 	}
 	X86.HasAMXTile = isSet(24, edx7)
 	X86.HasAMXInt8 = isSet(25, edx7)
 	X86.HasAMXBF16 = isSet(22, edx7)
 }
 func isSet(bitpos uint, value uint32) bool {
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (386 || amd64 || amd64p32) && gc
 // +build 386 amd64 amd64p32
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/cpu/endian_big.go
+++ b/vendor/golang.org/x/sys/cpu/endian_big.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
 // +build armbe arm64be m68k mips mips64 mips64p32 ppc ppc64 s390 s390x shbe sparc sparc64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/endian_little.go
+++ b/vendor/golang.org/x/sys/cpu/endian_little.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
 // +build 386 amd64 amd64p32 alpha arm arm64 loong64 mipsle mips64le mips64p32le nios2 ppc64le riscv riscv64 sh wasm
 package cpu
--- a/vendor/golang.org/x/sys/cpu/hwcap_linux.go
+++ b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
@ -5,7 +5,7 @@
 package cpu
 import (
-	"io/ioutil"
+	"os"
 )
 const (
@ -39,7 +39,7 @@ func readHWCAP() error {
 		return nil
 	}
-	buf, err := ioutil.ReadFile(procAuxv)
+	buf, err := os.ReadFile(procAuxv)
 	if err != nil {
 		// e.g. on android /proc/self/auxv is not accessible, so silently
 		// ignore the error and leave Initialized = false. On some
--- a/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
+++ b/vendor/golang.org/x/sys/cpu/proc_cpuinfo_linux.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && arm64
 // +build linux,arm64
 package cpu
--- a/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
+++ b/vendor/golang.org/x/sys/cpu/runtime_auxv_go121.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build go1.21
 // +build go1.21
 package cpu
--- a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
@ -9,7 +9,6 @@
 // gccgo's libgo and thus must not used a CGo method.
 //go:build aix && gccgo
 // +build aix,gccgo
 package cpu
--- a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
@ -7,7 +7,6 @@
 // (See golang.org/issue/32102)
 //go:build aix && ppc64 && gc
 // +build aix,ppc64,gc
 package cpu
--- a/vendor/golang.org/x/sys/execabs/execabs_go118.go
+++ b/vendor/golang.org/x/sys/execabs/execabs_go118.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !go1.19
 // +build !go1.19
 package execabs
--- a/vendor/golang.org/x/sys/execabs/execabs_go119.go
+++ b/vendor/golang.org/x/sys/execabs/execabs_go119.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build go1.19
 // +build go1.19
 package execabs
--- a/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
+++ b/vendor/golang.org/x/sys/internal/unsafeheader/unsafeheader.go
@ -1,30 +0,0 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // Package unsafeheader contains header declarations for the Go runtime's
 // slice and string implementations.
 //
 // This package allows x/sys to use types equivalent to
 // reflect.SliceHeader and reflect.StringHeader without introducing
 // a dependency on the (relatively heavy) "reflect" package.
 package unsafeheader
 import (
 	"unsafe"
 )
 // Slice is the runtime representation of a slice.
 // It cannot be used safely or portably and its representation may change in a later release.
 type Slice struct {
 	Data unsafe.Pointer
 	Len  int
 	Cap  int
 }
 // String is the runtime representation of a string.
 // It cannot be used safely or portably and its representation may change in a later release.
 type String struct {
 	Data unsafe.Pointer
 	Len  int
 }
--- a/vendor/golang.org/x/sys/plan9/pwd_go15_plan9.go
+++ b/vendor/golang.org/x/sys/plan9/pwd_go15_plan9.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build go1.5
 // +build go1.5
 package plan9
--- a/vendor/golang.org/x/sys/plan9/pwd_plan9.go
+++ b/vendor/golang.org/x/sys/plan9/pwd_plan9.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build !go1.5
 // +build !go1.5
 package plan9
--- a/vendor/golang.org/x/sys/plan9/race.go
+++ b/vendor/golang.org/x/sys/plan9/race.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build plan9 && race
 // +build plan9,race
 package plan9
--- a/vendor/golang.org/x/sys/plan9/race0.go
+++ b/vendor/golang.org/x/sys/plan9/race0.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build plan9 && !race
 // +build plan9,!race
 package plan9
--- a/vendor/golang.org/x/sys/plan9/str.go
+++ b/vendor/golang.org/x/sys/plan9/str.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build plan9
 // +build plan9
 package plan9
--- a/vendor/golang.org/x/sys/plan9/syscall.go
+++ b/vendor/golang.org/x/sys/plan9/syscall.go
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build plan9
 // +build plan9
 // Package plan9 contains an interface to the low-level operating system
 // primitives. OS details vary depending on the underlying system, and
--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_386.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_386.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.
 //go:build plan9 && 386
 // +build plan9,386
 package plan9
--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_amd64.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_amd64.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.
 //go:build plan9 && amd64
 // +build plan9,amd64
 package plan9
--- a/vendor/golang.org/x/sys/plan9/zsyscall_plan9_arm.go
+++ b/vendor/golang.org/x/sys/plan9/zsyscall_plan9_arm.go
@ -2,7 +2,6 @@
 // Code generated by the command above; see README.md. DO NOT EDIT.
 //go:build plan9 && arm
 // +build plan9,arm
 package plan9
--- a/vendor/golang.org/x/sys/unix/aliases.go
+++ b/vendor/golang.org/x/sys/unix/aliases.go
@ -2,9 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
-//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos) && go1.9
+//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
 // +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
 // +build go1.9
 package unix
--- a/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
+++ b/vendor/golang.org/x/sys/unix/asm_aix_ppc64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_386.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_386.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (freebsd || netbsd || openbsd) && gc
 // +build freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_amd64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
 // +build darwin dragonfly freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_arm.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (freebsd || netbsd || openbsd) && gc
 // +build freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_arm64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (darwin || freebsd || netbsd || openbsd) && gc
 // +build darwin freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_ppc64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_ppc64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (darwin || freebsd || netbsd || openbsd) && gc
 // +build darwin freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
+++ b/vendor/golang.org/x/sys/unix/asm_bsd_riscv64.s
@ -3,8 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build (darwin || freebsd || netbsd || openbsd) && gc
 // +build darwin freebsd netbsd openbsd
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_386.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_386.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_amd64.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_arm.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_arm.s
@ -3,7 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build gc
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_arm64.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && arm64 && gc
 // +build linux
 // +build arm64
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_loong64.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && loong64 && gc
 // +build linux
 // +build loong64
 // +build gc
 #include "textflag.h"
--- a/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
+++ b/vendor/golang.org/x/sys/unix/asm_linux_mips64x.s
@ -3,9 +3,6 @@
 // license that can be found in the LICENSE file.
 //go:build linux && (mips64 || mips64le) && gc
 // +build linux
 // +build mips64 mips64le
 // +build gc
 #include "textflag.h"
--- a/Show More
+++ b/Show More