chore_: add `klauspost/reedsolomon` module
This commit is contained in:
parent
bd91f5ab49
commit
4f2adc0ced
5
go.mod
5
go.mod
|
@ -84,6 +84,7 @@ require (
|
||||||
github.com/ipfs/go-log/v2 v2.5.1
|
github.com/ipfs/go-log/v2 v2.5.1
|
||||||
github.com/jellydator/ttlcache/v3 v3.2.0
|
github.com/jellydator/ttlcache/v3 v3.2.0
|
||||||
github.com/jmoiron/sqlx v1.3.5
|
github.com/jmoiron/sqlx v1.3.5
|
||||||
|
github.com/klauspost/reedsolomon v1.12.1
|
||||||
github.com/ladydascalie/currency v1.6.0
|
github.com/ladydascalie/currency v1.6.0
|
||||||
github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8
|
github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8
|
||||||
github.com/mutecomm/go-sqlcipher/v4 v4.4.2
|
github.com/mutecomm/go-sqlcipher/v4 v4.4.2
|
||||||
|
@ -177,7 +178,7 @@ require (
|
||||||
github.com/jackpal/go-nat-pmp v1.0.2 // indirect
|
github.com/jackpal/go-nat-pmp v1.0.2 // indirect
|
||||||
github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
|
github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
|
||||||
github.com/klauspost/compress v1.16.7 // indirect
|
github.com/klauspost/compress v1.16.7 // indirect
|
||||||
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
|
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||||
github.com/koron/go-ssdp v0.0.4 // indirect
|
github.com/koron/go-ssdp v0.0.4 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
|
@ -279,7 +280,7 @@ require (
|
||||||
go.uber.org/fx v1.20.0 // indirect
|
go.uber.org/fx v1.20.0 // indirect
|
||||||
golang.org/x/mod v0.12.0 // indirect
|
golang.org/x/mod v0.12.0 // indirect
|
||||||
golang.org/x/sync v0.3.0 // indirect
|
golang.org/x/sync v0.3.0 // indirect
|
||||||
golang.org/x/sys v0.11.0 // indirect
|
golang.org/x/sys v0.18.0 // indirect
|
||||||
golang.org/x/term v0.11.0 // indirect
|
golang.org/x/term v0.11.0 // indirect
|
||||||
golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect
|
golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect
|
||||||
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
|
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
|
||||||
|
|
10
go.sum
10
go.sum
|
@ -1295,10 +1295,12 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
|
||||||
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
|
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
|
||||||
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||||
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
|
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
|
||||||
github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||||
|
github.com/klauspost/reedsolomon v1.12.1 h1:NhWgum1efX1x58daOBGCFWcxtEhOhXKKl1HAPQUp03Q=
|
||||||
|
github.com/klauspost/reedsolomon v1.12.1/go.mod h1:nEi5Kjb6QqtbofI6s+cbG/j1da11c96IBYBSnVGtuBs=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
|
@ -2637,8 +2639,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||||
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
|
golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
|
||||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
|
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
|
|
@ -9,10 +9,7 @@ You can access the CPU information by accessing the shared CPU variable of the c
|
||||||
Package home: https://github.com/klauspost/cpuid
|
Package home: https://github.com/klauspost/cpuid
|
||||||
|
|
||||||
[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
|
[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
|
||||||
[![Build Status][3]][4]
|
[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
|
||||||
|
|
||||||
[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
|
|
||||||
[4]: https://travis-ci.org/klauspost/cpuid
|
|
||||||
|
|
||||||
## installing
|
## installing
|
||||||
|
|
||||||
|
@ -285,7 +282,12 @@ Exit Code 1
|
||||||
| AMXINT8 | Tile computational operations on 8-bit integers |
|
| AMXINT8 | Tile computational operations on 8-bit integers |
|
||||||
| AMXFP16 | Tile computational operations on FP16 numbers |
|
| AMXFP16 | Tile computational operations on FP16 numbers |
|
||||||
| AMXTILE | Tile architecture |
|
| AMXTILE | Tile architecture |
|
||||||
|
| APX_F | Intel APX |
|
||||||
| AVX | AVX functions |
|
| AVX | AVX functions |
|
||||||
|
| AVX10 | If set the Intel AVX10 Converged Vector ISA is supported |
|
||||||
|
| AVX10_128 | If set indicates that AVX10 128-bit vector support is present |
|
||||||
|
| AVX10_256 | If set indicates that AVX10 256-bit vector support is present |
|
||||||
|
| AVX10_512 | If set indicates that AVX10 512-bit vector support is present |
|
||||||
| AVX2 | AVX2 functions |
|
| AVX2 | AVX2 functions |
|
||||||
| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
|
| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
|
||||||
| AVX512BITALG | AVX-512 Bit Algorithms |
|
| AVX512BITALG | AVX-512 Bit Algorithms |
|
||||||
|
@ -365,6 +367,8 @@ Exit Code 1
|
||||||
| IDPRED_CTRL | IPRED_DIS |
|
| IDPRED_CTRL | IPRED_DIS |
|
||||||
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
|
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
|
||||||
| INVLPGB | NVLPGB and TLBSYNC instruction supported |
|
| INVLPGB | NVLPGB and TLBSYNC instruction supported |
|
||||||
|
| KEYLOCKER | Key locker |
|
||||||
|
| KEYLOCKERW | Key locker wide |
|
||||||
| LAHF | LAHF/SAHF in long mode |
|
| LAHF | LAHF/SAHF in long mode |
|
||||||
| LAM | If set, CPU supports Linear Address Masking |
|
| LAM | If set, CPU supports Linear Address Masking |
|
||||||
| LBRVIRT | LBR virtualization |
|
| LBRVIRT | LBR virtualization |
|
||||||
|
@ -380,7 +384,7 @@ Exit Code 1
|
||||||
| MOVDIRI | Move Doubleword as Direct Store |
|
| MOVDIRI | Move Doubleword as Direct Store |
|
||||||
| MOVSB_ZL | Fast Zero-Length MOVSB |
|
| MOVSB_ZL | Fast Zero-Length MOVSB |
|
||||||
| MPX | Intel MPX (Memory Protection Extensions) |
|
| MPX | Intel MPX (Memory Protection Extensions) |
|
||||||
| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
|
| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
|
||||||
| MSRIRC | Instruction Retired Counter MSR available |
|
| MSRIRC | Instruction Retired Counter MSR available |
|
||||||
| MSRLIST | Read/Write List of Model Specific Registers |
|
| MSRLIST | Read/Write List of Model Specific Registers |
|
||||||
| MSR_PAGEFLUSH | Page Flush MSR available |
|
| MSR_PAGEFLUSH | Page Flush MSR available |
|
||||||
|
|
|
@ -67,188 +67,200 @@ const (
|
||||||
// Keep index -1 as unknown
|
// Keep index -1 as unknown
|
||||||
UNKNOWN = -1
|
UNKNOWN = -1
|
||||||
|
|
||||||
// Add features
|
// x86 features
|
||||||
ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
|
ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
|
||||||
AESNI // Advanced Encryption Standard New Instructions
|
AESNI // Advanced Encryption Standard New Instructions
|
||||||
AMD3DNOW // AMD 3DNOW
|
AMD3DNOW // AMD 3DNOW
|
||||||
AMD3DNOWEXT // AMD 3DNowExt
|
AMD3DNOWEXT // AMD 3DNowExt
|
||||||
AMXBF16 // Tile computational operations on BFLOAT16 numbers
|
AMXBF16 // Tile computational operations on BFLOAT16 numbers
|
||||||
AMXFP16 // Tile computational operations on FP16 numbers
|
AMXFP16 // Tile computational operations on FP16 numbers
|
||||||
AMXINT8 // Tile computational operations on 8-bit integers
|
AMXINT8 // Tile computational operations on 8-bit integers
|
||||||
AMXTILE // Tile architecture
|
AMXTILE // Tile architecture
|
||||||
AVX // AVX functions
|
APX_F // Intel APX
|
||||||
AVX2 // AVX2 functions
|
AVX // AVX functions
|
||||||
AVX512BF16 // AVX-512 BFLOAT16 Instructions
|
AVX10 // If set the Intel AVX10 Converged Vector ISA is supported
|
||||||
AVX512BITALG // AVX-512 Bit Algorithms
|
AVX10_128 // If set indicates that AVX10 128-bit vector support is present
|
||||||
AVX512BW // AVX-512 Byte and Word Instructions
|
AVX10_256 // If set indicates that AVX10 256-bit vector support is present
|
||||||
AVX512CD // AVX-512 Conflict Detection Instructions
|
AVX10_512 // If set indicates that AVX10 512-bit vector support is present
|
||||||
AVX512DQ // AVX-512 Doubleword and Quadword Instructions
|
AVX2 // AVX2 functions
|
||||||
AVX512ER // AVX-512 Exponential and Reciprocal Instructions
|
AVX512BF16 // AVX-512 BFLOAT16 Instructions
|
||||||
AVX512F // AVX-512 Foundation
|
AVX512BITALG // AVX-512 Bit Algorithms
|
||||||
AVX512FP16 // AVX-512 FP16 Instructions
|
AVX512BW // AVX-512 Byte and Word Instructions
|
||||||
AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
|
AVX512CD // AVX-512 Conflict Detection Instructions
|
||||||
AVX512PF // AVX-512 Prefetch Instructions
|
AVX512DQ // AVX-512 Doubleword and Quadword Instructions
|
||||||
AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
|
AVX512ER // AVX-512 Exponential and Reciprocal Instructions
|
||||||
AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
|
AVX512F // AVX-512 Foundation
|
||||||
AVX512VL // AVX-512 Vector Length Extensions
|
AVX512FP16 // AVX-512 FP16 Instructions
|
||||||
AVX512VNNI // AVX-512 Vector Neural Network Instructions
|
AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
|
||||||
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
|
AVX512PF // AVX-512 Prefetch Instructions
|
||||||
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
|
AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
|
||||||
AVXIFMA // AVX-IFMA instructions
|
AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
|
||||||
AVXNECONVERT // AVX-NE-CONVERT instructions
|
AVX512VL // AVX-512 Vector Length Extensions
|
||||||
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
|
AVX512VNNI // AVX-512 Vector Neural Network Instructions
|
||||||
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
|
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
|
||||||
AVXVNNIINT8 // AVX-VNNI-INT8 instructions
|
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
|
||||||
BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
|
AVXIFMA // AVX-IFMA instructions
|
||||||
BMI1 // Bit Manipulation Instruction Set 1
|
AVXNECONVERT // AVX-NE-CONVERT instructions
|
||||||
BMI2 // Bit Manipulation Instruction Set 2
|
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
|
||||||
CETIBT // Intel CET Indirect Branch Tracking
|
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
|
||||||
CETSS // Intel CET Shadow Stack
|
AVXVNNIINT8 // AVX-VNNI-INT8 instructions
|
||||||
CLDEMOTE // Cache Line Demote
|
BHI_CTRL // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
|
||||||
CLMUL // Carry-less Multiplication
|
BMI1 // Bit Manipulation Instruction Set 1
|
||||||
CLZERO // CLZERO instruction supported
|
BMI2 // Bit Manipulation Instruction Set 2
|
||||||
CMOV // i686 CMOV
|
CETIBT // Intel CET Indirect Branch Tracking
|
||||||
CMPCCXADD // CMPCCXADD instructions
|
CETSS // Intel CET Shadow Stack
|
||||||
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
|
CLDEMOTE // Cache Line Demote
|
||||||
CMPXCHG8 // CMPXCHG8 instruction
|
CLMUL // Carry-less Multiplication
|
||||||
CPBOOST // Core Performance Boost
|
CLZERO // CLZERO instruction supported
|
||||||
CPPC // AMD: Collaborative Processor Performance Control
|
CMOV // i686 CMOV
|
||||||
CX16 // CMPXCHG16B Instruction
|
CMPCCXADD // CMPCCXADD instructions
|
||||||
EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
|
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
|
||||||
ENQCMD // Enqueue Command
|
CMPXCHG8 // CMPXCHG8 instruction
|
||||||
ERMS // Enhanced REP MOVSB/STOSB
|
CPBOOST // Core Performance Boost
|
||||||
F16C // Half-precision floating-point conversion
|
CPPC // AMD: Collaborative Processor Performance Control
|
||||||
FLUSH_L1D // Flush L1D cache
|
CX16 // CMPXCHG16B Instruction
|
||||||
FMA3 // Intel FMA 3. Does not imply AVX.
|
EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
|
||||||
FMA4 // Bulldozer FMA4 functions
|
ENQCMD // Enqueue Command
|
||||||
FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
|
ERMS // Enhanced REP MOVSB/STOSB
|
||||||
FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
|
F16C // Half-precision floating-point conversion
|
||||||
FSRM // Fast Short Rep Mov
|
FLUSH_L1D // Flush L1D cache
|
||||||
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
|
FMA3 // Intel FMA 3. Does not imply AVX.
|
||||||
FXSROPT // FXSAVE/FXRSTOR optimizations
|
FMA4 // Bulldozer FMA4 functions
|
||||||
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
|
FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
|
||||||
HLE // Hardware Lock Elision
|
FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
|
||||||
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
|
FSRM // Fast Short Rep Mov
|
||||||
HTT // Hyperthreading (enabled)
|
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
|
||||||
HWA // Hardware assert supported. Indicates support for MSRC001_10
|
FXSROPT // FXSAVE/FXRSTOR optimizations
|
||||||
HYBRID_CPU // This part has CPUs of more than one type.
|
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
|
||||||
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
|
HLE // Hardware Lock Elision
|
||||||
IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
|
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
|
||||||
IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
|
HTT // Hyperthreading (enabled)
|
||||||
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
|
HWA // Hardware assert supported. Indicates support for MSRC001_10
|
||||||
IBRS // AMD: Indirect Branch Restricted Speculation
|
HYBRID_CPU // This part has CPUs of more than one type.
|
||||||
IBRS_PREFERRED // AMD: IBRS is preferred over software solution
|
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
|
||||||
IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
|
IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
|
||||||
IBS // Instruction Based Sampling (AMD)
|
IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
|
||||||
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
|
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
|
||||||
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
|
IBPB_BRTYPE // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor
|
||||||
IBSFFV // Instruction Based Sampling Feature (AMD)
|
IBRS // AMD: Indirect Branch Restricted Speculation
|
||||||
IBSOPCNT // Instruction Based Sampling Feature (AMD)
|
IBRS_PREFERRED // AMD: IBRS is preferred over software solution
|
||||||
IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
|
IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
|
||||||
IBSOPSAM // Instruction Based Sampling Feature (AMD)
|
IBS // Instruction Based Sampling (AMD)
|
||||||
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
|
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
|
||||||
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
|
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
|
||||||
IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
|
IBSFFV // Instruction Based Sampling Feature (AMD)
|
||||||
IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
|
IBSOPCNT // Instruction Based Sampling Feature (AMD)
|
||||||
IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
|
IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
|
||||||
IBS_PREVENTHOST // Disallowing IBS use by the host supported
|
IBSOPSAM // Instruction Based Sampling Feature (AMD)
|
||||||
IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
|
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
|
||||||
IDPRED_CTRL // IPRED_DIS
|
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
|
||||||
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
|
IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
|
||||||
INVLPGB // NVLPGB and TLBSYNC instruction supported
|
IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
|
||||||
LAHF // LAHF/SAHF in long mode
|
IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
|
||||||
LAM // If set, CPU supports Linear Address Masking
|
IBS_PREVENTHOST // Disallowing IBS use by the host supported
|
||||||
LBRVIRT // LBR virtualization
|
IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
|
||||||
LZCNT // LZCNT instruction
|
IDPRED_CTRL // IPRED_DIS
|
||||||
MCAOVERFLOW // MCA overflow recovery support.
|
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
|
||||||
MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
|
INVLPGB // NVLPGB and TLBSYNC instruction supported
|
||||||
MCOMMIT // MCOMMIT instruction supported
|
KEYLOCKER // Key locker
|
||||||
MD_CLEAR // VERW clears CPU buffers
|
KEYLOCKERW // Key locker wide
|
||||||
MMX // standard MMX
|
LAHF // LAHF/SAHF in long mode
|
||||||
MMXEXT // SSE integer functions or AMD MMX ext
|
LAM // If set, CPU supports Linear Address Masking
|
||||||
MOVBE // MOVBE instruction (big-endian)
|
LBRVIRT // LBR virtualization
|
||||||
MOVDIR64B // Move 64 Bytes as Direct Store
|
LZCNT // LZCNT instruction
|
||||||
MOVDIRI // Move Doubleword as Direct Store
|
MCAOVERFLOW // MCA overflow recovery support.
|
||||||
MOVSB_ZL // Fast Zero-Length MOVSB
|
MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
|
||||||
MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
|
MCOMMIT // MCOMMIT instruction supported
|
||||||
MPX // Intel MPX (Memory Protection Extensions)
|
MD_CLEAR // VERW clears CPU buffers
|
||||||
MSRIRC // Instruction Retired Counter MSR available
|
MMX // standard MMX
|
||||||
MSRLIST // Read/Write List of Model Specific Registers
|
MMXEXT // SSE integer functions or AMD MMX ext
|
||||||
MSR_PAGEFLUSH // Page Flush MSR available
|
MOVBE // MOVBE instruction (big-endian)
|
||||||
NRIPS // Indicates support for NRIP save on VMEXIT
|
MOVDIR64B // Move 64 Bytes as Direct Store
|
||||||
NX // NX (No-Execute) bit
|
MOVDIRI // Move Doubleword as Direct Store
|
||||||
OSXSAVE // XSAVE enabled by OS
|
MOVSB_ZL // Fast Zero-Length MOVSB
|
||||||
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
|
MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
|
||||||
POPCNT // POPCNT instruction
|
MPX // Intel MPX (Memory Protection Extensions)
|
||||||
PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
|
MSRIRC // Instruction Retired Counter MSR available
|
||||||
PREFETCHI // PREFETCHIT0/1 instructions
|
MSRLIST // Read/Write List of Model Specific Registers
|
||||||
PSFD // Predictive Store Forward Disable
|
MSR_PAGEFLUSH // Page Flush MSR available
|
||||||
RDPRU // RDPRU instruction supported
|
NRIPS // Indicates support for NRIP save on VMEXIT
|
||||||
RDRAND // RDRAND instruction is available
|
NX // NX (No-Execute) bit
|
||||||
RDSEED // RDSEED instruction is available
|
OSXSAVE // XSAVE enabled by OS
|
||||||
RDTSCP // RDTSCP Instruction
|
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
|
||||||
RRSBA_CTRL // Restricted RSB Alternate
|
POPCNT // POPCNT instruction
|
||||||
RTM // Restricted Transactional Memory
|
PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
|
||||||
RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
|
PREFETCHI // PREFETCHIT0/1 instructions
|
||||||
SERIALIZE // Serialize Instruction Execution
|
PSFD // Predictive Store Forward Disable
|
||||||
SEV // AMD Secure Encrypted Virtualization supported
|
RDPRU // RDPRU instruction supported
|
||||||
SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
|
RDRAND // RDRAND instruction is available
|
||||||
SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
|
RDSEED // RDSEED instruction is available
|
||||||
SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
|
RDTSCP // RDTSCP Instruction
|
||||||
SEV_ES // AMD SEV Encrypted State supported
|
RRSBA_CTRL // Restricted RSB Alternate
|
||||||
SEV_RESTRICTED // AMD SEV Restricted Injection supported
|
RTM // Restricted Transactional Memory
|
||||||
SEV_SNP // AMD SEV Secure Nested Paging supported
|
RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
|
||||||
SGX // Software Guard Extensions
|
SBPB // Indicates support for the Selective Branch Predictor Barrier
|
||||||
SGXLC // Software Guard Extensions Launch Control
|
SERIALIZE // Serialize Instruction Execution
|
||||||
SHA // Intel SHA Extensions
|
SEV // AMD Secure Encrypted Virtualization supported
|
||||||
SME // AMD Secure Memory Encryption supported
|
SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
|
||||||
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
|
SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
|
||||||
SPEC_CTRL_SSBD // Speculative Store Bypass Disable
|
SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
|
||||||
SRBDS_CTRL // SRBDS mitigation MSR available
|
SEV_ES // AMD SEV Encrypted State supported
|
||||||
SSE // SSE functions
|
SEV_RESTRICTED // AMD SEV Restricted Injection supported
|
||||||
SSE2 // P4 SSE functions
|
SEV_SNP // AMD SEV Secure Nested Paging supported
|
||||||
SSE3 // Prescott SSE3 functions
|
SGX // Software Guard Extensions
|
||||||
SSE4 // Penryn SSE4.1 functions
|
SGXLC // Software Guard Extensions Launch Control
|
||||||
SSE42 // Nehalem SSE4.2 functions
|
SHA // Intel SHA Extensions
|
||||||
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
|
SME // AMD Secure Memory Encryption supported
|
||||||
SSSE3 // Conroe SSSE3 functions
|
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
|
||||||
STIBP // Single Thread Indirect Branch Predictors
|
SPEC_CTRL_SSBD // Speculative Store Bypass Disable
|
||||||
STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
|
SRBDS_CTRL // SRBDS mitigation MSR available
|
||||||
STOSB_SHORT // Fast short STOSB
|
SRSO_MSR_FIX // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
|
||||||
SUCCOR // Software uncorrectable error containment and recovery capability.
|
SRSO_NO // Indicates the CPU is not subject to the SRSO vulnerability
|
||||||
SVM // AMD Secure Virtual Machine
|
SRSO_USER_KERNEL_NO // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
|
||||||
SVMDA // Indicates support for the SVM decode assists.
|
SSE // SSE functions
|
||||||
SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
|
SSE2 // P4 SSE functions
|
||||||
SVML // AMD SVM lock. Indicates support for SVM-Lock.
|
SSE3 // Prescott SSE3 functions
|
||||||
SVMNP // AMD SVM nested paging
|
SSE4 // Penryn SSE4.1 functions
|
||||||
SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
|
SSE42 // Nehalem SSE4.2 functions
|
||||||
SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
|
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
|
||||||
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
|
SSSE3 // Conroe SSSE3 functions
|
||||||
SYSEE // SYSENTER and SYSEXIT instructions
|
STIBP // Single Thread Indirect Branch Predictors
|
||||||
TBM // AMD Trailing Bit Manipulation
|
STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
|
||||||
TDX_GUEST // Intel Trust Domain Extensions Guest
|
STOSB_SHORT // Fast short STOSB
|
||||||
TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
|
SUCCOR // Software uncorrectable error containment and recovery capability.
|
||||||
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
|
SVM // AMD Secure Virtual Machine
|
||||||
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
|
SVMDA // Indicates support for the SVM decode assists.
|
||||||
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
|
SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
|
||||||
TSXLDTRK // Intel TSX Suspend Load Address Tracking
|
SVML // AMD SVM lock. Indicates support for SVM-Lock.
|
||||||
VAES // Vector AES. AVX(512) versions requires additional checks.
|
SVMNP // AMD SVM nested paging
|
||||||
VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
|
SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
|
||||||
VMPL // AMD VM Permission Levels supported
|
SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
|
||||||
VMSA_REGPROT // AMD VMSA Register Protection supported
|
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
|
||||||
VMX // Virtual Machine Extensions
|
SYSEE // SYSENTER and SYSEXIT instructions
|
||||||
VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
|
TBM // AMD Trailing Bit Manipulation
|
||||||
VTE // AMD Virtual Transparent Encryption supported
|
TDX_GUEST // Intel Trust Domain Extensions Guest
|
||||||
WAITPKG // TPAUSE, UMONITOR, UMWAIT
|
TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
|
||||||
WBNOINVD // Write Back and Do Not Invalidate Cache
|
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
|
||||||
WRMSRNS // Non-Serializing Write to Model Specific Register
|
TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
|
||||||
X87 // FPU
|
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
|
||||||
XGETBV1 // Supports XGETBV with ECX = 1
|
TSXLDTRK // Intel TSX Suspend Load Address Tracking
|
||||||
XOP // Bulldozer XOP functions
|
VAES // Vector AES. AVX(512) versions requires additional checks.
|
||||||
XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
|
VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
|
||||||
XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
|
VMPL // AMD VM Permission Levels supported
|
||||||
XSAVEOPT // XSAVEOPT available
|
VMSA_REGPROT // AMD VMSA Register Protection supported
|
||||||
XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
|
VMX // Virtual Machine Extensions
|
||||||
|
VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
|
||||||
|
VTE // AMD Virtual Transparent Encryption supported
|
||||||
|
WAITPKG // TPAUSE, UMONITOR, UMWAIT
|
||||||
|
WBNOINVD // Write Back and Do Not Invalidate Cache
|
||||||
|
WRMSRNS // Non-Serializing Write to Model Specific Register
|
||||||
|
X87 // FPU
|
||||||
|
XGETBV1 // Supports XGETBV with ECX = 1
|
||||||
|
XOP // Bulldozer XOP functions
|
||||||
|
XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
|
||||||
|
XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
|
||||||
|
XSAVEOPT // XSAVEOPT available
|
||||||
|
XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
|
||||||
|
|
||||||
// ARM features:
|
// ARM features:
|
||||||
AESARM // AES instructions
|
AESARM // AES instructions
|
||||||
|
@ -302,9 +314,11 @@ type CPUInfo struct {
|
||||||
L2 int // L2 Cache (per core or shared). Will be -1 if undetected
|
L2 int // L2 Cache (per core or shared). Will be -1 if undetected
|
||||||
L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
|
L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
|
||||||
}
|
}
|
||||||
SGX SGXSupport
|
SGX SGXSupport
|
||||||
maxFunc uint32
|
AMDMemEncryption AMDMemEncryptionSupport
|
||||||
maxExFunc uint32
|
AVX10Level uint8
|
||||||
|
maxFunc uint32
|
||||||
|
maxExFunc uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
|
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
|
||||||
|
@ -1071,6 +1085,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AMDMemEncryptionSupport struct {
|
||||||
|
Available bool
|
||||||
|
CBitPossition uint32
|
||||||
|
NumVMPL uint32
|
||||||
|
PhysAddrReduction uint32
|
||||||
|
NumEntryptedGuests uint32
|
||||||
|
MinSevNoEsAsid uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
|
||||||
|
rval.Available = available
|
||||||
|
if !available {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
_, b, c, d := cpuidex(0x8000001f, 0)
|
||||||
|
|
||||||
|
rval.CBitPossition = b & 0x3f
|
||||||
|
rval.PhysAddrReduction = (b >> 6) & 0x3F
|
||||||
|
rval.NumVMPL = (b >> 12) & 0xf
|
||||||
|
rval.NumEntryptedGuests = c
|
||||||
|
rval.MinSevNoEsAsid = d
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func support() flagSet {
|
func support() flagSet {
|
||||||
var fs flagSet
|
var fs flagSet
|
||||||
mfi := maxFunctionID()
|
mfi := maxFunctionID()
|
||||||
|
@ -1165,6 +1205,7 @@ func support() flagSet {
|
||||||
fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
|
fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
|
||||||
fs.setIf(ecx&(1<<13) != 0, TME)
|
fs.setIf(ecx&(1<<13) != 0, TME)
|
||||||
fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
|
fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
|
||||||
|
fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
|
||||||
fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
|
fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
|
||||||
fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
|
fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
|
||||||
fs.setIf(ecx&(1<<29) != 0, ENQCMD)
|
fs.setIf(ecx&(1<<29) != 0, ENQCMD)
|
||||||
|
@ -1202,6 +1243,8 @@ func support() flagSet {
|
||||||
fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
|
fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
|
||||||
fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
|
fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
|
||||||
fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
|
fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
|
||||||
|
fs.setIf(edx1&(1<<19) != 0, AVX10)
|
||||||
|
fs.setIf(edx1&(1<<21) != 0, APX_F)
|
||||||
|
|
||||||
// Only detect AVX-512 features if XGETBV is supported
|
// Only detect AVX-512 features if XGETBV is supported
|
||||||
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
|
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
|
||||||
|
@ -1252,6 +1295,19 @@ func support() flagSet {
|
||||||
fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
|
fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
|
||||||
fs.setIf(edx&(1<<5) != 0, MCDT_NO)
|
fs.setIf(edx&(1<<5) != 0, MCDT_NO)
|
||||||
|
|
||||||
|
// Add keylocker features.
|
||||||
|
if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
|
||||||
|
_, ebx, _, _ := cpuidex(0x19, 0)
|
||||||
|
fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add AVX10 features.
|
||||||
|
if fs.inSet(AVX10) && mfi >= 0x24 {
|
||||||
|
_, ebx, _, _ := cpuidex(0x24, 0)
|
||||||
|
fs.setIf(ebx&(1<<16) != 0, AVX10_128)
|
||||||
|
fs.setIf(ebx&(1<<17) != 0, AVX10_256)
|
||||||
|
fs.setIf(ebx&(1<<18) != 0, AVX10_512)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
|
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
|
||||||
|
@ -1394,6 +1450,29 @@ func support() flagSet {
|
||||||
fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
|
fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if maxExtendedFunction() >= 0x80000021 && vend == AMD {
|
||||||
|
a, _, _, _ := cpuid(0x80000021)
|
||||||
|
fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
|
||||||
|
fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
|
||||||
|
fs.setIf((a>>29)&1 == 1, SRSO_NO)
|
||||||
|
fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
|
||||||
|
fs.setIf((a>>27)&1 == 1, SBPB)
|
||||||
|
}
|
||||||
|
|
||||||
|
if mfi >= 0x20 {
|
||||||
|
// Microsoft has decided to purposefully hide the information
|
||||||
|
// of the guest TEE when VMs are being created using Hyper-V.
|
||||||
|
//
|
||||||
|
// This leads us to check for the Hyper-V cpuid features
|
||||||
|
// (0x4000000C), and then for the `ebx` value set.
|
||||||
|
//
|
||||||
|
// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
|
||||||
|
// we're mostly interested about,according to:
|
||||||
|
// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
|
||||||
|
_, ebx, _, _ := cpuid(0x4000000C)
|
||||||
|
fs.setIf(ebx == 0xbe3, TDX_GUEST)
|
||||||
|
}
|
||||||
|
|
||||||
if mfi >= 0x21 {
|
if mfi >= 0x21 {
|
||||||
// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
|
// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
|
||||||
_, ebx, ecx, edx := cpuid(0x21)
|
_, ebx, ecx, edx := cpuid(0x21)
|
||||||
|
@ -1404,6 +1483,14 @@ func support() flagSet {
|
||||||
return fs
|
return fs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *CPUInfo) supportAVX10() uint8 {
|
||||||
|
if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
|
||||||
|
_, ebx, _, _ := cpuidex(0x24, 0)
|
||||||
|
return uint8(ebx)
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
func valAsString(values ...uint32) []byte {
|
func valAsString(values ...uint32) []byte {
|
||||||
r := make([]byte, 4*len(values))
|
r := make([]byte, 4*len(values))
|
||||||
for i, v := range values {
|
for i, v := range values {
|
||||||
|
|
|
@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) {
|
||||||
c.Family, c.Model, c.Stepping = familyModel()
|
c.Family, c.Model, c.Stepping = familyModel()
|
||||||
c.featureSet = support()
|
c.featureSet = support()
|
||||||
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
|
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
|
||||||
|
c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
|
||||||
c.ThreadsPerCore = threadsPerCore()
|
c.ThreadsPerCore = threadsPerCore()
|
||||||
c.LogicalCores = logicalCores()
|
c.LogicalCores = logicalCores()
|
||||||
c.PhysicalCores = physicalCores()
|
c.PhysicalCores = physicalCores()
|
||||||
c.VendorID, c.VendorString = vendorID()
|
c.VendorID, c.VendorString = vendorID()
|
||||||
|
c.AVX10Level = c.supportAVX10()
|
||||||
c.cacheSize()
|
c.cacheSize()
|
||||||
c.frequencies()
|
c.frequencies()
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,210 +16,222 @@ func _() {
|
||||||
_ = x[AMXFP16-6]
|
_ = x[AMXFP16-6]
|
||||||
_ = x[AMXINT8-7]
|
_ = x[AMXINT8-7]
|
||||||
_ = x[AMXTILE-8]
|
_ = x[AMXTILE-8]
|
||||||
_ = x[AVX-9]
|
_ = x[APX_F-9]
|
||||||
_ = x[AVX2-10]
|
_ = x[AVX-10]
|
||||||
_ = x[AVX512BF16-11]
|
_ = x[AVX10-11]
|
||||||
_ = x[AVX512BITALG-12]
|
_ = x[AVX10_128-12]
|
||||||
_ = x[AVX512BW-13]
|
_ = x[AVX10_256-13]
|
||||||
_ = x[AVX512CD-14]
|
_ = x[AVX10_512-14]
|
||||||
_ = x[AVX512DQ-15]
|
_ = x[AVX2-15]
|
||||||
_ = x[AVX512ER-16]
|
_ = x[AVX512BF16-16]
|
||||||
_ = x[AVX512F-17]
|
_ = x[AVX512BITALG-17]
|
||||||
_ = x[AVX512FP16-18]
|
_ = x[AVX512BW-18]
|
||||||
_ = x[AVX512IFMA-19]
|
_ = x[AVX512CD-19]
|
||||||
_ = x[AVX512PF-20]
|
_ = x[AVX512DQ-20]
|
||||||
_ = x[AVX512VBMI-21]
|
_ = x[AVX512ER-21]
|
||||||
_ = x[AVX512VBMI2-22]
|
_ = x[AVX512F-22]
|
||||||
_ = x[AVX512VL-23]
|
_ = x[AVX512FP16-23]
|
||||||
_ = x[AVX512VNNI-24]
|
_ = x[AVX512IFMA-24]
|
||||||
_ = x[AVX512VP2INTERSECT-25]
|
_ = x[AVX512PF-25]
|
||||||
_ = x[AVX512VPOPCNTDQ-26]
|
_ = x[AVX512VBMI-26]
|
||||||
_ = x[AVXIFMA-27]
|
_ = x[AVX512VBMI2-27]
|
||||||
_ = x[AVXNECONVERT-28]
|
_ = x[AVX512VL-28]
|
||||||
_ = x[AVXSLOW-29]
|
_ = x[AVX512VNNI-29]
|
||||||
_ = x[AVXVNNI-30]
|
_ = x[AVX512VP2INTERSECT-30]
|
||||||
_ = x[AVXVNNIINT8-31]
|
_ = x[AVX512VPOPCNTDQ-31]
|
||||||
_ = x[BHI_CTRL-32]
|
_ = x[AVXIFMA-32]
|
||||||
_ = x[BMI1-33]
|
_ = x[AVXNECONVERT-33]
|
||||||
_ = x[BMI2-34]
|
_ = x[AVXSLOW-34]
|
||||||
_ = x[CETIBT-35]
|
_ = x[AVXVNNI-35]
|
||||||
_ = x[CETSS-36]
|
_ = x[AVXVNNIINT8-36]
|
||||||
_ = x[CLDEMOTE-37]
|
_ = x[BHI_CTRL-37]
|
||||||
_ = x[CLMUL-38]
|
_ = x[BMI1-38]
|
||||||
_ = x[CLZERO-39]
|
_ = x[BMI2-39]
|
||||||
_ = x[CMOV-40]
|
_ = x[CETIBT-40]
|
||||||
_ = x[CMPCCXADD-41]
|
_ = x[CETSS-41]
|
||||||
_ = x[CMPSB_SCADBS_SHORT-42]
|
_ = x[CLDEMOTE-42]
|
||||||
_ = x[CMPXCHG8-43]
|
_ = x[CLMUL-43]
|
||||||
_ = x[CPBOOST-44]
|
_ = x[CLZERO-44]
|
||||||
_ = x[CPPC-45]
|
_ = x[CMOV-45]
|
||||||
_ = x[CX16-46]
|
_ = x[CMPCCXADD-46]
|
||||||
_ = x[EFER_LMSLE_UNS-47]
|
_ = x[CMPSB_SCADBS_SHORT-47]
|
||||||
_ = x[ENQCMD-48]
|
_ = x[CMPXCHG8-48]
|
||||||
_ = x[ERMS-49]
|
_ = x[CPBOOST-49]
|
||||||
_ = x[F16C-50]
|
_ = x[CPPC-50]
|
||||||
_ = x[FLUSH_L1D-51]
|
_ = x[CX16-51]
|
||||||
_ = x[FMA3-52]
|
_ = x[EFER_LMSLE_UNS-52]
|
||||||
_ = x[FMA4-53]
|
_ = x[ENQCMD-53]
|
||||||
_ = x[FP128-54]
|
_ = x[ERMS-54]
|
||||||
_ = x[FP256-55]
|
_ = x[F16C-55]
|
||||||
_ = x[FSRM-56]
|
_ = x[FLUSH_L1D-56]
|
||||||
_ = x[FXSR-57]
|
_ = x[FMA3-57]
|
||||||
_ = x[FXSROPT-58]
|
_ = x[FMA4-58]
|
||||||
_ = x[GFNI-59]
|
_ = x[FP128-59]
|
||||||
_ = x[HLE-60]
|
_ = x[FP256-60]
|
||||||
_ = x[HRESET-61]
|
_ = x[FSRM-61]
|
||||||
_ = x[HTT-62]
|
_ = x[FXSR-62]
|
||||||
_ = x[HWA-63]
|
_ = x[FXSROPT-63]
|
||||||
_ = x[HYBRID_CPU-64]
|
_ = x[GFNI-64]
|
||||||
_ = x[HYPERVISOR-65]
|
_ = x[HLE-65]
|
||||||
_ = x[IA32_ARCH_CAP-66]
|
_ = x[HRESET-66]
|
||||||
_ = x[IA32_CORE_CAP-67]
|
_ = x[HTT-67]
|
||||||
_ = x[IBPB-68]
|
_ = x[HWA-68]
|
||||||
_ = x[IBRS-69]
|
_ = x[HYBRID_CPU-69]
|
||||||
_ = x[IBRS_PREFERRED-70]
|
_ = x[HYPERVISOR-70]
|
||||||
_ = x[IBRS_PROVIDES_SMP-71]
|
_ = x[IA32_ARCH_CAP-71]
|
||||||
_ = x[IBS-72]
|
_ = x[IA32_CORE_CAP-72]
|
||||||
_ = x[IBSBRNTRGT-73]
|
_ = x[IBPB-73]
|
||||||
_ = x[IBSFETCHSAM-74]
|
_ = x[IBPB_BRTYPE-74]
|
||||||
_ = x[IBSFFV-75]
|
_ = x[IBRS-75]
|
||||||
_ = x[IBSOPCNT-76]
|
_ = x[IBRS_PREFERRED-76]
|
||||||
_ = x[IBSOPCNTEXT-77]
|
_ = x[IBRS_PROVIDES_SMP-77]
|
||||||
_ = x[IBSOPSAM-78]
|
_ = x[IBS-78]
|
||||||
_ = x[IBSRDWROPCNT-79]
|
_ = x[IBSBRNTRGT-79]
|
||||||
_ = x[IBSRIPINVALIDCHK-80]
|
_ = x[IBSFETCHSAM-80]
|
||||||
_ = x[IBS_FETCH_CTLX-81]
|
_ = x[IBSFFV-81]
|
||||||
_ = x[IBS_OPDATA4-82]
|
_ = x[IBSOPCNT-82]
|
||||||
_ = x[IBS_OPFUSE-83]
|
_ = x[IBSOPCNTEXT-83]
|
||||||
_ = x[IBS_PREVENTHOST-84]
|
_ = x[IBSOPSAM-84]
|
||||||
_ = x[IBS_ZEN4-85]
|
_ = x[IBSRDWROPCNT-85]
|
||||||
_ = x[IDPRED_CTRL-86]
|
_ = x[IBSRIPINVALIDCHK-86]
|
||||||
_ = x[INT_WBINVD-87]
|
_ = x[IBS_FETCH_CTLX-87]
|
||||||
_ = x[INVLPGB-88]
|
_ = x[IBS_OPDATA4-88]
|
||||||
_ = x[LAHF-89]
|
_ = x[IBS_OPFUSE-89]
|
||||||
_ = x[LAM-90]
|
_ = x[IBS_PREVENTHOST-90]
|
||||||
_ = x[LBRVIRT-91]
|
_ = x[IBS_ZEN4-91]
|
||||||
_ = x[LZCNT-92]
|
_ = x[IDPRED_CTRL-92]
|
||||||
_ = x[MCAOVERFLOW-93]
|
_ = x[INT_WBINVD-93]
|
||||||
_ = x[MCDT_NO-94]
|
_ = x[INVLPGB-94]
|
||||||
_ = x[MCOMMIT-95]
|
_ = x[KEYLOCKER-95]
|
||||||
_ = x[MD_CLEAR-96]
|
_ = x[KEYLOCKERW-96]
|
||||||
_ = x[MMX-97]
|
_ = x[LAHF-97]
|
||||||
_ = x[MMXEXT-98]
|
_ = x[LAM-98]
|
||||||
_ = x[MOVBE-99]
|
_ = x[LBRVIRT-99]
|
||||||
_ = x[MOVDIR64B-100]
|
_ = x[LZCNT-100]
|
||||||
_ = x[MOVDIRI-101]
|
_ = x[MCAOVERFLOW-101]
|
||||||
_ = x[MOVSB_ZL-102]
|
_ = x[MCDT_NO-102]
|
||||||
_ = x[MOVU-103]
|
_ = x[MCOMMIT-103]
|
||||||
_ = x[MPX-104]
|
_ = x[MD_CLEAR-104]
|
||||||
_ = x[MSRIRC-105]
|
_ = x[MMX-105]
|
||||||
_ = x[MSRLIST-106]
|
_ = x[MMXEXT-106]
|
||||||
_ = x[MSR_PAGEFLUSH-107]
|
_ = x[MOVBE-107]
|
||||||
_ = x[NRIPS-108]
|
_ = x[MOVDIR64B-108]
|
||||||
_ = x[NX-109]
|
_ = x[MOVDIRI-109]
|
||||||
_ = x[OSXSAVE-110]
|
_ = x[MOVSB_ZL-110]
|
||||||
_ = x[PCONFIG-111]
|
_ = x[MOVU-111]
|
||||||
_ = x[POPCNT-112]
|
_ = x[MPX-112]
|
||||||
_ = x[PPIN-113]
|
_ = x[MSRIRC-113]
|
||||||
_ = x[PREFETCHI-114]
|
_ = x[MSRLIST-114]
|
||||||
_ = x[PSFD-115]
|
_ = x[MSR_PAGEFLUSH-115]
|
||||||
_ = x[RDPRU-116]
|
_ = x[NRIPS-116]
|
||||||
_ = x[RDRAND-117]
|
_ = x[NX-117]
|
||||||
_ = x[RDSEED-118]
|
_ = x[OSXSAVE-118]
|
||||||
_ = x[RDTSCP-119]
|
_ = x[PCONFIG-119]
|
||||||
_ = x[RRSBA_CTRL-120]
|
_ = x[POPCNT-120]
|
||||||
_ = x[RTM-121]
|
_ = x[PPIN-121]
|
||||||
_ = x[RTM_ALWAYS_ABORT-122]
|
_ = x[PREFETCHI-122]
|
||||||
_ = x[SERIALIZE-123]
|
_ = x[PSFD-123]
|
||||||
_ = x[SEV-124]
|
_ = x[RDPRU-124]
|
||||||
_ = x[SEV_64BIT-125]
|
_ = x[RDRAND-125]
|
||||||
_ = x[SEV_ALTERNATIVE-126]
|
_ = x[RDSEED-126]
|
||||||
_ = x[SEV_DEBUGSWAP-127]
|
_ = x[RDTSCP-127]
|
||||||
_ = x[SEV_ES-128]
|
_ = x[RRSBA_CTRL-128]
|
||||||
_ = x[SEV_RESTRICTED-129]
|
_ = x[RTM-129]
|
||||||
_ = x[SEV_SNP-130]
|
_ = x[RTM_ALWAYS_ABORT-130]
|
||||||
_ = x[SGX-131]
|
_ = x[SBPB-131]
|
||||||
_ = x[SGXLC-132]
|
_ = x[SERIALIZE-132]
|
||||||
_ = x[SHA-133]
|
_ = x[SEV-133]
|
||||||
_ = x[SME-134]
|
_ = x[SEV_64BIT-134]
|
||||||
_ = x[SME_COHERENT-135]
|
_ = x[SEV_ALTERNATIVE-135]
|
||||||
_ = x[SPEC_CTRL_SSBD-136]
|
_ = x[SEV_DEBUGSWAP-136]
|
||||||
_ = x[SRBDS_CTRL-137]
|
_ = x[SEV_ES-137]
|
||||||
_ = x[SSE-138]
|
_ = x[SEV_RESTRICTED-138]
|
||||||
_ = x[SSE2-139]
|
_ = x[SEV_SNP-139]
|
||||||
_ = x[SSE3-140]
|
_ = x[SGX-140]
|
||||||
_ = x[SSE4-141]
|
_ = x[SGXLC-141]
|
||||||
_ = x[SSE42-142]
|
_ = x[SHA-142]
|
||||||
_ = x[SSE4A-143]
|
_ = x[SME-143]
|
||||||
_ = x[SSSE3-144]
|
_ = x[SME_COHERENT-144]
|
||||||
_ = x[STIBP-145]
|
_ = x[SPEC_CTRL_SSBD-145]
|
||||||
_ = x[STIBP_ALWAYSON-146]
|
_ = x[SRBDS_CTRL-146]
|
||||||
_ = x[STOSB_SHORT-147]
|
_ = x[SRSO_MSR_FIX-147]
|
||||||
_ = x[SUCCOR-148]
|
_ = x[SRSO_NO-148]
|
||||||
_ = x[SVM-149]
|
_ = x[SRSO_USER_KERNEL_NO-149]
|
||||||
_ = x[SVMDA-150]
|
_ = x[SSE-150]
|
||||||
_ = x[SVMFBASID-151]
|
_ = x[SSE2-151]
|
||||||
_ = x[SVML-152]
|
_ = x[SSE3-152]
|
||||||
_ = x[SVMNP-153]
|
_ = x[SSE4-153]
|
||||||
_ = x[SVMPF-154]
|
_ = x[SSE42-154]
|
||||||
_ = x[SVMPFT-155]
|
_ = x[SSE4A-155]
|
||||||
_ = x[SYSCALL-156]
|
_ = x[SSSE3-156]
|
||||||
_ = x[SYSEE-157]
|
_ = x[STIBP-157]
|
||||||
_ = x[TBM-158]
|
_ = x[STIBP_ALWAYSON-158]
|
||||||
_ = x[TDX_GUEST-159]
|
_ = x[STOSB_SHORT-159]
|
||||||
_ = x[TLB_FLUSH_NESTED-160]
|
_ = x[SUCCOR-160]
|
||||||
_ = x[TME-161]
|
_ = x[SVM-161]
|
||||||
_ = x[TOPEXT-162]
|
_ = x[SVMDA-162]
|
||||||
_ = x[TSCRATEMSR-163]
|
_ = x[SVMFBASID-163]
|
||||||
_ = x[TSXLDTRK-164]
|
_ = x[SVML-164]
|
||||||
_ = x[VAES-165]
|
_ = x[SVMNP-165]
|
||||||
_ = x[VMCBCLEAN-166]
|
_ = x[SVMPF-166]
|
||||||
_ = x[VMPL-167]
|
_ = x[SVMPFT-167]
|
||||||
_ = x[VMSA_REGPROT-168]
|
_ = x[SYSCALL-168]
|
||||||
_ = x[VMX-169]
|
_ = x[SYSEE-169]
|
||||||
_ = x[VPCLMULQDQ-170]
|
_ = x[TBM-170]
|
||||||
_ = x[VTE-171]
|
_ = x[TDX_GUEST-171]
|
||||||
_ = x[WAITPKG-172]
|
_ = x[TLB_FLUSH_NESTED-172]
|
||||||
_ = x[WBNOINVD-173]
|
_ = x[TME-173]
|
||||||
_ = x[WRMSRNS-174]
|
_ = x[TOPEXT-174]
|
||||||
_ = x[X87-175]
|
_ = x[TSCRATEMSR-175]
|
||||||
_ = x[XGETBV1-176]
|
_ = x[TSXLDTRK-176]
|
||||||
_ = x[XOP-177]
|
_ = x[VAES-177]
|
||||||
_ = x[XSAVE-178]
|
_ = x[VMCBCLEAN-178]
|
||||||
_ = x[XSAVEC-179]
|
_ = x[VMPL-179]
|
||||||
_ = x[XSAVEOPT-180]
|
_ = x[VMSA_REGPROT-180]
|
||||||
_ = x[XSAVES-181]
|
_ = x[VMX-181]
|
||||||
_ = x[AESARM-182]
|
_ = x[VPCLMULQDQ-182]
|
||||||
_ = x[ARMCPUID-183]
|
_ = x[VTE-183]
|
||||||
_ = x[ASIMD-184]
|
_ = x[WAITPKG-184]
|
||||||
_ = x[ASIMDDP-185]
|
_ = x[WBNOINVD-185]
|
||||||
_ = x[ASIMDHP-186]
|
_ = x[WRMSRNS-186]
|
||||||
_ = x[ASIMDRDM-187]
|
_ = x[X87-187]
|
||||||
_ = x[ATOMICS-188]
|
_ = x[XGETBV1-188]
|
||||||
_ = x[CRC32-189]
|
_ = x[XOP-189]
|
||||||
_ = x[DCPOP-190]
|
_ = x[XSAVE-190]
|
||||||
_ = x[EVTSTRM-191]
|
_ = x[XSAVEC-191]
|
||||||
_ = x[FCMA-192]
|
_ = x[XSAVEOPT-192]
|
||||||
_ = x[FP-193]
|
_ = x[XSAVES-193]
|
||||||
_ = x[FPHP-194]
|
_ = x[AESARM-194]
|
||||||
_ = x[GPA-195]
|
_ = x[ARMCPUID-195]
|
||||||
_ = x[JSCVT-196]
|
_ = x[ASIMD-196]
|
||||||
_ = x[LRCPC-197]
|
_ = x[ASIMDDP-197]
|
||||||
_ = x[PMULL-198]
|
_ = x[ASIMDHP-198]
|
||||||
_ = x[SHA1-199]
|
_ = x[ASIMDRDM-199]
|
||||||
_ = x[SHA2-200]
|
_ = x[ATOMICS-200]
|
||||||
_ = x[SHA3-201]
|
_ = x[CRC32-201]
|
||||||
_ = x[SHA512-202]
|
_ = x[DCPOP-202]
|
||||||
_ = x[SM3-203]
|
_ = x[EVTSTRM-203]
|
||||||
_ = x[SM4-204]
|
_ = x[FCMA-204]
|
||||||
_ = x[SVE-205]
|
_ = x[FP-205]
|
||||||
_ = x[lastID-206]
|
_ = x[FPHP-206]
|
||||||
|
_ = x[GPA-207]
|
||||||
|
_ = x[JSCVT-208]
|
||||||
|
_ = x[LRCPC-209]
|
||||||
|
_ = x[PMULL-210]
|
||||||
|
_ = x[SHA1-211]
|
||||||
|
_ = x[SHA2-212]
|
||||||
|
_ = x[SHA3-213]
|
||||||
|
_ = x[SHA512-214]
|
||||||
|
_ = x[SM3-215]
|
||||||
|
_ = x[SM4-216]
|
||||||
|
_ = x[SVE-217]
|
||||||
|
_ = x[lastID-218]
|
||||||
_ = x[firstID-0]
|
_ = x[firstID-0]
|
||||||
}
|
}
|
||||||
|
|
||||||
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
|
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
|
||||||
|
|
||||||
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465}
|
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
|
||||||
|
|
||||||
func (i FeatureID) String() string {
|
func (i FeatureID) String() string {
|
||||||
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
|
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||||
|
*.o
|
||||||
|
*.a
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Folders
|
||||||
|
_obj
|
||||||
|
_test
|
||||||
|
|
||||||
|
# Architecture specific extensions/prefixes
|
||||||
|
*.[568vq]
|
||||||
|
[568vq].out
|
||||||
|
|
||||||
|
*.cgo1.go
|
||||||
|
*.cgo2.c
|
||||||
|
_cgo_defun.c
|
||||||
|
_cgo_gotypes.go
|
||||||
|
_cgo_export.*
|
||||||
|
|
||||||
|
_testmain.go
|
||||||
|
|
||||||
|
*.exe
|
||||||
|
*.test
|
||||||
|
*.prof
|
||||||
|
|
||||||
|
.idea
|
|
@ -0,0 +1,23 @@
|
||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2015 Klaus Post
|
||||||
|
Copyright (c) 2015 Backblaze
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
|
@ -0,0 +1,566 @@
|
||||||
|
# Reed-Solomon
|
||||||
|
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
|
||||||
|
|
||||||
|
Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
|
||||||
|
|
||||||
|
This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by
|
||||||
|
[Backblaze](http://backblaze.com), with some additional optimizations.
|
||||||
|
|
||||||
|
For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
|
||||||
|
|
||||||
|
For encoding high shard counts (>256) a Leopard implementation is used.
|
||||||
|
For most platforms this performs close to the original Leopard implementation in terms of speed.
|
||||||
|
|
||||||
|
Package home: https://github.com/klauspost/reedsolomon
|
||||||
|
|
||||||
|
Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
|
||||||
|
|
||||||
|
# Installation
|
||||||
|
To get the package use the standard:
|
||||||
|
```bash
|
||||||
|
go get -u github.com/klauspost/reedsolomon
|
||||||
|
```
|
||||||
|
|
||||||
|
Using Go modules is recommended.
|
||||||
|
|
||||||
|
# Changes
|
||||||
|
|
||||||
|
## 2022
|
||||||
|
|
||||||
|
* [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing.
|
||||||
|
* [Leopard GF8](https://github.com/klauspost/reedsolomon#leopard-gf8) mode added, for faster processing of medium shard counts.
|
||||||
|
* [Leopard GF16](https://github.com/klauspost/reedsolomon#leopard-compatible-gf16) mode added, for up to 65536 shards.
|
||||||
|
* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
|
||||||
|
|
||||||
|
## 2021
|
||||||
|
|
||||||
|
* Use `GOAMD64=v4` to enable faster AVX2.
|
||||||
|
* Add progressive shard encoding.
|
||||||
|
* Wider AVX2 loops
|
||||||
|
* Limit concurrency on AVX2, since we are likely memory bound.
|
||||||
|
* Allow 0 parity shards.
|
||||||
|
* Allow disabling inversion cache.
|
||||||
|
* Faster AVX2 encoding.
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See older changes</summary>
|
||||||
|
|
||||||
|
## May 2020
|
||||||
|
|
||||||
|
* ARM64 optimizations, up to 2.5x faster.
|
||||||
|
* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
|
||||||
|
* Much better performance when using a limited number of goroutines.
|
||||||
|
* AVX512 is now using multiple cores.
|
||||||
|
* Stream processing overhaul, big speedups in most cases.
|
||||||
|
* AVX512 optimizations
|
||||||
|
|
||||||
|
## March 6, 2019
|
||||||
|
|
||||||
|
The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
|
||||||
|
|
||||||
|
## February 8, 2019
|
||||||
|
|
||||||
|
AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
|
||||||
|
See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
|
||||||
|
|
||||||
|
## December 18, 2018
|
||||||
|
|
||||||
|
Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
|
||||||
|
|
||||||
|
## November 18, 2017
|
||||||
|
|
||||||
|
Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt
|
||||||
|
to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
|
||||||
|
|
||||||
|
## October 1, 2017
|
||||||
|
|
||||||
|
* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option.
|
||||||
|
Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
|
||||||
|
|
||||||
|
* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines)
|
||||||
|
has been increased for better multi-core scaling.
|
||||||
|
|
||||||
|
* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to
|
||||||
|
be used instead of allocating new memory.
|
||||||
|
|
||||||
|
## August 26, 2017
|
||||||
|
|
||||||
|
* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update`
|
||||||
|
function contributed by [chenzhongtao](https://github.com/chenzhongtao).
|
||||||
|
|
||||||
|
* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly,
|
||||||
|
which gives a huge performance boost on this platform.
|
||||||
|
|
||||||
|
## July 20, 2017
|
||||||
|
|
||||||
|
`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface.
|
||||||
|
This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
func (e *YourEnc) ReconstructData(shards [][]byte) error {
|
||||||
|
return ReconstructData(shards)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You can of course also do your own implementation.
|
||||||
|
The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder)
|
||||||
|
handles this without modifying the interface.
|
||||||
|
This is a good lesson on why returning interfaces is not a good design.
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
|
||||||
|
This section assumes you know the basics of Reed-Solomon encoding.
|
||||||
|
A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
|
||||||
|
|
||||||
|
This package performs the calculation of the parity sets. The usage is therefore relatively simple.
|
||||||
|
|
||||||
|
First of all, you need to choose your distribution of data and parity shards.
|
||||||
|
A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
|
||||||
|
|
||||||
|
To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
|
||||||
|
```Go
|
||||||
|
enc, err := reedsolomon.New(10, 3)
|
||||||
|
```
|
||||||
|
This encoder will work for all parity sets with this distribution of data and parity shards.
|
||||||
|
|
||||||
|
If you will primarily be using it with one shard size it is recommended to use
|
||||||
|
[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
|
||||||
|
as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
|
||||||
|
It is not required that all shards are this size.
|
||||||
|
|
||||||
|
Then you send and receive data that is a simple slice of byte slices; `[][]byte`.
|
||||||
|
In the example above, the top slice must have a length of 13.
|
||||||
|
|
||||||
|
```Go
|
||||||
|
data := make([][]byte, 13)
|
||||||
|
```
|
||||||
|
You should then fill the 10 first slices with *equally sized* data,
|
||||||
|
and create parity shards that will be populated with parity data. In this case we create the data in memory,
|
||||||
|
but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Create all shards, size them at 50000 each
|
||||||
|
for i := range input {
|
||||||
|
data[i] := make([]byte, 50000)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The above allocations can also be done by the encoder:
|
||||||
|
// data := enc.(reedsolomon.Extended).AllocAligned(50000)
|
||||||
|
|
||||||
|
// Fill some data into the data shards
|
||||||
|
for i, in := range data[:10] {
|
||||||
|
for j:= range in {
|
||||||
|
in[j] = byte((i+j)&0xff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
To populate the parity shards, you simply call `Encode()` with your data.
|
||||||
|
```Go
|
||||||
|
err = enc.Encode(data)
|
||||||
|
```
|
||||||
|
The only cases where you should get an error is, if the data shards aren't of equal size.
|
||||||
|
The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
ok, err = enc.Verify(data)
|
||||||
|
```
|
||||||
|
|
||||||
|
The final (and important) part is to be able to reconstruct missing shards.
|
||||||
|
For this to work, you need to know which parts of your data is missing.
|
||||||
|
The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario,
|
||||||
|
you need to implement a hash check for each shard.
|
||||||
|
|
||||||
|
If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
|
||||||
|
|
||||||
|
To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Delete two data shards
|
||||||
|
data[3] = nil
|
||||||
|
data[7] = nil
|
||||||
|
|
||||||
|
// Reconstruct the missing shards
|
||||||
|
err := enc.Reconstruct(data)
|
||||||
|
```
|
||||||
|
The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
|
||||||
|
|
||||||
|
If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Delete two data shards
|
||||||
|
data[3] = nil
|
||||||
|
data[7] = nil
|
||||||
|
|
||||||
|
// Reconstruct just the missing data shards
|
||||||
|
err := enc.ReconstructData(data)
|
||||||
|
```
|
||||||
|
|
||||||
|
If you don't need all data shards you can use `ReconstructSome()`:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Delete two data shards
|
||||||
|
data[3] = nil
|
||||||
|
data[7] = nil
|
||||||
|
|
||||||
|
// Reconstruct just the shard 3
|
||||||
|
err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
|
||||||
|
```
|
||||||
|
|
||||||
|
So to sum up reconstruction:
|
||||||
|
* The number of data/parity shards must match the numbers used for encoding.
|
||||||
|
* The order of shards must be the same as used when encoding.
|
||||||
|
* You may only supply data you know is valid.
|
||||||
|
* Invalid shards should be set to nil.
|
||||||
|
|
||||||
|
For complete examples of an encoder and decoder see the
|
||||||
|
[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
|
||||||
|
|
||||||
|
# Splitting/Joining Data
|
||||||
|
|
||||||
|
You might have a large slice of data.
|
||||||
|
To help you split this, there are some helper functions that can split and join a single byte slice.
|
||||||
|
|
||||||
|
```Go
|
||||||
|
bigfile, _ := ioutil.Readfile("myfile.data")
|
||||||
|
|
||||||
|
// Split the file
|
||||||
|
split, err := enc.Split(bigfile)
|
||||||
|
```
|
||||||
|
This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
|
||||||
|
|
||||||
|
An important thing to note is that you have to *keep track of the exact input size*.
|
||||||
|
If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
|
||||||
|
|
||||||
|
To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
|
||||||
|
```Go
|
||||||
|
// Join a data set and write it to io.Discard.
|
||||||
|
err = enc.Join(io.Discard, data, len(bigfile))
|
||||||
|
```
|
||||||
|
|
||||||
|
## Aligned Allocations
|
||||||
|
|
||||||
|
For AMD64 aligned inputs can make a big speed difference.
|
||||||
|
|
||||||
|
This is an example of the speed difference when inputs are unaligned/aligned:
|
||||||
|
|
||||||
|
```
|
||||||
|
BenchmarkEncode100x20x10000-32 7058 172648 ns/op 6950.57 MB/s
|
||||||
|
BenchmarkEncode100x20x10000-32 8406 137911 ns/op 8701.24 MB/s
|
||||||
|
```
|
||||||
|
|
||||||
|
This is mostly the case when dealing with odd-sized shards.
|
||||||
|
|
||||||
|
To facilitate this the package provides an `AllocAligned(shards, each int) [][]byte`.
|
||||||
|
This will allocate a number of shards, each with the size `each`.
|
||||||
|
Each shard will then be aligned to a 64 byte boundary.
|
||||||
|
|
||||||
|
Each encoder also has a `AllocAligned(each int) [][]byte` as an extended interface which will return the same,
|
||||||
|
but with the shard count configured in the encoder.
|
||||||
|
|
||||||
|
It is not possible to re-aligned already allocated slices, for example when using `Split`.
|
||||||
|
When it is not possible to write to aligned shards, you should not copy to them.
|
||||||
|
|
||||||
|
# Progressive encoding
|
||||||
|
|
||||||
|
It is possible to encode individual shards using EncodeIdx:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// EncodeIdx will add parity for a single data shard.
|
||||||
|
// Parity shards should start out as 0. The caller must zero them.
|
||||||
|
// Data shards must be delivered exactly once. There is no check for this.
|
||||||
|
// The parity shards will always be updated and the data shards will remain the same.
|
||||||
|
EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
|
||||||
|
```
|
||||||
|
|
||||||
|
This allows progressively encoding the parity by sending individual data shards.
|
||||||
|
There is no requirement on shards being delivered in order,
|
||||||
|
but when sent in order it allows encoding shards one at the time,
|
||||||
|
effectively allowing the operation to be streaming.
|
||||||
|
|
||||||
|
The result will be the same as encoding all shards at once.
|
||||||
|
There is a minor speed penalty using this method, so send
|
||||||
|
shards at once if they are available.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
```Go
|
||||||
|
func test() {
|
||||||
|
// Create an encoder with 7 data and 3 parity slices.
|
||||||
|
enc, _ := reedsolomon.New(7, 3)
|
||||||
|
|
||||||
|
// This will be our output parity.
|
||||||
|
parity := make([][]byte, 3)
|
||||||
|
for i := range parity {
|
||||||
|
parity[i] = make([]byte, 10000)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < 7; i++ {
|
||||||
|
// Send data shards one at the time.
|
||||||
|
_ = enc.EncodeIdx(make([]byte, 10000), i, parity)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parity now contains parity, as if all data was sent in one call.
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
# Streaming/Merging
|
||||||
|
|
||||||
|
It might seem like a limitation that all data should be in memory,
|
||||||
|
but an important property is that *as long as the number of data/parity shards are the same,
|
||||||
|
you can merge/split data sets*, and they will remain valid as a separate set.
|
||||||
|
|
||||||
|
```Go
|
||||||
|
// Split the data set of 50000 elements into two of 25000
|
||||||
|
splitA := make([][]byte, 13)
|
||||||
|
splitB := make([][]byte, 13)
|
||||||
|
|
||||||
|
// Merge into a 100000 element set
|
||||||
|
merged := make([][]byte, 13)
|
||||||
|
|
||||||
|
for i := range data {
|
||||||
|
splitA[i] = data[i][:25000]
|
||||||
|
splitB[i] = data[i][25000:]
|
||||||
|
|
||||||
|
// Concatenate it to itself
|
||||||
|
merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
|
||||||
|
merged[i] = append(merged[i], data[i]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each part should still verify as ok.
|
||||||
|
ok, err := enc.Verify(splitA)
|
||||||
|
if ok && err == nil {
|
||||||
|
log.Println("splitA ok")
|
||||||
|
}
|
||||||
|
|
||||||
|
ok, err = enc.Verify(splitB)
|
||||||
|
if ok && err == nil {
|
||||||
|
log.Println("splitB ok")
|
||||||
|
}
|
||||||
|
|
||||||
|
ok, err = enc.Verify(merge)
|
||||||
|
if ok && err == nil {
|
||||||
|
log.Println("merge ok")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks.
|
||||||
|
For the best throughput, don't use too small blocks.
|
||||||
|
|
||||||
|
This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data.
|
||||||
|
This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
|
||||||
|
|
||||||
|
# Streaming API
|
||||||
|
|
||||||
|
There has been added support for a streaming API, to help perform fully streaming operations,
|
||||||
|
which enables you to do the same operations, but on streams.
|
||||||
|
To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function
|
||||||
|
to create the encoding/decoding interfaces.
|
||||||
|
|
||||||
|
You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams)
|
||||||
|
to ready an interface that reads/writes concurrently from the streams.
|
||||||
|
|
||||||
|
You can specify the size of each operation using
|
||||||
|
[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
|
||||||
|
This will set the size of each read/write operation.
|
||||||
|
|
||||||
|
Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API.
|
||||||
|
Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
|
||||||
|
If an error occurs in relation to a stream,
|
||||||
|
a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError)
|
||||||
|
or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError)
|
||||||
|
will help you determine which stream was the offender.
|
||||||
|
|
||||||
|
There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
|
||||||
|
|
||||||
|
For complete examples of a streaming encoder and decoder see the
|
||||||
|
[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
|
||||||
|
|
||||||
|
GF16 (more than 256 shards) is not supported by the streaming interface.
|
||||||
|
|
||||||
|
# Advanced Options
|
||||||
|
|
||||||
|
You can modify internal options which affects how jobs are split between and processed by goroutines.
|
||||||
|
|
||||||
|
To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`.
|
||||||
|
If no Options are supplied, default options are used.
|
||||||
|
|
||||||
|
Example of how to supply options:
|
||||||
|
|
||||||
|
```Go
|
||||||
|
enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
|
||||||
|
```
|
||||||
|
|
||||||
|
# Leopard Compatible GF16
|
||||||
|
|
||||||
|
When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
|
||||||
|
|
||||||
|
This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
|
||||||
|
|
||||||
|
* The original and recovery data must not exceed 65536 pieces.
|
||||||
|
* The shard size *must* each be a multiple of 64 bytes.
|
||||||
|
* Each buffer should have the same number of bytes.
|
||||||
|
* Even the last shard must be rounded up to the block size.
|
||||||
|
|
||||||
|
| | Regular | Leopard |
|
||||||
|
|-----------------|---------|---------|
|
||||||
|
| Encode | ✓ | ✓ |
|
||||||
|
| EncodeIdx | ✓ | - |
|
||||||
|
| Verify | ✓ | ✓ |
|
||||||
|
| Reconstruct | ✓ | ✓ |
|
||||||
|
| ReconstructData | ✓ | ✓ |
|
||||||
|
| ReconstructSome | ✓ | ✓ (+) |
|
||||||
|
| Update | ✓ | - |
|
||||||
|
| Split | ✓ | ✓ |
|
||||||
|
| Join | ✓ | ✓ |
|
||||||
|
|
||||||
|
* (+) Same as calling `ReconstructData`.
|
||||||
|
|
||||||
|
The Split/Join functions will help to split an input to the proper sizes.
|
||||||
|
|
||||||
|
Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`.
|
||||||
|
Reconstruction matrix calculation is more time-consuming,
|
||||||
|
so be sure to include that as part of any benchmark you run.
|
||||||
|
|
||||||
|
For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
|
||||||
|
|
||||||
|
Leopard mode currently always runs as a single goroutine, since multiple
|
||||||
|
goroutines doesn't provide any worthwhile speedup.
|
||||||
|
|
||||||
|
## Leopard GF8
|
||||||
|
|
||||||
|
It is possible to replace the default reed-solomon encoder with a leopard compatible one.
|
||||||
|
This will typically be faster when dealing with more than 20-30 shards.
|
||||||
|
Note that the limitations listed above also applies to this mode.
|
||||||
|
See table below for speed with different number of shards.
|
||||||
|
|
||||||
|
To enable Leopard GF8 mode use `WithLeopardGF(true)`.
|
||||||
|
|
||||||
|
Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
|
||||||
|
All implementation use inversion cache when available.
|
||||||
|
Speed is total shard size for each operation. Data shard throughput is speed/2.
|
||||||
|
AVX2 is used.
|
||||||
|
|
||||||
|
| Encoder | Shards | Encode | Recover All | Recover One |
|
||||||
|
|--------------|-------------|----------------|--------------|----------------|
|
||||||
|
| Cauchy | 4+4 | 23076.83 MB/s | 5444.02 MB/s | 10834.67 MB/s |
|
||||||
|
| Cauchy | 8+8 | 15206.87 MB/s | 4223.42 MB/s | 16181.62 MB/s |
|
||||||
|
| Cauchy | 16+16 | 7427.47 MB/s | 3305.84 MB/s | 22480.41 MB/s |
|
||||||
|
| Cauchy | 32+32 | 3785.64 MB/s | 2300.07 MB/s | 26181.31 MB/s |
|
||||||
|
| Cauchy | 64+64 | 1911.93 MB/s | 1368.51 MB/s | 27992.93 MB/s |
|
||||||
|
| Cauchy | 128+128 | 963.83 MB/s | 1327.56 MB/s | 32866.86 MB/s |
|
||||||
|
| Leopard GF8 | 4+4 | 17061.28 MB/s | 3099.06 MB/s | 4096.78 MB/s |
|
||||||
|
| Leopard GF8 | 8+8 | 10546.67 MB/s | 2925.92 MB/s | 3964.00 MB/s |
|
||||||
|
| Leopard GF8 | 16+16 | 10961.37 MB/s | 2328.40 MB/s | 3110.22 MB/s |
|
||||||
|
| Leopard GF8 | 32+32 | 7111.47 MB/s | 2374.61 MB/s | 3220.75 MB/s |
|
||||||
|
| Leopard GF8 | 64+64 | 7468.57 MB/s | 2055.41 MB/s | 3061.81 MB/s |
|
||||||
|
| Leopard GF8 | 128+128 | 5479.99 MB/s | 1953.21 MB/s | 2815.15 MB/s |
|
||||||
|
| Leopard GF16 | 256+256 | 6158.66 MB/s | 454.14 MB/s | 506.70 MB/s |
|
||||||
|
| Leopard GF16 | 512+512 | 4418.58 MB/s | 685.75 MB/s | 801.63 MB/s |
|
||||||
|
| Leopard GF16 | 1024+1024 | 4778.05 MB/s | 814.51 MB/s | 1080.19 MB/s |
|
||||||
|
| Leopard GF16 | 2048+2048 | 3417.05 MB/s | 911.64 MB/s | 1179.48 MB/s |
|
||||||
|
| Leopard GF16 | 4096+4096 | 3209.41 MB/s | 729.13 MB/s | 1135.06 MB/s |
|
||||||
|
| Leopard GF16 | 8192+8192 | 2034.11 MB/s | 604.52 MB/s | 842.13 MB/s |
|
||||||
|
| Leopard GF16 | 16384+16384 | 1525.88 MB/s | 486.74 MB/s | 750.01 MB/s |
|
||||||
|
| Leopard GF16 | 32768+32768 | 1138.67 MB/s | 482.81 MB/s | 712.73 MB/s |
|
||||||
|
|
||||||
|
"Traditional" encoding is faster until somewhere between 16 and 32 shards.
|
||||||
|
Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
|
||||||
|
|
||||||
|
Calculating the reconstruction matrix takes a significant amount of computation.
|
||||||
|
With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
|
||||||
|
Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard.
|
||||||
|
|
||||||
|
# Performance
|
||||||
|
|
||||||
|
Performance depends mainly on the number of parity shards.
|
||||||
|
In rough terms, doubling the number of parity shards will double the encoding time.
|
||||||
|
|
||||||
|
Here are the throughput numbers with some different selections of data and parity shards.
|
||||||
|
For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
|
||||||
|
|
||||||
|
| Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
|
||||||
|
|------|--------|---------|------------|-----------|
|
||||||
|
| 5 | 2 | 20,772 | 66,355 | 108,755 |
|
||||||
|
| 8 | 8 | 6,815 | 38,338 | 70,516 |
|
||||||
|
| 10 | 4 | 9,245 | 48,237 | 93,875 |
|
||||||
|
| 50 | 20 | 2,063 | 12,130 | 22,828 |
|
||||||
|
|
||||||
|
The throughput numbers here is the size of the encoded data and parity shards.
|
||||||
|
|
||||||
|
If `runtime.GOMAXPROCS()` is set to a value higher than 1,
|
||||||
|
the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
|
||||||
|
|
||||||
|
|
||||||
|
Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
|
||||||
|
```
|
||||||
|
benchmark all MB/s data MB/s speedup
|
||||||
|
BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
|
||||||
|
BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
|
||||||
|
BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
|
||||||
|
BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
|
||||||
|
BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
|
||||||
|
BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
|
||||||
|
BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
|
||||||
|
```
|
||||||
|
|
||||||
|
The package will use [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) instructions combined with AVX512 when these are available.
|
||||||
|
This further improves speed by up to 3x over AVX2 code paths.
|
||||||
|
|
||||||
|
## ARM64 NEON
|
||||||
|
|
||||||
|
By exploiting NEON instructions the performance for ARM has been accelerated.
|
||||||
|
Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
|
||||||
|
|
||||||
|
```
|
||||||
|
BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s
|
||||||
|
BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s
|
||||||
|
BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s
|
||||||
|
BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
|
||||||
|
```
|
||||||
|
|
||||||
|
# Performance on ppc64le
|
||||||
|
|
||||||
|
The performance for ppc64le has been accelerated.
|
||||||
|
This gives roughly a 10x performance improvement on this architecture as can be seen below:
|
||||||
|
|
||||||
|
```
|
||||||
|
benchmark old MB/s new MB/s speedup
|
||||||
|
BenchmarkGalois128K-160 948.87 8878.85 9.36x
|
||||||
|
BenchmarkGalois1M-160 968.85 9041.92 9.33x
|
||||||
|
BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
|
||||||
|
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
|
||||||
|
```
|
||||||
|
|
||||||
|
# Legal
|
||||||
|
|
||||||
|
> None of section below is legal advice. Seek your own legal counsel.
|
||||||
|
> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
|
||||||
|
> Users are encouraged to independently verify they comply with all legal requirements.
|
||||||
|
|
||||||
|
As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/)
|
||||||
|
there has been lawsuits related to possible patents of aspects of erasure coding functionality.
|
||||||
|
|
||||||
|
As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
|
||||||
|
This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.
|
||||||
|
|
||||||
|
This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.
|
||||||
|
|
||||||
|
The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left.
|
||||||
|
|
||||||
|
# Links
|
||||||
|
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
|
||||||
|
* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
|
||||||
|
* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
|
||||||
|
* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
|
||||||
|
* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
|
||||||
|
* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
|
||||||
|
* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
|
||||||
|
* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
|
||||||
|
* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
|
||||||
|
|
||||||
|
# License
|
||||||
|
|
||||||
|
This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,583 @@
|
||||||
|
//go:build !noasm && !appengine && !gccgo && !nopshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
const pshufb = true
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulSSSE3(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulSSSE3Xor(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulAVX2Xor(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulAVX2(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulAVX2Xor_64(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulAVX2_64(low, high, in, out []byte)
|
||||||
|
|
||||||
|
// This is what the assembler routines do in blocks of 16 bytes:
|
||||||
|
/*
|
||||||
|
func galMulSSSE3(low, high, in, out []byte) {
|
||||||
|
for n, input := range in {
|
||||||
|
l := input & 0xf
|
||||||
|
h := input >> 4
|
||||||
|
out[n] = low[l] ^ high[h]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSSSE3Xor(low, high, in, out []byte) {
|
||||||
|
for n, input := range in {
|
||||||
|
l := input & 0xf
|
||||||
|
h := input >> 4
|
||||||
|
out[n] ^= low[l] ^ high[h]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
// bigSwitchover is the size where 64 bytes are processed per loop.
|
||||||
|
const bigSwitchover = 128
|
||||||
|
|
||||||
|
func galMulSlice(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
copy(out, in)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
if len(in) >= bigSwitchover {
|
||||||
|
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
if len(in) > 32 {
|
||||||
|
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 5) << 5
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
out = out[:len(in)]
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := range in {
|
||||||
|
out[i] = mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSliceXor(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
sliceXor(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if o.useAVX2 {
|
||||||
|
if len(in) >= bigSwitchover {
|
||||||
|
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
if len(in) >= 32 {
|
||||||
|
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 5) << 5
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
if len(in) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out = out[:len(in)]
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := range in {
|
||||||
|
out[i] ^= mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// simple slice xor
|
||||||
|
func sliceXor(in, out []byte, o *options) {
|
||||||
|
if o.useSSE2 {
|
||||||
|
if len(in) >= bigSwitchover {
|
||||||
|
if o.useAVX2 {
|
||||||
|
avx2XorSlice_64(in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
} else {
|
||||||
|
sSE2XorSlice_64(in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(in) >= 16 {
|
||||||
|
sSE2XorSlice(in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sliceXorGo(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out = out[:len(in)]
|
||||||
|
for i := range in {
|
||||||
|
out[i] ^= in[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
if len(work[0]) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t01 := &multiply256LUT[log_m01]
|
||||||
|
t23 := &multiply256LUT[log_m23]
|
||||||
|
t02 := &multiply256LUT[log_m02]
|
||||||
|
if o.useAVX512 {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
} else if o.useAVX2 {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
if len(work[0]) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if false && o.useAvx512GFNI {
|
||||||
|
// Note that these currently require that length is multiple of 64.
|
||||||
|
t01 := gf2p811dMulMatrices[log_m01]
|
||||||
|
t23 := gf2p811dMulMatrices[log_m23]
|
||||||
|
t02 := gf2p811dMulMatrices[log_m02]
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_gfni_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_gfni_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_gfni_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_gfni_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_gfni_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_gfni_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_gfni_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_gfni_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
// Note that these currently require that length is multiple of 64.
|
||||||
|
t01 := &multiply256LUT8[log_m01]
|
||||||
|
t23 := &multiply256LUT8[log_m23]
|
||||||
|
t02 := &multiply256LUT8[log_m02]
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_avx2_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_avx2_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_avx2_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_avx2_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_avx2_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_avx2_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
ifftDIT48_avx2_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
ifftDIT48_avx2_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
if len(work[0]) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t01 := &multiply256LUT[log_m01]
|
||||||
|
t23 := &multiply256LUT[log_m23]
|
||||||
|
t02 := &multiply256LUT[log_m02]
|
||||||
|
if o.useAVX512 {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
} else if o.useAVX2 {
|
||||||
|
if log_m02 == modulus {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m01 == modulus {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus {
|
||||||
|
fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
if len(work[0]) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if false && o.useAvx512GFNI {
|
||||||
|
t01 := gf2p811dMulMatrices[log_m01]
|
||||||
|
t23 := gf2p811dMulMatrices[log_m23]
|
||||||
|
t02 := gf2p811dMulMatrices[log_m02]
|
||||||
|
// Note that these currently require that length is multiple of 64.
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_gfni_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_gfni_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_gfni_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_gfni_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_gfni_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_gfni_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_gfni_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_gfni_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
t01 := &multiply256LUT8[log_m01]
|
||||||
|
t23 := &multiply256LUT8[log_m23]
|
||||||
|
t02 := &multiply256LUT8[log_m02]
|
||||||
|
// Note that these currently require that length is multiple of 64.
|
||||||
|
if log_m02 == modulus8 {
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_avx2_7(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_avx2_3(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_avx2_5(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_avx2_1(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m01 == modulus8 {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_avx2_6(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_avx2_2(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if log_m23 == modulus8 {
|
||||||
|
fftDIT48_avx2_4(work, dist*24, t01, t23, t02)
|
||||||
|
} else {
|
||||||
|
fftDIT48_avx2_0(work, dist*24, t01, t23, t02)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
if len(x) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
fftDIT2_avx2(x, y, tmp)
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
fftDIT2_ssse3(x, y, tmp)
|
||||||
|
} else {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
if len(x) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if o.useAVX2 {
|
||||||
|
fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
|
||||||
|
if len(x)&63 == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
done := (len(y) >> 6) << 6
|
||||||
|
y = y[done:]
|
||||||
|
x = x[done:]
|
||||||
|
}
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
if len(x) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if o.useAVX2 {
|
||||||
|
ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
|
||||||
|
if len(x)&63 == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
done := (len(y) >> 6) << 6
|
||||||
|
y = y[done:]
|
||||||
|
x = x[done:]
|
||||||
|
}
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulAdd8(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
if o.useAVX2 {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
|
||||||
|
done := (len(y) >> 6) << 6
|
||||||
|
y = y[done:]
|
||||||
|
x = x[done:]
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulSSSE3Xor(t[:16], t[16:32], y, x)
|
||||||
|
done := (len(y) >> 4) << 4
|
||||||
|
y = y[done:]
|
||||||
|
x = x[done:]
|
||||||
|
}
|
||||||
|
refMulAdd8(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly
|
||||||
|
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
if len(x) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
ifftDIT2_avx2(x, y, tmp)
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
ifftDIT2_ssse3(x, y, tmp)
|
||||||
|
} else {
|
||||||
|
// Reference version:
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf16(x, y []byte, log_m ffe, o *options) {
|
||||||
|
if len(x) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
mulgf16_avx2(x, y, tmp)
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
tmp := &multiply256LUT[log_m]
|
||||||
|
mulgf16_ssse3(x, y, tmp)
|
||||||
|
} else {
|
||||||
|
refMul(x, y, log_m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf8(out, in []byte, log_m ffe8, o *options) {
|
||||||
|
if o.useAVX2 {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulAVX2_64(t[:16], t[16:32], in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
} else if o.useSSSE3 {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulSSSE3(t[:16], t[16:32], in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
out = out[:len(in)]
|
||||||
|
mt := mul8LUTs[log_m].Value[:]
|
||||||
|
for i := range in {
|
||||||
|
out[i] = byte(mt[in[i]])
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,310 @@
|
||||||
|
//+build !noasm
|
||||||
|
//+build !appengine
|
||||||
|
//+build !gccgo
|
||||||
|
//+build !nopshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
|
||||||
|
// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
|
||||||
|
// and http://jerasure.org/jerasure/gf-complete/tree/master
|
||||||
|
|
||||||
|
// func galMulSSSE3Xor(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulSSSE3Xor(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVOU (SI), X6 // X6 low
|
||||||
|
MOVOU (DX), X7 // X7: high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X8
|
||||||
|
PXOR X5, X5
|
||||||
|
MOVQ in+48(FP), SI // R11: &in
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
PSHUFB X5, X8 // X8: lomask (unpacked)
|
||||||
|
SHRQ $4, R9 // len(in) / 16
|
||||||
|
MOVQ SI, AX
|
||||||
|
MOVQ DX, BX
|
||||||
|
ANDQ $15, AX
|
||||||
|
ANDQ $15, BX
|
||||||
|
CMPQ R9, $0
|
||||||
|
JEQ done_xor
|
||||||
|
ORQ AX, BX
|
||||||
|
CMPQ BX, $0
|
||||||
|
JNZ loopback_xor
|
||||||
|
|
||||||
|
loopback_xor_aligned:
|
||||||
|
MOVOA (SI), X0 // in[x]
|
||||||
|
MOVOA (DX), X4 // out[x]
|
||||||
|
MOVOA X0, X1 // in[x]
|
||||||
|
MOVOA X6, X2 // low copy
|
||||||
|
MOVOA X7, X3 // high copy
|
||||||
|
PSRLQ $4, X1 // X1: high input
|
||||||
|
PAND X8, X0 // X0: low input
|
||||||
|
PAND X8, X1 // X0: high input
|
||||||
|
PSHUFB X0, X2 // X2: mul low part
|
||||||
|
PSHUFB X1, X3 // X3: mul high part
|
||||||
|
PXOR X2, X3 // X3: Result
|
||||||
|
PXOR X4, X3 // X3: Result xor existing out
|
||||||
|
MOVOA X3, (DX) // Store
|
||||||
|
ADDQ $16, SI // in+=16
|
||||||
|
ADDQ $16, DX // out+=16
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_xor_aligned
|
||||||
|
JMP done_xor
|
||||||
|
|
||||||
|
loopback_xor:
|
||||||
|
MOVOU (SI), X0 // in[x]
|
||||||
|
MOVOU (DX), X4 // out[x]
|
||||||
|
MOVOU X0, X1 // in[x]
|
||||||
|
MOVOU X6, X2 // low copy
|
||||||
|
MOVOU X7, X3 // high copy
|
||||||
|
PSRLQ $4, X1 // X1: high input
|
||||||
|
PAND X8, X0 // X0: low input
|
||||||
|
PAND X8, X1 // X0: high input
|
||||||
|
PSHUFB X0, X2 // X2: mul low part
|
||||||
|
PSHUFB X1, X3 // X3: mul high part
|
||||||
|
PXOR X2, X3 // X3: Result
|
||||||
|
PXOR X4, X3 // X3: Result xor existing out
|
||||||
|
MOVOU X3, (DX) // Store
|
||||||
|
ADDQ $16, SI // in+=16
|
||||||
|
ADDQ $16, DX // out+=16
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_xor
|
||||||
|
|
||||||
|
done_xor:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulSSSE3(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulSSSE3(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVOU (SI), X6 // X6 low
|
||||||
|
MOVOU (DX), X7 // X7: high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X8
|
||||||
|
PXOR X5, X5
|
||||||
|
MOVQ in+48(FP), SI // R11: &in
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
PSHUFB X5, X8 // X8: lomask (unpacked)
|
||||||
|
MOVQ SI, AX
|
||||||
|
MOVQ DX, BX
|
||||||
|
SHRQ $4, R9 // len(in) / 16
|
||||||
|
ANDQ $15, AX
|
||||||
|
ANDQ $15, BX
|
||||||
|
CMPQ R9, $0
|
||||||
|
JEQ done
|
||||||
|
ORQ AX, BX
|
||||||
|
CMPQ BX, $0
|
||||||
|
JNZ loopback
|
||||||
|
|
||||||
|
loopback_aligned:
|
||||||
|
MOVOA (SI), X0 // in[x]
|
||||||
|
MOVOA X0, X1 // in[x]
|
||||||
|
MOVOA X6, X2 // low copy
|
||||||
|
MOVOA X7, X3 // high copy
|
||||||
|
PSRLQ $4, X1 // X1: high input
|
||||||
|
PAND X8, X0 // X0: low input
|
||||||
|
PAND X8, X1 // X0: high input
|
||||||
|
PSHUFB X0, X2 // X2: mul low part
|
||||||
|
PSHUFB X1, X3 // X3: mul high part
|
||||||
|
PXOR X2, X3 // X3: Result
|
||||||
|
MOVOA X3, (DX) // Store
|
||||||
|
ADDQ $16, SI // in+=16
|
||||||
|
ADDQ $16, DX // out+=16
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_aligned
|
||||||
|
JMP done
|
||||||
|
|
||||||
|
loopback:
|
||||||
|
MOVOU (SI), X0 // in[x]
|
||||||
|
MOVOU X0, X1 // in[x]
|
||||||
|
MOVOA X6, X2 // low copy
|
||||||
|
MOVOA X7, X3 // high copy
|
||||||
|
PSRLQ $4, X1 // X1: high input
|
||||||
|
PAND X8, X0 // X0: low input
|
||||||
|
PAND X8, X1 // X0: high input
|
||||||
|
PSHUFB X0, X2 // X2: mul low part
|
||||||
|
PSHUFB X1, X3 // X3: mul high part
|
||||||
|
PXOR X2, X3 // X3: Result
|
||||||
|
MOVOU X3, (DX) // Store
|
||||||
|
ADDQ $16, SI // in+=16
|
||||||
|
ADDQ $16, DX // out+=16
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback
|
||||||
|
|
||||||
|
done:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulAVX2Xor(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulAVX2Xor(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X5
|
||||||
|
MOVOU (SI), X6 // X6: low
|
||||||
|
MOVOU (DX), X7 // X7: high
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
|
||||||
|
VINSERTI128 $1, X6, Y6, Y6 // low
|
||||||
|
VINSERTI128 $1, X7, Y7, Y7 // high
|
||||||
|
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
|
||||||
|
|
||||||
|
SHRQ $5, R9 // len(in) / 32
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
MOVQ in+48(FP), SI // SI: &in
|
||||||
|
TESTQ R9, R9
|
||||||
|
JZ done_xor_avx2
|
||||||
|
|
||||||
|
loopback_xor_avx2:
|
||||||
|
VMOVDQU (SI), Y0
|
||||||
|
VMOVDQU (DX), Y4
|
||||||
|
VPSRLQ $4, Y0, Y1 // Y1: high input
|
||||||
|
VPAND Y8, Y0, Y0 // Y0: low input
|
||||||
|
VPAND Y8, Y1, Y1 // Y1: high input
|
||||||
|
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
|
||||||
|
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
|
||||||
|
VPXOR Y3, Y2, Y3 // Y3: Result
|
||||||
|
VPXOR Y4, Y3, Y4 // Y4: Result
|
||||||
|
VMOVDQU Y4, (DX)
|
||||||
|
|
||||||
|
ADDQ $32, SI // in+=32
|
||||||
|
ADDQ $32, DX // out+=32
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_xor_avx2
|
||||||
|
|
||||||
|
done_xor_avx2:
|
||||||
|
VZEROUPPER
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulAVX2(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulAVX2(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X5
|
||||||
|
MOVOU (SI), X6 // X6: low
|
||||||
|
MOVOU (DX), X7 // X7: high
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
|
||||||
|
VINSERTI128 $1, X6, Y6, Y6 // low
|
||||||
|
VINSERTI128 $1, X7, Y7, Y7 // high
|
||||||
|
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
|
||||||
|
|
||||||
|
SHRQ $5, R9 // len(in) / 32
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
MOVQ in+48(FP), SI // SI: &in
|
||||||
|
TESTQ R9, R9
|
||||||
|
JZ done_avx2
|
||||||
|
|
||||||
|
loopback_avx2:
|
||||||
|
VMOVDQU (SI), Y0
|
||||||
|
VPSRLQ $4, Y0, Y1 // Y1: high input
|
||||||
|
VPAND Y8, Y0, Y0 // Y0: low input
|
||||||
|
VPAND Y8, Y1, Y1 // Y1: high input
|
||||||
|
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
|
||||||
|
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
|
||||||
|
VPXOR Y3, Y2, Y4 // Y4: Result
|
||||||
|
VMOVDQU Y4, (DX)
|
||||||
|
|
||||||
|
ADDQ $32, SI // in+=32
|
||||||
|
ADDQ $32, DX // out+=32
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_avx2
|
||||||
|
|
||||||
|
done_avx2:
|
||||||
|
VZEROUPPER
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulAVX2Xor_64(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulAVX2Xor_64(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X5
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
|
||||||
|
VBROADCASTI128 (SI), Y6 // low table
|
||||||
|
VBROADCASTI128 (DX), Y7 // high high table
|
||||||
|
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
|
||||||
|
|
||||||
|
SHRQ $6, R9 // len(in) / 64
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
MOVQ in+48(FP), SI // SI: &in
|
||||||
|
TESTQ R9, R9
|
||||||
|
JZ done_xor_avx2_64
|
||||||
|
|
||||||
|
loopback_xor_avx2_64:
|
||||||
|
VMOVDQU (SI), Y0
|
||||||
|
VMOVDQU 32(SI), Y10
|
||||||
|
VMOVDQU (DX), Y4
|
||||||
|
VMOVDQU 32(DX), Y14
|
||||||
|
VPSRLQ $4, Y0, Y1 // Y1: high input
|
||||||
|
VPSRLQ $4, Y10, Y11 // Y11: high input 2
|
||||||
|
VPAND Y8, Y0, Y0 // Y0: low input
|
||||||
|
VPAND Y8, Y10, Y10 // Y10: low input 2
|
||||||
|
VPAND Y8, Y1, Y1 // Y11: high input
|
||||||
|
VPAND Y8, Y11, Y11 // Y11: high input 2
|
||||||
|
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
|
||||||
|
VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
|
||||||
|
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
|
||||||
|
VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
|
||||||
|
VPXOR Y3, Y2, Y3 // Y3: Result
|
||||||
|
VPXOR Y13, Y12, Y13 // Y13: Result 2
|
||||||
|
VPXOR Y4, Y3, Y4 // Y4: Result
|
||||||
|
VPXOR Y14, Y13, Y14 // Y4: Result 2
|
||||||
|
VMOVDQU Y4, (DX)
|
||||||
|
VMOVDQU Y14, 32(DX)
|
||||||
|
|
||||||
|
ADDQ $64, SI // in+=64
|
||||||
|
ADDQ $64, DX // out+=64
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_xor_avx2_64
|
||||||
|
|
||||||
|
done_xor_avx2_64:
|
||||||
|
VZEROUPPER
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulAVX2_64(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulAVX2_64(SB), 7, $0
|
||||||
|
MOVQ low+0(FP), SI // SI: &low
|
||||||
|
MOVQ high+24(FP), DX // DX: &high
|
||||||
|
MOVQ $15, BX // BX: low mask
|
||||||
|
MOVQ BX, X5
|
||||||
|
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||||
|
VBROADCASTI128 (SI), Y6 // low table
|
||||||
|
VBROADCASTI128 (DX), Y7 // high high table
|
||||||
|
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
|
||||||
|
|
||||||
|
SHRQ $6, R9 // len(in) / 64
|
||||||
|
MOVQ out+72(FP), DX // DX: &out
|
||||||
|
MOVQ in+48(FP), SI // SI: &in
|
||||||
|
TESTQ R9, R9
|
||||||
|
JZ done_avx2_64
|
||||||
|
|
||||||
|
loopback_avx2_64:
|
||||||
|
VMOVDQU (SI), Y0
|
||||||
|
VMOVDQU 32(SI), Y10
|
||||||
|
VPSRLQ $4, Y0, Y1 // Y1: high input
|
||||||
|
VPSRLQ $4, Y10, Y11 // Y11: high input 2
|
||||||
|
VPAND Y8, Y0, Y0 // Y0: low input
|
||||||
|
VPAND Y8, Y10, Y10 // Y10: low input
|
||||||
|
VPAND Y8, Y1, Y1 // Y1: high input
|
||||||
|
VPAND Y8, Y11, Y11 // Y11: high input 2
|
||||||
|
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
|
||||||
|
VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
|
||||||
|
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
|
||||||
|
VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
|
||||||
|
VPXOR Y3, Y2, Y4 // Y4: Result
|
||||||
|
VPXOR Y13, Y12, Y14 // Y14: Result 2
|
||||||
|
VMOVDQU Y4, (DX)
|
||||||
|
VMOVDQU Y14, 32(DX)
|
||||||
|
|
||||||
|
ADDQ $64, SI // in+=64
|
||||||
|
ADDQ $64, DX // out+=64
|
||||||
|
SUBQ $1, R9
|
||||||
|
JNZ loopback_avx2_64
|
||||||
|
|
||||||
|
done_avx2_64:
|
||||||
|
VZEROUPPER
|
||||||
|
RET
|
|
@ -0,0 +1,130 @@
|
||||||
|
//go:build !noasm && !appengine && !gccgo && !nopshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
// Copyright 2017, Minio, Inc.
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
const pshufb = true
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulNEON(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulXorNEON(low, high, in, out []byte)
|
||||||
|
|
||||||
|
func galMulSlice(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
copy(out, in)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var done int
|
||||||
|
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done = (len(in) >> 5) << 5
|
||||||
|
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] = mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSliceXor(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
sliceXor(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var done int
|
||||||
|
galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
|
||||||
|
done = (len(in) >> 5) << 5
|
||||||
|
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] ^= mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
// 64 byte aligned, always full.
|
||||||
|
xorSliceNEON(x, y)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly
|
||||||
|
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// 64 byte aligned, always full.
|
||||||
|
xorSliceNEON(x, y)
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf16(x, y []byte, log_m ffe, o *options) {
|
||||||
|
refMul(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulAdd8(out, in []byte, log_m ffe8, o *options) {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulXorNEON(t[:16], t[16:32], in, out)
|
||||||
|
done := (len(in) >> 5) << 5
|
||||||
|
in = in[done:]
|
||||||
|
if len(in) > 0 {
|
||||||
|
out = out[done:]
|
||||||
|
refMulAdd8(in, out, log_m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf8(out, in []byte, log_m ffe8, o *options) {
|
||||||
|
var done int
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulNEON(t[:16], t[16:32], in, out)
|
||||||
|
done = (len(in) >> 5) << 5
|
||||||
|
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mul8LUTs[log_m].Value[:]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] ^= byte(mt[in[i]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,102 @@
|
||||||
|
//+build !noasm
|
||||||
|
//+build !appengine
|
||||||
|
//+build !gccgo
|
||||||
|
//+build !nopshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
// Copyright 2017, Minio, Inc.
|
||||||
|
|
||||||
|
#define LOAD(LO1, LO2, HI1, HI2) \
|
||||||
|
VLD1.P 32(R1), [LO1.B16, LO2.B16] \
|
||||||
|
\
|
||||||
|
\ // Get low input and high input
|
||||||
|
VUSHR $4, LO1.B16, HI1.B16 \
|
||||||
|
VUSHR $4, LO2.B16, HI2.B16 \
|
||||||
|
VAND V8.B16, LO1.B16, LO1.B16 \
|
||||||
|
VAND V8.B16, LO2.B16, LO2.B16
|
||||||
|
|
||||||
|
#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
|
||||||
|
\ // Mul low part and mul high part
|
||||||
|
VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \
|
||||||
|
VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
|
||||||
|
VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \
|
||||||
|
VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
|
||||||
|
\
|
||||||
|
\ // Combine results
|
||||||
|
VEOR OUT2.B16, OUT1.B16, OUT1.B16 \
|
||||||
|
VEOR TMP2.B16, TMP1.B16, OUT2.B16
|
||||||
|
|
||||||
|
// func galMulNEON(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulNEON(SB), 7, $0
|
||||||
|
MOVD in_base+48(FP), R1
|
||||||
|
MOVD in_len+56(FP), R2 // length of message
|
||||||
|
MOVD out_base+72(FP), R5
|
||||||
|
SUBS $32, R2
|
||||||
|
BMI complete
|
||||||
|
|
||||||
|
MOVD low+0(FP), R10 // R10: &low
|
||||||
|
MOVD high+24(FP), R11 // R11: &high
|
||||||
|
VLD1 (R10), [V6.B16]
|
||||||
|
VLD1 (R11), [V7.B16]
|
||||||
|
|
||||||
|
//
|
||||||
|
// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
|
||||||
|
// WORD $0x4e010c68 // dup v8.16b, w3
|
||||||
|
//
|
||||||
|
MOVD $0x0f, R3
|
||||||
|
VMOV R3, V8.B[0]
|
||||||
|
VDUP V8.B[0], V8.B16
|
||||||
|
|
||||||
|
loop:
|
||||||
|
// Main loop
|
||||||
|
LOAD(V0, V1, V10, V11)
|
||||||
|
GALOIS_MUL(V6, V7, V4, V5, V14, V15)
|
||||||
|
|
||||||
|
// Store result
|
||||||
|
VST1.P [V4.D2, V5.D2], 32(R5)
|
||||||
|
|
||||||
|
SUBS $32, R2
|
||||||
|
BPL loop
|
||||||
|
|
||||||
|
complete:
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulXorNEON(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulXorNEON(SB), 7, $0
|
||||||
|
MOVD in_base+48(FP), R1
|
||||||
|
MOVD in_len+56(FP), R2 // length of message
|
||||||
|
MOVD out_base+72(FP), R5
|
||||||
|
SUBS $32, R2
|
||||||
|
BMI completeXor
|
||||||
|
|
||||||
|
MOVD low+0(FP), R10 // R10: &low
|
||||||
|
MOVD high+24(FP), R11 // R11: &high
|
||||||
|
VLD1 (R10), [V6.B16]
|
||||||
|
VLD1 (R11), [V7.B16]
|
||||||
|
|
||||||
|
//
|
||||||
|
// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
|
||||||
|
// WORD $0x4e010c68 // dup v8.16b, w3
|
||||||
|
//
|
||||||
|
MOVD $0x0f, R3
|
||||||
|
VMOV R3, V8.B[0]
|
||||||
|
VDUP V8.B[0], V8.B16
|
||||||
|
|
||||||
|
loopXor:
|
||||||
|
// Main loop
|
||||||
|
VLD1 (R5), [V20.B16, V21.B16]
|
||||||
|
|
||||||
|
LOAD(V0, V1, V10, V11)
|
||||||
|
GALOIS_MUL(V6, V7, V4, V5, V14, V15)
|
||||||
|
|
||||||
|
VEOR V20.B16, V4.B16, V4.B16
|
||||||
|
VEOR V21.B16, V5.B16, V5.B16
|
||||||
|
|
||||||
|
// Store result
|
||||||
|
VST1.P [V4.D2, V5.D2], 32(R5)
|
||||||
|
|
||||||
|
SUBS $32, R2
|
||||||
|
BPL loopXor
|
||||||
|
|
||||||
|
completeXor:
|
||||||
|
RET
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,33 @@
|
||||||
|
//go:build !amd64 || noasm || appengine || gccgo || nogen
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
const maxAvx2Inputs = 1
|
||||||
|
const maxAvx2Outputs = 1
|
||||||
|
const minAvx2Size = 1
|
||||||
|
const avxSizeMask = 0
|
||||||
|
const avx2CodeGen = false
|
||||||
|
|
||||||
|
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
|
||||||
|
panic("codegen not available")
|
||||||
|
}
|
2264
vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
generated
vendored
Normal file
2264
vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
67987
vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
generated
vendored
Normal file
67987
vendor/github.com/klauspost/reedsolomon/galois_gen_nopshufb_amd64.s
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2045
vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
generated
vendored
Normal file
2045
vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1372
vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
generated
vendored
Normal file
1372
vendor/github.com/klauspost/reedsolomon/galois_gen_switch_nopshufb_amd64.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,91 @@
|
||||||
|
//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo || nopshufb) && (!ppc64le || noasm || appengine || gccgo || nopshufb)
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
const pshufb = false
|
||||||
|
|
||||||
|
func galMulSlice(c byte, in, out []byte, o *options) {
|
||||||
|
out = out[:len(in)]
|
||||||
|
if c == 1 {
|
||||||
|
copy(out, in)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for n, input := range in {
|
||||||
|
out[n] = mt[input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSliceXor(c byte, in, out []byte, o *options) {
|
||||||
|
out = out[:len(in)]
|
||||||
|
if c == 1 {
|
||||||
|
sliceXor(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for n, input := range in {
|
||||||
|
out[n] ^= mt[input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
defaultOptions.useAVX512 = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd8(x, y, log_m)
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
refMulAdd8(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf16(x, y []byte, log_m ffe, o *options) {
|
||||||
|
refMul(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf8(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
refMul8(x, y, log_m)
|
||||||
|
}
|
|
@ -0,0 +1,146 @@
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details
|
||||||
|
|
||||||
|
//go:build nopshufb && !noasm
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
// bigSwitchover is the size where 64 bytes are processed per loop.
|
||||||
|
const bigSwitchover = 128
|
||||||
|
|
||||||
|
const pshufb = false
|
||||||
|
|
||||||
|
// simple slice xor
|
||||||
|
func sliceXor(in, out []byte, o *options) {
|
||||||
|
if o.useSSE2 {
|
||||||
|
if len(in) >= bigSwitchover {
|
||||||
|
if o.useAVX2 {
|
||||||
|
avx2XorSlice_64(in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
} else {
|
||||||
|
sSE2XorSlice_64(in, out)
|
||||||
|
done := (len(in) >> 6) << 6
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(in) >= 16 {
|
||||||
|
sSE2XorSlice(in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
out = out[done:]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sliceXorGo(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
out = out[:len(in)]
|
||||||
|
for i := range in {
|
||||||
|
out[i] ^= in[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSlice(c byte, in, out []byte, o *options) {
|
||||||
|
out = out[:len(in)]
|
||||||
|
if c == 1 {
|
||||||
|
copy(out, in)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for len(in) >= 4 {
|
||||||
|
ii := (*[4]byte)(in)
|
||||||
|
oo := (*[4]byte)(out)
|
||||||
|
oo[0] = mt[ii[0]]
|
||||||
|
oo[1] = mt[ii[1]]
|
||||||
|
oo[2] = mt[ii[2]]
|
||||||
|
oo[3] = mt[ii[3]]
|
||||||
|
in = in[4:]
|
||||||
|
out = out[4:]
|
||||||
|
}
|
||||||
|
for n, input := range in {
|
||||||
|
out[n] = mt[input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSliceXor(c byte, in, out []byte, o *options) {
|
||||||
|
out = out[:len(in)]
|
||||||
|
if c == 1 {
|
||||||
|
sliceXor(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for len(in) >= 4 {
|
||||||
|
ii := (*[4]byte)(in)
|
||||||
|
oo := (*[4]byte)(out)
|
||||||
|
oo[0] ^= mt[ii[0]]
|
||||||
|
oo[1] ^= mt[ii[1]]
|
||||||
|
oo[2] ^= mt[ii[2]]
|
||||||
|
oo[3] ^= mt[ii[3]]
|
||||||
|
in = in[4:]
|
||||||
|
out = out[4:]
|
||||||
|
}
|
||||||
|
for n, input := range in {
|
||||||
|
out[n] ^= mt[input]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
defaultOptions.useAVX512 = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd8(x, y, log_m)
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXor(x, y, o)
|
||||||
|
refMulAdd8(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf16(x, y []byte, log_m ffe, o *options) {
|
||||||
|
refMul(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf8(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
refMul8(x, y, log_m)
|
||||||
|
}
|
|
@ -0,0 +1,13 @@
|
||||||
|
//go:build !amd64 || noasm || appengine || gccgo || pshufb
|
||||||
|
|
||||||
|
// Copyright 2020, Klaus Post, see LICENSE for details.
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
|
||||||
|
panic("codeSomeShardsAvx512 should not be called if built without asm")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
|
||||||
|
panic("codeSomeShardsAvx512P should not be called if built without asm")
|
||||||
|
}
|
|
@ -0,0 +1,146 @@
|
||||||
|
//go:build !noasm && !appengine && !gccgo && !nopshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
// Copyright 2018, Minio, Inc.
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
const pshufb = true
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulPpc(low, high, in, out []byte)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func galMulPpcXor(low, high, in, out []byte)
|
||||||
|
|
||||||
|
// This is what the assembler routines do in blocks of 16 bytes:
|
||||||
|
/*
|
||||||
|
func galMulPpc(low, high, in, out []byte) {
|
||||||
|
for n, input := range in {
|
||||||
|
l := input & 0xf
|
||||||
|
h := input >> 4
|
||||||
|
out[n] = low[l] ^ high[h]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func galMulPpcXor(low, high, in, out []byte) {
|
||||||
|
for n, input := range in {
|
||||||
|
l := input & 0xf
|
||||||
|
h := input >> 4
|
||||||
|
out[n] ^= low[l] ^ high[h]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
func galMulSlice(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
copy(out, in)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
if done > 0 {
|
||||||
|
galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
|
||||||
|
}
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] = mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func galMulSliceXor(c byte, in, out []byte, o *options) {
|
||||||
|
if c == 1 {
|
||||||
|
sliceXor(in, out, o)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
if done > 0 {
|
||||||
|
galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
|
||||||
|
}
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mulTable[c][:256]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] ^= mt[in[i]]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
|
||||||
|
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4-way butterfly
|
||||||
|
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
|
||||||
|
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly forward
|
||||||
|
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
refMulAdd(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2-way butterfly inverse
|
||||||
|
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
|
||||||
|
// Reference version:
|
||||||
|
sliceXorGo(x, y, o)
|
||||||
|
mulAdd8(x, y, log_m, o)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf16(x, y []byte, log_m ffe, o *options) {
|
||||||
|
refMul(x, y, log_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulAdd8(out, in []byte, log_m ffe8, o *options) {
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulPpcXor(t[:16], t[16:32], in, out)
|
||||||
|
done := (len(in) >> 4) << 4
|
||||||
|
in = in[done:]
|
||||||
|
if len(in) > 0 {
|
||||||
|
out = out[done:]
|
||||||
|
refMulAdd8(in, out, log_m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mulgf8(out, in []byte, log_m ffe8, o *options) {
|
||||||
|
var done int
|
||||||
|
t := &multiply256LUT8[log_m]
|
||||||
|
galMulPpc(t[:16], t[16:32], in, out)
|
||||||
|
done = (len(in) >> 4) << 4
|
||||||
|
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
mt := mul8LUTs[log_m].Value[:]
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] ^= byte(mt[in[i]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
//+build !noasm
|
||||||
|
//+build !appengine
|
||||||
|
//+build !gccgo
|
||||||
|
//+build !pshufb
|
||||||
|
|
||||||
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
||||||
|
// Copyright 2018, Minio, Inc.
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
#define LOW R3
|
||||||
|
#define HIGH R4
|
||||||
|
#define IN R5
|
||||||
|
#define LEN R6
|
||||||
|
#define OUT R7
|
||||||
|
#define CONSTANTS R8
|
||||||
|
#define OFFSET R9
|
||||||
|
#define OFFSET1 R10
|
||||||
|
#define OFFSET2 R11
|
||||||
|
|
||||||
|
#define X6 VS34
|
||||||
|
#define X6_ V2
|
||||||
|
#define X7 VS35
|
||||||
|
#define X7_ V3
|
||||||
|
#define MSG VS36
|
||||||
|
#define MSG_ V4
|
||||||
|
#define MSG_HI VS37
|
||||||
|
#define MSG_HI_ V5
|
||||||
|
#define RESULT VS38
|
||||||
|
#define RESULT_ V6
|
||||||
|
#define ROTATE VS39
|
||||||
|
#define ROTATE_ V7
|
||||||
|
#define MASK VS40
|
||||||
|
#define MASK_ V8
|
||||||
|
#define FLIP VS41
|
||||||
|
#define FLIP_ V9
|
||||||
|
|
||||||
|
// func galMulPpc(low, high, in, out []byte)
|
||||||
|
TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
|
||||||
|
MOVD low+0(FP), LOW
|
||||||
|
MOVD high+24(FP), HIGH
|
||||||
|
MOVD in+48(FP), IN
|
||||||
|
MOVD in_len+56(FP), LEN
|
||||||
|
MOVD out+72(FP), OUT
|
||||||
|
|
||||||
|
MOVD $16, OFFSET1
|
||||||
|
MOVD $32, OFFSET2
|
||||||
|
|
||||||
|
MOVD $·constants(SB), CONSTANTS
|
||||||
|
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||||
|
LXVD2X (CONSTANTS)(OFFSET1), MASK
|
||||||
|
LXVD2X (CONSTANTS)(OFFSET2), FLIP
|
||||||
|
|
||||||
|
LXVD2X (LOW)(R0), X6
|
||||||
|
LXVD2X (HIGH)(R0), X7
|
||||||
|
VPERM X6_, V31, FLIP_, X6_
|
||||||
|
VPERM X7_, V31, FLIP_, X7_
|
||||||
|
|
||||||
|
MOVD $0, OFFSET
|
||||||
|
|
||||||
|
loop:
|
||||||
|
LXVD2X (IN)(OFFSET), MSG
|
||||||
|
|
||||||
|
VSRB MSG_, ROTATE_, MSG_HI_
|
||||||
|
VAND MSG_, MASK_, MSG_
|
||||||
|
VPERM X6_, V31, MSG_, MSG_
|
||||||
|
VPERM X7_, V31, MSG_HI_, MSG_HI_
|
||||||
|
|
||||||
|
VXOR MSG_, MSG_HI_, MSG_
|
||||||
|
|
||||||
|
STXVD2X MSG, (OUT)(OFFSET)
|
||||||
|
|
||||||
|
ADD $16, OFFSET, OFFSET
|
||||||
|
CMP LEN, OFFSET
|
||||||
|
BGT loop
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func galMulPpcXorlow, high, in, out []byte)
|
||||||
|
TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
|
||||||
|
MOVD low+0(FP), LOW
|
||||||
|
MOVD high+24(FP), HIGH
|
||||||
|
MOVD in+48(FP), IN
|
||||||
|
MOVD in_len+56(FP), LEN
|
||||||
|
MOVD out+72(FP), OUT
|
||||||
|
|
||||||
|
MOVD $16, OFFSET1
|
||||||
|
MOVD $32, OFFSET2
|
||||||
|
|
||||||
|
MOVD $·constants(SB), CONSTANTS
|
||||||
|
LXVD2X (CONSTANTS)(R0), ROTATE
|
||||||
|
LXVD2X (CONSTANTS)(OFFSET1), MASK
|
||||||
|
LXVD2X (CONSTANTS)(OFFSET2), FLIP
|
||||||
|
|
||||||
|
LXVD2X (LOW)(R0), X6
|
||||||
|
LXVD2X (HIGH)(R0), X7
|
||||||
|
VPERM X6_, V31, FLIP_, X6_
|
||||||
|
VPERM X7_, V31, FLIP_, X7_
|
||||||
|
|
||||||
|
MOVD $0, OFFSET
|
||||||
|
|
||||||
|
loopXor:
|
||||||
|
LXVD2X (IN)(OFFSET), MSG
|
||||||
|
LXVD2X (OUT)(OFFSET), RESULT
|
||||||
|
|
||||||
|
VSRB MSG_, ROTATE_, MSG_HI_
|
||||||
|
VAND MSG_, MASK_, MSG_
|
||||||
|
VPERM X6_, V31, MSG_, MSG_
|
||||||
|
VPERM X7_, V31, MSG_HI_, MSG_HI_
|
||||||
|
|
||||||
|
VXOR MSG_, MSG_HI_, MSG_
|
||||||
|
VXOR MSG_, RESULT_, RESULT_
|
||||||
|
|
||||||
|
STXVD2X RESULT, (OUT)(OFFSET)
|
||||||
|
|
||||||
|
ADD $16, OFFSET, OFFSET
|
||||||
|
CMP LEN, OFFSET
|
||||||
|
BGT loopXor
|
||||||
|
RET
|
||||||
|
|
||||||
|
DATA ·constants+0x0(SB)/8, $0x0404040404040404
|
||||||
|
DATA ·constants+0x8(SB)/8, $0x0404040404040404
|
||||||
|
DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||||
|
DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||||
|
DATA ·constants+0x20(SB)/8, $0x0706050403020100
|
||||||
|
DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
|
||||||
|
|
||||||
|
GLOBL ·constants(SB), 8, $48
|
|
@ -0,0 +1,164 @@
|
||||||
|
/**
|
||||||
|
* A thread-safe tree which caches inverted matrices.
|
||||||
|
*
|
||||||
|
* Copyright 2016, Peter Collins
|
||||||
|
*/
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// The tree uses a Reader-Writer mutex to make it thread-safe
|
||||||
|
// when accessing cached matrices and inserting new ones.
|
||||||
|
type inversionTree struct {
|
||||||
|
mutex sync.RWMutex
|
||||||
|
root inversionNode
|
||||||
|
}
|
||||||
|
|
||||||
|
type inversionNode struct {
|
||||||
|
matrix matrix
|
||||||
|
children []*inversionNode
|
||||||
|
}
|
||||||
|
|
||||||
|
// newInversionTree initializes a tree for storing inverted matrices.
|
||||||
|
// Note that the root node is the identity matrix as it implies
|
||||||
|
// there were no errors with the original data.
|
||||||
|
func newInversionTree(dataShards, parityShards int) *inversionTree {
|
||||||
|
identity, _ := identityMatrix(dataShards)
|
||||||
|
return &inversionTree{
|
||||||
|
root: inversionNode{
|
||||||
|
matrix: identity,
|
||||||
|
children: make([]*inversionNode, dataShards+parityShards),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInvertedMatrix returns the cached inverted matrix or nil if it
|
||||||
|
// is not found in the tree keyed on the indices of invalid rows.
|
||||||
|
func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
|
||||||
|
if t == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Lock the tree for reading before accessing the tree.
|
||||||
|
t.mutex.RLock()
|
||||||
|
defer t.mutex.RUnlock()
|
||||||
|
|
||||||
|
// If no invalid indices were give we should return the root
|
||||||
|
// identity matrix.
|
||||||
|
if len(invalidIndices) == 0 {
|
||||||
|
return t.root.matrix
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively search for the inverted matrix in the tree, passing in
|
||||||
|
// 0 as the parent index as we start at the root of the tree.
|
||||||
|
return t.root.getInvertedMatrix(invalidIndices, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// errAlreadySet is returned if the root node matrix is overwritten
|
||||||
|
var errAlreadySet = errors.New("the root node identity matrix is already set")
|
||||||
|
|
||||||
|
// InsertInvertedMatrix inserts a new inverted matrix into the tree
|
||||||
|
// keyed by the indices of invalid rows. The total number of shards
|
||||||
|
// is required for creating the proper length lists of child nodes for
|
||||||
|
// each node.
|
||||||
|
func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
|
||||||
|
if t == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// If no invalid indices were given then we are done because the
|
||||||
|
// root node is already set with the identity matrix.
|
||||||
|
if len(invalidIndices) == 0 {
|
||||||
|
return errAlreadySet
|
||||||
|
}
|
||||||
|
|
||||||
|
if !matrix.IsSquare() {
|
||||||
|
return errNotSquare
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lock the tree for writing and reading before accessing the tree.
|
||||||
|
t.mutex.Lock()
|
||||||
|
defer t.mutex.Unlock()
|
||||||
|
|
||||||
|
// Recursively create nodes for the inverted matrix in the tree until
|
||||||
|
// we reach the node to insert the matrix to. We start by passing in
|
||||||
|
// 0 as the parent index as we start at the root of the tree.
|
||||||
|
t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
|
||||||
|
// Get the child node to search next from the list of children. The
|
||||||
|
// list of children starts relative to the parent index passed in
|
||||||
|
// because the indices of invalid rows is sorted (by default). As we
|
||||||
|
// search recursively, the first invalid index gets popped off the list,
|
||||||
|
// so when searching through the list of children, use that first invalid
|
||||||
|
// index to find the child node.
|
||||||
|
firstIndex := invalidIndices[0]
|
||||||
|
node := n.children[firstIndex-parent]
|
||||||
|
|
||||||
|
// If the child node doesn't exist in the list yet, fail fast by
|
||||||
|
// returning, so we can construct and insert the proper inverted matrix.
|
||||||
|
if node == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there's more than one invalid index left in the list we should
|
||||||
|
// keep searching recursively.
|
||||||
|
if len(invalidIndices) > 1 {
|
||||||
|
// Search recursively on the child node by passing in the invalid indices
|
||||||
|
// with the first index popped off the front. Also the parent index to
|
||||||
|
// pass down is the first index plus one.
|
||||||
|
return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
|
||||||
|
}
|
||||||
|
// If there aren't any more invalid indices to search, we've found our
|
||||||
|
// node. Return it, however keep in mind that the matrix could still be
|
||||||
|
// nil because intermediary nodes in the tree are created sometimes with
|
||||||
|
// their inversion matrices uninitialized.
|
||||||
|
return node.matrix
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
|
||||||
|
// As above, get the child node to search next from the list of children.
|
||||||
|
// The list of children starts relative to the parent index passed in
|
||||||
|
// because the indices of invalid rows is sorted (by default). As we
|
||||||
|
// search recursively, the first invalid index gets popped off the list,
|
||||||
|
// so when searching through the list of children, use that first invalid
|
||||||
|
// index to find the child node.
|
||||||
|
firstIndex := invalidIndices[0]
|
||||||
|
node := n.children[firstIndex-parent]
|
||||||
|
|
||||||
|
// If the child node doesn't exist in the list yet, create a new
|
||||||
|
// node because we have the writer lock and add it to the list
|
||||||
|
// of children.
|
||||||
|
if node == nil {
|
||||||
|
// Make the length of the list of children equal to the number
|
||||||
|
// of shards minus the first invalid index because the list of
|
||||||
|
// invalid indices is sorted, so only this length of errors
|
||||||
|
// are possible in the tree.
|
||||||
|
node = &inversionNode{
|
||||||
|
children: make([]*inversionNode, shards-firstIndex),
|
||||||
|
}
|
||||||
|
// Insert the new node into the tree at the first index relative
|
||||||
|
// to the parent index that was given in this recursive call.
|
||||||
|
n.children[firstIndex-parent] = node
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there's more than one invalid index left in the list we should
|
||||||
|
// keep searching recursively in order to find the node to add our
|
||||||
|
// matrix.
|
||||||
|
if len(invalidIndices) > 1 {
|
||||||
|
// As above, search recursively on the child node by passing in
|
||||||
|
// the invalid indices with the first index popped off the front.
|
||||||
|
// Also the total number of shards and parent index are passed down
|
||||||
|
// which is equal to the first index plus one.
|
||||||
|
node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
|
||||||
|
} else {
|
||||||
|
// If there aren't any more invalid indices to search, we've found our
|
||||||
|
// node. Cache the inverted matrix in this node.
|
||||||
|
node.matrix = matrix
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,281 @@
|
||||||
|
/**
|
||||||
|
* Matrix Algebra over an 8-bit Galois Field
|
||||||
|
*
|
||||||
|
* Copyright 2015, Klaus Post
|
||||||
|
* Copyright 2015, Backblaze, Inc.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// byte[row][col]
|
||||||
|
type matrix [][]byte
|
||||||
|
|
||||||
|
// newMatrix returns a matrix of zeros.
|
||||||
|
func newMatrix(rows, cols int) (matrix, error) {
|
||||||
|
if rows <= 0 {
|
||||||
|
return nil, errInvalidRowSize
|
||||||
|
}
|
||||||
|
if cols <= 0 {
|
||||||
|
return nil, errInvalidColSize
|
||||||
|
}
|
||||||
|
|
||||||
|
m := matrix(make([][]byte, rows))
|
||||||
|
for i := range m {
|
||||||
|
m[i] = make([]byte, cols)
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewMatrixData initializes a matrix with the given row-major data.
|
||||||
|
// Note that data is not copied from input.
|
||||||
|
func newMatrixData(data [][]byte) (matrix, error) {
|
||||||
|
m := matrix(data)
|
||||||
|
err := m.Check()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IdentityMatrix returns an identity matrix of the given size.
|
||||||
|
func identityMatrix(size int) (matrix, error) {
|
||||||
|
m, err := newMatrix(size, size)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for i := range m {
|
||||||
|
m[i][i] = 1
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
|
||||||
|
var errInvalidRowSize = errors.New("invalid row size")
|
||||||
|
|
||||||
|
// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
|
||||||
|
var errInvalidColSize = errors.New("invalid column size")
|
||||||
|
|
||||||
|
// errColSizeMismatch is returned if the size of matrix columns mismatch.
|
||||||
|
var errColSizeMismatch = errors.New("column size is not the same for all rows")
|
||||||
|
|
||||||
|
func (m matrix) Check() error {
|
||||||
|
rows := len(m)
|
||||||
|
if rows == 0 {
|
||||||
|
return errInvalidRowSize
|
||||||
|
}
|
||||||
|
cols := len(m[0])
|
||||||
|
if cols == 0 {
|
||||||
|
return errInvalidColSize
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, col := range m {
|
||||||
|
if len(col) != cols {
|
||||||
|
return errColSizeMismatch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a human-readable string of the matrix contents.
|
||||||
|
//
|
||||||
|
// Example: [[1, 2], [3, 4]]
|
||||||
|
func (m matrix) String() string {
|
||||||
|
rowOut := make([]string, 0, len(m))
|
||||||
|
for _, row := range m {
|
||||||
|
colOut := make([]string, 0, len(row))
|
||||||
|
for _, col := range row {
|
||||||
|
colOut = append(colOut, strconv.Itoa(int(col)))
|
||||||
|
}
|
||||||
|
rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
|
||||||
|
}
|
||||||
|
return "[" + strings.Join(rowOut, ", ") + "]"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiply multiplies this matrix (the one on the left) by another
|
||||||
|
// matrix (the one on the right) and returns a new matrix with the result.
|
||||||
|
func (m matrix) Multiply(right matrix) (matrix, error) {
|
||||||
|
if len(m[0]) != len(right) {
|
||||||
|
return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
|
||||||
|
}
|
||||||
|
result, _ := newMatrix(len(m), len(right[0]))
|
||||||
|
for r, row := range result {
|
||||||
|
for c := range row {
|
||||||
|
var value byte
|
||||||
|
for i := range m[0] {
|
||||||
|
value ^= galMultiply(m[r][i], right[i][c])
|
||||||
|
}
|
||||||
|
result[r][c] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Augment returns the concatenation of this matrix and the matrix on the right.
|
||||||
|
func (m matrix) Augment(right matrix) (matrix, error) {
|
||||||
|
if len(m) != len(right) {
|
||||||
|
return nil, errMatrixSize
|
||||||
|
}
|
||||||
|
|
||||||
|
result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
|
||||||
|
for r, row := range m {
|
||||||
|
for c := range row {
|
||||||
|
result[r][c] = m[r][c]
|
||||||
|
}
|
||||||
|
cols := len(m[0])
|
||||||
|
for c := range right[0] {
|
||||||
|
result[r][cols+c] = right[r][c]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// errMatrixSize is returned if matrix dimensions are doesn't match.
|
||||||
|
var errMatrixSize = errors.New("matrix sizes do not match")
|
||||||
|
|
||||||
|
func (m matrix) SameSize(n matrix) error {
|
||||||
|
if len(m) != len(n) {
|
||||||
|
return errMatrixSize
|
||||||
|
}
|
||||||
|
for i := range m {
|
||||||
|
if len(m[i]) != len(n[i]) {
|
||||||
|
return errMatrixSize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubMatrix returns a part of this matrix. Data is copied.
|
||||||
|
func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
|
||||||
|
result, err := newMatrix(rmax-rmin, cmax-cmin)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// OPTME: If used heavily, use copy function to copy slice
|
||||||
|
for r := rmin; r < rmax; r++ {
|
||||||
|
for c := cmin; c < cmax; c++ {
|
||||||
|
result[r-rmin][c-cmin] = m[r][c]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SwapRows Exchanges two rows in the matrix.
|
||||||
|
func (m matrix) SwapRows(r1, r2 int) error {
|
||||||
|
if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
|
||||||
|
return errInvalidRowSize
|
||||||
|
}
|
||||||
|
m[r2], m[r1] = m[r1], m[r2]
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsSquare will return true if the matrix is square, otherwise false.
|
||||||
|
func (m matrix) IsSquare() bool {
|
||||||
|
return len(m) == len(m[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
// errSingular is returned if the matrix is singular and cannot be inversed
|
||||||
|
var errSingular = errors.New("matrix is singular")
|
||||||
|
|
||||||
|
// errNotSquare is returned if attempting to inverse a non-square matrix.
|
||||||
|
var errNotSquare = errors.New("only square matrices can be inverted")
|
||||||
|
|
||||||
|
// Invert returns the inverse of this matrix.
|
||||||
|
// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
|
||||||
|
// The matrix must be square, otherwise ErrNotSquare is returned.
|
||||||
|
func (m matrix) Invert() (matrix, error) {
|
||||||
|
if !m.IsSquare() {
|
||||||
|
return nil, errNotSquare
|
||||||
|
}
|
||||||
|
|
||||||
|
size := len(m)
|
||||||
|
work, _ := identityMatrix(size)
|
||||||
|
work, _ = m.Augment(work)
|
||||||
|
|
||||||
|
err := work.gaussianElimination()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return work.SubMatrix(0, size, size, size*2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m matrix) gaussianElimination() error {
|
||||||
|
rows := len(m)
|
||||||
|
columns := len(m[0])
|
||||||
|
// Clear out the part below the main diagonal and scale the main
|
||||||
|
// diagonal to be 1.
|
||||||
|
for r := 0; r < rows; r++ {
|
||||||
|
// If the element on the diagonal is 0, find a row below
|
||||||
|
// that has a non-zero and swap them.
|
||||||
|
if m[r][r] == 0 {
|
||||||
|
for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
|
||||||
|
if m[rowBelow][r] != 0 {
|
||||||
|
err := m.SwapRows(r, rowBelow)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we couldn't find one, the matrix is singular.
|
||||||
|
if m[r][r] == 0 {
|
||||||
|
return errSingular
|
||||||
|
}
|
||||||
|
// Scale to 1.
|
||||||
|
if m[r][r] != 1 {
|
||||||
|
scale := galOneOver(m[r][r])
|
||||||
|
for c := 0; c < columns; c++ {
|
||||||
|
m[r][c] = galMultiply(m[r][c], scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Make everything below the 1 be a 0 by subtracting
|
||||||
|
// a multiple of it. (Subtraction and addition are
|
||||||
|
// both exclusive or in the Galois field.)
|
||||||
|
for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
|
||||||
|
if m[rowBelow][r] != 0 {
|
||||||
|
scale := m[rowBelow][r]
|
||||||
|
for c := 0; c < columns; c++ {
|
||||||
|
m[rowBelow][c] ^= galMultiply(scale, m[r][c])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now clear the part above the main diagonal.
|
||||||
|
for d := 0; d < rows; d++ {
|
||||||
|
for rowAbove := 0; rowAbove < d; rowAbove++ {
|
||||||
|
if m[rowAbove][d] != 0 {
|
||||||
|
scale := m[rowAbove][d]
|
||||||
|
for c := 0; c < columns; c++ {
|
||||||
|
m[rowAbove][c] ^= galMultiply(scale, m[d][c])
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a Vandermonde matrix, which is guaranteed to have the
|
||||||
|
// property that any subset of rows that forms a square matrix
|
||||||
|
// is invertible.
|
||||||
|
func vandermonde(rows, cols int) (matrix, error) {
|
||||||
|
result, err := newMatrix(rows, cols)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for r, row := range result {
|
||||||
|
for c := range row {
|
||||||
|
result[r][c] = galExp(byte(r), c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
|
@ -0,0 +1,323 @@
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/klauspost/cpuid/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Option allows to override processing parameters.
|
||||||
|
type Option func(*options)
|
||||||
|
|
||||||
|
type options struct {
|
||||||
|
maxGoroutines int
|
||||||
|
minSplitSize int
|
||||||
|
shardSize int
|
||||||
|
perRound int
|
||||||
|
|
||||||
|
useAvxGNFI,
|
||||||
|
useAvx512GFNI,
|
||||||
|
useAVX512,
|
||||||
|
useAVX2,
|
||||||
|
useSSSE3,
|
||||||
|
useSSE2 bool
|
||||||
|
|
||||||
|
useJerasureMatrix bool
|
||||||
|
usePAR1Matrix bool
|
||||||
|
useCauchy bool
|
||||||
|
fastOneParity bool
|
||||||
|
inversionCache bool
|
||||||
|
forcedInversionCache bool
|
||||||
|
customMatrix [][]byte
|
||||||
|
withLeopard leopardMode
|
||||||
|
|
||||||
|
// stream options
|
||||||
|
concReads bool
|
||||||
|
concWrites bool
|
||||||
|
streamBS int
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaultOptions = options{
|
||||||
|
maxGoroutines: 384,
|
||||||
|
minSplitSize: -1,
|
||||||
|
fastOneParity: false,
|
||||||
|
inversionCache: true,
|
||||||
|
|
||||||
|
// Detect CPU capabilities.
|
||||||
|
useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3),
|
||||||
|
useSSE2: cpuid.CPU.Supports(cpuid.SSE2),
|
||||||
|
useAVX2: cpuid.CPU.Supports(cpuid.AVX2),
|
||||||
|
useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL),
|
||||||
|
useAvx512GFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ),
|
||||||
|
useAvxGNFI: cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI),
|
||||||
|
}
|
||||||
|
|
||||||
|
// leopardMode controls the use of leopard GF in encoding and decoding.
|
||||||
|
type leopardMode int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// leopardAsNeeded only switches to leopard 16-bit when there are more than
|
||||||
|
// 256 shards.
|
||||||
|
leopardAsNeeded leopardMode = iota
|
||||||
|
// leopardGF16 uses leopard in 16-bit mode for all shard counts.
|
||||||
|
leopardGF16
|
||||||
|
// leopardAlways uses 8-bit leopard for shards less than or equal to 256,
|
||||||
|
// 16-bit leopard otherwise.
|
||||||
|
leopardAlways
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if runtime.GOMAXPROCS(0) <= 1 {
|
||||||
|
defaultOptions.maxGoroutines = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
|
||||||
|
// Jobs will be split into this many parts, unless each goroutine would have to process
|
||||||
|
// less than minSplitSize bytes (set with WithMinSplitSize).
|
||||||
|
// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
|
||||||
|
// scheduling.
|
||||||
|
// If n <= 0, it is ignored.
|
||||||
|
func WithMaxGoroutines(n int) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
if n > 0 {
|
||||||
|
o.maxGoroutines = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
|
||||||
|
// specific shard size.
|
||||||
|
// Send in the shard size you expect to send. Other shard sizes will work, but may not
|
||||||
|
// run at the optimal speed.
|
||||||
|
// Overwrites WithMaxGoroutines.
|
||||||
|
// If shardSize <= 0, it is ignored.
|
||||||
|
func WithAutoGoroutines(shardSize int) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.shardSize = shardSize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
|
||||||
|
// By default this parameter is determined by CPU cache characteristics.
|
||||||
|
// See WithMaxGoroutines on how jobs are split.
|
||||||
|
// If n <= 0, it is ignored.
|
||||||
|
func WithMinSplitSize(n int) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
if n > 0 {
|
||||||
|
o.minSplitSize = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithConcurrentStreams will enable concurrent reads and writes on the streams.
|
||||||
|
// Default: Disabled, meaning only one stream will be read/written at the time.
|
||||||
|
// Ignored if not used on a stream input.
|
||||||
|
func WithConcurrentStreams(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.concReads, o.concWrites = enabled, enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithConcurrentStreamReads will enable concurrent reads from the input streams.
|
||||||
|
// Default: Disabled, meaning only one stream will be read at the time.
|
||||||
|
// Ignored if not used on a stream input.
|
||||||
|
func WithConcurrentStreamReads(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.concReads = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
|
||||||
|
// Default: Disabled, meaning only one stream will be written at the time.
|
||||||
|
// Ignored if not used on a stream input.
|
||||||
|
func WithConcurrentStreamWrites(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.concWrites = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithInversionCache allows to control the inversion cache.
|
||||||
|
// This will cache reconstruction matrices so they can be reused.
|
||||||
|
// Enabled by default, or <= 64 shards for Leopard encoding.
|
||||||
|
func WithInversionCache(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.inversionCache = enabled
|
||||||
|
o.forcedInversionCache = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
|
||||||
|
// If not set, any shard size set with WithAutoGoroutines will be used.
|
||||||
|
// If WithAutoGoroutines is also unset, 4MB will be used.
|
||||||
|
// Ignored if not used on stream.
|
||||||
|
func WithStreamBlockSize(n int) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.streamBS = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithSSSE3 allows to enable/disable SSSE3 instructions.
|
||||||
|
// If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
|
||||||
|
func WithSSSE3(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useSSSE3 = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAVX2 allows to enable/disable AVX2 instructions.
|
||||||
|
// If not set, AVX will be turned on or off automatically based on CPU ID information.
|
||||||
|
// This will also disable AVX GFNI instructions.
|
||||||
|
func WithAVX2(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useAVX2 = enabled
|
||||||
|
if o.useAvxGNFI {
|
||||||
|
o.useAvxGNFI = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithSSE2 allows to enable/disable SSE2 instructions.
|
||||||
|
// If not set, SSE2 will be turned on or off automatically based on CPU ID information.
|
||||||
|
func WithSSE2(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useSSE2 = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAVX512 allows to enable/disable AVX512 (and GFNI) instructions.
|
||||||
|
func WithAVX512(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useAVX512 = enabled
|
||||||
|
o.useAvx512GFNI = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithGFNI allows to enable/disable AVX512+GFNI instructions.
|
||||||
|
// If not set, GFNI will be turned on or off automatically based on CPU ID information.
|
||||||
|
func WithGFNI(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useAvx512GFNI = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithAVXGFNI allows to enable/disable GFNI with AVX instructions.
|
||||||
|
// If not set, GFNI will be turned on or off automatically based on CPU ID information.
|
||||||
|
func WithAVXGFNI(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useAvxGNFI = enabled
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
|
||||||
|
// matrix in the same way as done by the Jerasure library.
|
||||||
|
// The first row and column of the coding matrix only contains 1's in this method
|
||||||
|
// so the first parity chunk is always equal to XOR of all data chunks.
|
||||||
|
func WithJerasureMatrix() Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useJerasureMatrix = true
|
||||||
|
o.usePAR1Matrix = false
|
||||||
|
o.useCauchy = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
|
||||||
|
// does. Note that the method they use is buggy, and may lead to cases
|
||||||
|
// where recovery is impossible, even if there are enough parity
|
||||||
|
// shards.
|
||||||
|
func WithPAR1Matrix() Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useJerasureMatrix = false
|
||||||
|
o.usePAR1Matrix = true
|
||||||
|
o.useCauchy = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
|
||||||
|
// The output of this is not compatible with the standard output.
|
||||||
|
// A Cauchy matrix is faster to generate. This does not affect data throughput,
|
||||||
|
// but will result in slightly faster start-up time.
|
||||||
|
func WithCauchyMatrix() Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.useJerasureMatrix = false
|
||||||
|
o.usePAR1Matrix = false
|
||||||
|
o.useCauchy = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithFastOneParityMatrix will switch the matrix to a simple xor
|
||||||
|
// if there is only one parity shard.
|
||||||
|
// The PAR1 matrix already has this property so it has little effect there.
|
||||||
|
func WithFastOneParityMatrix() Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.fastOneParity = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithCustomMatrix causes the encoder to use the manually specified matrix.
|
||||||
|
// customMatrix represents only the parity chunks.
|
||||||
|
// customMatrix must have at least ParityShards rows and DataShards columns.
|
||||||
|
// It can be used for interoperability with libraries which generate
|
||||||
|
// the matrix differently or to implement more complex coding schemes like LRC
|
||||||
|
// (locally reconstructible codes).
|
||||||
|
func WithCustomMatrix(customMatrix [][]byte) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
o.customMatrix = customMatrix
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithLeopardGF16 will always use leopard GF16 for encoding,
|
||||||
|
// even when there is less than 256 shards.
|
||||||
|
// This will likely improve reconstruction time for some setups.
|
||||||
|
// This is not compatible with Leopard output for <= 256 shards.
|
||||||
|
// Note that Leopard places certain restrictions on use see other documentation.
|
||||||
|
func WithLeopardGF16(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
if enabled {
|
||||||
|
o.withLeopard = leopardGF16
|
||||||
|
} else {
|
||||||
|
o.withLeopard = leopardAsNeeded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithLeopardGF will use leopard GF for encoding, even when there are fewer than
|
||||||
|
// 256 shards.
|
||||||
|
// This will likely improve reconstruction time for some setups.
|
||||||
|
// Note that Leopard places certain restrictions on use see other documentation.
|
||||||
|
func WithLeopardGF(enabled bool) Option {
|
||||||
|
return func(o *options) {
|
||||||
|
if enabled {
|
||||||
|
o.withLeopard = leopardAlways
|
||||||
|
} else {
|
||||||
|
o.withLeopard = leopardAsNeeded
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *options) cpuOptions() string {
|
||||||
|
var res []string
|
||||||
|
if o.useSSE2 {
|
||||||
|
res = append(res, "SSE2")
|
||||||
|
}
|
||||||
|
if o.useAVX2 {
|
||||||
|
res = append(res, "AVX2")
|
||||||
|
}
|
||||||
|
if o.useSSSE3 {
|
||||||
|
res = append(res, "SSSE3")
|
||||||
|
}
|
||||||
|
if o.useAVX512 {
|
||||||
|
res = append(res, "AVX512")
|
||||||
|
}
|
||||||
|
if o.useAvx512GFNI {
|
||||||
|
res = append(res, "AVX512+GFNI")
|
||||||
|
}
|
||||||
|
if o.useAvxGNFI {
|
||||||
|
res = append(res, "AVX+GFNI")
|
||||||
|
}
|
||||||
|
if len(res) == 0 {
|
||||||
|
return "pure Go"
|
||||||
|
}
|
||||||
|
return strings.Join(res, ",")
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,614 @@
|
||||||
|
/**
|
||||||
|
* Reed-Solomon Coding over 8-bit values.
|
||||||
|
*
|
||||||
|
* Copyright 2015, Klaus Post
|
||||||
|
* Copyright 2015, Backblaze, Inc.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
|
||||||
|
// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
|
||||||
|
//
|
||||||
|
// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
|
||||||
|
// since the streaming interface has a start up overhead.
|
||||||
|
//
|
||||||
|
// For all operations, no readers and writers should not assume any order/size of
|
||||||
|
// individual reads/writes.
|
||||||
|
//
|
||||||
|
// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
|
||||||
|
// folder.
|
||||||
|
type StreamEncoder interface {
|
||||||
|
// Encode parity shards for a set of data shards.
|
||||||
|
//
|
||||||
|
// Input is 'shards' containing readers for data shards followed by parity shards
|
||||||
|
// io.Writer.
|
||||||
|
//
|
||||||
|
// The number of shards must match the number given to NewStream().
|
||||||
|
//
|
||||||
|
// Each reader must supply the same number of bytes.
|
||||||
|
//
|
||||||
|
// The parity shards will be written to the writer.
|
||||||
|
// The number of bytes written will match the input size.
|
||||||
|
//
|
||||||
|
// If a data stream returns an error, a StreamReadError type error
|
||||||
|
// will be returned. If a parity writer returns an error, a
|
||||||
|
// StreamWriteError will be returned.
|
||||||
|
Encode(data []io.Reader, parity []io.Writer) error
|
||||||
|
|
||||||
|
// Verify returns true if the parity shards contain correct data.
|
||||||
|
//
|
||||||
|
// The number of shards must match the number total data+parity shards
|
||||||
|
// given to NewStream().
|
||||||
|
//
|
||||||
|
// Each reader must supply the same number of bytes.
|
||||||
|
// If a shard stream returns an error, a StreamReadError type error
|
||||||
|
// will be returned.
|
||||||
|
Verify(shards []io.Reader) (bool, error)
|
||||||
|
|
||||||
|
// Reconstruct will recreate the missing shards if possible.
|
||||||
|
//
|
||||||
|
// Given a list of valid shards (to read) and invalid shards (to write)
|
||||||
|
//
|
||||||
|
// You indicate that a shard is missing by setting it to nil in the 'valid'
|
||||||
|
// slice and at the same time setting a non-nil writer in "fill".
|
||||||
|
// An index cannot contain both non-nil 'valid' and 'fill' entry.
|
||||||
|
// If both are provided 'ErrReconstructMismatch' is returned.
|
||||||
|
//
|
||||||
|
// If there are too few shards to reconstruct the missing
|
||||||
|
// ones, ErrTooFewShards will be returned.
|
||||||
|
//
|
||||||
|
// The reconstructed shard set is complete, but integrity is not verified.
|
||||||
|
// Use the Verify function to check if data set is ok.
|
||||||
|
Reconstruct(valid []io.Reader, fill []io.Writer) error
|
||||||
|
|
||||||
|
// Split a an input stream into the number of shards given to the encoder.
|
||||||
|
//
|
||||||
|
// The data will be split into equally sized shards.
|
||||||
|
// If the data size isn't dividable by the number of shards,
|
||||||
|
// the last shard will contain extra zeros.
|
||||||
|
//
|
||||||
|
// You must supply the total size of your input.
|
||||||
|
// 'ErrShortData' will be returned if it is unable to retrieve the
|
||||||
|
// number of bytes indicated.
|
||||||
|
Split(data io.Reader, dst []io.Writer, size int64) (err error)
|
||||||
|
|
||||||
|
// Join the shards and write the data segment to dst.
|
||||||
|
//
|
||||||
|
// Only the data shards are considered.
|
||||||
|
//
|
||||||
|
// You must supply the exact output size you want.
|
||||||
|
// If there are to few shards given, ErrTooFewShards will be returned.
|
||||||
|
// If the total data size is less than outSize, ErrShortData will be returned.
|
||||||
|
Join(dst io.Writer, shards []io.Reader, outSize int64) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// StreamReadError is returned when a read error is encountered
|
||||||
|
// that relates to a supplied stream.
|
||||||
|
// This will allow you to find out which reader has failed.
|
||||||
|
type StreamReadError struct {
|
||||||
|
Err error // The error
|
||||||
|
Stream int // The stream number on which the error occurred
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error returns the error as a string
|
||||||
|
func (s StreamReadError) Error() string {
|
||||||
|
return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the error as a string
|
||||||
|
func (s StreamReadError) String() string {
|
||||||
|
return s.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// StreamWriteError is returned when a write error is encountered
|
||||||
|
// that relates to a supplied stream. This will allow you to
|
||||||
|
// find out which reader has failed.
|
||||||
|
type StreamWriteError struct {
|
||||||
|
Err error // The error
|
||||||
|
Stream int // The stream number on which the error occurred
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error returns the error as a string
|
||||||
|
func (s StreamWriteError) Error() string {
|
||||||
|
return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the error as a string
|
||||||
|
func (s StreamWriteError) String() string {
|
||||||
|
return s.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// rsStream contains a matrix for a specific
|
||||||
|
// distribution of datashards and parity shards.
|
||||||
|
// Construct if using NewStream()
|
||||||
|
type rsStream struct {
|
||||||
|
r *reedSolomon
|
||||||
|
o options
|
||||||
|
|
||||||
|
// Shard reader
|
||||||
|
readShards func(dst [][]byte, in []io.Reader) error
|
||||||
|
// Shard writer
|
||||||
|
writeShards func(out []io.Writer, in [][]byte) error
|
||||||
|
|
||||||
|
blockPool sync.Pool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStream creates a new encoder and initializes it to
|
||||||
|
// the number of data shards and parity shards that
|
||||||
|
// you want to use. You can reuse this encoder.
|
||||||
|
// Note that the maximum number of data shards is 256.
|
||||||
|
func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
|
||||||
|
if dataShards+parityShards > 256 {
|
||||||
|
return nil, ErrMaxShardNum
|
||||||
|
}
|
||||||
|
|
||||||
|
r := rsStream{o: defaultOptions}
|
||||||
|
for _, opt := range o {
|
||||||
|
opt(&r.o)
|
||||||
|
}
|
||||||
|
// Override block size if shard size is set.
|
||||||
|
if r.o.streamBS == 0 && r.o.shardSize > 0 {
|
||||||
|
r.o.streamBS = r.o.shardSize
|
||||||
|
}
|
||||||
|
if r.o.streamBS <= 0 {
|
||||||
|
r.o.streamBS = 4 << 20
|
||||||
|
}
|
||||||
|
if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
|
||||||
|
o = append(o, WithAutoGoroutines(r.o.streamBS))
|
||||||
|
}
|
||||||
|
|
||||||
|
enc, err := New(dataShards, parityShards, o...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
r.r = enc.(*reedSolomon)
|
||||||
|
|
||||||
|
r.blockPool.New = func() interface{} {
|
||||||
|
return AllocAligned(dataShards+parityShards, r.o.streamBS)
|
||||||
|
}
|
||||||
|
r.readShards = readShards
|
||||||
|
r.writeShards = writeShards
|
||||||
|
if r.o.concReads {
|
||||||
|
r.readShards = cReadShards
|
||||||
|
}
|
||||||
|
if r.o.concWrites {
|
||||||
|
r.writeShards = cWriteShards
|
||||||
|
}
|
||||||
|
|
||||||
|
return &r, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStreamC creates a new encoder and initializes it to
|
||||||
|
// the number of data shards and parity shards given.
|
||||||
|
//
|
||||||
|
// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
|
||||||
|
func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
|
||||||
|
return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rsStream) createSlice() [][]byte {
|
||||||
|
out := r.blockPool.Get().([][]byte)
|
||||||
|
for i := range out {
|
||||||
|
out[i] = out[i][:r.o.streamBS]
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encodes parity shards for a set of data shards.
|
||||||
|
//
|
||||||
|
// Input is 'shards' containing readers for data shards followed by parity shards
|
||||||
|
// io.Writer.
|
||||||
|
//
|
||||||
|
// The number of shards must match the number given to NewStream().
|
||||||
|
//
|
||||||
|
// Each reader must supply the same number of bytes.
|
||||||
|
//
|
||||||
|
// The parity shards will be written to the writer.
|
||||||
|
// The number of bytes written will match the input size.
|
||||||
|
//
|
||||||
|
// If a data stream returns an error, a StreamReadError type error
|
||||||
|
// will be returned. If a parity writer returns an error, a
|
||||||
|
// StreamWriteError will be returned.
|
||||||
|
func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
|
||||||
|
if len(data) != r.r.dataShards {
|
||||||
|
return ErrTooFewShards
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(parity) != r.r.parityShards {
|
||||||
|
return ErrTooFewShards
|
||||||
|
}
|
||||||
|
|
||||||
|
all := r.createSlice()
|
||||||
|
defer r.blockPool.Put(all)
|
||||||
|
in := all[:r.r.dataShards]
|
||||||
|
out := all[r.r.dataShards:]
|
||||||
|
read := 0
|
||||||
|
|
||||||
|
for {
|
||||||
|
err := r.readShards(in, data)
|
||||||
|
switch err {
|
||||||
|
case nil:
|
||||||
|
case io.EOF:
|
||||||
|
if read == 0 {
|
||||||
|
return ErrShardNoData
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
out = trimShards(out, shardSize(in))
|
||||||
|
read += shardSize(in)
|
||||||
|
err = r.r.Encode(all)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = r.writeShards(parity, out)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim the shards so they are all the same size
|
||||||
|
func trimShards(in [][]byte, size int) [][]byte {
|
||||||
|
for i := range in {
|
||||||
|
if len(in[i]) != 0 {
|
||||||
|
in[i] = in[i][0:size]
|
||||||
|
}
|
||||||
|
if len(in[i]) < size {
|
||||||
|
in[i] = in[i][:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return in
|
||||||
|
}
|
||||||
|
|
||||||
|
func readShards(dst [][]byte, in []io.Reader) error {
|
||||||
|
if len(in) != len(dst) {
|
||||||
|
panic("internal error: in and dst size do not match")
|
||||||
|
}
|
||||||
|
size := -1
|
||||||
|
for i := range in {
|
||||||
|
if in[i] == nil {
|
||||||
|
dst[i] = dst[i][:0]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := io.ReadFull(in[i], dst[i])
|
||||||
|
// The error is EOF only if no bytes were read.
|
||||||
|
// If an EOF happens after reading some but not all the bytes,
|
||||||
|
// ReadFull returns ErrUnexpectedEOF.
|
||||||
|
switch err {
|
||||||
|
case io.ErrUnexpectedEOF, io.EOF:
|
||||||
|
if size < 0 {
|
||||||
|
size = n
|
||||||
|
} else if n != size {
|
||||||
|
// Shard sizes must match.
|
||||||
|
return ErrShardSize
|
||||||
|
}
|
||||||
|
dst[i] = dst[i][0:n]
|
||||||
|
case nil:
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
return StreamReadError{Err: err, Stream: i}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if size == 0 {
|
||||||
|
return io.EOF
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeShards(out []io.Writer, in [][]byte) error {
|
||||||
|
if len(out) != len(in) {
|
||||||
|
panic("internal error: in and out size do not match")
|
||||||
|
}
|
||||||
|
for i := range in {
|
||||||
|
if out[i] == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
n, err := out[i].Write(in[i])
|
||||||
|
if err != nil {
|
||||||
|
return StreamWriteError{Err: err, Stream: i}
|
||||||
|
}
|
||||||
|
//
|
||||||
|
if n != len(in[i]) {
|
||||||
|
return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type readResult struct {
|
||||||
|
n int
|
||||||
|
size int
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
// cReadShards reads shards concurrently
|
||||||
|
func cReadShards(dst [][]byte, in []io.Reader) error {
|
||||||
|
if len(in) != len(dst) {
|
||||||
|
panic("internal error: in and dst size do not match")
|
||||||
|
}
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(in))
|
||||||
|
res := make(chan readResult, len(in))
|
||||||
|
for i := range in {
|
||||||
|
if in[i] == nil {
|
||||||
|
dst[i] = dst[i][:0]
|
||||||
|
wg.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
go func(i int) {
|
||||||
|
defer wg.Done()
|
||||||
|
n, err := io.ReadFull(in[i], dst[i])
|
||||||
|
// The error is EOF only if no bytes were read.
|
||||||
|
// If an EOF happens after reading some but not all the bytes,
|
||||||
|
// ReadFull returns ErrUnexpectedEOF.
|
||||||
|
res <- readResult{size: n, err: err, n: i}
|
||||||
|
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
close(res)
|
||||||
|
size := -1
|
||||||
|
for r := range res {
|
||||||
|
switch r.err {
|
||||||
|
case io.ErrUnexpectedEOF, io.EOF:
|
||||||
|
if size < 0 {
|
||||||
|
size = r.size
|
||||||
|
} else if r.size != size {
|
||||||
|
// Shard sizes must match.
|
||||||
|
return ErrShardSize
|
||||||
|
}
|
||||||
|
dst[r.n] = dst[r.n][0:r.size]
|
||||||
|
case nil:
|
||||||
|
default:
|
||||||
|
return StreamReadError{Err: r.err, Stream: r.n}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if size == 0 {
|
||||||
|
return io.EOF
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// cWriteShards writes shards concurrently
|
||||||
|
func cWriteShards(out []io.Writer, in [][]byte) error {
|
||||||
|
if len(out) != len(in) {
|
||||||
|
panic("internal error: in and out size do not match")
|
||||||
|
}
|
||||||
|
var errs = make(chan error, len(out))
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(len(out))
|
||||||
|
for i := range in {
|
||||||
|
go func(i int) {
|
||||||
|
defer wg.Done()
|
||||||
|
if out[i] == nil {
|
||||||
|
errs <- nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
n, err := out[i].Write(in[i])
|
||||||
|
if err != nil {
|
||||||
|
errs <- StreamWriteError{Err: err, Stream: i}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if n != len(in[i]) {
|
||||||
|
errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
|
||||||
|
}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
close(errs)
|
||||||
|
for err := range errs {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify returns true if the parity shards contain correct data.
|
||||||
|
//
|
||||||
|
// The number of shards must match the number total data+parity shards
|
||||||
|
// given to NewStream().
|
||||||
|
//
|
||||||
|
// Each reader must supply the same number of bytes.
|
||||||
|
// If a shard stream returns an error, a StreamReadError type error
|
||||||
|
// will be returned.
|
||||||
|
func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
|
||||||
|
if len(shards) != r.r.totalShards {
|
||||||
|
return false, ErrTooFewShards
|
||||||
|
}
|
||||||
|
|
||||||
|
read := 0
|
||||||
|
all := r.createSlice()
|
||||||
|
defer r.blockPool.Put(all)
|
||||||
|
for {
|
||||||
|
err := r.readShards(all, shards)
|
||||||
|
if err == io.EOF {
|
||||||
|
if read == 0 {
|
||||||
|
return false, ErrShardNoData
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
read += shardSize(all)
|
||||||
|
ok, err := r.r.Verify(all)
|
||||||
|
if !ok || err != nil {
|
||||||
|
return ok, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
|
||||||
|
// "valid" and "fill" streams on the same index.
|
||||||
|
// Therefore it is impossible to see if you consider the shard valid
|
||||||
|
// or would like to have it reconstructed.
|
||||||
|
var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
|
||||||
|
|
||||||
|
// Reconstruct will recreate the missing shards if possible.
|
||||||
|
//
|
||||||
|
// Given a list of valid shards (to read) and invalid shards (to write)
|
||||||
|
//
|
||||||
|
// You indicate that a shard is missing by setting it to nil in the 'valid'
|
||||||
|
// slice and at the same time setting a non-nil writer in "fill".
|
||||||
|
// An index cannot contain both non-nil 'valid' and 'fill' entry.
|
||||||
|
//
|
||||||
|
// If there are too few shards to reconstruct the missing
|
||||||
|
// ones, ErrTooFewShards will be returned.
|
||||||
|
//
|
||||||
|
// The reconstructed shard set is complete when explicitly asked for all missing shards.
|
||||||
|
// However its integrity is not automatically verified.
|
||||||
|
// Use the Verify function to check in case the data set is complete.
|
||||||
|
func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
|
||||||
|
if len(valid) != r.r.totalShards {
|
||||||
|
return ErrTooFewShards
|
||||||
|
}
|
||||||
|
if len(fill) != r.r.totalShards {
|
||||||
|
return ErrTooFewShards
|
||||||
|
}
|
||||||
|
|
||||||
|
all := r.createSlice()
|
||||||
|
defer r.blockPool.Put(all)
|
||||||
|
reconDataOnly := true
|
||||||
|
for i := range valid {
|
||||||
|
if valid[i] != nil && fill[i] != nil {
|
||||||
|
return ErrReconstructMismatch
|
||||||
|
}
|
||||||
|
if i >= r.r.dataShards && fill[i] != nil {
|
||||||
|
reconDataOnly = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
read := 0
|
||||||
|
for {
|
||||||
|
err := r.readShards(all, valid)
|
||||||
|
if err == io.EOF {
|
||||||
|
if read == 0 {
|
||||||
|
return ErrShardNoData
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
read += shardSize(all)
|
||||||
|
all = trimShards(all, shardSize(all))
|
||||||
|
|
||||||
|
if reconDataOnly {
|
||||||
|
err = r.r.ReconstructData(all) // just reconstruct missing data shards
|
||||||
|
} else {
|
||||||
|
err = r.r.Reconstruct(all) // reconstruct all missing shards
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = r.writeShards(fill, all)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Join the shards and write the data segment to dst.
|
||||||
|
//
|
||||||
|
// Only the data shards are considered.
|
||||||
|
//
|
||||||
|
// You must supply the exact output size you want.
|
||||||
|
// If there are to few shards given, ErrTooFewShards will be returned.
|
||||||
|
// If the total data size is less than outSize, ErrShortData will be returned.
|
||||||
|
func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
|
||||||
|
// Do we have enough shards?
|
||||||
|
if len(shards) < r.r.dataShards {
|
||||||
|
return ErrTooFewShards
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trim off parity shards if any
|
||||||
|
shards = shards[:r.r.dataShards]
|
||||||
|
for i := range shards {
|
||||||
|
if shards[i] == nil {
|
||||||
|
return StreamReadError{Err: ErrShardNoData, Stream: i}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Join all shards
|
||||||
|
src := io.MultiReader(shards...)
|
||||||
|
|
||||||
|
// Copy data to dst
|
||||||
|
n, err := io.CopyN(dst, src, outSize)
|
||||||
|
if err == io.EOF {
|
||||||
|
return ErrShortData
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n != outSize {
|
||||||
|
return ErrShortData
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split a an input stream into the number of shards given to the encoder.
|
||||||
|
//
|
||||||
|
// The data will be split into equally sized shards.
|
||||||
|
// If the data size isn't dividable by the number of shards,
|
||||||
|
// the last shard will contain extra zeros.
|
||||||
|
//
|
||||||
|
// You must supply the total size of your input.
|
||||||
|
// 'ErrShortData' will be returned if it is unable to retrieve the
|
||||||
|
// number of bytes indicated.
|
||||||
|
func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
|
||||||
|
if size == 0 {
|
||||||
|
return ErrShortData
|
||||||
|
}
|
||||||
|
if len(dst) != r.r.dataShards {
|
||||||
|
return ErrInvShardNum
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range dst {
|
||||||
|
if dst[i] == nil {
|
||||||
|
return StreamWriteError{Err: ErrShardNoData, Stream: i}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate number of bytes per shard.
|
||||||
|
perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
|
||||||
|
|
||||||
|
// Pad data to r.Shards*perShard.
|
||||||
|
paddingSize := (int64(r.r.totalShards) * perShard) - size
|
||||||
|
data = io.MultiReader(data, io.LimitReader(zeroPaddingReader{}, paddingSize))
|
||||||
|
|
||||||
|
// Split into equal-length shards and copy.
|
||||||
|
for i := range dst {
|
||||||
|
n, err := io.CopyN(dst[i], data, perShard)
|
||||||
|
if err != io.EOF && err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n != perShard {
|
||||||
|
return ErrShortData
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type zeroPaddingReader struct{}
|
||||||
|
|
||||||
|
var _ io.Reader = &zeroPaddingReader{}
|
||||||
|
|
||||||
|
func (t zeroPaddingReader) Read(p []byte) (n int, err error) {
|
||||||
|
n = len(p)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
p[i] = 0
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
//go:build !noasm && !nounsafe && !gccgo && !appengine
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reed-Solomon Coding over 8-bit values.
|
||||||
|
*
|
||||||
|
* Copyright 2023, Klaus Post
|
||||||
|
*/
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AllocAligned allocates 'shards' slices, with 'each' bytes.
|
||||||
|
// Each slice will start on a 64 byte aligned boundary.
|
||||||
|
func AllocAligned(shards, each int) [][]byte {
|
||||||
|
if false {
|
||||||
|
res := make([][]byte, shards)
|
||||||
|
for i := range res {
|
||||||
|
res[i] = make([]byte, each)
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
const (
|
||||||
|
alignEach = 64
|
||||||
|
alignStart = 64
|
||||||
|
)
|
||||||
|
eachAligned := ((each + alignEach - 1) / alignEach) * alignEach
|
||||||
|
total := make([]byte, eachAligned*shards+63)
|
||||||
|
align := uint(uintptr(unsafe.Pointer(&total[0]))) & (alignStart - 1)
|
||||||
|
if align > 0 {
|
||||||
|
total = total[alignStart-align:]
|
||||||
|
}
|
||||||
|
res := make([][]byte, shards)
|
||||||
|
for i := range res {
|
||||||
|
res[i] = total[:each:eachAligned]
|
||||||
|
total = total[eachAligned:]
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
//go:build noasm || nounsafe || gccgo || appengine
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reed-Solomon Coding over 8-bit values.
|
||||||
|
*
|
||||||
|
* Copyright 2023, Klaus Post
|
||||||
|
*/
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
// AllocAligned allocates 'shards' slices, with 'each' bytes.
|
||||||
|
// Each slice will start on a 64 byte aligned boundary.
|
||||||
|
func AllocAligned(shards, each int) [][]byte {
|
||||||
|
eachAligned := ((each + 63) / 64) * 64
|
||||||
|
total := make([]byte, eachAligned*shards+63)
|
||||||
|
// We cannot do initial align without "unsafe", just use native alignment.
|
||||||
|
res := make([][]byte, shards)
|
||||||
|
for i := range res {
|
||||||
|
res[i] = total[:each:eachAligned]
|
||||||
|
total = total[eachAligned:]
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
//go:build !noasm && !appengine && !gccgo
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func xorSliceNEON(in, out []byte)
|
||||||
|
|
||||||
|
// simple slice xor
|
||||||
|
func sliceXor(in, out []byte, o *options) {
|
||||||
|
xorSliceNEON(in, out)
|
||||||
|
done := (len(in) >> 5) << 5
|
||||||
|
|
||||||
|
remain := len(in) - done
|
||||||
|
if remain > 0 {
|
||||||
|
for i := done; i < len(in); i++ {
|
||||||
|
out[i] ^= in[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
//+build !noasm
|
||||||
|
//+build !appengine
|
||||||
|
//+build !gccgo
|
||||||
|
|
||||||
|
// func xorSliceNEON(in, out []byte)
|
||||||
|
TEXT ·xorSliceNEON(SB), 7, $0
|
||||||
|
MOVD in_base+0(FP), R1
|
||||||
|
MOVD in_len+8(FP), R2 // length of message
|
||||||
|
MOVD out_base+24(FP), R5
|
||||||
|
SUBS $32, R2
|
||||||
|
BMI completeXor
|
||||||
|
|
||||||
|
loopXor:
|
||||||
|
// Main loop
|
||||||
|
VLD1.P 32(R1), [V0.B16, V1.B16]
|
||||||
|
VLD1 (R5), [V20.B16, V21.B16]
|
||||||
|
|
||||||
|
VEOR V20.B16, V0.B16, V4.B16
|
||||||
|
VEOR V21.B16, V1.B16, V5.B16
|
||||||
|
|
||||||
|
// Store result
|
||||||
|
VST1.P [V4.D2, V5.D2], 32(R5)
|
||||||
|
|
||||||
|
SUBS $32, R2
|
||||||
|
BPL loopXor
|
||||||
|
|
||||||
|
completeXor:
|
||||||
|
RET
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
//go:build noasm || gccgo || appengine || (!amd64 && !arm64)
|
||||||
|
|
||||||
|
package reedsolomon
|
||||||
|
|
||||||
|
func sliceXor(in, out []byte, o *options) {
|
||||||
|
sliceXorGo(in, out, o)
|
||||||
|
}
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ var X86 struct {
|
||||||
HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
|
HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
|
||||||
HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
|
HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
|
||||||
HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
|
HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
|
||||||
HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions Instructions
|
HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions
|
||||||
HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
|
HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
|
||||||
HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
|
HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
|
||||||
HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
|
HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
|
||||||
|
@ -54,6 +54,9 @@ var X86 struct {
|
||||||
HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
|
HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
|
||||||
HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
|
HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
|
||||||
HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
|
HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
|
||||||
|
HasAMXTile bool // Advanced Matrix Extension Tile instructions
|
||||||
|
HasAMXInt8 bool // Advanced Matrix Extension Int8 instructions
|
||||||
|
HasAMXBF16 bool // Advanced Matrix Extension BFloat16 instructions
|
||||||
HasBMI1 bool // Bit manipulation instruction set 1
|
HasBMI1 bool // Bit manipulation instruction set 1
|
||||||
HasBMI2 bool // Bit manipulation instruction set 2
|
HasBMI2 bool // Bit manipulation instruction set 2
|
||||||
HasCX16 bool // Compare and exchange 16 Bytes
|
HasCX16 bool // Compare and exchange 16 Bytes
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build aix
|
//go:build aix
|
||||||
// +build aix
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (386 || amd64 || amd64p32) && gc
|
//go:build (386 || amd64 || amd64p32) && gc
|
||||||
// +build 386 amd64 amd64p32
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gccgo
|
//go:build gccgo
|
||||||
// +build gccgo
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gccgo
|
//go:build gccgo
|
||||||
// +build gccgo
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (386 || amd64 || amd64p32) && gccgo
|
//go:build (386 || amd64 || amd64p32) && gccgo
|
||||||
// +build 386 amd64 amd64p32
|
|
||||||
// +build gccgo
|
|
||||||
|
|
||||||
#include <cpuid.h>
|
#include <cpuid.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (386 || amd64 || amd64p32) && gccgo
|
//go:build (386 || amd64 || amd64p32) && gccgo
|
||||||
// +build 386 amd64 amd64p32
|
|
||||||
// +build gccgo
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !386 && !amd64 && !amd64p32 && !arm64
|
//go:build !386 && !amd64 && !amd64p32 && !arm64
|
||||||
// +build !386,!amd64,!amd64p32,!arm64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && (mips64 || mips64le)
|
//go:build linux && (mips64 || mips64le)
|
||||||
// +build linux
|
|
||||||
// +build mips64 mips64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
|
//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
|
||||||
// +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && (ppc64 || ppc64le)
|
//go:build linux && (ppc64 || ppc64le)
|
||||||
// +build linux
|
|
||||||
// +build ppc64 ppc64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build loong64
|
//go:build loong64
|
||||||
// +build loong64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build mips64 || mips64le
|
//go:build mips64 || mips64le
|
||||||
// +build mips64 mips64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build mips || mipsle
|
//go:build mips || mipsle
|
||||||
// +build mips mipsle
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !linux && arm
|
//go:build !linux && arm
|
||||||
// +build !linux,arm
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !linux && !netbsd && !openbsd && arm64
|
//go:build !linux && !netbsd && !openbsd && arm64
|
||||||
// +build !linux,!netbsd,!openbsd,arm64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !linux && (mips64 || mips64le)
|
//go:build !linux && (mips64 || mips64le)
|
||||||
// +build !linux
|
|
||||||
// +build mips64 mips64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !aix && !linux && (ppc64 || ppc64le)
|
//go:build !aix && !linux && (ppc64 || ppc64le)
|
||||||
// +build !aix
|
|
||||||
// +build !linux
|
|
||||||
// +build ppc64 ppc64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !linux && riscv64
|
//go:build !linux && riscv64
|
||||||
// +build !linux,riscv64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build ppc64 || ppc64le
|
//go:build ppc64 || ppc64le
|
||||||
// +build ppc64 ppc64le
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,9 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build riscv64
|
//go:build riscv64
|
||||||
// +build riscv64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
const cacheLineSize = 32
|
const cacheLineSize = 64
|
||||||
|
|
||||||
func initOptions() {}
|
func initOptions() {}
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build wasm
|
//go:build wasm
|
||||||
// +build wasm
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build 386 || amd64 || amd64p32
|
//go:build 386 || amd64 || amd64p32
|
||||||
// +build 386 amd64 amd64p32
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
@ -37,6 +36,9 @@ func initOptions() {
|
||||||
{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
|
{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
|
||||||
{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
|
{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
|
||||||
{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
|
{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
|
||||||
|
{Name: "amxtile", Feature: &X86.HasAMXTile},
|
||||||
|
{Name: "amxint8", Feature: &X86.HasAMXInt8},
|
||||||
|
{Name: "amxbf16", Feature: &X86.HasAMXBF16},
|
||||||
{Name: "bmi1", Feature: &X86.HasBMI1},
|
{Name: "bmi1", Feature: &X86.HasBMI1},
|
||||||
{Name: "bmi2", Feature: &X86.HasBMI2},
|
{Name: "bmi2", Feature: &X86.HasBMI2},
|
||||||
{Name: "cx16", Feature: &X86.HasCX16},
|
{Name: "cx16", Feature: &X86.HasCX16},
|
||||||
|
@ -138,6 +140,10 @@ func archInit() {
|
||||||
eax71, _, _, _ := cpuid(7, 1)
|
eax71, _, _, _ := cpuid(7, 1)
|
||||||
X86.HasAVX512BF16 = isSet(5, eax71)
|
X86.HasAVX512BF16 = isSet(5, eax71)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
X86.HasAMXTile = isSet(24, edx7)
|
||||||
|
X86.HasAMXInt8 = isSet(25, edx7)
|
||||||
|
X86.HasAMXBF16 = isSet(22, edx7)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isSet(bitpos uint, value uint32) bool {
|
func isSet(bitpos uint, value uint32) bool {
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (386 || amd64 || amd64p32) && gc
|
//go:build (386 || amd64 || amd64p32) && gc
|
||||||
// +build 386 amd64 amd64p32
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
|
//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
|
||||||
// +build armbe arm64be m68k mips mips64 mips64p32 ppc ppc64 s390 s390x shbe sparc sparc64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
|
//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
|
||||||
// +build 386 amd64 amd64p32 alpha arm arm64 loong64 mipsle mips64le mips64p32le nios2 ppc64le riscv riscv64 sh wasm
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io/ioutil"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -39,7 +39,7 @@ func readHWCAP() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
buf, err := ioutil.ReadFile(procAuxv)
|
buf, err := os.ReadFile(procAuxv)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// e.g. on android /proc/self/auxv is not accessible, so silently
|
// e.g. on android /proc/self/auxv is not accessible, so silently
|
||||||
// ignore the error and leave Initialized = false. On some
|
// ignore the error and leave Initialized = false. On some
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && arm64
|
//go:build linux && arm64
|
||||||
// +build linux,arm64
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build go1.21
|
//go:build go1.21
|
||||||
// +build go1.21
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
// gccgo's libgo and thus must not used a CGo method.
|
// gccgo's libgo and thus must not used a CGo method.
|
||||||
|
|
||||||
//go:build aix && gccgo
|
//go:build aix && gccgo
|
||||||
// +build aix,gccgo
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
// (See golang.org/issue/32102)
|
// (See golang.org/issue/32102)
|
||||||
|
|
||||||
//go:build aix && ppc64 && gc
|
//go:build aix && ppc64 && gc
|
||||||
// +build aix,ppc64,gc
|
|
||||||
|
|
||||||
package cpu
|
package cpu
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !go1.19
|
//go:build !go1.19
|
||||||
// +build !go1.19
|
|
||||||
|
|
||||||
package execabs
|
package execabs
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build go1.19
|
//go:build go1.19
|
||||||
// +build go1.19
|
|
||||||
|
|
||||||
package execabs
|
package execabs
|
||||||
|
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
// Copyright 2020 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// Package unsafeheader contains header declarations for the Go runtime's
|
|
||||||
// slice and string implementations.
|
|
||||||
//
|
|
||||||
// This package allows x/sys to use types equivalent to
|
|
||||||
// reflect.SliceHeader and reflect.StringHeader without introducing
|
|
||||||
// a dependency on the (relatively heavy) "reflect" package.
|
|
||||||
package unsafeheader
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unsafe"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Slice is the runtime representation of a slice.
|
|
||||||
// It cannot be used safely or portably and its representation may change in a later release.
|
|
||||||
type Slice struct {
|
|
||||||
Data unsafe.Pointer
|
|
||||||
Len int
|
|
||||||
Cap int
|
|
||||||
}
|
|
||||||
|
|
||||||
// String is the runtime representation of a string.
|
|
||||||
// It cannot be used safely or portably and its representation may change in a later release.
|
|
||||||
type String struct {
|
|
||||||
Data unsafe.Pointer
|
|
||||||
Len int
|
|
||||||
}
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build go1.5
|
//go:build go1.5
|
||||||
// +build go1.5
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build !go1.5
|
//go:build !go1.5
|
||||||
// +build !go1.5
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build plan9 && race
|
//go:build plan9 && race
|
||||||
// +build plan9,race
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build plan9 && !race
|
//go:build plan9 && !race
|
||||||
// +build plan9,!race
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build plan9
|
//go:build plan9
|
||||||
// +build plan9
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build plan9
|
//go:build plan9
|
||||||
// +build plan9
|
|
||||||
|
|
||||||
// Package plan9 contains an interface to the low-level operating system
|
// Package plan9 contains an interface to the low-level operating system
|
||||||
// primitives. OS details vary depending on the underlying system, and
|
// primitives. OS details vary depending on the underlying system, and
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// Code generated by the command above; see README.md. DO NOT EDIT.
|
// Code generated by the command above; see README.md. DO NOT EDIT.
|
||||||
|
|
||||||
//go:build plan9 && 386
|
//go:build plan9 && 386
|
||||||
// +build plan9,386
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// Code generated by the command above; see README.md. DO NOT EDIT.
|
// Code generated by the command above; see README.md. DO NOT EDIT.
|
||||||
|
|
||||||
//go:build plan9 && amd64
|
//go:build plan9 && amd64
|
||||||
// +build plan9,amd64
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// Code generated by the command above; see README.md. DO NOT EDIT.
|
// Code generated by the command above; see README.md. DO NOT EDIT.
|
||||||
|
|
||||||
//go:build plan9 && arm
|
//go:build plan9 && arm
|
||||||
// +build plan9,arm
|
|
||||||
|
|
||||||
package plan9
|
package plan9
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,7 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos) && go1.9
|
//go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
|
||||||
// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
|
|
||||||
// +build go1.9
|
|
||||||
|
|
||||||
package unix
|
package unix
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (freebsd || netbsd || openbsd) && gc
|
//go:build (freebsd || netbsd || openbsd) && gc
|
||||||
// +build freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
|
//go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
|
||||||
// +build darwin dragonfly freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (freebsd || netbsd || openbsd) && gc
|
//go:build (freebsd || netbsd || openbsd) && gc
|
||||||
// +build freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
||||||
// +build darwin freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
||||||
// +build darwin freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
//go:build (darwin || freebsd || netbsd || openbsd) && gc
|
||||||
// +build darwin freebsd netbsd openbsd
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build gc
|
//go:build gc
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && arm64 && gc
|
//go:build linux && arm64 && gc
|
||||||
// +build linux
|
|
||||||
// +build arm64
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && loong64 && gc
|
//go:build linux && loong64 && gc
|
||||||
// +build linux
|
|
||||||
// +build loong64
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,6 @@
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
//go:build linux && (mips64 || mips64le) && gc
|
//go:build linux && (mips64 || mips64le) && gc
|
||||||
// +build linux
|
|
||||||
// +build mips64 mips64le
|
|
||||||
// +build gc
|
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue