chore_: add `klauspost/reedsolomon` module

This commit is contained in:
Patryk Osmaczko 2024-04-04 17:52:49 +02:00 committed by osmaczko
parent bd91f5ab49
commit 4f2adc0ced
398 changed files with 216863 additions and 3038 deletions

5
go.mod
View File

@ -84,6 +84,7 @@ require (
github.com/ipfs/go-log/v2 v2.5.1 github.com/ipfs/go-log/v2 v2.5.1
github.com/jellydator/ttlcache/v3 v3.2.0 github.com/jellydator/ttlcache/v3 v3.2.0
github.com/jmoiron/sqlx v1.3.5 github.com/jmoiron/sqlx v1.3.5
github.com/klauspost/reedsolomon v1.12.1
github.com/ladydascalie/currency v1.6.0 github.com/ladydascalie/currency v1.6.0
github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8 github.com/meirf/gopart v0.0.0-20180520194036-37e9492a85a8
github.com/mutecomm/go-sqlcipher/v4 v4.4.2 github.com/mutecomm/go-sqlcipher/v4 v4.4.2
@ -177,7 +178,7 @@ require (
github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect
github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect
github.com/klauspost/compress v1.16.7 // indirect github.com/klauspost/compress v1.16.7 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/koron/go-ssdp v0.0.4 // indirect github.com/koron/go-ssdp v0.0.4 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
@ -279,7 +280,7 @@ require (
go.uber.org/fx v1.20.0 // indirect go.uber.org/fx v1.20.0 // indirect
golang.org/x/mod v0.12.0 // indirect golang.org/x/mod v0.12.0 // indirect
golang.org/x/sync v0.3.0 // indirect golang.org/x/sync v0.3.0 // indirect
golang.org/x/sys v0.11.0 // indirect golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.11.0 // indirect golang.org/x/term v0.11.0 // indirect
golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect golang.org/x/tools v0.12.1-0.20230818130535-1517d1a3ba60 // indirect
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect

10
go.sum
View File

@ -1295,10 +1295,12 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg= github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/klauspost/reedsolomon v1.12.1 h1:NhWgum1efX1x58daOBGCFWcxtEhOhXKKl1HAPQUp03Q=
github.com/klauspost/reedsolomon v1.12.1/go.mod h1:nEi5Kjb6QqtbofI6s+cbG/j1da11c96IBYBSnVGtuBs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@ -2637,8 +2639,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

View File

@ -9,10 +9,7 @@ You can access the CPU information by accessing the shared CPU variable of the c
Package home: https://github.com/klauspost/cpuid Package home: https://github.com/klauspost/cpuid
[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2) [![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
[![Build Status][3]][4] [![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
[4]: https://travis-ci.org/klauspost/cpuid
## installing ## installing
@ -285,7 +282,12 @@ Exit Code 1
| AMXINT8 | Tile computational operations on 8-bit integers | | AMXINT8 | Tile computational operations on 8-bit integers |
| AMXFP16 | Tile computational operations on FP16 numbers | | AMXFP16 | Tile computational operations on FP16 numbers |
| AMXTILE | Tile architecture | | AMXTILE | Tile architecture |
| APX_F | Intel APX |
| AVX | AVX functions | | AVX | AVX functions |
| AVX10 | If set the Intel AVX10 Converged Vector ISA is supported |
| AVX10_128 | If set indicates that AVX10 128-bit vector support is present |
| AVX10_256 | If set indicates that AVX10 256-bit vector support is present |
| AVX10_512 | If set indicates that AVX10 512-bit vector support is present |
| AVX2 | AVX2 functions | | AVX2 | AVX2 functions |
| AVX512BF16 | AVX-512 BFLOAT16 Instructions | | AVX512BF16 | AVX-512 BFLOAT16 Instructions |
| AVX512BITALG | AVX-512 Bit Algorithms | | AVX512BITALG | AVX-512 Bit Algorithms |
@ -365,6 +367,8 @@ Exit Code 1
| IDPRED_CTRL | IPRED_DIS | | IDPRED_CTRL | IPRED_DIS |
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. | | INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
| INVLPGB | NVLPGB and TLBSYNC instruction supported | | INVLPGB | NVLPGB and TLBSYNC instruction supported |
| KEYLOCKER | Key locker |
| KEYLOCKERW | Key locker wide |
| LAHF | LAHF/SAHF in long mode | | LAHF | LAHF/SAHF in long mode |
| LAM | If set, CPU supports Linear Address Masking | | LAM | If set, CPU supports Linear Address Masking |
| LBRVIRT | LBR virtualization | | LBRVIRT | LBR virtualization |

View File

@ -67,7 +67,7 @@ const (
// Keep index -1 as unknown // Keep index -1 as unknown
UNKNOWN = -1 UNKNOWN = -1
// Add features // x86 features
ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
AESNI // Advanced Encryption Standard New Instructions AESNI // Advanced Encryption Standard New Instructions
AMD3DNOW // AMD 3DNOW AMD3DNOW // AMD 3DNOW
@ -76,7 +76,12 @@ const (
AMXFP16 // Tile computational operations on FP16 numbers AMXFP16 // Tile computational operations on FP16 numbers
AMXINT8 // Tile computational operations on 8-bit integers AMXINT8 // Tile computational operations on 8-bit integers
AMXTILE // Tile architecture AMXTILE // Tile architecture
APX_F // Intel APX
AVX // AVX functions AVX // AVX functions
AVX10 // If set the Intel AVX10 Converged Vector ISA is supported
AVX10_128 // If set indicates that AVX10 128-bit vector support is present
AVX10_256 // If set indicates that AVX10 256-bit vector support is present
AVX10_512 // If set indicates that AVX10 512-bit vector support is present
AVX2 // AVX2 functions AVX2 // AVX2 functions
AVX512BF16 // AVX-512 BFLOAT16 Instructions AVX512BF16 // AVX-512 BFLOAT16 Instructions
AVX512BITALG // AVX-512 Bit Algorithms AVX512BITALG // AVX-512 Bit Algorithms
@ -136,6 +141,7 @@ const (
IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
IBPB_BRTYPE // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes all branch type predictions from the CPU branch predictor
IBRS // AMD: Indirect Branch Restricted Speculation IBRS // AMD: Indirect Branch Restricted Speculation
IBRS_PREFERRED // AMD: IBRS is preferred over software solution IBRS_PREFERRED // AMD: IBRS is preferred over software solution
IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
@ -156,6 +162,8 @@ const (
IDPRED_CTRL // IPRED_DIS IDPRED_CTRL // IPRED_DIS
INT_WBINVD // WBINVD/WBNOINVD are interruptible. INT_WBINVD // WBINVD/WBNOINVD are interruptible.
INVLPGB // NVLPGB and TLBSYNC instruction supported INVLPGB // NVLPGB and TLBSYNC instruction supported
KEYLOCKER // Key locker
KEYLOCKERW // Key locker wide
LAHF // LAHF/SAHF in long mode LAHF // LAHF/SAHF in long mode
LAM // If set, CPU supports Linear Address Masking LAM // If set, CPU supports Linear Address Masking
LBRVIRT // LBR virtualization LBRVIRT // LBR virtualization
@ -190,6 +198,7 @@ const (
RRSBA_CTRL // Restricted RSB Alternate RRSBA_CTRL // Restricted RSB Alternate
RTM // Restricted Transactional Memory RTM // Restricted Transactional Memory
RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
SBPB // Indicates support for the Selective Branch Predictor Barrier
SERIALIZE // Serialize Instruction Execution SERIALIZE // Serialize Instruction Execution
SEV // AMD Secure Encrypted Virtualization supported SEV // AMD Secure Encrypted Virtualization supported
SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
@ -205,6 +214,9 @@ const (
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
SPEC_CTRL_SSBD // Speculative Store Bypass Disable SPEC_CTRL_SSBD // Speculative Store Bypass Disable
SRBDS_CTRL // SRBDS mitigation MSR available SRBDS_CTRL // SRBDS mitigation MSR available
SRSO_MSR_FIX // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
SRSO_NO // Indicates the CPU is not subject to the SRSO vulnerability
SRSO_USER_KERNEL_NO // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
SSE // SSE functions SSE // SSE functions
SSE2 // P4 SSE functions SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions SSE3 // Prescott SSE3 functions
@ -303,6 +315,8 @@ type CPUInfo struct {
L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
} }
SGX SGXSupport SGX SGXSupport
AMDMemEncryption AMDMemEncryptionSupport
AVX10Level uint8
maxFunc uint32 maxFunc uint32
maxExFunc uint32 maxExFunc uint32
} }
@ -1071,6 +1085,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
return return
} }
type AMDMemEncryptionSupport struct {
Available bool
CBitPossition uint32
NumVMPL uint32
PhysAddrReduction uint32
NumEntryptedGuests uint32
MinSevNoEsAsid uint32
}
func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
rval.Available = available
if !available {
return
}
_, b, c, d := cpuidex(0x8000001f, 0)
rval.CBitPossition = b & 0x3f
rval.PhysAddrReduction = (b >> 6) & 0x3F
rval.NumVMPL = (b >> 12) & 0xf
rval.NumEntryptedGuests = c
rval.MinSevNoEsAsid = d
return
}
func support() flagSet { func support() flagSet {
var fs flagSet var fs flagSet
mfi := maxFunctionID() mfi := maxFunctionID()
@ -1165,6 +1205,7 @@ func support() flagSet {
fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
fs.setIf(ecx&(1<<13) != 0, TME) fs.setIf(ecx&(1<<13) != 0, TME)
fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
fs.setIf(ecx&(1<<23) != 0, KEYLOCKER)
fs.setIf(ecx&(1<<27) != 0, MOVDIRI) fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
fs.setIf(ecx&(1<<29) != 0, ENQCMD) fs.setIf(ecx&(1<<29) != 0, ENQCMD)
@ -1202,6 +1243,8 @@ func support() flagSet {
fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8) fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT) fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
fs.setIf(edx1&(1<<14) != 0, PREFETCHI) fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
fs.setIf(edx1&(1<<19) != 0, AVX10)
fs.setIf(edx1&(1<<21) != 0, APX_F)
// Only detect AVX-512 features if XGETBV is supported // Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
@ -1252,6 +1295,19 @@ func support() flagSet {
fs.setIf(edx&(1<<4) != 0, BHI_CTRL) fs.setIf(edx&(1<<4) != 0, BHI_CTRL)
fs.setIf(edx&(1<<5) != 0, MCDT_NO) fs.setIf(edx&(1<<5) != 0, MCDT_NO)
// Add keylocker features.
if fs.inSet(KEYLOCKER) && mfi >= 0x19 {
_, ebx, _, _ := cpuidex(0x19, 0)
fs.setIf(ebx&5 == 5, KEYLOCKERW) // Bit 0 and 2 (1+4)
}
// Add AVX10 features.
if fs.inSet(AVX10) && mfi >= 0x24 {
_, ebx, _, _ := cpuidex(0x24, 0)
fs.setIf(ebx&(1<<16) != 0, AVX10_128)
fs.setIf(ebx&(1<<17) != 0, AVX10_256)
fs.setIf(ebx&(1<<18) != 0, AVX10_512)
}
} }
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
@ -1394,6 +1450,29 @@ func support() flagSet {
fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
} }
if maxExtendedFunction() >= 0x80000021 && vend == AMD {
a, _, _, _ := cpuid(0x80000021)
fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
fs.setIf((a>>29)&1 == 1, SRSO_NO)
fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
fs.setIf((a>>27)&1 == 1, SBPB)
}
if mfi >= 0x20 {
// Microsoft has decided to purposefully hide the information
// of the guest TEE when VMs are being created using Hyper-V.
//
// This leads us to check for the Hyper-V cpuid features
// (0x4000000C), and then for the `ebx` value set.
//
// For Intel TDX, `ebx` is set as `0xbe3`, being 3 the part
// we're mostly interested about,according to:
// https://github.com/torvalds/linux/blob/d2f51b3516dade79269ff45eae2a7668ae711b25/arch/x86/include/asm/hyperv-tlfs.h#L169-L174
_, ebx, _, _ := cpuid(0x4000000C)
fs.setIf(ebx == 0xbe3, TDX_GUEST)
}
if mfi >= 0x21 { if mfi >= 0x21 {
// Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21). // Intel Trusted Domain Extensions Guests have their own cpuid leaf (0x21).
_, ebx, ecx, edx := cpuid(0x21) _, ebx, ecx, edx := cpuid(0x21)
@ -1404,6 +1483,14 @@ func support() flagSet {
return fs return fs
} }
func (c *CPUInfo) supportAVX10() uint8 {
if c.maxFunc >= 0x24 && c.featureSet.inSet(AVX10) {
_, ebx, _, _ := cpuidex(0x24, 0)
return uint8(ebx)
}
return 0
}
func valAsString(values ...uint32) []byte { func valAsString(values ...uint32) []byte {
r := make([]byte, 4*len(values)) r := make([]byte, 4*len(values))
for i, v := range values { for i, v := range values {

View File

@ -27,10 +27,12 @@ func addInfo(c *CPUInfo, safe bool) {
c.Family, c.Model, c.Stepping = familyModel() c.Family, c.Model, c.Stepping = familyModel()
c.featureSet = support() c.featureSet = support()
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
c.ThreadsPerCore = threadsPerCore() c.ThreadsPerCore = threadsPerCore()
c.LogicalCores = logicalCores() c.LogicalCores = logicalCores()
c.PhysicalCores = physicalCores() c.PhysicalCores = physicalCores()
c.VendorID, c.VendorString = vendorID() c.VendorID, c.VendorString = vendorID()
c.AVX10Level = c.supportAVX10()
c.cacheSize() c.cacheSize()
c.frequencies() c.frequencies()
} }

View File

@ -16,210 +16,222 @@ func _() {
_ = x[AMXFP16-6] _ = x[AMXFP16-6]
_ = x[AMXINT8-7] _ = x[AMXINT8-7]
_ = x[AMXTILE-8] _ = x[AMXTILE-8]
_ = x[AVX-9] _ = x[APX_F-9]
_ = x[AVX2-10] _ = x[AVX-10]
_ = x[AVX512BF16-11] _ = x[AVX10-11]
_ = x[AVX512BITALG-12] _ = x[AVX10_128-12]
_ = x[AVX512BW-13] _ = x[AVX10_256-13]
_ = x[AVX512CD-14] _ = x[AVX10_512-14]
_ = x[AVX512DQ-15] _ = x[AVX2-15]
_ = x[AVX512ER-16] _ = x[AVX512BF16-16]
_ = x[AVX512F-17] _ = x[AVX512BITALG-17]
_ = x[AVX512FP16-18] _ = x[AVX512BW-18]
_ = x[AVX512IFMA-19] _ = x[AVX512CD-19]
_ = x[AVX512PF-20] _ = x[AVX512DQ-20]
_ = x[AVX512VBMI-21] _ = x[AVX512ER-21]
_ = x[AVX512VBMI2-22] _ = x[AVX512F-22]
_ = x[AVX512VL-23] _ = x[AVX512FP16-23]
_ = x[AVX512VNNI-24] _ = x[AVX512IFMA-24]
_ = x[AVX512VP2INTERSECT-25] _ = x[AVX512PF-25]
_ = x[AVX512VPOPCNTDQ-26] _ = x[AVX512VBMI-26]
_ = x[AVXIFMA-27] _ = x[AVX512VBMI2-27]
_ = x[AVXNECONVERT-28] _ = x[AVX512VL-28]
_ = x[AVXSLOW-29] _ = x[AVX512VNNI-29]
_ = x[AVXVNNI-30] _ = x[AVX512VP2INTERSECT-30]
_ = x[AVXVNNIINT8-31] _ = x[AVX512VPOPCNTDQ-31]
_ = x[BHI_CTRL-32] _ = x[AVXIFMA-32]
_ = x[BMI1-33] _ = x[AVXNECONVERT-33]
_ = x[BMI2-34] _ = x[AVXSLOW-34]
_ = x[CETIBT-35] _ = x[AVXVNNI-35]
_ = x[CETSS-36] _ = x[AVXVNNIINT8-36]
_ = x[CLDEMOTE-37] _ = x[BHI_CTRL-37]
_ = x[CLMUL-38] _ = x[BMI1-38]
_ = x[CLZERO-39] _ = x[BMI2-39]
_ = x[CMOV-40] _ = x[CETIBT-40]
_ = x[CMPCCXADD-41] _ = x[CETSS-41]
_ = x[CMPSB_SCADBS_SHORT-42] _ = x[CLDEMOTE-42]
_ = x[CMPXCHG8-43] _ = x[CLMUL-43]
_ = x[CPBOOST-44] _ = x[CLZERO-44]
_ = x[CPPC-45] _ = x[CMOV-45]
_ = x[CX16-46] _ = x[CMPCCXADD-46]
_ = x[EFER_LMSLE_UNS-47] _ = x[CMPSB_SCADBS_SHORT-47]
_ = x[ENQCMD-48] _ = x[CMPXCHG8-48]
_ = x[ERMS-49] _ = x[CPBOOST-49]
_ = x[F16C-50] _ = x[CPPC-50]
_ = x[FLUSH_L1D-51] _ = x[CX16-51]
_ = x[FMA3-52] _ = x[EFER_LMSLE_UNS-52]
_ = x[FMA4-53] _ = x[ENQCMD-53]
_ = x[FP128-54] _ = x[ERMS-54]
_ = x[FP256-55] _ = x[F16C-55]
_ = x[FSRM-56] _ = x[FLUSH_L1D-56]
_ = x[FXSR-57] _ = x[FMA3-57]
_ = x[FXSROPT-58] _ = x[FMA4-58]
_ = x[GFNI-59] _ = x[FP128-59]
_ = x[HLE-60] _ = x[FP256-60]
_ = x[HRESET-61] _ = x[FSRM-61]
_ = x[HTT-62] _ = x[FXSR-62]
_ = x[HWA-63] _ = x[FXSROPT-63]
_ = x[HYBRID_CPU-64] _ = x[GFNI-64]
_ = x[HYPERVISOR-65] _ = x[HLE-65]
_ = x[IA32_ARCH_CAP-66] _ = x[HRESET-66]
_ = x[IA32_CORE_CAP-67] _ = x[HTT-67]
_ = x[IBPB-68] _ = x[HWA-68]
_ = x[IBRS-69] _ = x[HYBRID_CPU-69]
_ = x[IBRS_PREFERRED-70] _ = x[HYPERVISOR-70]
_ = x[IBRS_PROVIDES_SMP-71] _ = x[IA32_ARCH_CAP-71]
_ = x[IBS-72] _ = x[IA32_CORE_CAP-72]
_ = x[IBSBRNTRGT-73] _ = x[IBPB-73]
_ = x[IBSFETCHSAM-74] _ = x[IBPB_BRTYPE-74]
_ = x[IBSFFV-75] _ = x[IBRS-75]
_ = x[IBSOPCNT-76] _ = x[IBRS_PREFERRED-76]
_ = x[IBSOPCNTEXT-77] _ = x[IBRS_PROVIDES_SMP-77]
_ = x[IBSOPSAM-78] _ = x[IBS-78]
_ = x[IBSRDWROPCNT-79] _ = x[IBSBRNTRGT-79]
_ = x[IBSRIPINVALIDCHK-80] _ = x[IBSFETCHSAM-80]
_ = x[IBS_FETCH_CTLX-81] _ = x[IBSFFV-81]
_ = x[IBS_OPDATA4-82] _ = x[IBSOPCNT-82]
_ = x[IBS_OPFUSE-83] _ = x[IBSOPCNTEXT-83]
_ = x[IBS_PREVENTHOST-84] _ = x[IBSOPSAM-84]
_ = x[IBS_ZEN4-85] _ = x[IBSRDWROPCNT-85]
_ = x[IDPRED_CTRL-86] _ = x[IBSRIPINVALIDCHK-86]
_ = x[INT_WBINVD-87] _ = x[IBS_FETCH_CTLX-87]
_ = x[INVLPGB-88] _ = x[IBS_OPDATA4-88]
_ = x[LAHF-89] _ = x[IBS_OPFUSE-89]
_ = x[LAM-90] _ = x[IBS_PREVENTHOST-90]
_ = x[LBRVIRT-91] _ = x[IBS_ZEN4-91]
_ = x[LZCNT-92] _ = x[IDPRED_CTRL-92]
_ = x[MCAOVERFLOW-93] _ = x[INT_WBINVD-93]
_ = x[MCDT_NO-94] _ = x[INVLPGB-94]
_ = x[MCOMMIT-95] _ = x[KEYLOCKER-95]
_ = x[MD_CLEAR-96] _ = x[KEYLOCKERW-96]
_ = x[MMX-97] _ = x[LAHF-97]
_ = x[MMXEXT-98] _ = x[LAM-98]
_ = x[MOVBE-99] _ = x[LBRVIRT-99]
_ = x[MOVDIR64B-100] _ = x[LZCNT-100]
_ = x[MOVDIRI-101] _ = x[MCAOVERFLOW-101]
_ = x[MOVSB_ZL-102] _ = x[MCDT_NO-102]
_ = x[MOVU-103] _ = x[MCOMMIT-103]
_ = x[MPX-104] _ = x[MD_CLEAR-104]
_ = x[MSRIRC-105] _ = x[MMX-105]
_ = x[MSRLIST-106] _ = x[MMXEXT-106]
_ = x[MSR_PAGEFLUSH-107] _ = x[MOVBE-107]
_ = x[NRIPS-108] _ = x[MOVDIR64B-108]
_ = x[NX-109] _ = x[MOVDIRI-109]
_ = x[OSXSAVE-110] _ = x[MOVSB_ZL-110]
_ = x[PCONFIG-111] _ = x[MOVU-111]
_ = x[POPCNT-112] _ = x[MPX-112]
_ = x[PPIN-113] _ = x[MSRIRC-113]
_ = x[PREFETCHI-114] _ = x[MSRLIST-114]
_ = x[PSFD-115] _ = x[MSR_PAGEFLUSH-115]
_ = x[RDPRU-116] _ = x[NRIPS-116]
_ = x[RDRAND-117] _ = x[NX-117]
_ = x[RDSEED-118] _ = x[OSXSAVE-118]
_ = x[RDTSCP-119] _ = x[PCONFIG-119]
_ = x[RRSBA_CTRL-120] _ = x[POPCNT-120]
_ = x[RTM-121] _ = x[PPIN-121]
_ = x[RTM_ALWAYS_ABORT-122] _ = x[PREFETCHI-122]
_ = x[SERIALIZE-123] _ = x[PSFD-123]
_ = x[SEV-124] _ = x[RDPRU-124]
_ = x[SEV_64BIT-125] _ = x[RDRAND-125]
_ = x[SEV_ALTERNATIVE-126] _ = x[RDSEED-126]
_ = x[SEV_DEBUGSWAP-127] _ = x[RDTSCP-127]
_ = x[SEV_ES-128] _ = x[RRSBA_CTRL-128]
_ = x[SEV_RESTRICTED-129] _ = x[RTM-129]
_ = x[SEV_SNP-130] _ = x[RTM_ALWAYS_ABORT-130]
_ = x[SGX-131] _ = x[SBPB-131]
_ = x[SGXLC-132] _ = x[SERIALIZE-132]
_ = x[SHA-133] _ = x[SEV-133]
_ = x[SME-134] _ = x[SEV_64BIT-134]
_ = x[SME_COHERENT-135] _ = x[SEV_ALTERNATIVE-135]
_ = x[SPEC_CTRL_SSBD-136] _ = x[SEV_DEBUGSWAP-136]
_ = x[SRBDS_CTRL-137] _ = x[SEV_ES-137]
_ = x[SSE-138] _ = x[SEV_RESTRICTED-138]
_ = x[SSE2-139] _ = x[SEV_SNP-139]
_ = x[SSE3-140] _ = x[SGX-140]
_ = x[SSE4-141] _ = x[SGXLC-141]
_ = x[SSE42-142] _ = x[SHA-142]
_ = x[SSE4A-143] _ = x[SME-143]
_ = x[SSSE3-144] _ = x[SME_COHERENT-144]
_ = x[STIBP-145] _ = x[SPEC_CTRL_SSBD-145]
_ = x[STIBP_ALWAYSON-146] _ = x[SRBDS_CTRL-146]
_ = x[STOSB_SHORT-147] _ = x[SRSO_MSR_FIX-147]
_ = x[SUCCOR-148] _ = x[SRSO_NO-148]
_ = x[SVM-149] _ = x[SRSO_USER_KERNEL_NO-149]
_ = x[SVMDA-150] _ = x[SSE-150]
_ = x[SVMFBASID-151] _ = x[SSE2-151]
_ = x[SVML-152] _ = x[SSE3-152]
_ = x[SVMNP-153] _ = x[SSE4-153]
_ = x[SVMPF-154] _ = x[SSE42-154]
_ = x[SVMPFT-155] _ = x[SSE4A-155]
_ = x[SYSCALL-156] _ = x[SSSE3-156]
_ = x[SYSEE-157] _ = x[STIBP-157]
_ = x[TBM-158] _ = x[STIBP_ALWAYSON-158]
_ = x[TDX_GUEST-159] _ = x[STOSB_SHORT-159]
_ = x[TLB_FLUSH_NESTED-160] _ = x[SUCCOR-160]
_ = x[TME-161] _ = x[SVM-161]
_ = x[TOPEXT-162] _ = x[SVMDA-162]
_ = x[TSCRATEMSR-163] _ = x[SVMFBASID-163]
_ = x[TSXLDTRK-164] _ = x[SVML-164]
_ = x[VAES-165] _ = x[SVMNP-165]
_ = x[VMCBCLEAN-166] _ = x[SVMPF-166]
_ = x[VMPL-167] _ = x[SVMPFT-167]
_ = x[VMSA_REGPROT-168] _ = x[SYSCALL-168]
_ = x[VMX-169] _ = x[SYSEE-169]
_ = x[VPCLMULQDQ-170] _ = x[TBM-170]
_ = x[VTE-171] _ = x[TDX_GUEST-171]
_ = x[WAITPKG-172] _ = x[TLB_FLUSH_NESTED-172]
_ = x[WBNOINVD-173] _ = x[TME-173]
_ = x[WRMSRNS-174] _ = x[TOPEXT-174]
_ = x[X87-175] _ = x[TSCRATEMSR-175]
_ = x[XGETBV1-176] _ = x[TSXLDTRK-176]
_ = x[XOP-177] _ = x[VAES-177]
_ = x[XSAVE-178] _ = x[VMCBCLEAN-178]
_ = x[XSAVEC-179] _ = x[VMPL-179]
_ = x[XSAVEOPT-180] _ = x[VMSA_REGPROT-180]
_ = x[XSAVES-181] _ = x[VMX-181]
_ = x[AESARM-182] _ = x[VPCLMULQDQ-182]
_ = x[ARMCPUID-183] _ = x[VTE-183]
_ = x[ASIMD-184] _ = x[WAITPKG-184]
_ = x[ASIMDDP-185] _ = x[WBNOINVD-185]
_ = x[ASIMDHP-186] _ = x[WRMSRNS-186]
_ = x[ASIMDRDM-187] _ = x[X87-187]
_ = x[ATOMICS-188] _ = x[XGETBV1-188]
_ = x[CRC32-189] _ = x[XOP-189]
_ = x[DCPOP-190] _ = x[XSAVE-190]
_ = x[EVTSTRM-191] _ = x[XSAVEC-191]
_ = x[FCMA-192] _ = x[XSAVEOPT-192]
_ = x[FP-193] _ = x[XSAVES-193]
_ = x[FPHP-194] _ = x[AESARM-194]
_ = x[GPA-195] _ = x[ARMCPUID-195]
_ = x[JSCVT-196] _ = x[ASIMD-196]
_ = x[LRCPC-197] _ = x[ASIMDDP-197]
_ = x[PMULL-198] _ = x[ASIMDHP-198]
_ = x[SHA1-199] _ = x[ASIMDRDM-199]
_ = x[SHA2-200] _ = x[ATOMICS-200]
_ = x[SHA3-201] _ = x[CRC32-201]
_ = x[SHA512-202] _ = x[DCPOP-202]
_ = x[SM3-203] _ = x[EVTSTRM-203]
_ = x[SM4-204] _ = x[FCMA-204]
_ = x[SVE-205] _ = x[FP-205]
_ = x[lastID-206] _ = x[FPHP-206]
_ = x[GPA-207]
_ = x[JSCVT-208]
_ = x[LRCPC-209]
_ = x[PMULL-210]
_ = x[SHA1-211]
_ = x[SHA2-212]
_ = x[SHA3-213]
_ = x[SHA512-214]
_ = x[SM3-215]
_ = x[SM4-216]
_ = x[SVE-217]
_ = x[lastID-218]
_ = x[firstID-0] _ = x[firstID-0]
} }
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 282, 286, 290, 296, 301, 309, 314, 320, 324, 333, 351, 359, 366, 370, 374, 388, 394, 398, 402, 411, 415, 419, 424, 429, 433, 437, 444, 448, 451, 457, 460, 463, 473, 483, 496, 509, 513, 517, 531, 548, 551, 561, 572, 578, 586, 597, 605, 617, 633, 647, 658, 668, 683, 691, 702, 712, 719, 723, 726, 733, 738, 749, 756, 763, 771, 774, 780, 785, 794, 801, 809, 813, 816, 822, 829, 842, 847, 849, 856, 863, 869, 873, 882, 886, 891, 897, 903, 909, 919, 922, 938, 947, 950, 959, 974, 987, 993, 1007, 1014, 1017, 1022, 1025, 1028, 1040, 1054, 1064, 1067, 1071, 1075, 1079, 1084, 1089, 1094, 1099, 1113, 1124, 1130, 1133, 1138, 1147, 1151, 1156, 1161, 1167, 1174, 1179, 1182, 1191, 1207, 1210, 1216, 1226, 1234, 1238, 1247, 1251, 1263, 1266, 1276, 1279, 1286, 1294, 1301, 1304, 1311, 1314, 1319, 1325, 1333, 1339, 1345, 1353, 1358, 1365, 1372, 1380, 1387, 1392, 1397, 1404, 1408, 1410, 1414, 1417, 1422, 1427, 1432, 1436, 1440, 1444, 1450, 1453, 1456, 1459, 1465} var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
func (i FeatureID) String() string { func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {

26
vendor/github.com/klauspost/reedsolomon/.gitignore generated vendored Normal file
View File

@ -0,0 +1,26 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
.idea

23
vendor/github.com/klauspost/reedsolomon/LICENSE generated vendored Normal file
View File

@ -0,0 +1,23 @@
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Copyright (c) 2015 Backblaze
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

566
vendor/github.com/klauspost/reedsolomon/README.md generated vendored Normal file
View File

@ -0,0 +1,566 @@
# Reed-Solomon
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/reedsolomon.svg)](https://pkg.go.dev/github.com/klauspost/reedsolomon) [![Go](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/reedsolomon/actions/workflows/go.yml)
Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by
[Backblaze](http://backblaze.com), with some additional optimizations.
For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
For encoding high shard counts (>256) a Leopard implementation is used.
For most platforms this performs close to the original Leopard implementation in terms of speed.
Package home: https://github.com/klauspost/reedsolomon
Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon
# Installation
To get the package use the standard:
```bash
go get -u github.com/klauspost/reedsolomon
```
Using Go modules is recommended.
# Changes
## 2022
* [GFNI](https://github.com/klauspost/reedsolomon/pull/224) support for amd64, for up to 3x faster processing.
* [Leopard GF8](https://github.com/klauspost/reedsolomon#leopard-gf8) mode added, for faster processing of medium shard counts.
* [Leopard GF16](https://github.com/klauspost/reedsolomon#leopard-compatible-gf16) mode added, for up to 65536 shards.
* [WithJerasureMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithJerasureMatrix) allows constructing a [Jerasure](https://github.com/tsuraan/Jerasure) compatible matrix.
## 2021
* Use `GOAMD64=v4` to enable faster AVX2.
* Add progressive shard encoding.
* Wider AVX2 loops
* Limit concurrency on AVX2, since we are likely memory bound.
* Allow 0 parity shards.
* Allow disabling inversion cache.
* Faster AVX2 encoding.
<details>
<summary>See older changes</summary>
## May 2020
* ARM64 optimizations, up to 2.5x faster.
* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
* Much better performance when using a limited number of goroutines.
* AVX512 is now using multiple cores.
* Stream processing overhaul, big speedups in most cases.
* AVX512 optimizations
## March 6, 2019
The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
## February 8, 2019
AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
## December 18, 2018
Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
## November 18, 2017
Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt
to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
## October 1, 2017
* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option.
Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines)
has been increased for better multi-core scaling.
* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to
be used instead of allocating new memory.
## August 26, 2017
* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update`
function contributed by [chenzhongtao](https://github.com/chenzhongtao).
* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly,
which gives a huge performance boost on this platform.
## July 20, 2017
`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface.
This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
```Go
func (e *YourEnc) ReconstructData(shards [][]byte) error {
return ReconstructData(shards)
}
```
You can of course also do your own implementation.
The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder)
handles this without modifying the interface.
This is a good lesson on why returning interfaces is not a good design.
</details>
# Usage
This section assumes you know the basics of Reed-Solomon encoding.
A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
This package performs the calculation of the parity sets. The usage is therefore relatively simple.
First of all, you need to choose your distribution of data and parity shards.
A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
```Go
enc, err := reedsolomon.New(10, 3)
```
This encoder will work for all parity sets with this distribution of data and parity shards.
If you will primarily be using it with one shard size it is recommended to use
[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
It is not required that all shards are this size.
Then you send and receive data that is a simple slice of byte slices; `[][]byte`.
In the example above, the top slice must have a length of 13.
```Go
data := make([][]byte, 13)
```
You should then fill the 10 first slices with *equally sized* data,
and create parity shards that will be populated with parity data. In this case we create the data in memory,
but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
```Go
// Create all shards, size them at 50000 each
for i := range input {
data[i] := make([]byte, 50000)
}
// The above allocations can also be done by the encoder:
// data := enc.(reedsolomon.Extended).AllocAligned(50000)
// Fill some data into the data shards
for i, in := range data[:10] {
for j:= range in {
in[j] = byte((i+j)&0xff)
}
}
```
To populate the parity shards, you simply call `Encode()` with your data.
```Go
err = enc.Encode(data)
```
The only cases where you should get an error is, if the data shards aren't of equal size.
The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
```Go
ok, err = enc.Verify(data)
```
The final (and important) part is to be able to reconstruct missing shards.
For this to work, you need to know which parts of your data is missing.
The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario,
you need to implement a hash check for each shard.
If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
```Go
// Delete two data shards
data[3] = nil
data[7] = nil
// Reconstruct the missing shards
err := enc.Reconstruct(data)
```
The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
```Go
// Delete two data shards
data[3] = nil
data[7] = nil
// Reconstruct just the missing data shards
err := enc.ReconstructData(data)
```
If you don't need all data shards you can use `ReconstructSome()`:
```Go
// Delete two data shards
data[3] = nil
data[7] = nil
// Reconstruct just the shard 3
err := enc.ReconstructSome(data, []bool{false, false, false, true, false, false, false, false})
```
So to sum up reconstruction:
* The number of data/parity shards must match the numbers used for encoding.
* The order of shards must be the same as used when encoding.
* You may only supply data you know is valid.
* Invalid shards should be set to nil.
For complete examples of an encoder and decoder see the
[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
# Splitting/Joining Data
You might have a large slice of data.
To help you split this, there are some helper functions that can split and join a single byte slice.
```Go
bigfile, _ := ioutil.Readfile("myfile.data")
// Split the file
split, err := enc.Split(bigfile)
```
This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
An important thing to note is that you have to *keep track of the exact input size*.
If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
```Go
// Join a data set and write it to io.Discard.
err = enc.Join(io.Discard, data, len(bigfile))
```
## Aligned Allocations
For AMD64 aligned inputs can make a big speed difference.
This is an example of the speed difference when inputs are unaligned/aligned:
```
BenchmarkEncode100x20x10000-32 7058 172648 ns/op 6950.57 MB/s
BenchmarkEncode100x20x10000-32 8406 137911 ns/op 8701.24 MB/s
```
This is mostly the case when dealing with odd-sized shards.
To facilitate this the package provides an `AllocAligned(shards, each int) [][]byte`.
This will allocate a number of shards, each with the size `each`.
Each shard will then be aligned to a 64 byte boundary.
Each encoder also has a `AllocAligned(each int) [][]byte` as an extended interface which will return the same,
but with the shard count configured in the encoder.
It is not possible to re-aligned already allocated slices, for example when using `Split`.
When it is not possible to write to aligned shards, you should not copy to them.
# Progressive encoding
It is possible to encode individual shards using EncodeIdx:
```Go
// EncodeIdx will add parity for a single data shard.
// Parity shards should start out as 0. The caller must zero them.
// Data shards must be delivered exactly once. There is no check for this.
// The parity shards will always be updated and the data shards will remain the same.
EncodeIdx(dataShard []byte, idx int, parity [][]byte) error
```
This allows progressively encoding the parity by sending individual data shards.
There is no requirement on shards being delivered in order,
but when sent in order it allows encoding shards one at the time,
effectively allowing the operation to be streaming.
The result will be the same as encoding all shards at once.
There is a minor speed penalty using this method, so send
shards at once if they are available.
## Example
```Go
func test() {
// Create an encoder with 7 data and 3 parity slices.
enc, _ := reedsolomon.New(7, 3)
// This will be our output parity.
parity := make([][]byte, 3)
for i := range parity {
parity[i] = make([]byte, 10000)
}
for i := 0; i < 7; i++ {
// Send data shards one at the time.
_ = enc.EncodeIdx(make([]byte, 10000), i, parity)
}
// parity now contains parity, as if all data was sent in one call.
}
```
# Streaming/Merging
It might seem like a limitation that all data should be in memory,
but an important property is that *as long as the number of data/parity shards are the same,
you can merge/split data sets*, and they will remain valid as a separate set.
```Go
// Split the data set of 50000 elements into two of 25000
splitA := make([][]byte, 13)
splitB := make([][]byte, 13)
// Merge into a 100000 element set
merged := make([][]byte, 13)
for i := range data {
splitA[i] = data[i][:25000]
splitB[i] = data[i][25000:]
// Concatenate it to itself
merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
merged[i] = append(merged[i], data[i]...)
}
// Each part should still verify as ok.
ok, err := enc.Verify(splitA)
if ok && err == nil {
log.Println("splitA ok")
}
ok, err = enc.Verify(splitB)
if ok && err == nil {
log.Println("splitB ok")
}
ok, err = enc.Verify(merge)
if ok && err == nil {
log.Println("merge ok")
}
```
This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks.
For the best throughput, don't use too small blocks.
This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data.
This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
# Streaming API
There has been added support for a streaming API, to help perform fully streaming operations,
which enables you to do the same operations, but on streams.
To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function
to create the encoding/decoding interfaces.
You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams)
to ready an interface that reads/writes concurrently from the streams.
You can specify the size of each operation using
[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
This will set the size of each read/write operation.
Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API.
Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
If an error occurs in relation to a stream,
a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError)
or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError)
will help you determine which stream was the offender.
There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
For complete examples of a streaming encoder and decoder see the
[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
GF16 (more than 256 shards) is not supported by the streaming interface.
# Advanced Options
You can modify internal options which affects how jobs are split between and processed by goroutines.
To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`.
If no Options are supplied, default options are used.
Example of how to supply options:
```Go
enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
```
# Leopard Compatible GF16
When you encode more than 256 shards the library will switch to a [Leopard-RS](https://github.com/catid/leopard) implementation.
This allows encoding up to 65536 shards (data+parity) with the following limitations, similar to leopard:
* The original and recovery data must not exceed 65536 pieces.
* The shard size *must* each be a multiple of 64 bytes.
* Each buffer should have the same number of bytes.
* Even the last shard must be rounded up to the block size.
| | Regular | Leopard |
|-----------------|---------|---------|
| Encode | ✓ | ✓ |
| EncodeIdx | ✓ | - |
| Verify | ✓ | ✓ |
| Reconstruct | ✓ | ✓ |
| ReconstructData | ✓ | ✓ |
| ReconstructSome | ✓ | ✓ (+) |
| Update | ✓ | - |
| Split | ✓ | ✓ |
| Join | ✓ | ✓ |
* (+) Same as calling `ReconstructData`.
The Split/Join functions will help to split an input to the proper sizes.
Speed can be expected to be `O(N*log(N))`, compared to the `O(N*N)`.
Reconstruction matrix calculation is more time-consuming,
so be sure to include that as part of any benchmark you run.
For now SSSE3, AVX2 and AVX512 assembly are available on AMD64 platforms.
Leopard mode currently always runs as a single goroutine, since multiple
goroutines doesn't provide any worthwhile speedup.
## Leopard GF8
It is possible to replace the default reed-solomon encoder with a leopard compatible one.
This will typically be faster when dealing with more than 20-30 shards.
Note that the limitations listed above also applies to this mode.
See table below for speed with different number of shards.
To enable Leopard GF8 mode use `WithLeopardGF(true)`.
Benchmark Encoding and Reconstructing *1KB* shards with variable number of shards.
All implementation use inversion cache when available.
Speed is total shard size for each operation. Data shard throughput is speed/2.
AVX2 is used.
| Encoder | Shards | Encode | Recover All | Recover One |
|--------------|-------------|----------------|--------------|----------------|
| Cauchy | 4+4 | 23076.83 MB/s | 5444.02 MB/s | 10834.67 MB/s |
| Cauchy | 8+8 | 15206.87 MB/s | 4223.42 MB/s | 16181.62 MB/s |
| Cauchy | 16+16 | 7427.47 MB/s | 3305.84 MB/s | 22480.41 MB/s |
| Cauchy | 32+32 | 3785.64 MB/s | 2300.07 MB/s | 26181.31 MB/s |
| Cauchy | 64+64 | 1911.93 MB/s | 1368.51 MB/s | 27992.93 MB/s |
| Cauchy | 128+128 | 963.83 MB/s | 1327.56 MB/s | 32866.86 MB/s |
| Leopard GF8 | 4+4 | 17061.28 MB/s | 3099.06 MB/s | 4096.78 MB/s |
| Leopard GF8 | 8+8 | 10546.67 MB/s | 2925.92 MB/s | 3964.00 MB/s |
| Leopard GF8 | 16+16 | 10961.37 MB/s | 2328.40 MB/s | 3110.22 MB/s |
| Leopard GF8 | 32+32 | 7111.47 MB/s | 2374.61 MB/s | 3220.75 MB/s |
| Leopard GF8 | 64+64 | 7468.57 MB/s | 2055.41 MB/s | 3061.81 MB/s |
| Leopard GF8 | 128+128 | 5479.99 MB/s | 1953.21 MB/s | 2815.15 MB/s |
| Leopard GF16 | 256+256 | 6158.66 MB/s | 454.14 MB/s | 506.70 MB/s |
| Leopard GF16 | 512+512 | 4418.58 MB/s | 685.75 MB/s | 801.63 MB/s |
| Leopard GF16 | 1024+1024 | 4778.05 MB/s | 814.51 MB/s | 1080.19 MB/s |
| Leopard GF16 | 2048+2048 | 3417.05 MB/s | 911.64 MB/s | 1179.48 MB/s |
| Leopard GF16 | 4096+4096 | 3209.41 MB/s | 729.13 MB/s | 1135.06 MB/s |
| Leopard GF16 | 8192+8192 | 2034.11 MB/s | 604.52 MB/s | 842.13 MB/s |
| Leopard GF16 | 16384+16384 | 1525.88 MB/s | 486.74 MB/s | 750.01 MB/s |
| Leopard GF16 | 32768+32768 | 1138.67 MB/s | 482.81 MB/s | 712.73 MB/s |
"Traditional" encoding is faster until somewhere between 16 and 32 shards.
Leopard provides fast encoding in all cases, but shows a significant overhead for reconstruction.
Calculating the reconstruction matrix takes a significant amount of computation.
With bigger shards that will be smaller. Arguably, fewer shards typically also means bigger shards.
Due to the high shard count caching reconstruction matrices generally isn't feasible for Leopard.
# Performance
Performance depends mainly on the number of parity shards.
In rough terms, doubling the number of parity shards will double the encoding time.
Here are the throughput numbers with some different selections of data and parity shards.
For reference each shard is 1MB random data, and 16 CPU cores are used for encoding.
| Data | Parity | Go MB/s | SSSE3 MB/s | AVX2 MB/s |
|------|--------|---------|------------|-----------|
| 5 | 2 | 20,772 | 66,355 | 108,755 |
| 8 | 8 | 6,815 | 38,338 | 70,516 |
| 10 | 4 | 9,245 | 48,237 | 93,875 |
| 50 | 20 | 2,063 | 12,130 | 22,828 |
The throughput numbers here is the size of the encoded data and parity shards.
If `runtime.GOMAXPROCS()` is set to a value higher than 1,
the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
```
benchmark all MB/s data MB/s speedup
BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
```
The package will use [GFNI](https://en.wikipedia.org/wiki/AVX-512#GFNI) instructions combined with AVX512 when these are available.
This further improves speed by up to 3x over AVX2 code paths.
## ARM64 NEON
By exploiting NEON instructions the performance for ARM has been accelerated.
Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
```
BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s
BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s
BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s
BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
```
# Performance on ppc64le
The performance for ppc64le has been accelerated.
This gives roughly a 10x performance improvement on this architecture as can be seen below:
```
benchmark old MB/s new MB/s speedup
BenchmarkGalois128K-160 948.87 8878.85 9.36x
BenchmarkGalois1M-160 968.85 9041.92 9.33x
BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
```
# Legal
> None of section below is legal advice. Seek your own legal counsel.
> As stated by the [LICENSE](LICENSE) the authors will not be held reliable for any use of this library.
> Users are encouraged to independently verify they comply with all legal requirements.
As can be seen in [recent news](https://www.datanami.com/2023/10/16/cloudera-hit-with-240-million-judgement-over-erasure-coding/)
there has been lawsuits related to possible patents of aspects of erasure coding functionality.
As a possible mitigation it is possible to use the tag `nopshufb` when compiling any code which includes this package.
This will remove all inclusion and use of `PSHUFB` and equivalent on other platforms.
This is done by adding `-tags=nopshufb` to `go build` and similar commands that produce binary output.
The removed code may not be infringing and even after `-tags=nopshufb` there may still be infringing code left.
# Links
* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
* [Leopard-RS](https://github.com/catid/leopard) C library used as basis for GF16 implementation.
# License
This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.

979
vendor/github.com/klauspost/reedsolomon/galois.go generated vendored Normal file

File diff suppressed because one or more lines are too long

583
vendor/github.com/klauspost/reedsolomon/galois_amd64.go generated vendored Normal file
View File

@ -0,0 +1,583 @@
//go:build !noasm && !appengine && !gccgo && !nopshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
package reedsolomon
const pshufb = true
//go:noescape
func galMulSSSE3(low, high, in, out []byte)
//go:noescape
func galMulSSSE3Xor(low, high, in, out []byte)
//go:noescape
func galMulAVX2Xor(low, high, in, out []byte)
//go:noescape
func galMulAVX2(low, high, in, out []byte)
//go:noescape
func galMulAVX2Xor_64(low, high, in, out []byte)
//go:noescape
func galMulAVX2_64(low, high, in, out []byte)
// This is what the assembler routines do in blocks of 16 bytes:
/*
func galMulSSSE3(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] = low[l] ^ high[h]
}
}
func galMulSSSE3Xor(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] ^= low[l] ^ high[h]
}
}
*/
// bigSwitchover is the size where 64 bytes are processed per loop.
const bigSwitchover = 128
func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
}
if len(in) > 32 {
galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
in = in[done:]
out = out[done:]
}
out = out[:len(in)]
mt := mulTable[c][:256]
for i := range in {
out[i] = mt[in[i]]
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
if o.useAVX2 {
if len(in) >= bigSwitchover {
galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
}
if len(in) >= 32 {
galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 5) << 5
in = in[done:]
out = out[done:]
}
} else if o.useSSSE3 {
galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done := (len(in) >> 4) << 4
in = in[done:]
out = out[done:]
}
if len(in) == 0 {
return
}
out = out[:len(in)]
mt := mulTable[c][:256]
for i := range in {
out[i] ^= mt[in[i]]
}
}
// simple slice xor
func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
if len(in) >= bigSwitchover {
if o.useAVX2 {
avx2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
} else {
sSE2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
}
}
if len(in) >= 16 {
sSE2XorSlice(in, out)
done := (len(in) >> 4) << 4
in = in[done:]
out = out[done:]
}
} else {
sliceXorGo(in, out, o)
return
}
out = out[:len(in)]
for i := range in {
out[i] ^= in[i]
}
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
if len(work[0]) == 0 {
return
}
t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
if log_m01 == modulus {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus {
ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus {
ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
}
}
}
return
} else if o.useAVX2 {
if log_m01 == modulus {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus {
ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus {
ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
if len(work[0]) == 0 {
return
}
if false && o.useAvx512GFNI {
// Note that these currently require that length is multiple of 64.
t01 := gf2p811dMulMatrices[log_m01]
t23 := gf2p811dMulMatrices[log_m23]
t02 := gf2p811dMulMatrices[log_m02]
if log_m01 == modulus8 {
if log_m23 == modulus8 {
if log_m02 == modulus8 {
ifftDIT48_gfni_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_gfni_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus8 {
ifftDIT48_gfni_5(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_gfni_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus8 {
if log_m02 == modulus8 {
ifftDIT48_gfni_6(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_gfni_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus8 {
ifftDIT48_gfni_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_gfni_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
if o.useAVX2 {
// Note that these currently require that length is multiple of 64.
t01 := &multiply256LUT8[log_m01]
t23 := &multiply256LUT8[log_m23]
t02 := &multiply256LUT8[log_m02]
if log_m01 == modulus8 {
if log_m23 == modulus8 {
if log_m02 == modulus8 {
ifftDIT48_avx2_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_avx2_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus8 {
ifftDIT48_avx2_5(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus8 {
if log_m02 == modulus8 {
ifftDIT48_avx2_6(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus8 {
ifftDIT48_avx2_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT48_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
if len(work[0]) == 0 {
return
}
t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
if log_m02 == modulus {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus {
fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus {
fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
}
}
}
return
} else if o.useAVX2 {
if log_m02 == modulus {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus {
fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus {
fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
if len(work[0]) == 0 {
return
}
if false && o.useAvx512GFNI {
t01 := gf2p811dMulMatrices[log_m01]
t23 := gf2p811dMulMatrices[log_m23]
t02 := gf2p811dMulMatrices[log_m02]
// Note that these currently require that length is multiple of 64.
if log_m02 == modulus8 {
if log_m01 == modulus8 {
if log_m23 == modulus8 {
fftDIT48_gfni_7(work, dist*24, t01, t23, t02)
} else {
fftDIT48_gfni_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus8 {
fftDIT48_gfni_5(work, dist*24, t01, t23, t02)
} else {
fftDIT48_gfni_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus8 {
if log_m23 == modulus8 {
fftDIT48_gfni_6(work, dist*24, t01, t23, t02)
} else {
fftDIT48_gfni_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus8 {
fftDIT48_gfni_4(work, dist*24, t01, t23, t02)
} else {
fftDIT48_gfni_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
if o.useAVX2 {
t01 := &multiply256LUT8[log_m01]
t23 := &multiply256LUT8[log_m23]
t02 := &multiply256LUT8[log_m02]
// Note that these currently require that length is multiple of 64.
if log_m02 == modulus8 {
if log_m01 == modulus8 {
if log_m23 == modulus8 {
fftDIT48_avx2_7(work, dist*24, t01, t23, t02)
} else {
fftDIT48_avx2_3(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus8 {
fftDIT48_avx2_5(work, dist*24, t01, t23, t02)
} else {
fftDIT48_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus8 {
if log_m23 == modulus8 {
fftDIT48_avx2_6(work, dist*24, t01, t23, t02)
} else {
fftDIT48_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus8 {
fftDIT48_avx2_4(work, dist*24, t01, t23, t02)
} else {
fftDIT48_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
return
}
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 2-way butterfly forward
func fftDIT2(x, y []byte, log_m ffe, o *options) {
if len(x) == 0 {
return
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
fftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
fftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
refMulAdd(x, y, log_m)
sliceXor(x, y, o)
}
}
// 2-way butterfly forward
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
if len(x) == 0 {
return
}
if o.useAVX2 {
fftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
mulAdd8(x, y, log_m, o)
sliceXor(x, y, o)
}
// 2-way butterfly inverse
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
if len(x) == 0 {
return
}
if o.useAVX2 {
ifftDIT28_avx2(x, y, &multiply256LUT8[log_m])
if len(x)&63 == 0 {
return
}
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
}
sliceXor(x, y, o)
mulAdd8(x, y, log_m, o)
}
func mulAdd8(x, y []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2Xor_64(t[:16], t[16:32], y, x)
done := (len(y) >> 6) << 6
y = y[done:]
x = x[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3Xor(t[:16], t[16:32], y, x)
done := (len(y) >> 4) << 4
y = y[done:]
x = x[done:]
}
refMulAdd8(x, y, log_m)
}
// 2-way butterfly
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
if len(x) == 0 {
return
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
ifftDIT2_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
ifftDIT2_ssse3(x, y, tmp)
} else {
// Reference version:
sliceXor(x, y, o)
refMulAdd(x, y, log_m)
}
}
func mulgf16(x, y []byte, log_m ffe, o *options) {
if len(x) == 0 {
return
}
if o.useAVX2 {
tmp := &multiply256LUT[log_m]
mulgf16_avx2(x, y, tmp)
} else if o.useSSSE3 {
tmp := &multiply256LUT[log_m]
mulgf16_ssse3(x, y, tmp)
} else {
refMul(x, y, log_m)
}
}
func mulgf8(out, in []byte, log_m ffe8, o *options) {
if o.useAVX2 {
t := &multiply256LUT8[log_m]
galMulAVX2_64(t[:16], t[16:32], in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
} else if o.useSSSE3 {
t := &multiply256LUT8[log_m]
galMulSSSE3(t[:16], t[16:32], in, out)
done := (len(in) >> 4) << 4
in = in[done:]
out = out[done:]
}
out = out[:len(in)]
mt := mul8LUTs[log_m].Value[:]
for i := range in {
out[i] = byte(mt[in[i]])
}
}

310
vendor/github.com/klauspost/reedsolomon/galois_amd64.s generated vendored Normal file
View File

@ -0,0 +1,310 @@
//+build !noasm
//+build !appengine
//+build !gccgo
//+build !nopshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
// and http://jerasure.org/jerasure/gf-complete/tree/master
// func galMulSSSE3Xor(low, high, in, out []byte)
TEXT ·galMulSSSE3Xor(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVOU (SI), X6 // X6 low
MOVOU (DX), X7 // X7: high
MOVQ $15, BX // BX: low mask
MOVQ BX, X8
PXOR X5, X5
MOVQ in+48(FP), SI // R11: &in
MOVQ in_len+56(FP), R9 // R9: len(in)
MOVQ out+72(FP), DX // DX: &out
PSHUFB X5, X8 // X8: lomask (unpacked)
SHRQ $4, R9 // len(in) / 16
MOVQ SI, AX
MOVQ DX, BX
ANDQ $15, AX
ANDQ $15, BX
CMPQ R9, $0
JEQ done_xor
ORQ AX, BX
CMPQ BX, $0
JNZ loopback_xor
loopback_xor_aligned:
MOVOA (SI), X0 // in[x]
MOVOA (DX), X4 // out[x]
MOVOA X0, X1 // in[x]
MOVOA X6, X2 // low copy
MOVOA X7, X3 // high copy
PSRLQ $4, X1 // X1: high input
PAND X8, X0 // X0: low input
PAND X8, X1 // X0: high input
PSHUFB X0, X2 // X2: mul low part
PSHUFB X1, X3 // X3: mul high part
PXOR X2, X3 // X3: Result
PXOR X4, X3 // X3: Result xor existing out
MOVOA X3, (DX) // Store
ADDQ $16, SI // in+=16
ADDQ $16, DX // out+=16
SUBQ $1, R9
JNZ loopback_xor_aligned
JMP done_xor
loopback_xor:
MOVOU (SI), X0 // in[x]
MOVOU (DX), X4 // out[x]
MOVOU X0, X1 // in[x]
MOVOU X6, X2 // low copy
MOVOU X7, X3 // high copy
PSRLQ $4, X1 // X1: high input
PAND X8, X0 // X0: low input
PAND X8, X1 // X0: high input
PSHUFB X0, X2 // X2: mul low part
PSHUFB X1, X3 // X3: mul high part
PXOR X2, X3 // X3: Result
PXOR X4, X3 // X3: Result xor existing out
MOVOU X3, (DX) // Store
ADDQ $16, SI // in+=16
ADDQ $16, DX // out+=16
SUBQ $1, R9
JNZ loopback_xor
done_xor:
RET
// func galMulSSSE3(low, high, in, out []byte)
TEXT ·galMulSSSE3(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVOU (SI), X6 // X6 low
MOVOU (DX), X7 // X7: high
MOVQ $15, BX // BX: low mask
MOVQ BX, X8
PXOR X5, X5
MOVQ in+48(FP), SI // R11: &in
MOVQ in_len+56(FP), R9 // R9: len(in)
MOVQ out+72(FP), DX // DX: &out
PSHUFB X5, X8 // X8: lomask (unpacked)
MOVQ SI, AX
MOVQ DX, BX
SHRQ $4, R9 // len(in) / 16
ANDQ $15, AX
ANDQ $15, BX
CMPQ R9, $0
JEQ done
ORQ AX, BX
CMPQ BX, $0
JNZ loopback
loopback_aligned:
MOVOA (SI), X0 // in[x]
MOVOA X0, X1 // in[x]
MOVOA X6, X2 // low copy
MOVOA X7, X3 // high copy
PSRLQ $4, X1 // X1: high input
PAND X8, X0 // X0: low input
PAND X8, X1 // X0: high input
PSHUFB X0, X2 // X2: mul low part
PSHUFB X1, X3 // X3: mul high part
PXOR X2, X3 // X3: Result
MOVOA X3, (DX) // Store
ADDQ $16, SI // in+=16
ADDQ $16, DX // out+=16
SUBQ $1, R9
JNZ loopback_aligned
JMP done
loopback:
MOVOU (SI), X0 // in[x]
MOVOU X0, X1 // in[x]
MOVOA X6, X2 // low copy
MOVOA X7, X3 // high copy
PSRLQ $4, X1 // X1: high input
PAND X8, X0 // X0: low input
PAND X8, X1 // X0: high input
PSHUFB X0, X2 // X2: mul low part
PSHUFB X1, X3 // X3: mul high part
PXOR X2, X3 // X3: Result
MOVOU X3, (DX) // Store
ADDQ $16, SI // in+=16
ADDQ $16, DX // out+=16
SUBQ $1, R9
JNZ loopback
done:
RET
// func galMulAVX2Xor(low, high, in, out []byte)
TEXT ·galMulAVX2Xor(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVQ $15, BX // BX: low mask
MOVQ BX, X5
MOVOU (SI), X6 // X6: low
MOVOU (DX), X7 // X7: high
MOVQ in_len+56(FP), R9 // R9: len(in)
VINSERTI128 $1, X6, Y6, Y6 // low
VINSERTI128 $1, X7, Y7, Y7 // high
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $5, R9 // len(in) / 32
MOVQ out+72(FP), DX // DX: &out
MOVQ in+48(FP), SI // SI: &in
TESTQ R9, R9
JZ done_xor_avx2
loopback_xor_avx2:
VMOVDQU (SI), Y0
VMOVDQU (DX), Y4
VPSRLQ $4, Y0, Y1 // Y1: high input
VPAND Y8, Y0, Y0 // Y0: low input
VPAND Y8, Y1, Y1 // Y1: high input
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
VPXOR Y3, Y2, Y3 // Y3: Result
VPXOR Y4, Y3, Y4 // Y4: Result
VMOVDQU Y4, (DX)
ADDQ $32, SI // in+=32
ADDQ $32, DX // out+=32
SUBQ $1, R9
JNZ loopback_xor_avx2
done_xor_avx2:
VZEROUPPER
RET
// func galMulAVX2(low, high, in, out []byte)
TEXT ·galMulAVX2(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVQ $15, BX // BX: low mask
MOVQ BX, X5
MOVOU (SI), X6 // X6: low
MOVOU (DX), X7 // X7: high
MOVQ in_len+56(FP), R9 // R9: len(in)
VINSERTI128 $1, X6, Y6, Y6 // low
VINSERTI128 $1, X7, Y7, Y7 // high
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $5, R9 // len(in) / 32
MOVQ out+72(FP), DX // DX: &out
MOVQ in+48(FP), SI // SI: &in
TESTQ R9, R9
JZ done_avx2
loopback_avx2:
VMOVDQU (SI), Y0
VPSRLQ $4, Y0, Y1 // Y1: high input
VPAND Y8, Y0, Y0 // Y0: low input
VPAND Y8, Y1, Y1 // Y1: high input
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
VPXOR Y3, Y2, Y4 // Y4: Result
VMOVDQU Y4, (DX)
ADDQ $32, SI // in+=32
ADDQ $32, DX // out+=32
SUBQ $1, R9
JNZ loopback_avx2
done_avx2:
VZEROUPPER
RET
// func galMulAVX2Xor_64(low, high, in, out []byte)
TEXT ·galMulAVX2Xor_64(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVQ $15, BX // BX: low mask
MOVQ BX, X5
MOVQ in_len+56(FP), R9 // R9: len(in)
VBROADCASTI128 (SI), Y6 // low table
VBROADCASTI128 (DX), Y7 // high high table
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $6, R9 // len(in) / 64
MOVQ out+72(FP), DX // DX: &out
MOVQ in+48(FP), SI // SI: &in
TESTQ R9, R9
JZ done_xor_avx2_64
loopback_xor_avx2_64:
VMOVDQU (SI), Y0
VMOVDQU 32(SI), Y10
VMOVDQU (DX), Y4
VMOVDQU 32(DX), Y14
VPSRLQ $4, Y0, Y1 // Y1: high input
VPSRLQ $4, Y10, Y11 // Y11: high input 2
VPAND Y8, Y0, Y0 // Y0: low input
VPAND Y8, Y10, Y10 // Y10: low input 2
VPAND Y8, Y1, Y1 // Y11: high input
VPAND Y8, Y11, Y11 // Y11: high input 2
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
VPXOR Y3, Y2, Y3 // Y3: Result
VPXOR Y13, Y12, Y13 // Y13: Result 2
VPXOR Y4, Y3, Y4 // Y4: Result
VPXOR Y14, Y13, Y14 // Y4: Result 2
VMOVDQU Y4, (DX)
VMOVDQU Y14, 32(DX)
ADDQ $64, SI // in+=64
ADDQ $64, DX // out+=64
SUBQ $1, R9
JNZ loopback_xor_avx2_64
done_xor_avx2_64:
VZEROUPPER
RET
// func galMulAVX2_64(low, high, in, out []byte)
TEXT ·galMulAVX2_64(SB), 7, $0
MOVQ low+0(FP), SI // SI: &low
MOVQ high+24(FP), DX // DX: &high
MOVQ $15, BX // BX: low mask
MOVQ BX, X5
MOVQ in_len+56(FP), R9 // R9: len(in)
VBROADCASTI128 (SI), Y6 // low table
VBROADCASTI128 (DX), Y7 // high high table
VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
SHRQ $6, R9 // len(in) / 64
MOVQ out+72(FP), DX // DX: &out
MOVQ in+48(FP), SI // SI: &in
TESTQ R9, R9
JZ done_avx2_64
loopback_avx2_64:
VMOVDQU (SI), Y0
VMOVDQU 32(SI), Y10
VPSRLQ $4, Y0, Y1 // Y1: high input
VPSRLQ $4, Y10, Y11 // Y11: high input 2
VPAND Y8, Y0, Y0 // Y0: low input
VPAND Y8, Y10, Y10 // Y10: low input
VPAND Y8, Y1, Y1 // Y1: high input
VPAND Y8, Y11, Y11 // Y11: high input 2
VPSHUFB Y0, Y6, Y2 // Y2: mul low part
VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
VPSHUFB Y1, Y7, Y3 // Y3: mul high part
VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
VPXOR Y3, Y2, Y4 // Y4: Result
VPXOR Y13, Y12, Y14 // Y14: Result 2
VMOVDQU Y4, (DX)
VMOVDQU Y14, 32(DX)
ADDQ $64, SI // in+=64
ADDQ $64, DX // out+=64
SUBQ $1, R9
JNZ loopback_avx2_64
done_avx2_64:
VZEROUPPER
RET

130
vendor/github.com/klauspost/reedsolomon/galois_arm64.go generated vendored Normal file
View File

@ -0,0 +1,130 @@
//go:build !noasm && !appengine && !gccgo && !nopshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.
package reedsolomon
const pshufb = true
//go:noescape
func galMulNEON(low, high, in, out []byte)
//go:noescape
func galMulXorNEON(low, high, in, out []byte)
func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
var done int
galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] = mt[in[i]]
}
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
var done int
galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
done = (len(in) >> 5) << 5
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] ^= mt[in[i]]
}
}
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 2-way butterfly forward
func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
// 64 byte aligned, always full.
xorSliceNEON(x, y)
}
// 2-way butterfly forward
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
mulAdd8(x, y, log_m, o)
sliceXor(x, y, o)
}
// 2-way butterfly
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// 64 byte aligned, always full.
xorSliceNEON(x, y)
// Reference version:
refMulAdd(x, y, log_m)
}
// 2-way butterfly inverse
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
sliceXor(x, y, o)
mulAdd8(x, y, log_m, o)
}
func mulgf16(x, y []byte, log_m ffe, o *options) {
refMul(x, y, log_m)
}
func mulAdd8(out, in []byte, log_m ffe8, o *options) {
t := &multiply256LUT8[log_m]
galMulXorNEON(t[:16], t[16:32], in, out)
done := (len(in) >> 5) << 5
in = in[done:]
if len(in) > 0 {
out = out[done:]
refMulAdd8(in, out, log_m)
}
}
func mulgf8(out, in []byte, log_m ffe8, o *options) {
var done int
t := &multiply256LUT8[log_m]
galMulNEON(t[:16], t[16:32], in, out)
done = (len(in) >> 5) << 5
remain := len(in) - done
if remain > 0 {
mt := mul8LUTs[log_m].Value[:]
for i := done; i < len(in); i++ {
out[i] ^= byte(mt[in[i]])
}
}
}

102
vendor/github.com/klauspost/reedsolomon/galois_arm64.s generated vendored Normal file
View File

@ -0,0 +1,102 @@
//+build !noasm
//+build !appengine
//+build !gccgo
//+build !nopshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2017, Minio, Inc.
#define LOAD(LO1, LO2, HI1, HI2) \
VLD1.P 32(R1), [LO1.B16, LO2.B16] \
\
\ // Get low input and high input
VUSHR $4, LO1.B16, HI1.B16 \
VUSHR $4, LO2.B16, HI2.B16 \
VAND V8.B16, LO1.B16, LO1.B16 \
VAND V8.B16, LO2.B16, LO2.B16
#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
\ // Mul low part and mul high part
VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \
VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \
VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
\
\ // Combine results
VEOR OUT2.B16, OUT1.B16, OUT1.B16 \
VEOR TMP2.B16, TMP1.B16, OUT2.B16
// func galMulNEON(low, high, in, out []byte)
TEXT ·galMulNEON(SB), 7, $0
MOVD in_base+48(FP), R1
MOVD in_len+56(FP), R2 // length of message
MOVD out_base+72(FP), R5
SUBS $32, R2
BMI complete
MOVD low+0(FP), R10 // R10: &low
MOVD high+24(FP), R11 // R11: &high
VLD1 (R10), [V6.B16]
VLD1 (R11), [V7.B16]
//
// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
// WORD $0x4e010c68 // dup v8.16b, w3
//
MOVD $0x0f, R3
VMOV R3, V8.B[0]
VDUP V8.B[0], V8.B16
loop:
// Main loop
LOAD(V0, V1, V10, V11)
GALOIS_MUL(V6, V7, V4, V5, V14, V15)
// Store result
VST1.P [V4.D2, V5.D2], 32(R5)
SUBS $32, R2
BPL loop
complete:
RET
// func galMulXorNEON(low, high, in, out []byte)
TEXT ·galMulXorNEON(SB), 7, $0
MOVD in_base+48(FP), R1
MOVD in_len+56(FP), R2 // length of message
MOVD out_base+72(FP), R5
SUBS $32, R2
BMI completeXor
MOVD low+0(FP), R10 // R10: &low
MOVD high+24(FP), R11 // R11: &high
VLD1 (R10), [V6.B16]
VLD1 (R11), [V7.B16]
//
// Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
// WORD $0x4e010c68 // dup v8.16b, w3
//
MOVD $0x0f, R3
VMOV R3, V8.B[0]
VDUP V8.B[0], V8.B16
loopXor:
// Main loop
VLD1 (R5), [V20.B16, V21.B16]
LOAD(V0, V1, V10, V11)
GALOIS_MUL(V6, V7, V4, V5, V14, V15)
VEOR V20.B16, V4.B16, V4.B16
VEOR V21.B16, V5.B16, V5.B16
// Store result
VST1.P [V4.D2, V5.D2], 32(R5)
SUBS $32, R2
BPL loopXor
completeXor:
RET

File diff suppressed because it is too large Load Diff

128293
vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,33 @@
//go:build !amd64 || noasm || appengine || gccgo || nogen
package reedsolomon
const maxAvx2Inputs = 1
const maxAvx2Outputs = 1
const minAvx2Size = 1
const avxSizeMask = 0
const avx2CodeGen = false
func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}
func galMulSlicesAvx2Xor(matrix []byte, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}
func galMulSlicesGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}
func galMulSlicesGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}
func galMulSlicesAvxGFNI(matrix []uint64, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}
func galMulSlicesAvxGFNIXor(matrix []uint64, in, out [][]byte, start, stop int) int {
panic("codegen not available")
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,91 @@
//go:build (!amd64 || noasm || appengine || gccgo) && (!arm64 || noasm || appengine || gccgo || nopshufb) && (!ppc64le || noasm || appengine || gccgo || nopshufb)
// Copyright 2015, Klaus Post, see LICENSE for details.
package reedsolomon
const pshufb = false
func galMulSlice(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
copy(out, in)
return
}
mt := mulTable[c][:256]
for n, input := range in {
out[n] = mt[input]
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
sliceXor(in, out, o)
return
}
mt := mulTable[c][:256]
for n, input := range in {
out[n] ^= mt[input]
}
}
func init() {
defaultOptions.useAVX512 = false
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 2-way butterfly forward
func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
sliceXorGo(x, y, o)
}
// 2-way butterfly forward
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
refMulAdd8(x, y, log_m)
sliceXorGo(x, y, o)
}
// 2-way butterfly inverse
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
sliceXorGo(x, y, o)
refMulAdd(x, y, log_m)
}
// 2-way butterfly inverse
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
sliceXorGo(x, y, o)
refMulAdd8(x, y, log_m)
}
func mulgf16(x, y []byte, log_m ffe, o *options) {
refMul(x, y, log_m)
}
func mulgf8(x, y []byte, log_m ffe8, o *options) {
refMul8(x, y, log_m)
}

View File

@ -0,0 +1,146 @@
// Copyright 2015, Klaus Post, see LICENSE for details
//go:build nopshufb && !noasm
package reedsolomon
// bigSwitchover is the size where 64 bytes are processed per loop.
const bigSwitchover = 128
const pshufb = false
// simple slice xor
func sliceXor(in, out []byte, o *options) {
if o.useSSE2 {
if len(in) >= bigSwitchover {
if o.useAVX2 {
avx2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
} else {
sSE2XorSlice_64(in, out)
done := (len(in) >> 6) << 6
in = in[done:]
out = out[done:]
}
}
if len(in) >= 16 {
sSE2XorSlice(in, out)
done := (len(in) >> 4) << 4
in = in[done:]
out = out[done:]
}
} else {
sliceXorGo(in, out, o)
return
}
out = out[:len(in)]
for i := range in {
out[i] ^= in[i]
}
}
func galMulSlice(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
copy(out, in)
return
}
mt := mulTable[c][:256]
for len(in) >= 4 {
ii := (*[4]byte)(in)
oo := (*[4]byte)(out)
oo[0] = mt[ii[0]]
oo[1] = mt[ii[1]]
oo[2] = mt[ii[2]]
oo[3] = mt[ii[3]]
in = in[4:]
out = out[4:]
}
for n, input := range in {
out[n] = mt[input]
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
out = out[:len(in)]
if c == 1 {
sliceXor(in, out, o)
return
}
mt := mulTable[c][:256]
for len(in) >= 4 {
ii := (*[4]byte)(in)
oo := (*[4]byte)(out)
oo[0] ^= mt[ii[0]]
oo[1] ^= mt[ii[1]]
oo[2] ^= mt[ii[2]]
oo[3] ^= mt[ii[3]]
in = in[4:]
out = out[4:]
}
for n, input := range in {
out[n] ^= mt[input]
}
}
func init() {
defaultOptions.useAVX512 = false
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 2-way butterfly forward
func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
sliceXor(x, y, o)
}
// 2-way butterfly forward
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
refMulAdd8(x, y, log_m)
sliceXor(x, y, o)
}
// 2-way butterfly inverse
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
sliceXor(x, y, o)
refMulAdd(x, y, log_m)
}
// 2-way butterfly inverse
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
sliceXor(x, y, o)
refMulAdd8(x, y, log_m)
}
func mulgf16(x, y []byte, log_m ffe, o *options) {
refMul(x, y, log_m)
}
func mulgf8(x, y []byte, log_m ffe8, o *options) {
refMul8(x, y, log_m)
}

View File

@ -0,0 +1,13 @@
//go:build !amd64 || noasm || appengine || gccgo || pshufb
// Copyright 2020, Klaus Post, see LICENSE for details.
package reedsolomon
func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, byteCount int) {
panic("codeSomeShardsAvx512 should not be called if built without asm")
}
func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, byteCount int) {
panic("codeSomeShardsAvx512P should not be called if built without asm")
}

View File

@ -0,0 +1,146 @@
//go:build !noasm && !appengine && !gccgo && !nopshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2018, Minio, Inc.
package reedsolomon
const pshufb = true
//go:noescape
func galMulPpc(low, high, in, out []byte)
//go:noescape
func galMulPpcXor(low, high, in, out []byte)
// This is what the assembler routines do in blocks of 16 bytes:
/*
func galMulPpc(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] = low[l] ^ high[h]
}
}
func galMulPpcXor(low, high, in, out []byte) {
for n, input := range in {
l := input & 0xf
h := input >> 4
out[n] ^= low[l] ^ high[h]
}
}
*/
func galMulSlice(c byte, in, out []byte, o *options) {
if c == 1 {
copy(out, in)
return
}
done := (len(in) >> 4) << 4
if done > 0 {
galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
}
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] = mt[in[i]]
}
}
}
func galMulSliceXor(c byte, in, out []byte, o *options) {
if c == 1 {
sliceXor(in, out, o)
return
}
done := (len(in) >> 4) << 4
if done > 0 {
galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
}
remain := len(in) - done
if remain > 0 {
mt := mulTable[c][:256]
for i := done; i < len(in); i++ {
out[i] ^= mt[in[i]]
}
}
}
// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
ifftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func ifftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
ifftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
fftDIT4Ref(work, dist, log_m01, log_m23, log_m02, o)
}
// 4-way butterfly
func fftDIT48(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe8, o *options) {
fftDIT4Ref8(work, dist, log_m01, log_m23, log_m02, o)
}
// 2-way butterfly forward
func fftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
refMulAdd(x, y, log_m)
sliceXorGo(x, y, o)
}
// 2-way butterfly forward
func fftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
mulAdd8(x, y, log_m, o)
sliceXorGo(x, y, o)
}
// 2-way butterfly inverse
func ifftDIT2(x, y []byte, log_m ffe, o *options) {
// Reference version:
sliceXorGo(x, y, o)
refMulAdd(x, y, log_m)
}
// 2-way butterfly inverse
func ifftDIT28(x, y []byte, log_m ffe8, o *options) {
// Reference version:
sliceXorGo(x, y, o)
mulAdd8(x, y, log_m, o)
}
func mulgf16(x, y []byte, log_m ffe, o *options) {
refMul(x, y, log_m)
}
func mulAdd8(out, in []byte, log_m ffe8, o *options) {
t := &multiply256LUT8[log_m]
galMulPpcXor(t[:16], t[16:32], in, out)
done := (len(in) >> 4) << 4
in = in[done:]
if len(in) > 0 {
out = out[done:]
refMulAdd8(in, out, log_m)
}
}
func mulgf8(out, in []byte, log_m ffe8, o *options) {
var done int
t := &multiply256LUT8[log_m]
galMulPpc(t[:16], t[16:32], in, out)
done = (len(in) >> 4) << 4
remain := len(in) - done
if remain > 0 {
mt := mul8LUTs[log_m].Value[:]
for i := done; i < len(in); i++ {
out[i] ^= byte(mt[in[i]])
}
}
}

View File

@ -0,0 +1,127 @@
//+build !noasm
//+build !appengine
//+build !gccgo
//+build !pshufb
// Copyright 2015, Klaus Post, see LICENSE for details.
// Copyright 2018, Minio, Inc.
#include "textflag.h"
#define LOW R3
#define HIGH R4
#define IN R5
#define LEN R6
#define OUT R7
#define CONSTANTS R8
#define OFFSET R9
#define OFFSET1 R10
#define OFFSET2 R11
#define X6 VS34
#define X6_ V2
#define X7 VS35
#define X7_ V3
#define MSG VS36
#define MSG_ V4
#define MSG_HI VS37
#define MSG_HI_ V5
#define RESULT VS38
#define RESULT_ V6
#define ROTATE VS39
#define ROTATE_ V7
#define MASK VS40
#define MASK_ V8
#define FLIP VS41
#define FLIP_ V9
// func galMulPpc(low, high, in, out []byte)
TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
MOVD low+0(FP), LOW
MOVD high+24(FP), HIGH
MOVD in+48(FP), IN
MOVD in_len+56(FP), LEN
MOVD out+72(FP), OUT
MOVD $16, OFFSET1
MOVD $32, OFFSET2
MOVD $·constants(SB), CONSTANTS
LXVD2X (CONSTANTS)(R0), ROTATE
LXVD2X (CONSTANTS)(OFFSET1), MASK
LXVD2X (CONSTANTS)(OFFSET2), FLIP
LXVD2X (LOW)(R0), X6
LXVD2X (HIGH)(R0), X7
VPERM X6_, V31, FLIP_, X6_
VPERM X7_, V31, FLIP_, X7_
MOVD $0, OFFSET
loop:
LXVD2X (IN)(OFFSET), MSG
VSRB MSG_, ROTATE_, MSG_HI_
VAND MSG_, MASK_, MSG_
VPERM X6_, V31, MSG_, MSG_
VPERM X7_, V31, MSG_HI_, MSG_HI_
VXOR MSG_, MSG_HI_, MSG_
STXVD2X MSG, (OUT)(OFFSET)
ADD $16, OFFSET, OFFSET
CMP LEN, OFFSET
BGT loop
RET
// func galMulPpcXorlow, high, in, out []byte)
TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
MOVD low+0(FP), LOW
MOVD high+24(FP), HIGH
MOVD in+48(FP), IN
MOVD in_len+56(FP), LEN
MOVD out+72(FP), OUT
MOVD $16, OFFSET1
MOVD $32, OFFSET2
MOVD $·constants(SB), CONSTANTS
LXVD2X (CONSTANTS)(R0), ROTATE
LXVD2X (CONSTANTS)(OFFSET1), MASK
LXVD2X (CONSTANTS)(OFFSET2), FLIP
LXVD2X (LOW)(R0), X6
LXVD2X (HIGH)(R0), X7
VPERM X6_, V31, FLIP_, X6_
VPERM X7_, V31, FLIP_, X7_
MOVD $0, OFFSET
loopXor:
LXVD2X (IN)(OFFSET), MSG
LXVD2X (OUT)(OFFSET), RESULT
VSRB MSG_, ROTATE_, MSG_HI_
VAND MSG_, MASK_, MSG_
VPERM X6_, V31, MSG_, MSG_
VPERM X7_, V31, MSG_HI_, MSG_HI_
VXOR MSG_, MSG_HI_, MSG_
VXOR MSG_, RESULT_, RESULT_
STXVD2X RESULT, (OUT)(OFFSET)
ADD $16, OFFSET, OFFSET
CMP LEN, OFFSET
BGT loopXor
RET
DATA ·constants+0x0(SB)/8, $0x0404040404040404
DATA ·constants+0x8(SB)/8, $0x0404040404040404
DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
DATA ·constants+0x20(SB)/8, $0x0706050403020100
DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
GLOBL ·constants(SB), 8, $48

View File

@ -0,0 +1,164 @@
/**
* A thread-safe tree which caches inverted matrices.
*
* Copyright 2016, Peter Collins
*/
package reedsolomon
import (
"errors"
"sync"
)
// The tree uses a Reader-Writer mutex to make it thread-safe
// when accessing cached matrices and inserting new ones.
type inversionTree struct {
mutex sync.RWMutex
root inversionNode
}
type inversionNode struct {
matrix matrix
children []*inversionNode
}
// newInversionTree initializes a tree for storing inverted matrices.
// Note that the root node is the identity matrix as it implies
// there were no errors with the original data.
func newInversionTree(dataShards, parityShards int) *inversionTree {
identity, _ := identityMatrix(dataShards)
return &inversionTree{
root: inversionNode{
matrix: identity,
children: make([]*inversionNode, dataShards+parityShards),
},
}
}
// GetInvertedMatrix returns the cached inverted matrix or nil if it
// is not found in the tree keyed on the indices of invalid rows.
func (t *inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
if t == nil {
return nil
}
// Lock the tree for reading before accessing the tree.
t.mutex.RLock()
defer t.mutex.RUnlock()
// If no invalid indices were give we should return the root
// identity matrix.
if len(invalidIndices) == 0 {
return t.root.matrix
}
// Recursively search for the inverted matrix in the tree, passing in
// 0 as the parent index as we start at the root of the tree.
return t.root.getInvertedMatrix(invalidIndices, 0)
}
// errAlreadySet is returned if the root node matrix is overwritten
var errAlreadySet = errors.New("the root node identity matrix is already set")
// InsertInvertedMatrix inserts a new inverted matrix into the tree
// keyed by the indices of invalid rows. The total number of shards
// is required for creating the proper length lists of child nodes for
// each node.
func (t *inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
if t == nil {
return nil
}
// If no invalid indices were given then we are done because the
// root node is already set with the identity matrix.
if len(invalidIndices) == 0 {
return errAlreadySet
}
if !matrix.IsSquare() {
return errNotSquare
}
// Lock the tree for writing and reading before accessing the tree.
t.mutex.Lock()
defer t.mutex.Unlock()
// Recursively create nodes for the inverted matrix in the tree until
// we reach the node to insert the matrix to. We start by passing in
// 0 as the parent index as we start at the root of the tree.
t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
return nil
}
func (n *inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
// Get the child node to search next from the list of children. The
// list of children starts relative to the parent index passed in
// because the indices of invalid rows is sorted (by default). As we
// search recursively, the first invalid index gets popped off the list,
// so when searching through the list of children, use that first invalid
// index to find the child node.
firstIndex := invalidIndices[0]
node := n.children[firstIndex-parent]
// If the child node doesn't exist in the list yet, fail fast by
// returning, so we can construct and insert the proper inverted matrix.
if node == nil {
return nil
}
// If there's more than one invalid index left in the list we should
// keep searching recursively.
if len(invalidIndices) > 1 {
// Search recursively on the child node by passing in the invalid indices
// with the first index popped off the front. Also the parent index to
// pass down is the first index plus one.
return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
}
// If there aren't any more invalid indices to search, we've found our
// node. Return it, however keep in mind that the matrix could still be
// nil because intermediary nodes in the tree are created sometimes with
// their inversion matrices uninitialized.
return node.matrix
}
func (n *inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
// As above, get the child node to search next from the list of children.
// The list of children starts relative to the parent index passed in
// because the indices of invalid rows is sorted (by default). As we
// search recursively, the first invalid index gets popped off the list,
// so when searching through the list of children, use that first invalid
// index to find the child node.
firstIndex := invalidIndices[0]
node := n.children[firstIndex-parent]
// If the child node doesn't exist in the list yet, create a new
// node because we have the writer lock and add it to the list
// of children.
if node == nil {
// Make the length of the list of children equal to the number
// of shards minus the first invalid index because the list of
// invalid indices is sorted, so only this length of errors
// are possible in the tree.
node = &inversionNode{
children: make([]*inversionNode, shards-firstIndex),
}
// Insert the new node into the tree at the first index relative
// to the parent index that was given in this recursive call.
n.children[firstIndex-parent] = node
}
// If there's more than one invalid index left in the list we should
// keep searching recursively in order to find the node to add our
// matrix.
if len(invalidIndices) > 1 {
// As above, search recursively on the child node by passing in
// the invalid indices with the first index popped off the front.
// Also the total number of shards and parent index are passed down
// which is equal to the first index plus one.
node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
} else {
// If there aren't any more invalid indices to search, we've found our
// node. Cache the inverted matrix in this node.
node.matrix = matrix
}
}

1262
vendor/github.com/klauspost/reedsolomon/leopard.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

1269
vendor/github.com/klauspost/reedsolomon/leopard8.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

281
vendor/github.com/klauspost/reedsolomon/matrix.go generated vendored Normal file
View File

@ -0,0 +1,281 @@
/**
* Matrix Algebra over an 8-bit Galois Field
*
* Copyright 2015, Klaus Post
* Copyright 2015, Backblaze, Inc.
*/
package reedsolomon
import (
"errors"
"fmt"
"strconv"
"strings"
)
// byte[row][col]
type matrix [][]byte
// newMatrix returns a matrix of zeros.
func newMatrix(rows, cols int) (matrix, error) {
if rows <= 0 {
return nil, errInvalidRowSize
}
if cols <= 0 {
return nil, errInvalidColSize
}
m := matrix(make([][]byte, rows))
for i := range m {
m[i] = make([]byte, cols)
}
return m, nil
}
// NewMatrixData initializes a matrix with the given row-major data.
// Note that data is not copied from input.
func newMatrixData(data [][]byte) (matrix, error) {
m := matrix(data)
err := m.Check()
if err != nil {
return nil, err
}
return m, nil
}
// IdentityMatrix returns an identity matrix of the given size.
func identityMatrix(size int) (matrix, error) {
m, err := newMatrix(size, size)
if err != nil {
return nil, err
}
for i := range m {
m[i][i] = 1
}
return m, nil
}
// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
var errInvalidRowSize = errors.New("invalid row size")
// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
var errInvalidColSize = errors.New("invalid column size")
// errColSizeMismatch is returned if the size of matrix columns mismatch.
var errColSizeMismatch = errors.New("column size is not the same for all rows")
func (m matrix) Check() error {
rows := len(m)
if rows == 0 {
return errInvalidRowSize
}
cols := len(m[0])
if cols == 0 {
return errInvalidColSize
}
for _, col := range m {
if len(col) != cols {
return errColSizeMismatch
}
}
return nil
}
// String returns a human-readable string of the matrix contents.
//
// Example: [[1, 2], [3, 4]]
func (m matrix) String() string {
rowOut := make([]string, 0, len(m))
for _, row := range m {
colOut := make([]string, 0, len(row))
for _, col := range row {
colOut = append(colOut, strconv.Itoa(int(col)))
}
rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
}
return "[" + strings.Join(rowOut, ", ") + "]"
}
// Multiply multiplies this matrix (the one on the left) by another
// matrix (the one on the right) and returns a new matrix with the result.
func (m matrix) Multiply(right matrix) (matrix, error) {
if len(m[0]) != len(right) {
return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
}
result, _ := newMatrix(len(m), len(right[0]))
for r, row := range result {
for c := range row {
var value byte
for i := range m[0] {
value ^= galMultiply(m[r][i], right[i][c])
}
result[r][c] = value
}
}
return result, nil
}
// Augment returns the concatenation of this matrix and the matrix on the right.
func (m matrix) Augment(right matrix) (matrix, error) {
if len(m) != len(right) {
return nil, errMatrixSize
}
result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
for r, row := range m {
for c := range row {
result[r][c] = m[r][c]
}
cols := len(m[0])
for c := range right[0] {
result[r][cols+c] = right[r][c]
}
}
return result, nil
}
// errMatrixSize is returned if matrix dimensions are doesn't match.
var errMatrixSize = errors.New("matrix sizes do not match")
func (m matrix) SameSize(n matrix) error {
if len(m) != len(n) {
return errMatrixSize
}
for i := range m {
if len(m[i]) != len(n[i]) {
return errMatrixSize
}
}
return nil
}
// SubMatrix returns a part of this matrix. Data is copied.
func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
result, err := newMatrix(rmax-rmin, cmax-cmin)
if err != nil {
return nil, err
}
// OPTME: If used heavily, use copy function to copy slice
for r := rmin; r < rmax; r++ {
for c := cmin; c < cmax; c++ {
result[r-rmin][c-cmin] = m[r][c]
}
}
return result, nil
}
// SwapRows Exchanges two rows in the matrix.
func (m matrix) SwapRows(r1, r2 int) error {
if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
return errInvalidRowSize
}
m[r2], m[r1] = m[r1], m[r2]
return nil
}
// IsSquare will return true if the matrix is square, otherwise false.
func (m matrix) IsSquare() bool {
return len(m) == len(m[0])
}
// errSingular is returned if the matrix is singular and cannot be inversed
var errSingular = errors.New("matrix is singular")
// errNotSquare is returned if attempting to inverse a non-square matrix.
var errNotSquare = errors.New("only square matrices can be inverted")
// Invert returns the inverse of this matrix.
// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
// The matrix must be square, otherwise ErrNotSquare is returned.
func (m matrix) Invert() (matrix, error) {
if !m.IsSquare() {
return nil, errNotSquare
}
size := len(m)
work, _ := identityMatrix(size)
work, _ = m.Augment(work)
err := work.gaussianElimination()
if err != nil {
return nil, err
}
return work.SubMatrix(0, size, size, size*2)
}
func (m matrix) gaussianElimination() error {
rows := len(m)
columns := len(m[0])
// Clear out the part below the main diagonal and scale the main
// diagonal to be 1.
for r := 0; r < rows; r++ {
// If the element on the diagonal is 0, find a row below
// that has a non-zero and swap them.
if m[r][r] == 0 {
for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
if m[rowBelow][r] != 0 {
err := m.SwapRows(r, rowBelow)
if err != nil {
return err
}
break
}
}
}
// If we couldn't find one, the matrix is singular.
if m[r][r] == 0 {
return errSingular
}
// Scale to 1.
if m[r][r] != 1 {
scale := galOneOver(m[r][r])
for c := 0; c < columns; c++ {
m[r][c] = galMultiply(m[r][c], scale)
}
}
// Make everything below the 1 be a 0 by subtracting
// a multiple of it. (Subtraction and addition are
// both exclusive or in the Galois field.)
for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
if m[rowBelow][r] != 0 {
scale := m[rowBelow][r]
for c := 0; c < columns; c++ {
m[rowBelow][c] ^= galMultiply(scale, m[r][c])
}
}
}
}
// Now clear the part above the main diagonal.
for d := 0; d < rows; d++ {
for rowAbove := 0; rowAbove < d; rowAbove++ {
if m[rowAbove][d] != 0 {
scale := m[rowAbove][d]
for c := 0; c < columns; c++ {
m[rowAbove][c] ^= galMultiply(scale, m[d][c])
}
}
}
}
return nil
}
// Create a Vandermonde matrix, which is guaranteed to have the
// property that any subset of rows that forms a square matrix
// is invertible.
func vandermonde(rows, cols int) (matrix, error) {
result, err := newMatrix(rows, cols)
if err != nil {
return nil, err
}
for r, row := range result {
for c := range row {
result[r][c] = galExp(byte(r), c)
}
}
return result, nil
}

323
vendor/github.com/klauspost/reedsolomon/options.go generated vendored Normal file
View File

@ -0,0 +1,323 @@
package reedsolomon
import (
"runtime"
"strings"
"github.com/klauspost/cpuid/v2"
)
// Option allows to override processing parameters.
type Option func(*options)
type options struct {
maxGoroutines int
minSplitSize int
shardSize int
perRound int
useAvxGNFI,
useAvx512GFNI,
useAVX512,
useAVX2,
useSSSE3,
useSSE2 bool
useJerasureMatrix bool
usePAR1Matrix bool
useCauchy bool
fastOneParity bool
inversionCache bool
forcedInversionCache bool
customMatrix [][]byte
withLeopard leopardMode
// stream options
concReads bool
concWrites bool
streamBS int
}
var defaultOptions = options{
maxGoroutines: 384,
minSplitSize: -1,
fastOneParity: false,
inversionCache: true,
// Detect CPU capabilities.
useSSSE3: cpuid.CPU.Supports(cpuid.SSSE3),
useSSE2: cpuid.CPU.Supports(cpuid.SSE2),
useAVX2: cpuid.CPU.Supports(cpuid.AVX2),
useAVX512: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512BW, cpuid.AVX512VL),
useAvx512GFNI: cpuid.CPU.Supports(cpuid.AVX512F, cpuid.GFNI, cpuid.AVX512DQ),
useAvxGNFI: cpuid.CPU.Supports(cpuid.AVX, cpuid.GFNI),
}
// leopardMode controls the use of leopard GF in encoding and decoding.
type leopardMode int
const (
// leopardAsNeeded only switches to leopard 16-bit when there are more than
// 256 shards.
leopardAsNeeded leopardMode = iota
// leopardGF16 uses leopard in 16-bit mode for all shard counts.
leopardGF16
// leopardAlways uses 8-bit leopard for shards less than or equal to 256,
// 16-bit leopard otherwise.
leopardAlways
)
func init() {
if runtime.GOMAXPROCS(0) <= 1 {
defaultOptions.maxGoroutines = 1
}
}
// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
// Jobs will be split into this many parts, unless each goroutine would have to process
// less than minSplitSize bytes (set with WithMinSplitSize).
// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
// scheduling.
// If n <= 0, it is ignored.
func WithMaxGoroutines(n int) Option {
return func(o *options) {
if n > 0 {
o.maxGoroutines = n
}
}
}
// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
// specific shard size.
// Send in the shard size you expect to send. Other shard sizes will work, but may not
// run at the optimal speed.
// Overwrites WithMaxGoroutines.
// If shardSize <= 0, it is ignored.
func WithAutoGoroutines(shardSize int) Option {
return func(o *options) {
o.shardSize = shardSize
}
}
// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
// By default this parameter is determined by CPU cache characteristics.
// See WithMaxGoroutines on how jobs are split.
// If n <= 0, it is ignored.
func WithMinSplitSize(n int) Option {
return func(o *options) {
if n > 0 {
o.minSplitSize = n
}
}
}
// WithConcurrentStreams will enable concurrent reads and writes on the streams.
// Default: Disabled, meaning only one stream will be read/written at the time.
// Ignored if not used on a stream input.
func WithConcurrentStreams(enabled bool) Option {
return func(o *options) {
o.concReads, o.concWrites = enabled, enabled
}
}
// WithConcurrentStreamReads will enable concurrent reads from the input streams.
// Default: Disabled, meaning only one stream will be read at the time.
// Ignored if not used on a stream input.
func WithConcurrentStreamReads(enabled bool) Option {
return func(o *options) {
o.concReads = enabled
}
}
// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
// Default: Disabled, meaning only one stream will be written at the time.
// Ignored if not used on a stream input.
func WithConcurrentStreamWrites(enabled bool) Option {
return func(o *options) {
o.concWrites = enabled
}
}
// WithInversionCache allows to control the inversion cache.
// This will cache reconstruction matrices so they can be reused.
// Enabled by default, or <= 64 shards for Leopard encoding.
func WithInversionCache(enabled bool) Option {
return func(o *options) {
o.inversionCache = enabled
o.forcedInversionCache = true
}
}
// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
// If not set, any shard size set with WithAutoGoroutines will be used.
// If WithAutoGoroutines is also unset, 4MB will be used.
// Ignored if not used on stream.
func WithStreamBlockSize(n int) Option {
return func(o *options) {
o.streamBS = n
}
}
// WithSSSE3 allows to enable/disable SSSE3 instructions.
// If not set, SSSE3 will be turned on or off automatically based on CPU ID information.
func WithSSSE3(enabled bool) Option {
return func(o *options) {
o.useSSSE3 = enabled
}
}
// WithAVX2 allows to enable/disable AVX2 instructions.
// If not set, AVX will be turned on or off automatically based on CPU ID information.
// This will also disable AVX GFNI instructions.
func WithAVX2(enabled bool) Option {
return func(o *options) {
o.useAVX2 = enabled
if o.useAvxGNFI {
o.useAvxGNFI = enabled
}
}
}
// WithSSE2 allows to enable/disable SSE2 instructions.
// If not set, SSE2 will be turned on or off automatically based on CPU ID information.
func WithSSE2(enabled bool) Option {
return func(o *options) {
o.useSSE2 = enabled
}
}
// WithAVX512 allows to enable/disable AVX512 (and GFNI) instructions.
func WithAVX512(enabled bool) Option {
return func(o *options) {
o.useAVX512 = enabled
o.useAvx512GFNI = enabled
}
}
// WithGFNI allows to enable/disable AVX512+GFNI instructions.
// If not set, GFNI will be turned on or off automatically based on CPU ID information.
func WithGFNI(enabled bool) Option {
return func(o *options) {
o.useAvx512GFNI = enabled
}
}
// WithAVXGFNI allows to enable/disable GFNI with AVX instructions.
// If not set, GFNI will be turned on or off automatically based on CPU ID information.
func WithAVXGFNI(enabled bool) Option {
return func(o *options) {
o.useAvxGNFI = enabled
}
}
// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde
// matrix in the same way as done by the Jerasure library.
// The first row and column of the coding matrix only contains 1's in this method
// so the first parity chunk is always equal to XOR of all data chunks.
func WithJerasureMatrix() Option {
return func(o *options) {
o.useJerasureMatrix = true
o.usePAR1Matrix = false
o.useCauchy = false
}
}
// WithPAR1Matrix causes the encoder to build the matrix how PARv1
// does. Note that the method they use is buggy, and may lead to cases
// where recovery is impossible, even if there are enough parity
// shards.
func WithPAR1Matrix() Option {
return func(o *options) {
o.useJerasureMatrix = false
o.usePAR1Matrix = true
o.useCauchy = false
}
}
// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
// The output of this is not compatible with the standard output.
// A Cauchy matrix is faster to generate. This does not affect data throughput,
// but will result in slightly faster start-up time.
func WithCauchyMatrix() Option {
return func(o *options) {
o.useJerasureMatrix = false
o.usePAR1Matrix = false
o.useCauchy = true
}
}
// WithFastOneParityMatrix will switch the matrix to a simple xor
// if there is only one parity shard.
// The PAR1 matrix already has this property so it has little effect there.
func WithFastOneParityMatrix() Option {
return func(o *options) {
o.fastOneParity = true
}
}
// WithCustomMatrix causes the encoder to use the manually specified matrix.
// customMatrix represents only the parity chunks.
// customMatrix must have at least ParityShards rows and DataShards columns.
// It can be used for interoperability with libraries which generate
// the matrix differently or to implement more complex coding schemes like LRC
// (locally reconstructible codes).
func WithCustomMatrix(customMatrix [][]byte) Option {
return func(o *options) {
o.customMatrix = customMatrix
}
}
// WithLeopardGF16 will always use leopard GF16 for encoding,
// even when there is less than 256 shards.
// This will likely improve reconstruction time for some setups.
// This is not compatible with Leopard output for <= 256 shards.
// Note that Leopard places certain restrictions on use see other documentation.
func WithLeopardGF16(enabled bool) Option {
return func(o *options) {
if enabled {
o.withLeopard = leopardGF16
} else {
o.withLeopard = leopardAsNeeded
}
}
}
// WithLeopardGF will use leopard GF for encoding, even when there are fewer than
// 256 shards.
// This will likely improve reconstruction time for some setups.
// Note that Leopard places certain restrictions on use see other documentation.
func WithLeopardGF(enabled bool) Option {
return func(o *options) {
if enabled {
o.withLeopard = leopardAlways
} else {
o.withLeopard = leopardAsNeeded
}
}
}
func (o *options) cpuOptions() string {
var res []string
if o.useSSE2 {
res = append(res, "SSE2")
}
if o.useAVX2 {
res = append(res, "AVX2")
}
if o.useSSSE3 {
res = append(res, "SSSE3")
}
if o.useAVX512 {
res = append(res, "AVX512")
}
if o.useAvx512GFNI {
res = append(res, "AVX512+GFNI")
}
if o.useAvxGNFI {
res = append(res, "AVX+GFNI")
}
if len(res) == 0 {
return "pure Go"
}
return strings.Join(res, ",")
}

1741
vendor/github.com/klauspost/reedsolomon/reedsolomon.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

614
vendor/github.com/klauspost/reedsolomon/streaming.go generated vendored Normal file
View File

@ -0,0 +1,614 @@
/**
* Reed-Solomon Coding over 8-bit values.
*
* Copyright 2015, Klaus Post
* Copyright 2015, Backblaze, Inc.
*/
package reedsolomon
import (
"errors"
"fmt"
"io"
"sync"
)
// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
//
// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
// since the streaming interface has a start up overhead.
//
// For all operations, no readers and writers should not assume any order/size of
// individual reads/writes.
//
// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
// folder.
type StreamEncoder interface {
// Encode parity shards for a set of data shards.
//
// Input is 'shards' containing readers for data shards followed by parity shards
// io.Writer.
//
// The number of shards must match the number given to NewStream().
//
// Each reader must supply the same number of bytes.
//
// The parity shards will be written to the writer.
// The number of bytes written will match the input size.
//
// If a data stream returns an error, a StreamReadError type error
// will be returned. If a parity writer returns an error, a
// StreamWriteError will be returned.
Encode(data []io.Reader, parity []io.Writer) error
// Verify returns true if the parity shards contain correct data.
//
// The number of shards must match the number total data+parity shards
// given to NewStream().
//
// Each reader must supply the same number of bytes.
// If a shard stream returns an error, a StreamReadError type error
// will be returned.
Verify(shards []io.Reader) (bool, error)
// Reconstruct will recreate the missing shards if possible.
//
// Given a list of valid shards (to read) and invalid shards (to write)
//
// You indicate that a shard is missing by setting it to nil in the 'valid'
// slice and at the same time setting a non-nil writer in "fill".
// An index cannot contain both non-nil 'valid' and 'fill' entry.
// If both are provided 'ErrReconstructMismatch' is returned.
//
// If there are too few shards to reconstruct the missing
// ones, ErrTooFewShards will be returned.
//
// The reconstructed shard set is complete, but integrity is not verified.
// Use the Verify function to check if data set is ok.
Reconstruct(valid []io.Reader, fill []io.Writer) error
// Split a an input stream into the number of shards given to the encoder.
//
// The data will be split into equally sized shards.
// If the data size isn't dividable by the number of shards,
// the last shard will contain extra zeros.
//
// You must supply the total size of your input.
// 'ErrShortData' will be returned if it is unable to retrieve the
// number of bytes indicated.
Split(data io.Reader, dst []io.Writer, size int64) (err error)
// Join the shards and write the data segment to dst.
//
// Only the data shards are considered.
//
// You must supply the exact output size you want.
// If there are to few shards given, ErrTooFewShards will be returned.
// If the total data size is less than outSize, ErrShortData will be returned.
Join(dst io.Writer, shards []io.Reader, outSize int64) error
}
// StreamReadError is returned when a read error is encountered
// that relates to a supplied stream.
// This will allow you to find out which reader has failed.
type StreamReadError struct {
Err error // The error
Stream int // The stream number on which the error occurred
}
// Error returns the error as a string
func (s StreamReadError) Error() string {
return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
}
// String returns the error as a string
func (s StreamReadError) String() string {
return s.Error()
}
// StreamWriteError is returned when a write error is encountered
// that relates to a supplied stream. This will allow you to
// find out which reader has failed.
type StreamWriteError struct {
Err error // The error
Stream int // The stream number on which the error occurred
}
// Error returns the error as a string
func (s StreamWriteError) Error() string {
return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
}
// String returns the error as a string
func (s StreamWriteError) String() string {
return s.Error()
}
// rsStream contains a matrix for a specific
// distribution of datashards and parity shards.
// Construct if using NewStream()
type rsStream struct {
r *reedSolomon
o options
// Shard reader
readShards func(dst [][]byte, in []io.Reader) error
// Shard writer
writeShards func(out []io.Writer, in [][]byte) error
blockPool sync.Pool
}
// NewStream creates a new encoder and initializes it to
// the number of data shards and parity shards that
// you want to use. You can reuse this encoder.
// Note that the maximum number of data shards is 256.
func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
if dataShards+parityShards > 256 {
return nil, ErrMaxShardNum
}
r := rsStream{o: defaultOptions}
for _, opt := range o {
opt(&r.o)
}
// Override block size if shard size is set.
if r.o.streamBS == 0 && r.o.shardSize > 0 {
r.o.streamBS = r.o.shardSize
}
if r.o.streamBS <= 0 {
r.o.streamBS = 4 << 20
}
if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
o = append(o, WithAutoGoroutines(r.o.streamBS))
}
enc, err := New(dataShards, parityShards, o...)
if err != nil {
return nil, err
}
r.r = enc.(*reedSolomon)
r.blockPool.New = func() interface{} {
return AllocAligned(dataShards+parityShards, r.o.streamBS)
}
r.readShards = readShards
r.writeShards = writeShards
if r.o.concReads {
r.readShards = cReadShards
}
if r.o.concWrites {
r.writeShards = cWriteShards
}
return &r, err
}
// NewStreamC creates a new encoder and initializes it to
// the number of data shards and parity shards given.
//
// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
}
func (r *rsStream) createSlice() [][]byte {
out := r.blockPool.Get().([][]byte)
for i := range out {
out[i] = out[i][:r.o.streamBS]
}
return out
}
// Encodes parity shards for a set of data shards.
//
// Input is 'shards' containing readers for data shards followed by parity shards
// io.Writer.
//
// The number of shards must match the number given to NewStream().
//
// Each reader must supply the same number of bytes.
//
// The parity shards will be written to the writer.
// The number of bytes written will match the input size.
//
// If a data stream returns an error, a StreamReadError type error
// will be returned. If a parity writer returns an error, a
// StreamWriteError will be returned.
func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
if len(data) != r.r.dataShards {
return ErrTooFewShards
}
if len(parity) != r.r.parityShards {
return ErrTooFewShards
}
all := r.createSlice()
defer r.blockPool.Put(all)
in := all[:r.r.dataShards]
out := all[r.r.dataShards:]
read := 0
for {
err := r.readShards(in, data)
switch err {
case nil:
case io.EOF:
if read == 0 {
return ErrShardNoData
}
return nil
default:
return err
}
out = trimShards(out, shardSize(in))
read += shardSize(in)
err = r.r.Encode(all)
if err != nil {
return err
}
err = r.writeShards(parity, out)
if err != nil {
return err
}
}
}
// Trim the shards so they are all the same size
func trimShards(in [][]byte, size int) [][]byte {
for i := range in {
if len(in[i]) != 0 {
in[i] = in[i][0:size]
}
if len(in[i]) < size {
in[i] = in[i][:0]
}
}
return in
}
func readShards(dst [][]byte, in []io.Reader) error {
if len(in) != len(dst) {
panic("internal error: in and dst size do not match")
}
size := -1
for i := range in {
if in[i] == nil {
dst[i] = dst[i][:0]
continue
}
n, err := io.ReadFull(in[i], dst[i])
// The error is EOF only if no bytes were read.
// If an EOF happens after reading some but not all the bytes,
// ReadFull returns ErrUnexpectedEOF.
switch err {
case io.ErrUnexpectedEOF, io.EOF:
if size < 0 {
size = n
} else if n != size {
// Shard sizes must match.
return ErrShardSize
}
dst[i] = dst[i][0:n]
case nil:
continue
default:
return StreamReadError{Err: err, Stream: i}
}
}
if size == 0 {
return io.EOF
}
return nil
}
func writeShards(out []io.Writer, in [][]byte) error {
if len(out) != len(in) {
panic("internal error: in and out size do not match")
}
for i := range in {
if out[i] == nil {
continue
}
n, err := out[i].Write(in[i])
if err != nil {
return StreamWriteError{Err: err, Stream: i}
}
//
if n != len(in[i]) {
return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
}
}
return nil
}
type readResult struct {
n int
size int
err error
}
// cReadShards reads shards concurrently
func cReadShards(dst [][]byte, in []io.Reader) error {
if len(in) != len(dst) {
panic("internal error: in and dst size do not match")
}
var wg sync.WaitGroup
wg.Add(len(in))
res := make(chan readResult, len(in))
for i := range in {
if in[i] == nil {
dst[i] = dst[i][:0]
wg.Done()
continue
}
go func(i int) {
defer wg.Done()
n, err := io.ReadFull(in[i], dst[i])
// The error is EOF only if no bytes were read.
// If an EOF happens after reading some but not all the bytes,
// ReadFull returns ErrUnexpectedEOF.
res <- readResult{size: n, err: err, n: i}
}(i)
}
wg.Wait()
close(res)
size := -1
for r := range res {
switch r.err {
case io.ErrUnexpectedEOF, io.EOF:
if size < 0 {
size = r.size
} else if r.size != size {
// Shard sizes must match.
return ErrShardSize
}
dst[r.n] = dst[r.n][0:r.size]
case nil:
default:
return StreamReadError{Err: r.err, Stream: r.n}
}
}
if size == 0 {
return io.EOF
}
return nil
}
// cWriteShards writes shards concurrently
func cWriteShards(out []io.Writer, in [][]byte) error {
if len(out) != len(in) {
panic("internal error: in and out size do not match")
}
var errs = make(chan error, len(out))
var wg sync.WaitGroup
wg.Add(len(out))
for i := range in {
go func(i int) {
defer wg.Done()
if out[i] == nil {
errs <- nil
return
}
n, err := out[i].Write(in[i])
if err != nil {
errs <- StreamWriteError{Err: err, Stream: i}
return
}
if n != len(in[i]) {
errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
}
}(i)
}
wg.Wait()
close(errs)
for err := range errs {
if err != nil {
return err
}
}
return nil
}
// Verify returns true if the parity shards contain correct data.
//
// The number of shards must match the number total data+parity shards
// given to NewStream().
//
// Each reader must supply the same number of bytes.
// If a shard stream returns an error, a StreamReadError type error
// will be returned.
func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
if len(shards) != r.r.totalShards {
return false, ErrTooFewShards
}
read := 0
all := r.createSlice()
defer r.blockPool.Put(all)
for {
err := r.readShards(all, shards)
if err == io.EOF {
if read == 0 {
return false, ErrShardNoData
}
return true, nil
}
if err != nil {
return false, err
}
read += shardSize(all)
ok, err := r.r.Verify(all)
if !ok || err != nil {
return ok, err
}
}
}
// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
// "valid" and "fill" streams on the same index.
// Therefore it is impossible to see if you consider the shard valid
// or would like to have it reconstructed.
var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
// Reconstruct will recreate the missing shards if possible.
//
// Given a list of valid shards (to read) and invalid shards (to write)
//
// You indicate that a shard is missing by setting it to nil in the 'valid'
// slice and at the same time setting a non-nil writer in "fill".
// An index cannot contain both non-nil 'valid' and 'fill' entry.
//
// If there are too few shards to reconstruct the missing
// ones, ErrTooFewShards will be returned.
//
// The reconstructed shard set is complete when explicitly asked for all missing shards.
// However its integrity is not automatically verified.
// Use the Verify function to check in case the data set is complete.
func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
if len(valid) != r.r.totalShards {
return ErrTooFewShards
}
if len(fill) != r.r.totalShards {
return ErrTooFewShards
}
all := r.createSlice()
defer r.blockPool.Put(all)
reconDataOnly := true
for i := range valid {
if valid[i] != nil && fill[i] != nil {
return ErrReconstructMismatch
}
if i >= r.r.dataShards && fill[i] != nil {
reconDataOnly = false
}
}
read := 0
for {
err := r.readShards(all, valid)
if err == io.EOF {
if read == 0 {
return ErrShardNoData
}
return nil
}
if err != nil {
return err
}
read += shardSize(all)
all = trimShards(all, shardSize(all))
if reconDataOnly {
err = r.r.ReconstructData(all) // just reconstruct missing data shards
} else {
err = r.r.Reconstruct(all) // reconstruct all missing shards
}
if err != nil {
return err
}
err = r.writeShards(fill, all)
if err != nil {
return err
}
}
}
// Join the shards and write the data segment to dst.
//
// Only the data shards are considered.
//
// You must supply the exact output size you want.
// If there are to few shards given, ErrTooFewShards will be returned.
// If the total data size is less than outSize, ErrShortData will be returned.
func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
// Do we have enough shards?
if len(shards) < r.r.dataShards {
return ErrTooFewShards
}
// Trim off parity shards if any
shards = shards[:r.r.dataShards]
for i := range shards {
if shards[i] == nil {
return StreamReadError{Err: ErrShardNoData, Stream: i}
}
}
// Join all shards
src := io.MultiReader(shards...)
// Copy data to dst
n, err := io.CopyN(dst, src, outSize)
if err == io.EOF {
return ErrShortData
}
if err != nil {
return err
}
if n != outSize {
return ErrShortData
}
return nil
}
// Split a an input stream into the number of shards given to the encoder.
//
// The data will be split into equally sized shards.
// If the data size isn't dividable by the number of shards,
// the last shard will contain extra zeros.
//
// You must supply the total size of your input.
// 'ErrShortData' will be returned if it is unable to retrieve the
// number of bytes indicated.
func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
if size == 0 {
return ErrShortData
}
if len(dst) != r.r.dataShards {
return ErrInvShardNum
}
for i := range dst {
if dst[i] == nil {
return StreamWriteError{Err: ErrShardNoData, Stream: i}
}
}
// Calculate number of bytes per shard.
perShard := (size + int64(r.r.dataShards) - 1) / int64(r.r.dataShards)
// Pad data to r.Shards*perShard.
paddingSize := (int64(r.r.totalShards) * perShard) - size
data = io.MultiReader(data, io.LimitReader(zeroPaddingReader{}, paddingSize))
// Split into equal-length shards and copy.
for i := range dst {
n, err := io.CopyN(dst[i], data, perShard)
if err != io.EOF && err != nil {
return err
}
if n != perShard {
return ErrShortData
}
}
return nil
}
type zeroPaddingReader struct{}
var _ io.Reader = &zeroPaddingReader{}
func (t zeroPaddingReader) Read(p []byte) (n int, err error) {
n = len(p)
for i := 0; i < n; i++ {
p[i] = 0
}
return n, nil
}

41
vendor/github.com/klauspost/reedsolomon/unsafe.go generated vendored Normal file
View File

@ -0,0 +1,41 @@
//go:build !noasm && !nounsafe && !gccgo && !appengine
/**
* Reed-Solomon Coding over 8-bit values.
*
* Copyright 2023, Klaus Post
*/
package reedsolomon
import (
"unsafe"
)
// AllocAligned allocates 'shards' slices, with 'each' bytes.
// Each slice will start on a 64 byte aligned boundary.
func AllocAligned(shards, each int) [][]byte {
if false {
res := make([][]byte, shards)
for i := range res {
res[i] = make([]byte, each)
}
return res
}
const (
alignEach = 64
alignStart = 64
)
eachAligned := ((each + alignEach - 1) / alignEach) * alignEach
total := make([]byte, eachAligned*shards+63)
align := uint(uintptr(unsafe.Pointer(&total[0]))) & (alignStart - 1)
if align > 0 {
total = total[alignStart-align:]
}
res := make([][]byte, shards)
for i := range res {
res[i] = total[:each:eachAligned]
total = total[eachAligned:]
}
return res
}

View File

@ -0,0 +1,23 @@
//go:build noasm || nounsafe || gccgo || appengine
/**
* Reed-Solomon Coding over 8-bit values.
*
* Copyright 2023, Klaus Post
*/
package reedsolomon
// AllocAligned allocates 'shards' slices, with 'each' bytes.
// Each slice will start on a 64 byte aligned boundary.
func AllocAligned(shards, each int) [][]byte {
eachAligned := ((each + 63) / 64) * 64
total := make([]byte, eachAligned*shards+63)
// We cannot do initial align without "unsafe", just use native alignment.
res := make([][]byte, shards)
for i := range res {
res[i] = total[:each:eachAligned]
total = total[eachAligned:]
}
return res
}

19
vendor/github.com/klauspost/reedsolomon/xor_arm64.go generated vendored Normal file
View File

@ -0,0 +1,19 @@
//go:build !noasm && !appengine && !gccgo
package reedsolomon
//go:noescape
func xorSliceNEON(in, out []byte)
// simple slice xor
func sliceXor(in, out []byte, o *options) {
xorSliceNEON(in, out)
done := (len(in) >> 5) << 5
remain := len(in) - done
if remain > 0 {
for i := done; i < len(in); i++ {
out[i] ^= in[i]
}
}
}

29
vendor/github.com/klauspost/reedsolomon/xor_arm64.s generated vendored Normal file
View File

@ -0,0 +1,29 @@
//+build !noasm
//+build !appengine
//+build !gccgo
// func xorSliceNEON(in, out []byte)
TEXT ·xorSliceNEON(SB), 7, $0
MOVD in_base+0(FP), R1
MOVD in_len+8(FP), R2 // length of message
MOVD out_base+24(FP), R5
SUBS $32, R2
BMI completeXor
loopXor:
// Main loop
VLD1.P 32(R1), [V0.B16, V1.B16]
VLD1 (R5), [V20.B16, V21.B16]
VEOR V20.B16, V0.B16, V4.B16
VEOR V21.B16, V1.B16, V5.B16
// Store result
VST1.P [V4.D2, V5.D2], 32(R5)
SUBS $32, R2
BPL loopXor
completeXor:
RET

7
vendor/github.com/klauspost/reedsolomon/xor_noasm.go generated vendored Normal file
View File

@ -0,0 +1,7 @@
//go:build noasm || gccgo || appengine || (!amd64 && !arm64)
package reedsolomon
func sliceXor(in, out []byte, o *options) {
sliceXorGo(in, out, o)
}

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

5
vendor/golang.org/x/sys/cpu/cpu.go generated vendored
View File

@ -38,7 +38,7 @@ var X86 struct {
HasAVX512F bool // Advanced vector extension 512 Foundation Instructions HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions Instructions HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions
HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
@ -54,6 +54,9 @@ var X86 struct {
HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2 HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
HasAMXTile bool // Advanced Matrix Extension Tile instructions
HasAMXInt8 bool // Advanced Matrix Extension Int8 instructions
HasAMXBF16 bool // Advanced Matrix Extension BFloat16 instructions
HasBMI1 bool // Bit manipulation instruction set 1 HasBMI1 bool // Bit manipulation instruction set 1
HasBMI2 bool // Bit manipulation instruction set 2 HasBMI2 bool // Bit manipulation instruction set 2
HasCX16 bool // Compare and exchange 16 Bytes HasCX16 bool // Compare and exchange 16 Bytes

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build aix //go:build aix
// +build aix
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
package cpu package cpu

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (386 || amd64 || amd64p32) && gc //go:build (386 || amd64 || amd64p32) && gc
// +build 386 amd64 amd64p32
// +build gc
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gccgo //go:build gccgo
// +build gccgo
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gccgo //go:build gccgo
// +build gccgo
package cpu package cpu

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (386 || amd64 || amd64p32) && gccgo //go:build (386 || amd64 || amd64p32) && gccgo
// +build 386 amd64 amd64p32
// +build gccgo
#include <cpuid.h> #include <cpuid.h>
#include <stdint.h> #include <stdint.h>

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (386 || amd64 || amd64p32) && gccgo //go:build (386 || amd64 || amd64p32) && gccgo
// +build 386 amd64 amd64p32
// +build gccgo
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !386 && !amd64 && !amd64p32 && !arm64 //go:build !386 && !amd64 && !amd64p32 && !arm64
// +build !386,!amd64,!amd64p32,!arm64
package cpu package cpu

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && (mips64 || mips64le) //go:build linux && (mips64 || mips64le)
// +build linux
// +build mips64 mips64le
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x //go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
// +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x
package cpu package cpu

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && (ppc64 || ppc64le) //go:build linux && (ppc64 || ppc64le)
// +build linux
// +build ppc64 ppc64le
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build loong64 //go:build loong64
// +build loong64
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build mips64 || mips64le //go:build mips64 || mips64le
// +build mips64 mips64le
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build mips || mipsle //go:build mips || mipsle
// +build mips mipsle
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !linux && arm //go:build !linux && arm
// +build !linux,arm
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !linux && !netbsd && !openbsd && arm64 //go:build !linux && !netbsd && !openbsd && arm64
// +build !linux,!netbsd,!openbsd,arm64
package cpu package cpu

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !linux && (mips64 || mips64le) //go:build !linux && (mips64 || mips64le)
// +build !linux
// +build mips64 mips64le
package cpu package cpu

View File

@ -3,9 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !aix && !linux && (ppc64 || ppc64le) //go:build !aix && !linux && (ppc64 || ppc64le)
// +build !aix
// +build !linux
// +build ppc64 ppc64le
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !linux && riscv64 //go:build !linux && riscv64
// +build !linux,riscv64
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build ppc64 || ppc64le //go:build ppc64 || ppc64le
// +build ppc64 ppc64le
package cpu package cpu

View File

@ -3,10 +3,9 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build riscv64 //go:build riscv64
// +build riscv64
package cpu package cpu
const cacheLineSize = 32 const cacheLineSize = 64
func initOptions() {} func initOptions() {}

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build wasm //go:build wasm
// +build wasm
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build 386 || amd64 || amd64p32 //go:build 386 || amd64 || amd64p32
// +build 386 amd64 amd64p32
package cpu package cpu
@ -37,6 +36,9 @@ func initOptions() {
{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2}, {Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG}, {Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
{Name: "avx512bf16", Feature: &X86.HasAVX512BF16}, {Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
{Name: "amxtile", Feature: &X86.HasAMXTile},
{Name: "amxint8", Feature: &X86.HasAMXInt8},
{Name: "amxbf16", Feature: &X86.HasAMXBF16},
{Name: "bmi1", Feature: &X86.HasBMI1}, {Name: "bmi1", Feature: &X86.HasBMI1},
{Name: "bmi2", Feature: &X86.HasBMI2}, {Name: "bmi2", Feature: &X86.HasBMI2},
{Name: "cx16", Feature: &X86.HasCX16}, {Name: "cx16", Feature: &X86.HasCX16},
@ -138,6 +140,10 @@ func archInit() {
eax71, _, _, _ := cpuid(7, 1) eax71, _, _, _ := cpuid(7, 1)
X86.HasAVX512BF16 = isSet(5, eax71) X86.HasAVX512BF16 = isSet(5, eax71)
} }
X86.HasAMXTile = isSet(24, edx7)
X86.HasAMXInt8 = isSet(25, edx7)
X86.HasAMXBF16 = isSet(22, edx7)
} }
func isSet(bitpos uint, value uint32) bool { func isSet(bitpos uint, value uint32) bool {

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (386 || amd64 || amd64p32) && gc //go:build (386 || amd64 || amd64p32) && gc
// +build 386 amd64 amd64p32
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64 //go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
// +build armbe arm64be m68k mips mips64 mips64p32 ppc ppc64 s390 s390x shbe sparc sparc64
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm //go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
// +build 386 amd64 amd64p32 alpha arm arm64 loong64 mipsle mips64le mips64p32le nios2 ppc64le riscv riscv64 sh wasm
package cpu package cpu

View File

@ -5,7 +5,7 @@
package cpu package cpu
import ( import (
"io/ioutil" "os"
) )
const ( const (
@ -39,7 +39,7 @@ func readHWCAP() error {
return nil return nil
} }
buf, err := ioutil.ReadFile(procAuxv) buf, err := os.ReadFile(procAuxv)
if err != nil { if err != nil {
// e.g. on android /proc/self/auxv is not accessible, so silently // e.g. on android /proc/self/auxv is not accessible, so silently
// ignore the error and leave Initialized = false. On some // ignore the error and leave Initialized = false. On some

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && arm64 //go:build linux && arm64
// +build linux,arm64
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build go1.21 //go:build go1.21
// +build go1.21
package cpu package cpu

View File

@ -9,7 +9,6 @@
// gccgo's libgo and thus must not used a CGo method. // gccgo's libgo and thus must not used a CGo method.
//go:build aix && gccgo //go:build aix && gccgo
// +build aix,gccgo
package cpu package cpu

View File

@ -7,7 +7,6 @@
// (See golang.org/issue/32102) // (See golang.org/issue/32102)
//go:build aix && ppc64 && gc //go:build aix && ppc64 && gc
// +build aix,ppc64,gc
package cpu package cpu

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !go1.19 //go:build !go1.19
// +build !go1.19
package execabs package execabs

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build go1.19 //go:build go1.19
// +build go1.19
package execabs package execabs

View File

@ -1,30 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package unsafeheader contains header declarations for the Go runtime's
// slice and string implementations.
//
// This package allows x/sys to use types equivalent to
// reflect.SliceHeader and reflect.StringHeader without introducing
// a dependency on the (relatively heavy) "reflect" package.
package unsafeheader
import (
"unsafe"
)
// Slice is the runtime representation of a slice.
// It cannot be used safely or portably and its representation may change in a later release.
type Slice struct {
Data unsafe.Pointer
Len int
Cap int
}
// String is the runtime representation of a string.
// It cannot be used safely or portably and its representation may change in a later release.
type String struct {
Data unsafe.Pointer
Len int
}

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build go1.5 //go:build go1.5
// +build go1.5
package plan9 package plan9

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build !go1.5 //go:build !go1.5
// +build !go1.5
package plan9 package plan9

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build plan9 && race //go:build plan9 && race
// +build plan9,race
package plan9 package plan9

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build plan9 && !race //go:build plan9 && !race
// +build plan9,!race
package plan9 package plan9

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build plan9 //go:build plan9
// +build plan9
package plan9 package plan9

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build plan9 //go:build plan9
// +build plan9
// Package plan9 contains an interface to the low-level operating system // Package plan9 contains an interface to the low-level operating system
// primitives. OS details vary depending on the underlying system, and // primitives. OS details vary depending on the underlying system, and

View File

@ -2,7 +2,6 @@
// Code generated by the command above; see README.md. DO NOT EDIT. // Code generated by the command above; see README.md. DO NOT EDIT.
//go:build plan9 && 386 //go:build plan9 && 386
// +build plan9,386
package plan9 package plan9

View File

@ -2,7 +2,6 @@
// Code generated by the command above; see README.md. DO NOT EDIT. // Code generated by the command above; see README.md. DO NOT EDIT.
//go:build plan9 && amd64 //go:build plan9 && amd64
// +build plan9,amd64
package plan9 package plan9

View File

@ -2,7 +2,6 @@
// Code generated by the command above; see README.md. DO NOT EDIT. // Code generated by the command above; see README.md. DO NOT EDIT.
//go:build plan9 && arm //go:build plan9 && arm
// +build plan9,arm
package plan9 package plan9

View File

@ -2,9 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos) && go1.9 //go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos
// +build aix darwin dragonfly freebsd linux netbsd openbsd solaris zos
// +build go1.9
package unix package unix

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (freebsd || netbsd || openbsd) && gc //go:build (freebsd || netbsd || openbsd) && gc
// +build freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc //go:build (darwin || dragonfly || freebsd || netbsd || openbsd) && gc
// +build darwin dragonfly freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (freebsd || netbsd || openbsd) && gc //go:build (freebsd || netbsd || openbsd) && gc
// +build freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (darwin || freebsd || netbsd || openbsd) && gc //go:build (darwin || freebsd || netbsd || openbsd) && gc
// +build darwin freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (darwin || freebsd || netbsd || openbsd) && gc //go:build (darwin || freebsd || netbsd || openbsd) && gc
// +build darwin freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,8 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (darwin || freebsd || netbsd || openbsd) && gc //go:build (darwin || freebsd || netbsd || openbsd) && gc
// +build darwin freebsd netbsd openbsd
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,7 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build gc //go:build gc
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,9 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && arm64 && gc //go:build linux && arm64 && gc
// +build linux
// +build arm64
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,9 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && loong64 && gc //go:build linux && loong64 && gc
// +build linux
// +build loong64
// +build gc
#include "textflag.h" #include "textflag.h"

View File

@ -3,9 +3,6 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build linux && (mips64 || mips64le) && gc //go:build linux && (mips64 || mips64le) && gc
// +build linux
// +build mips64 mips64le
// +build gc
#include "textflag.h" #include "textflag.h"

Some files were not shown because too many files have changed in this diff Show More