From 5593e2b2ddcaeea40d28d7007369105a3bff7e66 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 2 Dec 2021 07:39:56 -0800 Subject: [PATCH] Unroll pure go xor loop (#172) * Unroll pure go xor loop Testing with `go test -bench=x1x -tags=noasm -short` ``` before: BenchmarkEncode2x1x1M-32 13658 87980 ns/op 35754.96 MB/s after: BenchmarkEncode2x1x1M-32 21633 55498 ns/op 56682.24 MB/s ``` --- galois_amd64.go | 2 +- galois_arm64.go | 2 +- galois_noasm.go | 23 ++++++++++++++++++----- reedsolomon_test.go | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/galois_amd64.go b/galois_amd64.go index 2db86219..03754d2a 100644 --- a/galois_amd64.go +++ b/galois_amd64.go @@ -114,7 +114,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// slice galois add +// simple slice xor func sliceXor(in, out []byte, o *options) { if o.useSSE2 { if len(in) >= bigSwitchover { diff --git a/galois_arm64.go b/galois_arm64.go index 93acd747..df79a987 100644 --- a/galois_arm64.go +++ b/galois_arm64.go @@ -51,7 +51,7 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// slice galois add +// simple slice xor func sliceXor(in, out []byte, o *options) { galXorNEON(in, out) diff --git a/galois_noasm.go b/galois_noasm.go index 6d3a5a78..7ef78f84 100644 --- a/galois_noasm.go +++ b/galois_noasm.go @@ -7,6 +7,8 @@ package reedsolomon +import "encoding/binary" + func galMulSlice(c byte, in, out []byte, o *options) { out = out[:len(in)] if c == 1 { @@ -22,9 +24,7 @@ func galMulSlice(c byte, in, out []byte, o *options) { func galMulSliceXor(c byte, in, out []byte, o *options) { out = out[:len(in)] if c == 1 { - for n, input := range in { - out[n] ^= input - } + sliceXor(in, out, o) return } mt := mulTable[c][:256] @@ -33,8 +33,21 @@ func galMulSliceXor(c byte, in, out []byte, o *options) { } } -// slice galois add -func sliceXor(in, out []byte, o *options) { +// simple slice xor +func sliceXor(in, out []byte, _ *options) { + for len(out) >= 32 { + inS := in[:32] + v0 := binary.LittleEndian.Uint64(out[:]) ^ binary.LittleEndian.Uint64(inS[:]) + v1 := binary.LittleEndian.Uint64(out[8:]) ^ binary.LittleEndian.Uint64(inS[8:]) + v2 := binary.LittleEndian.Uint64(out[16:]) ^ binary.LittleEndian.Uint64(inS[16:]) + v3 := binary.LittleEndian.Uint64(out[24:]) ^ binary.LittleEndian.Uint64(inS[24:]) + binary.LittleEndian.PutUint64(out[:], v0) + binary.LittleEndian.PutUint64(out[8:], v1) + binary.LittleEndian.PutUint64(out[16:], v2) + binary.LittleEndian.PutUint64(out[24:], v3) + out = out[32:] + in = in[32:] + } for n, input := range in { out[n] ^= input } diff --git a/reedsolomon_test.go b/reedsolomon_test.go index 411d55e6..ce5d1765 100644 --- a/reedsolomon_test.go +++ b/reedsolomon_test.go @@ -28,6 +28,7 @@ func TestMain(m *testing.M) { } func testOptions(o ...Option) []Option { + o = append(o, WithFastOneParityMatrix()) if *noSSSE3 { o = append(o, withSSSE3(false)) }