commit 9384c46598f70b326111a4b43debb6635ffd234a Author: Barak Michener Date: Mon Nov 15 16:12:34 2021 -0800 countlines in go asm, v1 diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 0000000..cf668e7 --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,61 @@ +package countlines + +import ( + "math/rand" + "testing" +) + +type size struct { + name string + l int +} + +var sizes = []size{ + {"32", 32}, + {"128", 128}, + {"1K", 1 * 1024}, + {"16K", 16 * 1024}, + {"128K", 128 * 1024}, + {"1M", 1024 * 1024}, + {"16M", 16 * 1024 * 1024}, + {"128M", 128 * 1024 * 1024}, + {"512M", 512 * 1024 * 1024}, +} + +func randRead64(s []uint64) { + for i := range s { + s[i] = uint64(rand.Int63()) + } +} + +func BenchmarkCountNewlines(b *testing.B) { + for _, size := range sizes { + b.Run(size.name, func(b *testing.B) { + s := make([]byte, size.l) + rand.Read(s) + + b.SetBytes(int64(size.l)) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + CountNewlines(s) + } + }) + } +} + +func BenchmarkCountNewlinesGo(b *testing.B) { + for _, size := range sizes { + b.Run(size.name, func(b *testing.B) { + s := make([]byte, size.l) + rand.Read(s) + + b.SetBytes(int64(size.l)) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + countNewlinesGo(s) + } + }) + } +} diff --git a/countlines_amd64.go b/countlines_amd64.go new file mode 100644 index 0000000..30a4c1e --- /dev/null +++ b/countlines_amd64.go @@ -0,0 +1,14 @@ +//go:build amd64 && !gccgo && !appengine + +package countlines + +func CountNewlines(s []byte) uint64 { + if len(s) == 0 { + return 0 + } + + return countNewlinesASM(&s[0], uint64(len(s))) +} + +//go:noescape +func countNewlinesASM(src *byte, len uint64) (ret uint64) diff --git a/countlines_amd64.s b/countlines_amd64.s new file mode 100644 index 0000000..306ac06 --- /dev/null +++ b/countlines_amd64.s @@ -0,0 +1,86 @@ +// +build amd64,!gccgo,!appengine + +#include "textflag.h" + +TEXT ·countNewlinesASM(SB),NOSPLIT,$0 + MOVQ src+0(FP), SI + MOVQ len+8(FP), BX + + XORQ AX, AX + XORQ DX, DX + XORPD X0, X0 + XORPD X2, X2 + XORPD X3, X3 + + MOVQ $0x0A0A0A0A0A0A0A0A, R10 // prep '\n' + PINSRQ $1, R10, X1 + PINSRQ $0, R10, X1 + + CMPQ BX, $16 + JB tail + + CMPQ BX, $64 + JB loop + +bigloop: + VMOVDQU -16(SI)(BX*1), X11 + VMOVDQU -32(SI)(BX*1), X10 + VMOVDQU -48(SI)(BX*1), X9 + VMOVDQU -64(SI)(BX*1), X8 + + VPCMPEQB X11, X1, X11 + VPCMPEQB X10, X1, X10 + VPCMPEQB X9, X1, X9 + VPCMPEQB X8, X1, X8 + + VPADDB X0, X11, X2 + VPADDB X2, X10, X2 + VPADDB X2, X9, X2 + VPADDB X2, X8, X2 + PSIGNB X2, X2 + VPSADBW X2, X0, X2 + VPADDQ X2, X3, X3 + + SUBQ $64, BX + JZ ret + + CMPQ BX, $64 + JAE bigloop + + CMPQ BX, $8 + JB tail + +loop: + VMOVDQU -16(SI)(BX*1), X11 + + VPCMPEQB X11, X1, X11 + VPADDB X0, X11, X2 + PSIGNB X2, X2 + VPSADBW X2, X0, X2 + VPADDQ X2, X3, X3 + + SUBQ $16, BX + JZ ret + + CMPQ BX, $16 + JAE loop + +tail: + MOVB -1(SI)(BX*1), DX + CMPB DX, $0x0A + JNZ next + + INCQ AX +next: + + SUBQ $1, BX + JNZ tail + + +ret: + PEXTRQ $0, X3, CX + PEXTRQ $1, X3, DX + ADDQ CX, AX + ADDQ DX, AX + MOVQ AX, ret+16(FP) + RET diff --git a/countlines_generic.go b/countlines_generic.go new file mode 100644 index 0000000..aad9afe --- /dev/null +++ b/countlines_generic.go @@ -0,0 +1,8 @@ +//go:build !amd64 || gccgo || appengine + +package countlines + +// CountBytes function counts number of non-zero bits in slice of 8bit unsigned integers. +func CountNewlines(s []byte) uint64 { + return countNewlinesGo(s) +} diff --git a/countlines_go.go b/countlines_go.go new file mode 100644 index 0000000..824a65a --- /dev/null +++ b/countlines_go.go @@ -0,0 +1,14 @@ +package countlines + +//func countNewlinesGo(s []byte) uint64 { +//return uint64(bytes.Count(s, []byte{'\n'})) +//} + +func countNewlinesGo(s []byte) (out uint64) { + for _, x := range s { + if x == '\n' { + out += 1 + } + } + return +} diff --git a/countlines_test.go b/countlines_test.go new file mode 100644 index 0000000..ff1eaa8 --- /dev/null +++ b/countlines_test.go @@ -0,0 +1,60 @@ +package countlines + +import ( + "testing" + "testing/quick" +) + +type testVector struct { + n uint64 + b []byte +} + +func repTestVector(expected uint64, s []byte, repeatTimes int) testVector { + b := make([]byte, repeatTimes*len(s)) + for i := 0; i < repeatTimes; i++ { + for j, v := range s { + b[i*len(s)+j] = v + } + } + return testVector{expected * uint64(repeatTimes), b} +} + +var testVectors = []testVector{ + repTestVector(1, []byte{0xa, 0x5e, 0x74, 0x15, 0x4e, 0xf3, 0xeb, 0xa6, 0x66, 0x83, 0x78, 0xfc, 0xfe, 0xd, 0x3e, 0xbd, 0xa8, 0x57, 0x93, 0x9e, 0x2b, 0x3d, 0xed, 0x99, 0xc9, 0xf9, 0x81, 0x10, 0x7f, 0xb0, 0xb0, + 0xad, 0x1e, 0x2a, 0x84, 0xd0}, 1), + repTestVector(1, []byte{0x01, 0x02, 0x03, 0x0A}, 128), + repTestVector(0, []byte{0xf5, 0xc, 0x36, 0x9e, 0x86, 0xca, 0xf9}, 1), + repTestVector(0, []byte{0xf1, 0x35, 0xe5, 0xa3, 0x3c, 0x9f, 0x2c, 0x93, 0xbd, 0x72, 0xcf, 0x95, 0x16, 0x34, 0x37, 0xc5, 0xfd, 0xe4, 0x5d, 0x75, 0xb8, 0x2f, 0x5f, 0x53, 0x19, 0x2d, 0x6, 0xc3, 0xdb, 0x6d, 0xd4, + 0xb5, 0xc0, 0x24, 0x95, 0x8e, 0x8d, 0x76, 0x20, 0xc5, 0x2b, 0x92, 0xc0, 0xa1, 0x3d, 0xee}, 1), +} + +func testCountNewlines(t *testing.T, count func(s []byte) uint64) { + for _, tc := range testVectors { + if n := count(tc.b); n != tc.n { + t.Errorf("Expected %d, got %d", tc.n, n) + } + } +} + +func TestCountNewlines(t *testing.T) { + testCountNewlines(t, CountNewlines) +} + +func TestCountNewlinesGo(t *testing.T) { + testCountNewlines(t, countNewlinesGo) +} + +func TestCountNewlinesCompare(t *testing.T) { + for _, tc := range testVectors { + if a, b := CountNewlines(tc.b), countNewlinesGo(tc.b); a != b { + t.Errorf("CountNewlines(%[1]v) = %[2]d; countNewlinesGo(%[1]v) = %[3]d", tc.b, a, b) + } + } + + if err := quick.CheckEqual(countNewlinesGo, CountNewlines, &quick.Config{ + MaxCountScale: 1000, + }); err != nil { + t.Error(err) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..575c697 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module git.barakmich.com/barak/go-countlines + +go 1.17 + +require github.com/klauspost/cpuid v1.3.1 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f568b65 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= +github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=