countlines in go asm, v1

This commit is contained in:
Barak Michener 2021-11-15 16:12:34 -08:00
commit 9384c46598
8 changed files with 250 additions and 0 deletions

61
benchmark_test.go Normal file
View file

@ -0,0 +1,61 @@
package countlines
import (
"math/rand"
"testing"
)
type size struct {
name string
l int
}
var sizes = []size{
{"32", 32},
{"128", 128},
{"1K", 1 * 1024},
{"16K", 16 * 1024},
{"128K", 128 * 1024},
{"1M", 1024 * 1024},
{"16M", 16 * 1024 * 1024},
{"128M", 128 * 1024 * 1024},
{"512M", 512 * 1024 * 1024},
}
func randRead64(s []uint64) {
for i := range s {
s[i] = uint64(rand.Int63())
}
}
func BenchmarkCountNewlines(b *testing.B) {
for _, size := range sizes {
b.Run(size.name, func(b *testing.B) {
s := make([]byte, size.l)
rand.Read(s)
b.SetBytes(int64(size.l))
b.ResetTimer()
for i := 0; i < b.N; i++ {
CountNewlines(s)
}
})
}
}
func BenchmarkCountNewlinesGo(b *testing.B) {
for _, size := range sizes {
b.Run(size.name, func(b *testing.B) {
s := make([]byte, size.l)
rand.Read(s)
b.SetBytes(int64(size.l))
b.ResetTimer()
for i := 0; i < b.N; i++ {
countNewlinesGo(s)
}
})
}
}

14
countlines_amd64.go Normal file
View file

@ -0,0 +1,14 @@
//go:build amd64 && !gccgo && !appengine
package countlines
func CountNewlines(s []byte) uint64 {
if len(s) == 0 {
return 0
}
return countNewlinesASM(&s[0], uint64(len(s)))
}
//go:noescape
func countNewlinesASM(src *byte, len uint64) (ret uint64)

86
countlines_amd64.s Normal file
View file

@ -0,0 +1,86 @@
// +build amd64,!gccgo,!appengine
#include "textflag.h"
TEXT ·countNewlinesASM(SB),NOSPLIT,$0
MOVQ src+0(FP), SI
MOVQ len+8(FP), BX
XORQ AX, AX
XORQ DX, DX
XORPD X0, X0
XORPD X2, X2
XORPD X3, X3
MOVQ $0x0A0A0A0A0A0A0A0A, R10 // prep '\n'
PINSRQ $1, R10, X1
PINSRQ $0, R10, X1
CMPQ BX, $16
JB tail
CMPQ BX, $64
JB loop
bigloop:
VMOVDQU -16(SI)(BX*1), X11
VMOVDQU -32(SI)(BX*1), X10
VMOVDQU -48(SI)(BX*1), X9
VMOVDQU -64(SI)(BX*1), X8
VPCMPEQB X11, X1, X11
VPCMPEQB X10, X1, X10
VPCMPEQB X9, X1, X9
VPCMPEQB X8, X1, X8
VPADDB X0, X11, X2
VPADDB X2, X10, X2
VPADDB X2, X9, X2
VPADDB X2, X8, X2
PSIGNB X2, X2
VPSADBW X2, X0, X2
VPADDQ X2, X3, X3
SUBQ $64, BX
JZ ret
CMPQ BX, $64
JAE bigloop
CMPQ BX, $8
JB tail
loop:
VMOVDQU -16(SI)(BX*1), X11
VPCMPEQB X11, X1, X11
VPADDB X0, X11, X2
PSIGNB X2, X2
VPSADBW X2, X0, X2
VPADDQ X2, X3, X3
SUBQ $16, BX
JZ ret
CMPQ BX, $16
JAE loop
tail:
MOVB -1(SI)(BX*1), DX
CMPB DX, $0x0A
JNZ next
INCQ AX
next:
SUBQ $1, BX
JNZ tail
ret:
PEXTRQ $0, X3, CX
PEXTRQ $1, X3, DX
ADDQ CX, AX
ADDQ DX, AX
MOVQ AX, ret+16(FP)
RET

8
countlines_generic.go Normal file
View file

@ -0,0 +1,8 @@
//go:build !amd64 || gccgo || appengine
package countlines
// CountBytes function counts number of non-zero bits in slice of 8bit unsigned integers.
func CountNewlines(s []byte) uint64 {
return countNewlinesGo(s)
}

14
countlines_go.go Normal file
View file

@ -0,0 +1,14 @@
package countlines
//func countNewlinesGo(s []byte) uint64 {
//return uint64(bytes.Count(s, []byte{'\n'}))
//}
func countNewlinesGo(s []byte) (out uint64) {
for _, x := range s {
if x == '\n' {
out += 1
}
}
return
}

60
countlines_test.go Normal file
View file

@ -0,0 +1,60 @@
package countlines
import (
"testing"
"testing/quick"
)
type testVector struct {
n uint64
b []byte
}
func repTestVector(expected uint64, s []byte, repeatTimes int) testVector {
b := make([]byte, repeatTimes*len(s))
for i := 0; i < repeatTimes; i++ {
for j, v := range s {
b[i*len(s)+j] = v
}
}
return testVector{expected * uint64(repeatTimes), b}
}
var testVectors = []testVector{
repTestVector(1, []byte{0xa, 0x5e, 0x74, 0x15, 0x4e, 0xf3, 0xeb, 0xa6, 0x66, 0x83, 0x78, 0xfc, 0xfe, 0xd, 0x3e, 0xbd, 0xa8, 0x57, 0x93, 0x9e, 0x2b, 0x3d, 0xed, 0x99, 0xc9, 0xf9, 0x81, 0x10, 0x7f, 0xb0, 0xb0,
0xad, 0x1e, 0x2a, 0x84, 0xd0}, 1),
repTestVector(1, []byte{0x01, 0x02, 0x03, 0x0A}, 128),
repTestVector(0, []byte{0xf5, 0xc, 0x36, 0x9e, 0x86, 0xca, 0xf9}, 1),
repTestVector(0, []byte{0xf1, 0x35, 0xe5, 0xa3, 0x3c, 0x9f, 0x2c, 0x93, 0xbd, 0x72, 0xcf, 0x95, 0x16, 0x34, 0x37, 0xc5, 0xfd, 0xe4, 0x5d, 0x75, 0xb8, 0x2f, 0x5f, 0x53, 0x19, 0x2d, 0x6, 0xc3, 0xdb, 0x6d, 0xd4,
0xb5, 0xc0, 0x24, 0x95, 0x8e, 0x8d, 0x76, 0x20, 0xc5, 0x2b, 0x92, 0xc0, 0xa1, 0x3d, 0xee}, 1),
}
func testCountNewlines(t *testing.T, count func(s []byte) uint64) {
for _, tc := range testVectors {
if n := count(tc.b); n != tc.n {
t.Errorf("Expected %d, got %d", tc.n, n)
}
}
}
func TestCountNewlines(t *testing.T) {
testCountNewlines(t, CountNewlines)
}
func TestCountNewlinesGo(t *testing.T) {
testCountNewlines(t, countNewlinesGo)
}
func TestCountNewlinesCompare(t *testing.T) {
for _, tc := range testVectors {
if a, b := CountNewlines(tc.b), countNewlinesGo(tc.b); a != b {
t.Errorf("CountNewlines(%[1]v) = %[2]d; countNewlinesGo(%[1]v) = %[3]d", tc.b, a, b)
}
}
if err := quick.CheckEqual(countNewlinesGo, CountNewlines, &quick.Config{
MaxCountScale: 1000,
}); err != nil {
t.Error(err)
}
}

5
go.mod Normal file
View file

@ -0,0 +1,5 @@
module git.barakmich.com/barak/go-countlines
go 1.17
require github.com/klauspost/cpuid v1.3.1 // indirect

2
go.sum Normal file
View file

@ -0,0 +1,2 @@
github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s=
github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=