countlines in go asm, v1
This commit is contained in:
commit
9384c46598
8 changed files with 250 additions and 0 deletions
61
benchmark_test.go
Normal file
61
benchmark_test.go
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
package countlines
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type size struct {
|
||||
name string
|
||||
l int
|
||||
}
|
||||
|
||||
var sizes = []size{
|
||||
{"32", 32},
|
||||
{"128", 128},
|
||||
{"1K", 1 * 1024},
|
||||
{"16K", 16 * 1024},
|
||||
{"128K", 128 * 1024},
|
||||
{"1M", 1024 * 1024},
|
||||
{"16M", 16 * 1024 * 1024},
|
||||
{"128M", 128 * 1024 * 1024},
|
||||
{"512M", 512 * 1024 * 1024},
|
||||
}
|
||||
|
||||
func randRead64(s []uint64) {
|
||||
for i := range s {
|
||||
s[i] = uint64(rand.Int63())
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCountNewlines(b *testing.B) {
|
||||
for _, size := range sizes {
|
||||
b.Run(size.name, func(b *testing.B) {
|
||||
s := make([]byte, size.l)
|
||||
rand.Read(s)
|
||||
|
||||
b.SetBytes(int64(size.l))
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
CountNewlines(s)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCountNewlinesGo(b *testing.B) {
|
||||
for _, size := range sizes {
|
||||
b.Run(size.name, func(b *testing.B) {
|
||||
s := make([]byte, size.l)
|
||||
rand.Read(s)
|
||||
|
||||
b.SetBytes(int64(size.l))
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
countNewlinesGo(s)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
14
countlines_amd64.go
Normal file
14
countlines_amd64.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
//go:build amd64 && !gccgo && !appengine
|
||||
|
||||
package countlines
|
||||
|
||||
func CountNewlines(s []byte) uint64 {
|
||||
if len(s) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
return countNewlinesASM(&s[0], uint64(len(s)))
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func countNewlinesASM(src *byte, len uint64) (ret uint64)
|
||||
86
countlines_amd64.s
Normal file
86
countlines_amd64.s
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
// +build amd64,!gccgo,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·countNewlinesASM(SB),NOSPLIT,$0
|
||||
MOVQ src+0(FP), SI
|
||||
MOVQ len+8(FP), BX
|
||||
|
||||
XORQ AX, AX
|
||||
XORQ DX, DX
|
||||
XORPD X0, X0
|
||||
XORPD X2, X2
|
||||
XORPD X3, X3
|
||||
|
||||
MOVQ $0x0A0A0A0A0A0A0A0A, R10 // prep '\n'
|
||||
PINSRQ $1, R10, X1
|
||||
PINSRQ $0, R10, X1
|
||||
|
||||
CMPQ BX, $16
|
||||
JB tail
|
||||
|
||||
CMPQ BX, $64
|
||||
JB loop
|
||||
|
||||
bigloop:
|
||||
VMOVDQU -16(SI)(BX*1), X11
|
||||
VMOVDQU -32(SI)(BX*1), X10
|
||||
VMOVDQU -48(SI)(BX*1), X9
|
||||
VMOVDQU -64(SI)(BX*1), X8
|
||||
|
||||
VPCMPEQB X11, X1, X11
|
||||
VPCMPEQB X10, X1, X10
|
||||
VPCMPEQB X9, X1, X9
|
||||
VPCMPEQB X8, X1, X8
|
||||
|
||||
VPADDB X0, X11, X2
|
||||
VPADDB X2, X10, X2
|
||||
VPADDB X2, X9, X2
|
||||
VPADDB X2, X8, X2
|
||||
PSIGNB X2, X2
|
||||
VPSADBW X2, X0, X2
|
||||
VPADDQ X2, X3, X3
|
||||
|
||||
SUBQ $64, BX
|
||||
JZ ret
|
||||
|
||||
CMPQ BX, $64
|
||||
JAE bigloop
|
||||
|
||||
CMPQ BX, $8
|
||||
JB tail
|
||||
|
||||
loop:
|
||||
VMOVDQU -16(SI)(BX*1), X11
|
||||
|
||||
VPCMPEQB X11, X1, X11
|
||||
VPADDB X0, X11, X2
|
||||
PSIGNB X2, X2
|
||||
VPSADBW X2, X0, X2
|
||||
VPADDQ X2, X3, X3
|
||||
|
||||
SUBQ $16, BX
|
||||
JZ ret
|
||||
|
||||
CMPQ BX, $16
|
||||
JAE loop
|
||||
|
||||
tail:
|
||||
MOVB -1(SI)(BX*1), DX
|
||||
CMPB DX, $0x0A
|
||||
JNZ next
|
||||
|
||||
INCQ AX
|
||||
next:
|
||||
|
||||
SUBQ $1, BX
|
||||
JNZ tail
|
||||
|
||||
|
||||
ret:
|
||||
PEXTRQ $0, X3, CX
|
||||
PEXTRQ $1, X3, DX
|
||||
ADDQ CX, AX
|
||||
ADDQ DX, AX
|
||||
MOVQ AX, ret+16(FP)
|
||||
RET
|
||||
8
countlines_generic.go
Normal file
8
countlines_generic.go
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
//go:build !amd64 || gccgo || appengine
|
||||
|
||||
package countlines
|
||||
|
||||
// CountBytes function counts number of non-zero bits in slice of 8bit unsigned integers.
|
||||
func CountNewlines(s []byte) uint64 {
|
||||
return countNewlinesGo(s)
|
||||
}
|
||||
14
countlines_go.go
Normal file
14
countlines_go.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
package countlines
|
||||
|
||||
//func countNewlinesGo(s []byte) uint64 {
|
||||
//return uint64(bytes.Count(s, []byte{'\n'}))
|
||||
//}
|
||||
|
||||
func countNewlinesGo(s []byte) (out uint64) {
|
||||
for _, x := range s {
|
||||
if x == '\n' {
|
||||
out += 1
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
60
countlines_test.go
Normal file
60
countlines_test.go
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
package countlines
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/quick"
|
||||
)
|
||||
|
||||
type testVector struct {
|
||||
n uint64
|
||||
b []byte
|
||||
}
|
||||
|
||||
func repTestVector(expected uint64, s []byte, repeatTimes int) testVector {
|
||||
b := make([]byte, repeatTimes*len(s))
|
||||
for i := 0; i < repeatTimes; i++ {
|
||||
for j, v := range s {
|
||||
b[i*len(s)+j] = v
|
||||
}
|
||||
}
|
||||
return testVector{expected * uint64(repeatTimes), b}
|
||||
}
|
||||
|
||||
var testVectors = []testVector{
|
||||
repTestVector(1, []byte{0xa, 0x5e, 0x74, 0x15, 0x4e, 0xf3, 0xeb, 0xa6, 0x66, 0x83, 0x78, 0xfc, 0xfe, 0xd, 0x3e, 0xbd, 0xa8, 0x57, 0x93, 0x9e, 0x2b, 0x3d, 0xed, 0x99, 0xc9, 0xf9, 0x81, 0x10, 0x7f, 0xb0, 0xb0,
|
||||
0xad, 0x1e, 0x2a, 0x84, 0xd0}, 1),
|
||||
repTestVector(1, []byte{0x01, 0x02, 0x03, 0x0A}, 128),
|
||||
repTestVector(0, []byte{0xf5, 0xc, 0x36, 0x9e, 0x86, 0xca, 0xf9}, 1),
|
||||
repTestVector(0, []byte{0xf1, 0x35, 0xe5, 0xa3, 0x3c, 0x9f, 0x2c, 0x93, 0xbd, 0x72, 0xcf, 0x95, 0x16, 0x34, 0x37, 0xc5, 0xfd, 0xe4, 0x5d, 0x75, 0xb8, 0x2f, 0x5f, 0x53, 0x19, 0x2d, 0x6, 0xc3, 0xdb, 0x6d, 0xd4,
|
||||
0xb5, 0xc0, 0x24, 0x95, 0x8e, 0x8d, 0x76, 0x20, 0xc5, 0x2b, 0x92, 0xc0, 0xa1, 0x3d, 0xee}, 1),
|
||||
}
|
||||
|
||||
func testCountNewlines(t *testing.T, count func(s []byte) uint64) {
|
||||
for _, tc := range testVectors {
|
||||
if n := count(tc.b); n != tc.n {
|
||||
t.Errorf("Expected %d, got %d", tc.n, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCountNewlines(t *testing.T) {
|
||||
testCountNewlines(t, CountNewlines)
|
||||
}
|
||||
|
||||
func TestCountNewlinesGo(t *testing.T) {
|
||||
testCountNewlines(t, countNewlinesGo)
|
||||
}
|
||||
|
||||
func TestCountNewlinesCompare(t *testing.T) {
|
||||
for _, tc := range testVectors {
|
||||
if a, b := CountNewlines(tc.b), countNewlinesGo(tc.b); a != b {
|
||||
t.Errorf("CountNewlines(%[1]v) = %[2]d; countNewlinesGo(%[1]v) = %[3]d", tc.b, a, b)
|
||||
}
|
||||
}
|
||||
|
||||
if err := quick.CheckEqual(countNewlinesGo, CountNewlines, &quick.Config{
|
||||
MaxCountScale: 1000,
|
||||
}); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
5
go.mod
Normal file
5
go.mod
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
module git.barakmich.com/barak/go-countlines
|
||||
|
||||
go 1.17
|
||||
|
||||
require github.com/klauspost/cpuid v1.3.1 // indirect
|
||||
2
go.sum
Normal file
2
go.sum
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s=
|
||||
github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=
|
||||
Loading…
Add table
Add a link
Reference in a new issue