go-countlines/countlines_arm64.s
2021-11-15 17:01:21 -08:00

74 lines
1 KiB
ArmAsm

// +build arm64,!gccgo,!appengine
#include "textflag.h"
TEXT ·countNewlinesASM(SB),NOSPLIT,$0
MOVD src+0(FP), R1
MOVD len+8(FP), R2
MOVD ZR, R0
VMOVI $0x0A, V23.B16
CMP $16, R2
BLT tail
CMP $64, R2
BLT loop
bigloop:
VLD1.P 64(R1), [V16.B16, V17.B16, V18.B16, V19.B16]
VCMEQ V16.B16, V23.B16, V16.B16
VCMEQ V17.B16, V23.B16, V17.B16
VCMEQ V18.B16, V23.B16, V18.B16
VCMEQ V19.B16, V23.B16, V19.B16
VADD V16.B16, V17.B16, V17.B16
VADD V18.B16, V19.B16, V19.B16
VADD V17.B16, V19.B16, V7.B16
WORD $0x6E20B8E7 // VNEG V7.B16, V7.B16
VUADDLV V7.B16, V8
VMOV V8.H[0], R3
ADD R3, R0, R0
SUB $64, R2
CMP ZR, R2
BEQ ret
CMP $64, R2
BGE bigloop
CMP $16, R2
BLT tail
loop:
VLD1.P 16(R1), [V7.B16]
WORD $0x6E20B8E7 // VNEG V7.B16, V7.B16
VUADDLV V7.B16, V8
VMOV V8.H[0], R3
ADD R3, R0, R0
SUB $16, R2
CMP ZR, R2
BEQ ret
CMP $16, R2
BGE loop
tail:
MOVBU 1(R1), R3
CMP $0x0A, R3
BNE next
ADD $1, R0
next:
SUB $1, R2
CMP ZR, R2
BNE tail
ret:
MOVD R0, ret+16(FP)
RET