From 235bcd7a0cf72f8a7073316f3ba13cf44d2708b5 Mon Sep 17 00:00:00 2001 From: Barak Michener Date: Mon, 15 Nov 2021 16:56:39 -0800 Subject: [PATCH] initial, untested, arm64 --- countlines_arm64.s | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 countlines_arm64.s diff --git a/countlines_arm64.s b/countlines_arm64.s new file mode 100644 index 0000000..9076c5d --- /dev/null +++ b/countlines_arm64.s @@ -0,0 +1,73 @@ +// +build arm64,!gccgo,!appengine + +#include "textflag.h" + +TEXT ·countNewlinesASM(SB),NOSPLIT,$0 + MOVQ src+0(FP), R1 + MOVQ len+8(FP), R2 + + MOVD ZR, R0 + VMOVI $0x0A V23.B16 + + CMP $16, R2 + BLT tail + + CMP $64, R2 + BLT loop + +bigloop: + VLD1.P 64(R1), [V16.B16, V17.B16, V18.B16, V19.B16] + + VCMEQ V16.B16, V23.B16, V16.B16 + VCMEQ V17.B16, V23.B16, V17.B16 + VCMEQ V18.B16, V23.B16, V18.B16 + VCMEQ V19.B16, V23.B16, V19.B16 + + VADD V16.B16, V17.B16, V17.B16 + VADD V18.B16, V19.B16, V19.B16 + VADD V17.B16, V19.B16, V7.B16 + WORD $0x6E20B8E7 // VNEG V7.B16, V7.B16 + + VUADDLV V7.B16, V8 + VMOV V8.H[0], R3 + ADD R3, R0, R0 + + SUB $64, R2 + CMP ZR, R2 + BEQ ret + + CMP $64, R2 + BGE bigloop + + CMP $16, R2 + BLT tail + +loop: + VLD1.P 16(R1), [V7.B16] + WORD $0x6E20B8E7 // VNEG V7.B16, V7.B16 + VUADDLV V7.B16, V8 + VMOV V8.H[0], R3 + ADD R3, R0, R0 + + SUB $16, R2 + CMP ZR, R2 + BEQ ret + + CMP $16, R2 + BGE loop + +tail: + MOVBU 1(R1), R3 + CMP $0x0A, R3 + BNE next + + ADD $1, R0 +next: + + SUB $1, R2 + JNZ tail + + +ret: + MOVD R0, ret+16(FP) + RET