// +build amd64,!gccgo,!appengine #include "textflag.h" TEXT ·countNewlinesASM(SB),NOSPLIT,$0 MOVQ src+0(FP), SI MOVQ len+8(FP), BX XORQ AX, AX XORQ DX, DX XORPD X0, X0 XORPD X2, X2 XORPD X3, X3 MOVQ $0x0A0A0A0A0A0A0A0A, R10 // prep '\n' PINSRQ $1, R10, X1 PINSRQ $0, R10, X1 CMPQ BX, $16 JB tail CMPQ BX, $64 JB loop bigloop: VMOVDQU -16(SI)(BX*1), X11 VMOVDQU -32(SI)(BX*1), X10 VMOVDQU -48(SI)(BX*1), X9 VMOVDQU -64(SI)(BX*1), X8 VPCMPEQB X11, X1, X11 VPCMPEQB X10, X1, X10 VPCMPEQB X9, X1, X9 VPCMPEQB X8, X1, X8 VPADDB X0, X11, X2 VPADDB X2, X10, X2 VPADDB X2, X9, X2 VPADDB X2, X8, X2 PSIGNB X2, X2 VPSADBW X2, X0, X2 VPADDQ X2, X3, X3 SUBQ $64, BX JZ ret CMPQ BX, $64 JAE bigloop CMPQ BX, $8 JB tail loop: VMOVDQU -16(SI)(BX*1), X11 VPCMPEQB X11, X1, X11 VPADDB X0, X11, X2 PSIGNB X2, X2 VPSADBW X2, X0, X2 VPADDQ X2, X3, X3 SUBQ $16, BX JZ ret CMPQ BX, $16 JAE loop tail: MOVB -1(SI)(BX*1), DX CMPB DX, $0x0A JNZ next INCQ AX next: SUBQ $1, BX JNZ tail ret: PEXTRQ $0, X3, CX PEXTRQ $1, X3, DX ADDQ CX, AX ADDQ DX, AX MOVQ AX, ret+16(FP) RET