// Code generated by command: go run decode_asm.go -pkg base64 -out ../base64/decode_amd64.s -stubs ../base64/decode_amd64.go. DO NOT EDIT. //go:build !purego #include "textflag.h" DATA b64_dec_lut_hi<>+0(SB)/8, $0x0804080402011010 DATA b64_dec_lut_hi<>+8(SB)/8, $0x1010101010101010 DATA b64_dec_lut_hi<>+16(SB)/8, $0x0804080402011010 DATA b64_dec_lut_hi<>+24(SB)/8, $0x1010101010101010 GLOBL b64_dec_lut_hi<>(SB), RODATA|NOPTR, $32 DATA b64_dec_madd1<>+0(SB)/8, $0x0140014001400140 DATA b64_dec_madd1<>+8(SB)/8, $0x0140014001400140 DATA b64_dec_madd1<>+16(SB)/8, $0x0140014001400140 DATA b64_dec_madd1<>+24(SB)/8, $0x0140014001400140 GLOBL b64_dec_madd1<>(SB), RODATA|NOPTR, $32 DATA b64_dec_madd2<>+0(SB)/8, $0x0001100000011000 DATA b64_dec_madd2<>+8(SB)/8, $0x0001100000011000 DATA b64_dec_madd2<>+16(SB)/8, $0x0001100000011000 DATA b64_dec_madd2<>+24(SB)/8, $0x0001100000011000 GLOBL b64_dec_madd2<>(SB), RODATA|NOPTR, $32 DATA b64_dec_shuf_lo<>+0(SB)/8, $0x0000000000000000 DATA b64_dec_shuf_lo<>+8(SB)/8, $0x0600010200000000 GLOBL b64_dec_shuf_lo<>(SB), RODATA|NOPTR, $16 DATA b64_dec_shuf<>+0(SB)/8, $0x090a040506000102 DATA b64_dec_shuf<>+8(SB)/8, $0x000000000c0d0e08 DATA b64_dec_shuf<>+16(SB)/8, $0x0c0d0e08090a0405 DATA b64_dec_shuf<>+24(SB)/8, $0x0000000000000000 GLOBL b64_dec_shuf<>(SB), RODATA|NOPTR, $32 // func decodeAVX2(dst []byte, src []byte, lut *int8) (int, int) // Requires: AVX, AVX2, SSE4.1 TEXT ·decodeAVX2(SB), NOSPLIT, $0-72 MOVQ dst_base+0(FP), AX MOVQ src_base+24(FP), DX MOVQ lut+48(FP), SI MOVQ src_len+32(FP), DI MOVB $0x2f, CL PINSRB $0x00, CX, X8 VPBROADCASTB X8, Y8 XORQ CX, CX XORQ BX, BX VPXOR Y7, Y7, Y7 VPERMQ $0x44, (SI), Y6 VPERMQ $0x44, 16(SI), Y4 VMOVDQA b64_dec_lut_hi<>+0(SB), Y5 loop: VMOVDQU (DX)(BX*1), Y0 VPSRLD $0x04, Y0, Y2 VPAND Y8, Y0, Y3 VPSHUFB Y3, Y4, Y3 VPAND Y8, Y2, Y2 VPSHUFB Y2, Y5, Y9 VPTEST Y9, Y3 JNE done VPCMPEQB Y8, Y0, Y3 VPADDB Y3, Y2, Y2 VPSHUFB Y2, Y6, Y2 VPADDB Y0, Y2, Y0 VPMADDUBSW b64_dec_madd1<>+0(SB), Y0, Y0 VPMADDWD b64_dec_madd2<>+0(SB), Y0, Y0 VEXTRACTI128 $0x01, Y0, X1 VPSHUFB b64_dec_shuf_lo<>+0(SB), X1, X1 VPSHUFB b64_dec_shuf<>+0(SB), Y0, Y0 VPBLENDD $0x08, Y1, Y0, Y1 VPBLENDD $0xc0, Y7, Y1, Y1 VMOVDQU Y1, (AX)(CX*1) ADDQ $0x18, CX ADDQ $0x20, BX SUBQ $0x20, DI CMPQ DI, $0x2d JB done JMP loop done: MOVQ CX, ret+56(FP) MOVQ BX, ret1+64(FP) VZEROUPPER RET // func decodeAVX2URI(dst []byte, src []byte, lut *int8) (int, int) // Requires: AVX, AVX2, SSE4.1 TEXT ·decodeAVX2URI(SB), NOSPLIT, $0-72 MOVB $0x2f, AL PINSRB $0x00, AX, X0 VPBROADCASTB X0, Y0 MOVB $0x5f, AL PINSRB $0x00, AX, X1 VPBROADCASTB X1, Y1 MOVQ dst_base+0(FP), AX MOVQ src_base+24(FP), DX MOVQ lut+48(FP), SI MOVQ src_len+32(FP), DI MOVB $0x2f, CL PINSRB $0x00, CX, X10 VPBROADCASTB X10, Y10 XORQ CX, CX XORQ BX, BX VPXOR Y9, Y9, Y9 VPERMQ $0x44, (SI), Y8 VPERMQ $0x44, 16(SI), Y6 VMOVDQA b64_dec_lut_hi<>+0(SB), Y7 loop: VMOVDQU (DX)(BX*1), Y2 VPCMPEQB Y2, Y1, Y4 VPBLENDVB Y4, Y0, Y2, Y2 VPSRLD $0x04, Y2, Y4 VPAND Y10, Y2, Y5 VPSHUFB Y5, Y6, Y5 VPAND Y10, Y4, Y4 VPSHUFB Y4, Y7, Y11 VPTEST Y11, Y5 JNE done VPCMPEQB Y10, Y2, Y5 VPADDB Y5, Y4, Y4 VPSHUFB Y4, Y8, Y4 VPADDB Y2, Y4, Y2 VPMADDUBSW b64_dec_madd1<>+0(SB), Y2, Y2 VPMADDWD b64_dec_madd2<>+0(SB), Y2, Y2 VEXTRACTI128 $0x01, Y2, X3 VPSHUFB b64_dec_shuf_lo<>+0(SB), X3, X3 VPSHUFB b64_dec_shuf<>+0(SB), Y2, Y2 VPBLENDD $0x08, Y3, Y2, Y3 VPBLENDD $0xc0, Y9, Y3, Y3 VMOVDQU Y3, (AX)(CX*1) ADDQ $0x18, CX ADDQ $0x20, BX SUBQ $0x20, DI CMPQ DI, $0x2d JB done JMP loop done: MOVQ CX, ret+56(FP) MOVQ BX, ret1+64(FP) VZEROUPPER RET