sgemv.s (1361B)
1 .file "sgemv.cpp" 2 .intel_syntax noprefix 3 .text 4 .p2align 4 5 .globl _Z16sgemv_noaliasingPKfS0_Pfm 6 .type _Z16sgemv_noaliasingPKfS0_Pfm, @function 7 _Z16sgemv_noaliasingPKfS0_Pfm: 8 .LFB20: 9 .cfi_startproc 10 test rcx, rcx 11 je .L1 12 lea r8, 0[0+rcx*4] 13 lea r9, [rdx+r8] 14 .p2align 4,,10 15 .p2align 3 16 .L3: 17 movss xmm1, DWORD PTR [rdx] 18 xor eax, eax 19 .p2align 4,,10 20 .p2align 3 21 .L4: 22 movss xmm0, DWORD PTR [rdi+rax*4] 23 mulss xmm0, DWORD PTR [rsi+rax*4] 24 add rax, 1 25 addss xmm1, xmm0 26 cmp rcx, rax 27 jne .L4 28 movss DWORD PTR [rdx], xmm1 29 add rdx, 4 30 add rdi, r8 31 cmp r9, rdx 32 jne .L3 33 .L1: 34 ret 35 .cfi_endproc 36 .LFE20: 37 .size _Z16sgemv_noaliasingPKfS0_Pfm, .-_Z16sgemv_noaliasingPKfS0_Pfm 38 .p2align 4 39 .globl _Z14sgemv_aliasingPKfS0_Pfm 40 .type _Z14sgemv_aliasingPKfS0_Pfm, @function 41 _Z14sgemv_aliasingPKfS0_Pfm: 42 .LFB21: 43 .cfi_startproc 44 test rcx, rcx 45 je .L10 46 lea r8, 0[0+rcx*4] 47 lea r9, [rdx+r8] 48 .p2align 4,,10 49 .p2align 3 50 .L12: 51 movss xmm1, DWORD PTR [rdx] 52 xor eax, eax 53 .p2align 4,,10 54 .p2align 3 55 .L13: 56 movss xmm0, DWORD PTR [rdi+rax*4] 57 mulss xmm0, DWORD PTR [rsi+rax*4] 58 add rax, 1 59 addss xmm1, xmm0 60 movss DWORD PTR [rdx], xmm1 61 cmp rcx, rax 62 jne .L13 63 add rdx, 4 64 add rdi, r8 65 cmp r9, rdx 66 jne .L12 67 .L10: 68 ret 69 .cfi_endproc 70 .LFE21: 71 .size _Z14sgemv_aliasingPKfS0_Pfm, .-_Z14sgemv_aliasingPKfS0_Pfm 72 .ident "GCC: (GNU) 11.2.0" 73 .section .note.GNU-stack,"",@progbits