cs205-lecture-examples

Example codes used during Harvard CS205 lectures
git clone https://git.0xfab.ch/cs205-lecture-examples.git
Log | Files | Refs | README | LICENSE

sgemv.s (1361B)


      1 	.file	"sgemv.cpp"
      2 	.intel_syntax noprefix
      3 	.text
      4 	.p2align 4
      5 	.globl	_Z16sgemv_noaliasingPKfS0_Pfm
      6 	.type	_Z16sgemv_noaliasingPKfS0_Pfm, @function
      7 _Z16sgemv_noaliasingPKfS0_Pfm:
      8 .LFB20:
      9 	.cfi_startproc
     10 	test	rcx, rcx
     11 	je	.L1
     12 	lea	r8, 0[0+rcx*4]
     13 	lea	r9, [rdx+r8]
     14 	.p2align 4,,10
     15 	.p2align 3
     16 .L3:
     17 	movss	xmm1, DWORD PTR [rdx]
     18 	xor	eax, eax
     19 	.p2align 4,,10
     20 	.p2align 3
     21 .L4:
     22 	movss	xmm0, DWORD PTR [rdi+rax*4]
     23 	mulss	xmm0, DWORD PTR [rsi+rax*4]
     24 	add	rax, 1
     25 	addss	xmm1, xmm0
     26 	cmp	rcx, rax
     27 	jne	.L4
     28 	movss	DWORD PTR [rdx], xmm1
     29 	add	rdx, 4
     30 	add	rdi, r8
     31 	cmp	r9, rdx
     32 	jne	.L3
     33 .L1:
     34 	ret
     35 	.cfi_endproc
     36 .LFE20:
     37 	.size	_Z16sgemv_noaliasingPKfS0_Pfm, .-_Z16sgemv_noaliasingPKfS0_Pfm
     38 	.p2align 4
     39 	.globl	_Z14sgemv_aliasingPKfS0_Pfm
     40 	.type	_Z14sgemv_aliasingPKfS0_Pfm, @function
     41 _Z14sgemv_aliasingPKfS0_Pfm:
     42 .LFB21:
     43 	.cfi_startproc
     44 	test	rcx, rcx
     45 	je	.L10
     46 	lea	r8, 0[0+rcx*4]
     47 	lea	r9, [rdx+r8]
     48 	.p2align 4,,10
     49 	.p2align 3
     50 .L12:
     51 	movss	xmm1, DWORD PTR [rdx]
     52 	xor	eax, eax
     53 	.p2align 4,,10
     54 	.p2align 3
     55 .L13:
     56 	movss	xmm0, DWORD PTR [rdi+rax*4]
     57 	mulss	xmm0, DWORD PTR [rsi+rax*4]
     58 	add	rax, 1
     59 	addss	xmm1, xmm0
     60 	movss	DWORD PTR [rdx], xmm1
     61 	cmp	rcx, rax
     62 	jne	.L13
     63 	add	rdx, 4
     64 	add	rdi, r8
     65 	cmp	r9, rdx
     66 	jne	.L12
     67 .L10:
     68 	ret
     69 	.cfi_endproc
     70 .LFE21:
     71 	.size	_Z14sgemv_aliasingPKfS0_Pfm, .-_Z14sgemv_aliasingPKfS0_Pfm
     72 	.ident	"GCC: (GNU) 11.2.0"
     73 	.section	.note.GNU-stack,"",@progbits