stencil_clangvsgcc

बनाया गया Diff कभी समाप्त नहीं होता
281 हटाए गए
295 लाइनें
223 जोड़े गए
236 लाइनें
.file "stencil_2d.cpp"
.text
.intel_syntax noprefix
.intel_syntax noprefix
.file "stencil_2d.cpp"
# Start of file scope inline assembly
.globl _ZSt21ios_base_library_initv

# End of file scope inline assembly
.section .rodata.cst16,"aM",@progbits,16
.p2align 4, 0x0 # -- Begin function main
.LCPI0_0:
.long 0x3f800000 # float 1
.long 0x3f800000 # float 1
.long 0x3f800000 # float 1
.long 0x3f800000 # float 1
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0
.LCPI0_1:
.long 0x3e000000 # float 0.125
.LCPI0_2:
.long 0x3f000000 # float 0.5
.text
.text
#APP
.globl _ZSt21ios_base_library_initv
#NO_APP
.section .text._ZNSt6vectorIfSaIfEED2Ev,"axG",@progbits,_ZNSt6vectorIfSaIfEED5Ev,comdat
.align 2
.p2align 4
.weak _ZNSt6vectorIfSaIfEED2Ev
.type _ZNSt6vectorIfSaIfEED2Ev, @function
_ZNSt6vectorIfSaIfEED2Ev:
.LFB2685:
.cfi_startproc
endbr64
mov rax, QWORD PTR [rdi]
test rax, rax
je .L1
mov rsi, QWORD PTR 16[rdi]
mov rdi, rax
sub rsi, rax
jmp _ZdlPvm@PLT
.p2align 4,,10
.p2align 3
.L1:
ret
.cfi_endproc
.LFE2685:
.size _ZNSt6vectorIfSaIfEED2Ev, .-_ZNSt6vectorIfSaIfEED2Ev
.weak _ZNSt6vectorIfSaIfEED1Ev
.set _ZNSt6vectorIfSaIfEED1Ev,_ZNSt6vectorIfSaIfEED2Ev
.section .text.unlikely,"ax",@progbits
.LCOLDB6:
.section .text.startup,"ax",@progbits
.LHOTB6:
.p2align 4
.globl main
.globl main
.type main, @function
.p2align 4, 0x90
main:
.type main,@function
.LFB2420:
main: # @main
.Lfunc_begin0:
.cfi_startproc
.cfi_startproc
.cfi_personality 0x9b,DW.ref.__gxx_personality_v0
.cfi_personality 155, DW.ref.__gxx_personality_v0
.cfi_lsda 0x1b,.LLSDA2420
.cfi_lsda 27, .Lexception0
endbr64
# %bb.0:
push r12
push rbp
.cfi_def_cfa_offset 16
.cfi_def_cfa_offset 16
.cfi_offset 12, -16
push r15
mov edi, 4194304
push rbp
.cfi_def_cfa_offset 24
.cfi_def_cfa_offset 24
.cfi_offset 6, -24
push r14
push rbx
.cfi_def_cfa_offset 32
.cfi_def_cfa_offset 32
.cfi_offset 3, -32
push r12
sub rsp, 80
.cfi_def_cfa_offset 40
.cfi_def_cfa_offset 112
push rbx
mov rax, QWORD PTR fs:40
.cfi_def_cfa_offset 48
mov QWORD PTR 72[rsp], rax
sub rsp, 16
xor eax, eax
.cfi_def_cfa_offset 64
.LEHB0:
.cfi_offset rbx, -48
.cfi_offset r12, -40
.cfi_offset r14, -32
.cfi_offset r15, -24
.cfi_offset rbp, -16
mov edi, 4194304
call _Znwm@PLT
call _Znwm@PLT
.LEHE0:
mov r15, rax
movss xmm0, DWORD PTR .LC2[rip]
mov eax, 28
lea rbx, 4194304[rax]
movapd xmm0, xmmword ptr [rip + .LCPI0_0] # xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
mov QWORD PTR 16[rsp], rax
.p2align 4, 0x90
mov r12, rax
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov QWORD PTR 32[rsp], rbx
movupd xmmword ptr [r15 + 4*rax - 112], xmm0
shufps xmm0, xmm0, 0
movupd xmmword ptr [r15 + 4*rax - 96], xmm0
.L5:
movupd xmmword ptr [r15 + 4*rax - 80], xmm0
movups XMMWORD PTR [rax], xmm0
movupd xmmword ptr [r15 + 4*rax - 64], xmm0
movupd xmmword ptr [r15 + 4*rax - 48], xmm0
movupd xmmword ptr [r15 + 4*rax - 32], xmm0
movupd xmmword ptr [r15 + 4*rax - 16], xmm0
movupd xmmword ptr [r15 + 4*rax], xmm0
add rax, 32
add rax, 32
movups XMMWORD PTR -16[rax], xmm0
cmp rax, 1048604
cmp rbx, rax
jne .LBB0_1
jne .L5
# %bb.2:
pxor xmm0, xmm0
.Ltmp0:
mov edi, 4194304
mov edi, 4194304
mov QWORD PTR 24[rsp], rbx
movups XMMWORD PTR 56[rsp], xmm0
.LEHB1:
call _Znwm@PLT
call _Znwm@PLT
.LEHE1:
.Ltmp1:
# %bb.3:
mov rbx, rax
xor ebp, ebp
mov edx, 4194304
mov edx, 4194304
xor esi, esi
mov rdi, rax
mov rdi, rax
lea rbp, 4194304[rax]
xor esi, esi
call memset@PLT
call memset@PLT
mov r9d, 12
xorpd xmm0, xmm0
pxor xmm1, xmm1
movss xmm1, dword ptr [rip + .LCPI0_1] # xmm1 = [1.25E-1,0.0E+0,0.0E+0,0.0E+0]
movss xmm4, DWORD PTR .LC3[rip]
movss xmm2, dword ptr [rip + .LCPI0_2] # xmm2 = [5.0E-1,0.0E+0,0.0E+0,0.0E+0]
movss xmm3, DWORD PTR .LC4[rip]
.p2align 4, 0x90
mov r8, rax
.LBB0_4: # =>This Loop Header: Depth=1
.L6:
# Child Loop BB0_7 Depth 2
lea rdi, 4100[r8]
# Child Loop BB0_8 Depth 3
lea rcx, 8184[r12]
mov r14, rbx
mov esi, 1024
mov rbx, r15
.p2align 4,,10
lea rax, [r15 + 4]
.p2align 3
mov ecx, 2
.L10:
mov rdx, r14
lea rax, -4088[rcx]
.p2align 4, 0x90
mov rdx, rdi
.LBB0_7: # Parent Loop BB0_4 Depth=1
.p2align 4,,10
# => This Loop Header: Depth=2
.p2align 3
# Child Loop BB0_8 Depth 3
.L7:
mov esi, 1025
movss xmm0, DWORD PTR [rax]
.p2align 4, 0x90
addss xmm0, DWORD PTR 8[rax]
.LBB0_8: # Parent Loop BB0_4 Depth=1
add rax, 4
# Parent Loop BB0_7 Depth=2
add rdx, 4
# => This Inner Loop Header: Depth=3
addss xmm0, DWORD PTR -4096[rax]
movss xmm3, dword ptr [rax + 4*rsi - 8] # xmm3 = mem[0],zero,zero,zero
addss xmm0, DWORD PTR 4096[rax]
addss xmm3, dword ptr [rax + 4*rsi]
movss xmm2, DWORD PTR [rax]
addss xmm3, dword ptr [rax + 4*rsi - 4100]
mulss xmm0, xmm4
addss xmm3, dword ptr [rax + 4*rsi + 4092]
mulss xmm2, xmm3
mulss xmm3, xmm1
addss xmm0, xmm2
movss xmm4, dword ptr [rax + 4*rsi - 4] # xmm4 = mem[0],zero,zero,zero
movss DWORD PTR -4[rdx], xmm0
mulss xmm4, xmm2
cvtss2sd xmm0, xmm0
addss xmm4, xmm3
addsd xmm1, xmm0
movss dword ptr [rdx + 4*rsi], xmm4
cmp rax, rcx
xorps xmm3, xmm3
jne .L7
cvtss2sd xmm3, xmm4
add rsi, 1024
addsd xmm0, xmm3
add rdi, 4096
inc rsi
lea rcx, 4096[rax]
cmp rsi, 2047
cmp rsi, 1047552
jne .LBB0_8
jne .L10
# %bb.6: # in Loop: Header=BB0_7 Depth=2
sub r9d, 1
inc rcx
je .L9
add rdx, 4096
mov rax, rbx
add rax, 4096
mov rbx, rbp
cmp rcx, 1024
mov rbp, rax
jne .LBB0_7
mov rax, r12
# %bb.9: # in Loop: Header=BB0_4 Depth=1
mov r12, r8
inc ebp
mov r8, rax
mov r15, r14
jmp .L6
cmp ebp, 12
.L9:
jne .LBB0_4
movapd xmm0, xmm1
# %bb.10:
lea rdi, _ZSt4cout[rip]
.Ltmp3:
mov QWORD PTR 16[rsp], r8
mov rdi, qword ptr [rip + _ZSt4cout@GOTPCREL]
mov QWORD PTR 24[rsp], rbp
mov QWORD PTR 32[rsp], rbp
mov QWORD PTR 48[rsp], r12
mov QWORD PTR 56[rsp], rbx
mov QWORD PTR 64[rsp], rbx
.LEHB2:
call _ZNSo9_M_insertIdEERSoT_@PLT
call _ZNSo9_M_insertIdEERSoT_@PLT
.Ltmp4:
# %bb.11:
mov byte ptr [rsp + 15], 10
mov rcx, qword ptr [rax]
mov rcx, qword ptr [rcx - 24]
cmp qword ptr [rax + rcx + 16], 0
je .LBB0_13
# %bb.12:
.Ltmp5:
lea rsi, [rsp + 15]
mov edx, 1
mov rdi, rax
mov rdi, rax
mov rax, QWORD PTR [rax]
mov BYTE PTR 15[rsp], 10
mov rax, QWORD PTR -24[rax]
cmp QWORD PTR 16[rdi+rax], 0
je .L11
lea rsi, 15[rsp]
mov edx, 1
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l@PLT
.L12:
.Ltmp6:
lea rdi, 48[rsp]
jmp .LBB0_14
call _ZNSt6vectorIfSaIfEED1Ev
.LBB0_13:
lea rdi, 16[rsp]
.Ltmp7:
call _ZNSt6vectorIfSaIfEED1Ev
mov rdi, rax
mov rax, QWORD PTR 72[rsp]
mov esi, 10
sub rax, QWORD PTR fs:40
call _ZNSo3putEc@PLT
jne .L24
.Ltmp8:
add rsp, 80
.LBB0_14:
.cfi_remember_state
mov rdi, rbx
.cfi_def_cfa_offset 32
call _ZdlPv@PLT
mov rdi, r14
call _ZdlPv@PLT
xor eax, eax
xor eax, eax
add rsp, 16
.cfi_def_cfa_offset 48
pop rbx
pop rbx
.cfi_def_cfa_offset 40
pop r12
.cfi_def_cfa_offset 32
pop r14
.cfi_def_cfa_offset 24
.cfi_def_cfa_offset 24
pop rbp
pop r15
.cfi_def_cfa_offset 16
.cfi_def_cfa_offset 16
pop r12
pop rbp
.cfi_def_cfa_offset 8
.cfi_def_cfa_offset 8
ret
ret
.L11:
.LBB0_5:
.cfi_restore_state
.cfi_def_cfa_offset 64
mov esi, 10
.Ltmp2:
call _ZNSo3putEc@PLT
mov r12, rax
.LEHE2:
mov r14, r15
jmp .L12
mov rdi, r14
.L24:
call _ZdlPv@PLT
call __stack_chk_fail@PLT
mov rdi, r12
.L18:
call _Unwind_Resume@PLT
endbr64
.LBB0_15:
mov rbx, rax
.Ltmp9:
jmp .L13
mov r12, rax
.L17:
endbr64
mov rbx, rax
jmp .L14
.globl __gxx_personality_v0
.section .gcc_except_table,"a",@progbits
.LLSDA2420:
.byte 0xff
.byte 0xff
.byte 0x1
.uleb128 .LLSDACSE2420-.LLSDACSB2420
.LLSDACSB2420:
.uleb128 .LEHB0-.LFB2420
.uleb128 .LEHE0-.LEHB0
.uleb128 0
.uleb128 0
.uleb128 .LEHB1-.LFB2420
.uleb128 .LEHE1-.LEHB1
.uleb128 .L17-.LFB2420
.uleb128 0
.uleb128 .LEHB2-.LFB2420
.uleb128 .LEHE2-.LEHB2
.uleb128 .L18-.LFB2420
.uleb128 0
.LLSDACSE2420:
.section .text.startup
.cfi_endproc
.section .text.unlikely
.cfi_startproc
.cfi_personality 0x9b,DW.ref.__gxx_personality_v0
.cfi_lsda 0x1b,.LLSDAC2420
.type main.cold, @function
main.cold:
.LFSB2420:
.L13:
.cfi_def_cfa_offset 112
.cfi_offset 3, -32
.cfi_offset 6, -24
.cfi_offset 12, -16
lea rdi, 48[rsp]
call _ZNSt6vectorIfSaIfEED1Ev
.L14:
lea rdi, 16[rsp]
call _ZNSt6vectorIfSaIfEED1Ev
mov rax, QWORD PTR 72[rsp]
sub rax, QWORD PTR fs:40
jne .L25
mov rdi, rbx
mov rdi, rbx
.LEHB3:
call _ZdlPv@PLT
mov rdi, r14
call _ZdlPv@PLT
mov rdi, r12
call _Unwind_Resume@PLT
call _Unwind_Resume@PLT
.LEHE3:
.Lfunc_end0:
.L25:
.size main, .Lfunc_end0-main
call __stack_chk_fail@PLT
.cfi_endproc
.cfi_endproc
.LFE2420:
.section .gcc_except_table,"a",@progbits
.section .gcc_except_table
.p2align 2, 0x0
.LLSDAC2420:
GCC_except_table0:
.byte 0xff
.Lexception0:
.byte 0xff
.byte 255 # @LPStart Encoding = omit
.byte 0x1
.byte 255 # @TType Encoding = omit
.uleb128 .LLSDACSEC2420-.LLSDACSBC2420
.byte 1 # Call site Encoding = uleb128
.LLSDACSBC2420:
.uleb128 .Lcst_end0-.Lcst_begin0
.uleb128 .LEHB3-.LCOLDB6
.Lcst_begin0:
.uleb128 .LEHE3-.LEHB3
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 0
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
.uleb128 0
.byte 0 # has no landing pad
.LLSDACSEC2420:
.byte 0 # On action: cleanup
.section .text.unlikely
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
.section .text.startup
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
.size main, .-main
.uleb128 .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2
.section .text.unlikely
.byte 0 # On action: cleanup
.size main.cold, .-main.cold
.uleb128 .Ltmp1-.Lfunc_begin0 # >> Call Site 3 <<
.LCOLDE6:
.uleb128 .Ltmp3-.Ltmp1 # Call between .Ltmp1 and .Ltmp3
.section .text.startup
.byte 0 # has no landing pad
.LHOTE6:
.byte 0 # On action: cleanup
.section .rodata.cst4,"aM",@progbits,4
.uleb128 .Ltmp3-.Lfunc_begin0 # >> Call Site 4 <<
.align 4
.uleb128 .Ltmp8-.Ltmp3 # Call between .Ltmp3 and .Ltmp8
.LC2:
.uleb128 .Ltmp9-.Lfunc_begin0 # jumps to .Ltmp9
.long 1065353216
.byte 0 # On action: cleanup
.align 4
.uleb128 .Ltmp8-.Lfunc_begin0 # >> Call Site 5 <<
.LC3:
.uleb128 .Lfunc_end0-.Ltmp8 # Call between .Ltmp8 and .Lfunc_end0
.long 1040187392
.byte 0 # has no landing pad
.align 4
.byte 0 # On action: cleanup
.LC4:
.Lcst_end0:
.long 1056964608
.p2align 2, 0x0
# -- End function
.hidden DW.ref.__gxx_personality_v0
.hidden DW.ref.__gxx_personality_v0
.weak DW.ref.__gxx_personality_v0
.weak DW.ref.__gxx_personality_v0
.section .data.rel.local.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
.section .data.DW.ref.__gxx_personality_v0,"awG",@progbits,DW.ref.__gxx_personality_v0,comdat
.align 8
.p2align 3, 0x0
.type DW.ref.__gxx_personality_v0, @object
.type DW.ref.__gxx_personality_v0,@object
.size DW.ref.__gxx_personality_v0, 8
.size DW.ref.__gxx_personality_v0, 8
DW.ref.__gxx_personality_v0:
DW.ref.__gxx_personality_v0:
.quad __gxx_personality_v0
.quad __gxx_personality_v0
.ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04.1) 13.3.0"
.ident "Ubuntu clang version 18.1.3 (1ubuntu1)"
.section .note.GNU-stack,"",@progbits
.section ".note.GNU-stack","",@progbits
.section .note.gnu.property,"a"
.addrsig
.align 8
.addrsig_sym __gxx_personality_v0
.long 1f - 0f
.addrsig_sym _Unwind_Resume
.long 4f - 1f
.addrsig_sym _ZSt4cout
.long 5
0:
.string "GNU"
1:
.align 8
.long 0xc0000002
.long 3f - 2f
2:
.long 0x3
3:
.align 8
4: