Untitled Diff
163 linee
; Assembly listing for method HexConverter:EncodeToUtf16_Ssse3(ReadOnlySpan`1,Span`1,int)
; Assembly listing for method HexConverter:EncodeToUtf16_Ssse3(ReadOnlySpan`1,Span`1,int)
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; optimized code
; rsp based frame
; rsp based frame
; fully interruptible
; fully interruptible
; No PGO data
; No PGO data
; Final local variable assignments
; Final local variable assignments
;
;
; V00 arg0 [V00,T12] ( 4, 8 ) byref -> rcx ld-addr-op
; V00 arg0 [V00,T12] ( 4, 8 ) byref -> rcx ld-addr-op
; V01 arg1 [V01,T01] ( 5, 36 ) byref -> rdx
; V01 arg1 [V01,T01] ( 5, 36 ) byref -> rdx
; V02 arg2 [V02,T13] ( 4, 7 ) int -> r8
; V02 arg2 [V02,T13] ( 4, 7 ) int -> r8
; V03 loc0 [V03,T00] ( 12, 62 ) long -> r9
; V03 loc0 [V03,T00] ( 12, 62 ) long -> r9
; V04 loc1 [V04,T21] ( 2, 9 ) simd16 -> mm0
; V04 loc1 [V04,T21] ( 2, 9 ) simd16 -> mm0
; V05 loc2 [V05,T22] ( 2, 9 ) simd16 -> mm1
; V05 loc2 [V05,T22] ( 2, 9 ) simd16 -> mm1
; V06 loc3 [V06,T05] ( 2, 16 ) int -> r11
; V06 loc3 [V06,T05] ( 2, 16 ) int -> r11
; V07 loc4 [V07,T18] ( 3, 24 ) simd16 -> mm2
; V07 loc4 [V07,T18] ( 3, 24 ) simd16 -> mm4
; V08 loc5 [V08,T19] ( 2, 16 ) simd16 -> mm3
; V08 loc5 [V08,T19] ( 2, 16 ) simd16 -> mm5
; V09 loc6 [V09,T20] ( 2, 16 ) simd16 -> mm2
; V09 loc6 [V09,T20] ( 2, 16 ) simd16 -> mm4
; V10 loc7 [V10,T17] ( 4, 32 ) simd16 -> mm2
; V10 loc7 [V10,T17] ( 4, 32 ) simd16 -> mm4
; V11 OutArgs [V11 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
; V11 OutArgs [V11 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
; V12 tmp1 [V12,T23] ( 3, 2 ) simd16 -> mm1
; V12 tmp1 [V12,T25] ( 3, 2 ) simd16 -> mm1
;* V13 tmp2 [V13 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V13 tmp2 [V13 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V14 tmp3 [V14 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V14 tmp3 [V14 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V15 tmp4 [V15 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V15 tmp4 [V15 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V16 tmp5 [V16 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V16 tmp5 [V16 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V17 tmp6 [V17 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V17 tmp6 [V17 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V18 tmp7 [V18 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V18 tmp7 [V18 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V19 tmp8 [V19 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V19 tmp8 [V19 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V20 tmp9 [V20 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V20 tmp9 [V20 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V21 tmp10 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V21 tmp10 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
;* V22 tmp11 [V22 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V22 tmp11 [V22 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V23 tmp12 [V23 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
;* V23 tmp12 [V23 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
; V24 tmp13 [V24,T03] ( 3, 24 ) ubyte -> r10 "Inlining Arg"
; V24 tmp13 [V24,T03] ( 3, 24 ) ubyte -> r10 "Inlining Arg"
; V25 tmp14 [V25,T08] ( 3, 12 ) int -> r10 "Inline stloc first use temp"
; V25 tmp14 [V25,T08] ( 3, 12 ) int -> r10 "Inline stloc first use temp"
; V26 tmp15 [V26,T09] ( 3, 12 ) int -> r10 "Inline stloc first use temp"
; V26 tmp15 [V26,T09] ( 3, 12 ) int -> r10 "Inline stloc first use temp"
;* V27 tmp16 [V27 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
;* V27 tmp16 [V27 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
; V28 tmp17 [V28,T02] ( 4, 32 ) int -> rdi "Inlining Arg"
; V28 tmp17 [V28,T02] ( 4, 32 ) int -> rdi "Inlining Arg"
; V29 tmp18 [V29,T06] ( 3, 13 ) byref -> rax V41._pointer(offs=0x00) P-INDEP "field V00._pointer (fldOffset=0x0)"
; V29 tmp18 [V29,T06] ( 3, 13 ) byref -> rax V41._pointer(offs=0x00) P-INDEP "field V00._pointer (fldOffset=0x0)"
; V30 tmp19 [V30,T16] ( 3, 3 ) int -> rcx V41._length(offs=0x08) P-INDEP "field V00._length (fldOffset=0x8)"
; V30 tmp19 [V30,T16] ( 3, 3 ) int -> rcx V41._length(offs=0x08) P-INDEP "field V00._length (fldOffset=0x8)"
;* V31 tmp20 [V31 ] ( 0, 0 ) byref -> zero-ref V42._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V31 tmp20 [V31 ] ( 0, 0 ) byref -> zero-ref V42._pointer(offs=0x00) P-INDEP "field V01._pointer (fldOffset=0x0)"
;* V32 tmp21 [V32 ] ( 0, 0 ) int -> zero-ref V42._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V32 tmp21 [V32 ] ( 0, 0 ) int -> zero-ref V42._length(offs=0x08) P-INDEP "field V01._length (fldOffset=0x8)"
;* V33 tmp22 [V33 ] ( 0, 0 ) byref -> zero-ref V13._pointer(offs=0x00) P-INDEP "field V13._pointer (fldOffset=0x0)"
;* V33 tmp22 [V33 ] ( 0, 0 ) byref -> zero-ref V13._pointer(offs=0x00) P-INDEP "field V13._pointer (fldOffset=0x0)"
;* V34 tmp23 [V34 ] ( 0, 0 ) int -> zero-ref V13._length(offs=0x08) P-INDEP "field V13._length (fldOffset=0x8)"
;* V34 tmp23 [V34 ] ( 0, 0 ) int -> zero-ref V13._length(offs=0x08) P-INDEP "field V13._length (fldOffset=0x8)"
; V35 tmp24 [V35,T04] ( 2, 16 ) byref -> r11 V16._pointer(offs=0x00) P-INDEP "field V16._pointer (fldOffset=0x0)"
; V35 tmp24 [V35,T04] ( 2, 16 ) byref -> r11 V16._pointer(offs=0x00) P-INDEP "field V16._pointer (fldOffset=0x0)"
;* V36 tmp25 [V36 ] ( 0, 0 ) int -> zero-ref V16._length(offs=0x08) P-INDEP "field V16._length (fldOffset=0x8)"
;* V36 tmp25 [V36 ] ( 0, 0 ) int -> zero-ref V16._length(offs=0x08) P-INDEP "field V16._length (fldOffset=0x8)"
;* V37 tmp26 [V37 ] ( 0, 0 ) byref -> zero-ref V22._pointer(offs=0x00) P-INDEP "field V22._pointer (fldOffset=0x0)"
;* V37 tmp26 [V37 ] ( 0, 0 ) byref -> zero-ref V22._pointer(offs=0x00) P-INDEP "field V22._pointer (fldOffset=0x0)"
;* V38 tmp27 [V38 ] ( 0, 0 ) int -> zero-ref V22._length(offs=0x08) P-INDEP "field V22._length (fldOffset=0x8)"
;* V38 tmp27 [V38 ] ( 0, 0 ) int -> zero-ref V22._length(offs=0x08) P-INDEP "field V22._length (fldOffset=0x8)"
; V39 tmp28 [V39,T07] ( 3, 12 ) byref -> r11 V27._pointer(offs=0x00) P-INDEP "field V27._pointer (fldOffset=0x0)"
; V39 tmp28 [V39,T07] ( 3, 12 ) byref -> r11 V27._pointer(offs=0x00) P-INDEP "field V27._pointer (fldOffset=0x0)"
; V40 tmp29 [V40,T10] ( 3, 12 ) int -> rsi V27._length(offs=0x08) P-INDEP "field V27._length (fldOffset=0x8)"
; V40 tmp29 [V40,T10] ( 3, 12 ) int -> rsi V27._length(offs=0x08) P-INDEP "field V27._length (fldOffset=0x8)"
;* V41 tmp30 [V41 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;* V41 tmp30 [V41 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;* V42 tmp31 [V42 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;* V42 tmp31 [V42 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
; V43 cse0 [V43,T14] ( 2, 9 ) long -> r10 "CSE - moderate"
; V43 cse0 [V43,T14] ( 2, 9 ) long -> r10 "CSE - moderate"
; V44 cse1 [V44,T11] ( 3, 12 ) int -> rbx "CSE - moderate"
; V44 cse1 [V44,T11] ( 3, 12 ) int -> rbx "CSE - moderate"
; V45 cse2 [V45,T15] ( 3, 6 ) long -> rcx "CSE - moderate"
; V45 cse2 [V45,T23] ( 2, 9 ) simd16 -> mm2 "CSE - moderate"
; V46 cse3 [V46,T24] ( 2, 9 ) simd16 -> mm3 "CSE - moderate"
; V47 cse4 [V47,T15] ( 3, 6 ) long -> rcx "CSE - moderate"
;
;
; Lcl frame size = 40
; Lcl frame size = 40
G_M58342_IG01:
G_M58342_IG01:
push rdi
push rdi
push rsi
push rsi
push rbp
push rbp
push rbx
push rbx
sub rsp, 40
sub rsp, 40
vzeroupper
vzeroupper
;; bbWeight=1 PerfScore 5.25
;; bbWeight=1 PerfScore 5.25
G_M58342_IG02:
G_M58342_IG02:
mov rax, bword ptr [rcx]
mov rax, bword ptr [rcx]
mov ecx, dword ptr [rcx+8]
mov ecx, dword ptr [rcx+8]
;; bbWeight=1 PerfScore 4.00
;; bbWeight=1 PerfScore 4.00
G_M58342_IG03:
G_M58342_IG03:
xor r9d, r9d
xor r9d, r9d
vmovupd xmm0, xmmword ptr [reloc @RWD00]
vmovupd xmm0, xmmword ptr [reloc @RWD00]
test r8d, r8d
test r8d, r8d
je SHORT G_M58342_IG05
je SHORT G_M58342_IG05
;; bbWeight=1 PerfScore 4.50
;; bbWeight=1 PerfScore 4.50
G_M58342_IG04:
G_M58342_IG04:
vmovupd xmm1, xmmword ptr [reloc @RWD16]
vmovupd xmm1, xmmword ptr [reloc @RWD16]
jmp SHORT G_M58342_IG06
jmp SHORT G_M58342_IG06
;; bbWeight=0.50 PerfScore 2.50
;; bbWeight=0.50 PerfScore 2.50
G_M58342_IG05:
G_M58342_IG05:
vmovupd xmm1, xmmword ptr [reloc @RWD32]
vmovupd xmm1, xmmword ptr [reloc @RWD32]
;; bbWeight=0.50 PerfScore 1.50
;; bbWeight=0.50 PerfScore 1.50
G_M58342_IG06:
G_M58342_IG06:
vmovupd xmm2, xmmword ptr [reloc @RWD48]
vmovupd xmm3, xmmword ptr [reloc @RWD64]
lea r10d, [rcx-3]
lea r10d, [rcx-3]
movsxd r10, r10d
movsxd r10, r10d
align [3 bytes]
align [1 bytes]
;; bbWeight=1 PerfScore 1.00
;; bbWeight=1 PerfScore 7.00
G_M58342_IG07:
G_M58342_IG07:
mov r11d, dword ptr [rax+r9]
mov r11d, dword ptr [rax+r9]
vmovd xmm2, r11d
vmovd xmm4, r11d
vpshufb xmm2, xmm2, xmm0
vpshufb xmm4, xmm4, xmm0
vpsrldq xmm3, xmm2, 2
vpsrldq xmm5, xmm4, 2
vpsrld xmm3, xmm3, 4
vpsrld xmm5, xmm5, 4
vpor xmm2, xmm2, xmm3
vpor xmm4, xmm4, xmm5
vpand xmm2, xmm2, xmmword ptr [reloc @RWD48]
vpand xmm4, xmm4, xmm2
vpshufb xmm2, xmm1, xmm2
vpshufb xmm4, xmm1, xmm4
vpand xmm2, xmm2, xmmword ptr [reloc @RWD64]
vpand xmm4, xmm4, xmm3
mov r11, bword ptr [rdx]
mov r11, bword ptr [rdx]
vmovupd xmmword ptr [r11+4*r9], xmm2
vmovupd xmmword ptr [r11+4*r9], xmm4
add r9, 4
add r9, 4
cmp r9, r10
cmp r9, r10
jl SHORT G_M58342_IG07
jl SHORT G_M58342_IG07
;; bbWeight=8 PerfScore 150.67
;; bbWeight=8 PerfScore 124.00
G_M58342_IG08:
G_M58342_IG08:
movsxd rcx, ecx
movsxd rcx, ecx
cmp r9, rcx
cmp r9, rcx
jge SHORT G_M58342_IG10
jge SHORT G_M58342_IG10
align [0 bytes]
align [0 bytes]
;; bbWeight=1 PerfScore 1.75
;; bbWeight=1 PerfScore 1.75
G_M58342_IG09:
G_M58342_IG09:
movzx r10, byte ptr [rax+r9]
movzx r10, byte ptr [rax+r9]
mov r11, bword ptr [rdx]
mov r11, bword ptr [rdx]
mov esi, dword ptr [rdx+8]
mov esi, dword ptr [rdx+8]
lea edi, [r9+r9]
lea edi, [r9+r9]
mov ebx, r10d
mov ebx, r10d
and ebx, 240
and ebx, 240
shl ebx, 4
shl ebx, 4
and r10d, 15
and r10d, 15
lea r10d, [rbx+r10-8989H]
lea r10d, [rbx+r10-8989H]
mov ebx, r10d
mov ebx, r10d
neg ebx
neg ebx
and ebx, 0x7070
and ebx, 0x7070
shr ebx, 4
shr ebx, 4
lea r10d, [rbx+r10+B9B9H]
lea r10d, [rbx+r10+B9B9H]
or r10d, r8d
or r10d, r8d
lea ebx, [rdi+1]
lea ebx, [rdi+1]
cmp ebx, esi
cmp ebx, esi
jae SHORT G_M58342_IG11
jae SHORT G_M58342_IG11
movsxd rbx, ebx
movsxd rbx, ebx
mov ebp, r10d
mov ebp, r10d
and ebp, 255
and ebp, 255
mov word ptr [r11+2*rbx], bp
mov word ptr [r11+2*rbx], bp
cmp edi, esi
cmp edi, esi
jae SHORT G_M58342_IG11
jae SHORT G_M58342_IG11
movsxd rsi, edi
movsxd rsi, edi
shr r10d, 8
shr r10d, 8
mov word ptr [r11+2*rsi], r10w
mov word ptr [r11+2*rsi], r10w
inc r9
inc r9
cmp r9, rcx
cmp r9, rcx
jl SHORT G_M58342_IG09
jl SHORT G_M58342_IG09
;; bbWeight=4 PerfScore 95.00
;; bbWeight=4 PerfScore 95.00
G_M58342_IG10:
G_M58342_IG10:
add rsp, 40
add rsp, 40
pop rbx
pop rbx
pop rbp
pop rbp
pop rsi
pop rsi
pop rdi
pop rdi
ret
ret
;; bbWeight=1 PerfScore 3.25
;; bbWeight=1 PerfScore 3.25
G_M58342_IG11:
G_M58342_IG11:
call CORINFO_HELP_RNGCHKFAIL
call CORINFO_HELP_RNGCHKFAIL
int3
int3
;; bbWeight=0 PerfScore 0.00
;; bbWeight=0 PerfScore 0.00
RWD00 dq FF01FFFFFF00FFFFh, FF03FFFFFF02FFFFh
RWD00 dq FF01FFFFFF00FFFFh, FF03FFFFFF02FFFFh
RWD16 dq 3736353433323130h, 6665646362613938h
RWD16 dq 3736353433323130h, 6665646362613938h
RWD32 dq 3736353433323130h, 4645444342413938h
RWD32 dq 3736353433323130h, 4645444342413938h
RWD48 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
RWD48 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
RWD64 dq 00FF00FF00FF00FFh, 00FF00FF00FF00FFh
RWD64 dq 00FF00FF00FF00FFh, 00FF00FF00FF00FFh
; Total bytes of code 266, prolog size 11, PerfScore 296.92, instruction count 74, allocated bytes for code 275 (MethodHash=a96a1c19) for method HexConverter:EncodeToUtf16_Ssse3(ReadOnlySpan`1,Span`1,int)
; Total bytes of code 274, prolog size 11, PerfScore 277.05, instruction count 76, allocated bytes for code 283 (MethodHash=a96a1c19) for method HexConverter:EncodeToUtf16_Ssse3(ReadOnlySpan`1,Span`1,int)
; ============================================================
; ============================================================