bank conflict debug

Created Diff never expires
2つのテキストは同一です
これら2つのテキストの間に違いはありません
0 削除
合計
削除
単語
合計
削除
この機能を引き続き使用するには、アップグレードしてください
Diffchecker logo
Diffchecker Pro
317
0 追加
合計
追加
単語
合計
追加
この機能を引き続き使用するには、アップグレードしてください
Diffchecker logo
Diffchecker Pro
317


Fatbin ptx code:
Fatbin ptx code:
================
================
arch = sm_70
arch = sm_70
code version = [8,2]
code version = [8,2]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit
compressed
compressed


Fatbin elf code:
Fatbin elf code:
================
================
arch = sm_70
arch = sm_70
code version = [1,7]
code version = [1,7]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit


code for sm_70
code for sm_70
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM70 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM70)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM70 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM70)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*0020*/ EXIT ; /* 0x000000000000794d */
/*0020*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0030*/ BRA 0x30; /* 0xfffffff000007947 */
/*0030*/ BRA 0x30; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0040*/ NOP; /* 0x0000000000007918 */
/*0040*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0050*/ NOP; /* 0x0000000000007918 */
/*0050*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0060*/ NOP; /* 0x0000000000007918 */
/*0060*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0070*/ NOP; /* 0x0000000000007918 */
/*0070*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........




Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM70 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM70)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM70 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM70)"
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/* 0x000fc400078e00ff */
/* 0x000fc400078e00ff */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*0020*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/*0020*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/* 0x000e220000002100 */
/* 0x000e220000002100 */
/*0030*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/*0030*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0040*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/*0040*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0050*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/*0050*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/* 0x001fc800078e0003 */
/* 0x001fc800078e0003 */
/*0060*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/*0060*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0070*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/*0070*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/* 0x000fc80007f1e0ff */
/* 0x000fc80007f1e0ff */
/*0080*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/*0080*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/* 0x000fc400007fe4ff */
/* 0x000fc400007fe4ff */
/*0090*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0090*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*00a0*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/*00a0*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/* 0x000fe40007ffe0ff */
/* 0x000fe40007ffe0ff */
/*00b0*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/*00b0*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/* 0x000fe40007f3e0ff */
/* 0x000fe40007f3e0ff */
/*00c0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/*00c0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/* 0x000fe40003f05270 */
/* 0x000fe40003f05270 */
/*00d0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/*00d0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/* 0x000fd40000ffe4ff */
/* 0x000fd40000ffe4ff */
/*00e0*/ @P0 BRA 0x90 ; /* 0xffffffa000000947 */
/*00e0*/ @P0 BRA 0x90 ; /* 0xffffffa000000947 */
/* 0x000fea000383ffff */
/* 0x000fea000383ffff */
/*00f0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/*00f0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/* 0x000ea800001e6500 */
/* 0x000ea800001e6500 */
/*0100*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/*0100*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/* 0x000ee800001e6500 */
/* 0x000ee800001e6500 */
/*0110*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/*0110*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/* 0x004fe80000000400 */
/* 0x004fe80000000400 */
/*0120*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/*0120*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/* 0x008fe20000000400 */
/* 0x008fe20000000400 */
/*0130*/ EXIT ; /* 0x000000000000794d */
/*0130*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0140*/ BRA 0x140; /* 0xfffffff000007947 */
/*0140*/ BRA 0x140; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0150*/ NOP; /* 0x0000000000007918 */
/*0150*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0160*/ NOP; /* 0x0000000000007918 */
/*0160*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0170*/ NOP; /* 0x0000000000007918 */
/*0170*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........






Fatbin ptx code:
Fatbin ptx code:
================
================
arch = sm_72
arch = sm_72
code version = [8,2]
code version = [8,2]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit
compressed
compressed


Fatbin elf code:
Fatbin elf code:
================
================
arch = sm_72
arch = sm_72
code version = [1,7]
code version = [1,7]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit


code for sm_72
code for sm_72
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM72 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM72)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM72 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM72)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*0020*/ EXIT ; /* 0x000000000000794d */
/*0020*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0030*/ BRA 0x30; /* 0xfffffff000007947 */
/*0030*/ BRA 0x30; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0040*/ NOP; /* 0x0000000000007918 */
/*0040*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0050*/ NOP; /* 0x0000000000007918 */
/*0050*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0060*/ NOP; /* 0x0000000000007918 */
/*0060*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0070*/ NOP; /* 0x0000000000007918 */
/*0070*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........




Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM72 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM72)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM72 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM72)"
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/* 0x000fc400078e00ff */
/* 0x000fc400078e00ff */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0010*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*0020*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/*0020*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/* 0x000e220000002100 */
/* 0x000e220000002100 */
/*0030*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/*0030*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0040*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/*0040*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0050*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/*0050*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/* 0x001fc800078e0003 */
/* 0x001fc800078e0003 */
/*0060*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/*0060*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0070*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/*0070*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/* 0x000fc80007f1e0ff */
/* 0x000fc80007f1e0ff */
/*0080*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/*0080*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/* 0x000fc400007fe4ff */
/* 0x000fc400007fe4ff */
/*0090*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/*0090*/ @!PT SHFL.IDX PT, RZ, RZ, RZ, RZ ; /* 0x000000fffffff389 */
/* 0x000fe200000e00ff */
/* 0x000fe200000e00ff */
/*00a0*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/*00a0*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/* 0x000fe40007ffe0ff */
/* 0x000fe40007ffe0ff */
/*00b0*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/*00b0*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/* 0x000fe40007f3e0ff */
/* 0x000fe40007f3e0ff */
/*00c0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/*00c0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/* 0x000fe40003f05270 */
/* 0x000fe40003f05270 */
/*00d0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/*00d0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/* 0x000fd40000ffe4ff */
/* 0x000fd40000ffe4ff */
/*00e0*/ @P0 BRA 0x90 ; /* 0xffffffa000000947 */
/*00e0*/ @P0 BRA 0x90 ; /* 0xffffffa000000947 */
/* 0x000fea000383ffff */
/* 0x000fea000383ffff */
/*00f0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/*00f0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/* 0x000ea800001e6500 */
/* 0x000ea800001e6500 */
/*0100*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/*0100*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/* 0x000ee800001e6500 */
/* 0x000ee800001e6500 */
/*0110*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/*0110*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/* 0x004fe80000000400 */
/* 0x004fe80000000400 */
/*0120*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/*0120*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/* 0x008fe20000000400 */
/* 0x008fe20000000400 */
/*0130*/ EXIT ; /* 0x000000000000794d */
/*0130*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0140*/ BRA 0x140; /* 0xfffffff000007947 */
/*0140*/ BRA 0x140; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0150*/ NOP; /* 0x0000000000007918 */
/*0150*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0160*/ NOP; /* 0x0000000000007918 */
/*0160*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0170*/ NOP; /* 0x0000000000007918 */
/*0170*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........






Fatbin ptx code:
Fatbin ptx code:
================
================
arch = sm_75
arch = sm_75
code version = [8,2]
code version = [8,2]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit
compressed
compressed


Fatbin elf code:
Fatbin elf code:
================
================
arch = sm_75
arch = sm_75
code version = [1,7]
code version = [1,7]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit


code for sm_75
code for sm_75
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM75)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM75)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0010*/ EXIT ; /* 0x000000000000794d */
/*0010*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0020*/ BRA 0x20; /* 0xfffffff000007947 */
/*0020*/ BRA 0x20; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0030*/ NOP; /* 0x0000000000007918 */
/*0030*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0040*/ NOP; /* 0x0000000000007918 */
/*0040*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0050*/ NOP; /* 0x0000000000007918 */
/*0050*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0060*/ NOP; /* 0x0000000000007918 */
/*0060*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0070*/ NOP; /* 0x0000000000007918 */
/*0070*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........




Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
Function : _Z22copy_kernel_vectorizedIN7cutlass6half_tES1_N4cute6LayoutINS2_5tupleIJNS2_1CILi8EEENS5_ILi32EEEEEENS4_IJS7_NS5_ILi1EEEEEEEENS3_INS4_IJS9_S9_EEENS4_IJNS5_ILi0EEESD_EEEEEEvPKT_PKT0_T1_T2_
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM75)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM75 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM75)"
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/*0000*/ IMAD.MOV.U32 R1, RZ, RZ, c[0x0][0x28] ; /* 0x00000a00ff017624 */
/* 0x000fc400078e00ff */
/* 0x000fc400078e00ff */
/*0010*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/*0010*/ S2R R0, SR_TID.X ; /* 0x0000000000007919 */
/* 0x000e220000002100 */
/* 0x000e220000002100 */
/*0020*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/*0020*/ IMAD.MOV.U32 R3, RZ, RZ, 0x2 ; /* 0x00000002ff037424 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0030*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/*0030*/ MOV R4, RZ ; /* 0x000000ff00047202 */
/* 0x000fc60000000f00 */
/* 0x000fc60000000f00 */
/*0040*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/*0040*/ IMAD.WIDE.U32 R2, R0, R3, c[0x0][0x160] ; /* 0x0000580000027625 */
/* 0x001fc800078e0003 */
/* 0x001fc800078e0003 */
/*0050*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/*0050*/ IMAD.SHL.U32 R0, R0, 0x2, RZ ; /* 0x0000000200007824 */
/* 0x000fe200078e00ff */
/* 0x000fe200078e00ff */
/*0060*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/*0060*/ IADD3 R2, P0, R2, -0x200, RZ ; /* 0xfffffe0002027810 */
/* 0x000fc80007f1e0ff */
/* 0x000fc80007f1e0ff */
/*0070*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/*0070*/ IADD3.X R3, R3, -0x1, RZ, P0, !PT ; /* 0xffffffff03037810 */
/* 0x000fc400007fe4ff */
/* 0x000fc400007fe4ff */
/*0080*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/*0080*/ IADD3 R4, R4, 0x800, RZ ; /* 0x0000080004047810 */
/* 0x000fe40007ffe0ff */
/* 0x000fe40007ffe0ff */
/*0090*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/*0090*/ IADD3 R2, P1, R2, 0x200000, RZ ; /* 0x0020000002027810 */
/* 0x000fe40007f3e0ff */
/* 0x000fe40007f3e0ff */
/*00a0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/*00a0*/ ISETP.NE.AND P0, PT, R4, 0x1000, PT ; /* 0x000010000400780c */
/* 0x000fe40003f05270 */
/* 0x000fe40003f05270 */
/*00b0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/*00b0*/ IADD3.X R3, RZ, R3, RZ, P1, !PT ; /* 0x00000003ff037210 */
/* 0x000fd40000ffe4ff */
/* 0x000fd40000ffe4ff */
/*00c0*/ @P0 BRA 0x80 ; /* 0xffffffb000000947 */
/*00c0*/ @P0 BRA 0x80 ; /* 0xffffffb000000947 */
/* 0x000fea000383ffff */
/* 0x000fea000383ffff */
/*00d0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/*00d0*/ LDG.E.U16.CONSTANT.SYS R5, [R2+-0x200] ; /* 0xfffe000002057381 */
/* 0x000ea800001e6500 */
/* 0x000ea800001e6500 */
/*00e0*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/*00e0*/ LDG.E.U16.CONSTANT.SYS R7, [R2] ; /* 0x0000000002077381 */
/* 0x000ee800001e6500 */
/* 0x000ee800001e6500 */
/*00f0*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/*00f0*/ STS.U16 [R0], R5 ; /* 0x0000000500007388 */
/* 0x004fe80000000400 */
/* 0x004fe80000000400 */
/*0100*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/*0100*/ STS.U16 [R0+0x200], R7 ; /* 0x0002000700007388 */
/* 0x008fe20000000400 */
/* 0x008fe20000000400 */
/*0110*/ EXIT ; /* 0x000000000000794d */
/*0110*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0120*/ BRA 0x120; /* 0xfffffff000007947 */
/*0120*/ BRA 0x120; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0130*/ NOP; /* 0x0000000000007918 */
/*0130*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0140*/ NOP; /* 0x0000000000007918 */
/*0140*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0150*/ NOP; /* 0x0000000000007918 */
/*0150*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0160*/ NOP; /* 0x0000000000007918 */
/*0160*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0170*/ NOP; /* 0x0000000000007918 */
/*0170*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
..........
..........






Fatbin ptx code:
Fatbin ptx code:
================
================
arch = sm_80
arch = sm_80
code version = [8,2]
code version = [8,2]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit
compressed
compressed


Fatbin elf code:
Fatbin elf code:
================
================
arch = sm_80
arch = sm_80
code version = [1,7]
code version = [1,7]
host = linux
host = linux
compile_size = 64bit
compile_size = 64bit


code for sm_80
code for sm_80
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
Function : _ZN3cub45CUB_200200_700_720_750_800_860_870_890_900_NS11EmptyKernelIvEEvv
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM80 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM80)"
.headerflags @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM80 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM80)"
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/*0000*/ MOV R1, c[0x0][0x28] ; /* 0x00000a0000017a02 */
/* 0x000fc40000000f00 */
/* 0x000fc40000000f00 */
/*0010*/ EXIT ; /* 0x000000000000794d */
/*0010*/ EXIT ; /* 0x000000000000794d */
/* 0x000fea0003800000 */
/* 0x000fea0003800000 */
/*0020*/ BRA 0x20; /* 0xfffffff000007947 */
/*0020*/ BRA 0x20; /* 0xfffffff000007947 */
/* 0x000fc0000383ffff */
/* 0x000fc0000383ffff */
/*0030*/ NOP; /* 0x0000000000007918 */
/*0030*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0040*/ NOP; /* 0x0000000000007918 */
/*0040*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0050*/ NOP; /* 0x0000000000007918 */
/*0050*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0060*/ NOP; /* 0x0000000000007918 */
/*0060*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0070*/ NOP; /* 0x0000000000007918 */
/*0070*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0080*/ NOP; /* 0x0000000000007918 */
/*0080*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*0090*/ NOP; /* 0x0000000000007918 */
/*0090*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*00a0*/ NOP; /* 0x0000000000007918 */
/*00a0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/*00b0*/ NOP; /* 0x0000000000007918 */
/*00b0*/ NOP; /* 0x0000000000007918 */
/* 0x000fc00000000000 */
/* 0x000fc00000000000 */
/
/