// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: UNKNOWN // Unknown Toolkit Version // Based on NVVM 7.0.1 // .version 8.5 .target sm_86, texmode_independent .address_size 64 // .globl ConcatImageFromPlanarKernel .entry ConcatImageFromPlanarKernel( .param .u64 .ptr .global .align 4 ConcatImageFromPlanarKernel_param_0, .param .u64 .ptr .global .align 16 ConcatImageFromPlanarKernel_param_1, .param .u64 .ptr .global .align 16 ConcatImageFromPlanarKernel_param_2 ) { .reg .pred %p<11>; .reg .f32 %f<17>; .reg .b32 %r<23>; .reg .b64 %rd<10>; ld.param.u64 %rd1, [ConcatImageFromPlanarKernel_param_0]; ld.param.u64 %rd2, [ConcatImageFromPlanarKernel_param_1]; ld.param.u64 %rd3, [ConcatImageFromPlanarKernel_param_2]; mov.b32 %r3, %envreg3; mov.u32 %r4, %ctaid.x; mov.u32 %r5, %ntid.x; mov.u32 %r6, %tid.x; add.s32 %r7, %r6, %r3; mad.lo.s32 %r1, %r5, %r4, %r7; mov.u32 %r8, %ctaid.y; mov.u32 %r9, %ntid.y; mov.u32 %r10, %tid.y; mov.b32 %r11, %envreg4; add.s32 %r12, %r10, %r11; mad.lo.s32 %r2, %r9, %r8, %r12; setp.gt.s32 %p1, %r1, 2047; setp.gt.s32 %p2, %r2, 2047; or.pred %p3, %p1, %p2; @%p3 bra $L__BB0_2; shl.b32 %r13, %r2, 11; add.s32 %r14, %r13, %r1; mul.wide.s32 %rd4, %r14, 4; add.s64 %rd5, %rd1, %rd4; max.s32 %r15, %r1, 0; min.s32 %r16, %r15, 2047; max.s32 %r17, %r2, 0; min.s32 %r18, %r17, 2047; shl.b32 %r19, %r18, 11; or.b32 %r20, %r19, %r16; mul.wide.u32 %rd6, %r20, 16; add.s64 %rd7, %rd2, %rd6; ld.global.nc.v4.f32 {%f1, %f2, %f3, %f4}, [%rd7]; abs.ftz.f32 %f6, %f1; abs.ftz.f32 %f8, %f2; abs.ftz.f32 %f10, %f3; abs.ftz.f32 %f12, %f4; setp.geu.ftz.f32 %p4, %f10, 0f7F800000; setp.geu.ftz.f32 %p5, %f12, 0f7F800000; setp.geu.ftz.f32 %p6, %f8, 0f7F800000; setp.geu.ftz.f32 %p7, %f6, 0f7F800000; or.pred %p8, %p7, %p6; selp.b32 %r21, -1, 0, %p8; or.pred %p9, %p5, %p4; selp.b32 %r22, -1, %r21, %p9; setp.gt.s32 %p10, %r22, -1; mul.wide.s32 %rd8, %r14, 16; add.s64 %rd9, %rd3, %rd8; selp.f32 %f13, %f4, 0f00000000, %p10; ld.global.nc.f32 %f14, [%rd5+33554432]; ld.global.nc.f32 %f15, [%rd5+16777216]; ld.global.nc.f32 %f16, [%rd5]; st.global.v4.f32 [%rd9], {%f16, %f15, %f14, %f13}; $L__BB0_2: ret; }