//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64


.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z32__d_bilinear_interp_pixel_float4P6float4iiiff(
	.param .b64 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_0,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_1,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_2,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_3,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_4,
	.param .b32 _Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_5
)
{
	.reg .pred 	%p<3>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<10>;


	ld.param.u64 	%rd1, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_0];
	ld.param.u32 	%r1, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_1];
	ld.param.u32 	%r2, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_2];
	ld.param.u32 	%r3, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_3];
	ld.param.f32 	%f1, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_4];
	ld.param.f32 	%f2, [_Z32__d_bilinear_interp_pixel_float4P6float4iiiff_param_5];
	cvt.rzi.ftz.s32.f32	%r4, %f1;
	cvt.rzi.ftz.s32.f32	%r5, %f2;
	cvt.rn.f32.s32	%f3, %r4;
	sub.ftz.f32 	%f4, %f1, %f3;
	cvt.rzi.ftz.s32.f32	%r6, %f4;
	mov.u32 	%r7, 1;
	min.s32 	%r8, %r6, %r7;
	setp.gt.s32	%p1, %r8, 0;
	mov.u32 	%r9, 0;
	cvt.rn.f32.s32	%f5, %r8;
	selp.f32	%f6, %f5, 0f00000000, %p1;
	mov.f32 	%f7, 0f00000000;
	cvt.rn.f32.s32	%f8, %r5;
	sub.ftz.f32 	%f9, %f2, %f8;
	cvt.rzi.ftz.s32.f32	%r10, %f9;
	min.s32 	%r11, %r10, %r7;
	setp.gt.s32	%p2, %r11, 0;
	cvt.rn.f32.s32	%f10, %r11;
	selp.f32	%f11, %f10, 0f00000000, %p2;
	add.s32 	%r12, %r2, -1;
	add.s32 	%r13, %r3, -1;
	min.s32 	%r14, %r13, %r5;
	max.s32 	%r15, %r9, %r14;
	mov.f32 	%f12, 0f3F800000;
	sub.ftz.f32 	%f13, %f12, %f11;
	min.s32 	%r16, %r12, %r4;
	max.s32 	%r17, %r9, %r16;
	sub.ftz.f32 	%f14, %f12, %f6;
	abs.ftz.f32 	%f15, %f14;
	abs.ftz.f32 	%f16, %f13;
	mul.ftz.f32 	%f17, %f15, %f16;
	mul.lo.s32 	%r18, %r15, %r1;
	add.s32 	%r19, %r18, %r17;
	mul.wide.s32 	%rd2, %r19, 16;
	add.s64 	%rd3, %rd1, %rd2;
	ld.v4.f32 	{%f18, %f19, %f20, %f21}, [%rd3];
	fma.rn.ftz.f32 	%f23, %f18, %f17, 0f00000000;
	fma.rn.ftz.f32 	%f25, %f19, %f17, 0f00000000;
	fma.rn.ftz.f32 	%f27, %f20, %f17, 0f00000000;
	fma.rn.ftz.f32 	%f29, %f21, %f17, 0f00000000;
	add.s32 	%r20, %r5, 1;
	min.s32 	%r21, %r13, %r20;
	max.s32 	%r22, %r9, %r21;
	sub.ftz.f32 	%f30, %f7, %f11;
	abs.ftz.f32 	%f31, %f30;
	mul.ftz.f32 	%f32, %f15, %f31;
	mul.lo.s32 	%r23, %r22, %r1;
	add.s32 	%r24, %r23, %r17;
	mul.wide.s32 	%rd4, %r24, 16;
	add.s64 	%rd5, %rd1, %rd4;
	ld.v4.f32 	{%f33, %f34, %f35, %f36}, [%rd5];
	fma.rn.ftz.f32 	%f38, %f33, %f32, %f23;
	fma.rn.ftz.f32 	%f40, %f34, %f32, %f25;
	fma.rn.ftz.f32 	%f42, %f35, %f32, %f27;
	fma.rn.ftz.f32 	%f44, %f36, %f32, %f29;
	add.s32 	%r25, %r4, 1;
	min.s32 	%r26, %r12, %r25;
	max.s32 	%r27, %r9, %r26;
	sub.ftz.f32 	%f45, %f7, %f6;
	abs.ftz.f32 	%f46, %f45;
	mul.ftz.f32 	%f47, %f46, %f16;
	add.s32 	%r28, %r18, %r27;
	mul.wide.s32 	%rd6, %r28, 16;
	add.s64 	%rd7, %rd1, %rd6;
	ld.v4.f32 	{%f48, %f49, %f50, %f51}, [%rd7];
	fma.rn.ftz.f32 	%f53, %f48, %f47, %f38;
	fma.rn.ftz.f32 	%f55, %f49, %f47, %f40;
	fma.rn.ftz.f32 	%f57, %f50, %f47, %f42;
	fma.rn.ftz.f32 	%f59, %f51, %f47, %f44;
	mul.ftz.f32 	%f60, %f46, %f31;
	add.s32 	%r29, %r23, %r27;
	mul.wide.s32 	%rd8, %r29, 16;
	add.s64 	%rd9, %rd1, %rd8;
	ld.v4.f32 	{%f61, %f62, %f63, %f64}, [%rd9];
	fma.rn.ftz.f32 	%f66, %f61, %f60, %f53;
	fma.rn.ftz.f32 	%f68, %f62, %f60, %f55;
	fma.rn.ftz.f32 	%f70, %f63, %f60, %f57;
	fma.rn.ftz.f32 	%f72, %f64, %f60, %f59;
	st.param.f32	[func_retval0+0], %f66;
	st.param.f32	[func_retval0+4], %f68;
	st.param.f32	[func_retval0+8], %f70;
	st.param.f32	[func_retval0+12], %f72;
	ret;
}

.visible .func  (.param .align 8 .b8 func_retval0[8]) _Z32__d_bilinear_interp_pixel_float2P6float2iiiff(
	.param .b64 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_0,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_1,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_2,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_3,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_4,
	.param .b32 _Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_5
)
{
	.reg .pred 	%p<3>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<49>;
	.reg .s64 	%rd<10>;


	ld.param.u64 	%rd1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_0];
	ld.param.u32 	%r1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_1];
	ld.param.u32 	%r2, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_2];
	ld.param.u32 	%r3, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_3];
	ld.param.f32 	%f1, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_4];
	ld.param.f32 	%f2, [_Z32__d_bilinear_interp_pixel_float2P6float2iiiff_param_5];
	cvt.rzi.ftz.s32.f32	%r4, %f1;
	cvt.rzi.ftz.s32.f32	%r5, %f2;
	cvt.rn.f32.s32	%f3, %r4;
	sub.ftz.f32 	%f4, %f1, %f3;
	cvt.rzi.ftz.s32.f32	%r6, %f4;
	mov.u32 	%r7, 1;
	min.s32 	%r8, %r6, %r7;
	setp.gt.s32	%p1, %r8, 0;
	mov.u32 	%r9, 0;
	cvt.rn.f32.s32	%f5, %r8;
	selp.f32	%f6, %f5, 0f00000000, %p1;
	mov.f32 	%f7, 0f00000000;
	cvt.rn.f32.s32	%f8, %r5;
	sub.ftz.f32 	%f9, %f2, %f8;
	cvt.rzi.ftz.s32.f32	%r10, %f9;
	min.s32 	%r11, %r10, %r7;
	setp.gt.s32	%p2, %r11, 0;
	cvt.rn.f32.s32	%f10, %r11;
	selp.f32	%f11, %f10, 0f00000000, %p2;
	add.s32 	%r12, %r2, -1;
	add.s32 	%r13, %r3, -1;
	min.s32 	%r14, %r13, %r5;
	max.s32 	%r15, %r9, %r14;
	mov.f32 	%f12, 0f3F800000;
	sub.ftz.f32 	%f13, %f12, %f11;
	min.s32 	%r16, %r12, %r4;
	max.s32 	%r17, %r9, %r16;
	sub.ftz.f32 	%f14, %f12, %f6;
	abs.ftz.f32 	%f15, %f14;
	abs.ftz.f32 	%f16, %f13;
	mul.ftz.f32 	%f17, %f15, %f16;
	mul.lo.s32 	%r18, %r15, %r1;
	add.s32 	%r19, %r18, %r17;
	mul.wide.s32 	%rd2, %r19, 8;
	add.s64 	%rd3, %rd1, %rd2;
	ld.v2.f32 	{%f18, %f19}, [%rd3];
	fma.rn.ftz.f32 	%f21, %f18, %f17, 0f00000000;
	fma.rn.ftz.f32 	%f23, %f19, %f17, 0f00000000;
	add.s32 	%r20, %r5, 1;
	min.s32 	%r21, %r13, %r20;
	max.s32 	%r22, %r9, %r21;
	sub.ftz.f32 	%f24, %f7, %f11;
	abs.ftz.f32 	%f25, %f24;
	mul.ftz.f32 	%f26, %f15, %f25;
	mul.lo.s32 	%r23, %r22, %r1;
	add.s32 	%r24, %r23, %r17;
	mul.wide.s32 	%rd4, %r24, 8;
	add.s64 	%rd5, %rd1, %rd4;
	ld.v2.f32 	{%f27, %f28}, [%rd5];
	fma.rn.ftz.f32 	%f30, %f27, %f26, %f21;
	fma.rn.ftz.f32 	%f32, %f28, %f26, %f23;
	add.s32 	%r25, %r4, 1;
	min.s32 	%r26, %r12, %r25;
	max.s32 	%r27, %r9, %r26;
	sub.ftz.f32 	%f33, %f7, %f6;
	abs.ftz.f32 	%f34, %f33;
	mul.ftz.f32 	%f35, %f34, %f16;
	add.s32 	%r28, %r18, %r27;
	mul.wide.s32 	%rd6, %r28, 8;
	add.s64 	%rd7, %rd1, %rd6;
	ld.v2.f32 	{%f36, %f37}, [%rd7];
	fma.rn.ftz.f32 	%f39, %f36, %f35, %f30;
	fma.rn.ftz.f32 	%f41, %f37, %f35, %f32;
	mul.ftz.f32 	%f42, %f34, %f25;
	add.s32 	%r29, %r23, %r27;
	mul.wide.s32 	%rd8, %r29, 8;
	add.s64 	%rd9, %rd1, %rd8;
	ld.v2.f32 	{%f43, %f44}, [%rd9];
	fma.rn.ftz.f32 	%f46, %f43, %f42, %f39;
	fma.rn.ftz.f32 	%f48, %f44, %f42, %f41;
	st.param.f32	[func_retval0+0], %f46;
	st.param.f32	[func_retval0+4], %f48;
	ret;
}

.visible .entry _d_image_rgb2gray_kernel_div255(
	.param .u64 _d_image_rgb2gray_kernel_div255_param_0,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_1,
	.param .u64 _d_image_rgb2gray_kernel_div255_param_2,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_3,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_4,
	.param .u32 _d_image_rgb2gray_kernel_div255_param_5,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_6,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_7,
	.param .f32 _d_image_rgb2gray_kernel_div255_param_8
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<16>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_image_rgb2gray_kernel_div255_param_0];
	ld.param.u32 	%r3, [_d_image_rgb2gray_kernel_div255_param_1];
	ld.param.u64 	%rd2, [_d_image_rgb2gray_kernel_div255_param_2];
	ld.param.u32 	%r4, [_d_image_rgb2gray_kernel_div255_param_3];
	ld.param.u32 	%r5, [_d_image_rgb2gray_kernel_div255_param_4];
	ld.param.u32 	%r6, [_d_image_rgb2gray_kernel_div255_param_5];
	ld.param.f32 	%f1, [_d_image_rgb2gray_kernel_div255_param_6];
	ld.param.f32 	%f2, [_d_image_rgb2gray_kernel_div255_param_7];
	ld.param.f32 	%f3, [_d_image_rgb2gray_kernel_div255_param_8];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB2_2;
	bra.uni 	BB2_1;

BB2_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r13, 16;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v4.f32 	{%f4, %f5, %f6, %f7}, [%rd6];
	mul.ftz.f32 	%f9, %f5, %f2;
	fma.rn.ftz.f32 	%f11, %f4, %f1, %f9;
	fma.rn.ftz.f32 	%f13, %f6, %f3, %f11;
	mov.f32 	%f14, 0f437F0000;
	div.approx.ftz.f32 	%f15, %f13, %f14;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd7, %r14, 4;
	add.s64 	%rd8, %rd3, %rd7;
	st.global.f32 	[%rd8], %f15;

BB2_2:
	ret;
}

.visible .entry _d_bilinear_resize_kernel_float4(
	.param .u64 _d_bilinear_resize_kernel_float4_param_0,
	.param .u32 _d_bilinear_resize_kernel_float4_param_1,
	.param .u32 _d_bilinear_resize_kernel_float4_param_2,
	.param .u32 _d_bilinear_resize_kernel_float4_param_3,
	.param .u64 _d_bilinear_resize_kernel_float4_param_4,
	.param .u32 _d_bilinear_resize_kernel_float4_param_5,
	.param .u32 _d_bilinear_resize_kernel_float4_param_6,
	.param .u32 _d_bilinear_resize_kernel_float4_param_7,
	.param .f32 _d_bilinear_resize_kernel_float4_param_8,
	.param .f32 _d_bilinear_resize_kernel_float4_param_9
)
{
	.reg .pred 	%p<6>;
	.reg .s32 	%r<44>;
	.reg .f32 	%f<79>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd1, [_d_bilinear_resize_kernel_float4_param_0];
	ld.param.u32 	%r3, [_d_bilinear_resize_kernel_float4_param_1];
	ld.param.u32 	%r7, [_d_bilinear_resize_kernel_float4_param_2];
	ld.param.u32 	%r8, [_d_bilinear_resize_kernel_float4_param_3];
	ld.param.u64 	%rd2, [_d_bilinear_resize_kernel_float4_param_4];
	ld.param.u32 	%r4, [_d_bilinear_resize_kernel_float4_param_5];
	ld.param.u32 	%r5, [_d_bilinear_resize_kernel_float4_param_6];
	ld.param.u32 	%r6, [_d_bilinear_resize_kernel_float4_param_7];
	ld.param.f32 	%f1, [_d_bilinear_resize_kernel_float4_param_8];
	ld.param.f32 	%f2, [_d_bilinear_resize_kernel_float4_param_9];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB3_2;
	bra.uni 	BB3_1;

BB3_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	add.s32 	%r15, %r1, 1;
	mov.u32 	%r16, 1;
	cvt.rn.f32.s32	%f3, %r15;
	rcp.approx.ftz.f32 	%f4, %f1;
	mov.f32 	%f5, 0f3F800000;
	fma.rn.ftz.f32 	%f6, %f3, %f4, 0fBF800000;
	add.s32 	%r17, %r2, 1;
	cvt.rn.f32.s32	%f7, %r17;
	rcp.approx.ftz.f32 	%f8, %f2;
	fma.rn.ftz.f32 	%f9, %f7, %f8, 0fBF800000;
	mad.lo.s32 	%r18, %r2, %r3, %r1;
	mul.wide.s32 	%rd5, %r18, 16;
	add.s64 	%rd6, %rd3, %rd5;
	cvt.rzi.ftz.s32.f32	%r19, %f6;
	cvt.rzi.ftz.s32.f32	%r20, %f9;
	cvt.rn.f32.s32	%f10, %r19;
	sub.ftz.f32 	%f11, %f6, %f10;
	cvt.rzi.ftz.s32.f32	%r21, %f11;
	min.s32 	%r22, %r21, %r16;
	setp.gt.s32	%p4, %r22, 0;
	mov.u32 	%r23, 0;
	cvt.rn.f32.s32	%f12, %r22;
	selp.f32	%f13, %f12, 0f00000000, %p4;
	mov.f32 	%f14, 0f00000000;
	cvt.rn.f32.s32	%f15, %r20;
	sub.ftz.f32 	%f16, %f9, %f15;
	cvt.rzi.ftz.s32.f32	%r24, %f16;
	min.s32 	%r25, %r24, %r16;
	setp.gt.s32	%p5, %r25, 0;
	cvt.rn.f32.s32	%f17, %r25;
	selp.f32	%f18, %f17, 0f00000000, %p5;
	add.s32 	%r26, %r6, -1;
	min.s32 	%r27, %r26, %r20;
	max.s32 	%r28, %r23, %r27;
	sub.ftz.f32 	%f19, %f5, %f18;
	add.s32 	%r29, %r5, -1;
	min.s32 	%r30, %r29, %r19;
	max.s32 	%r31, %r23, %r30;
	sub.ftz.f32 	%f20, %f5, %f13;
	abs.ftz.f32 	%f21, %f20;
	abs.ftz.f32 	%f22, %f19;
	mul.ftz.f32 	%f23, %f21, %f22;
	mul.lo.s32 	%r32, %r28, %r4;
	add.s32 	%r33, %r32, %r31;
	mul.wide.s32 	%rd7, %r33, 16;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v4.f32 	{%f24, %f25, %f26, %f27}, [%rd8];
	fma.rn.ftz.f32 	%f29, %f24, %f23, 0f00000000;
	fma.rn.ftz.f32 	%f31, %f25, %f23, 0f00000000;
	fma.rn.ftz.f32 	%f33, %f26, %f23, 0f00000000;
	fma.rn.ftz.f32 	%f35, %f27, %f23, 0f00000000;
	add.s32 	%r34, %r20, 1;
	min.s32 	%r35, %r26, %r34;
	max.s32 	%r36, %r23, %r35;
	sub.ftz.f32 	%f36, %f14, %f18;
	abs.ftz.f32 	%f37, %f36;
	mul.ftz.f32 	%f38, %f21, %f37;
	mul.lo.s32 	%r37, %r36, %r4;
	add.s32 	%r38, %r37, %r31;
	mul.wide.s32 	%rd9, %r38, 16;
	add.s64 	%rd10, %rd4, %rd9;
	ld.global.v4.f32 	{%f39, %f40, %f41, %f42}, [%rd10];
	fma.rn.ftz.f32 	%f44, %f39, %f38, %f29;
	fma.rn.ftz.f32 	%f46, %f40, %f38, %f31;
	fma.rn.ftz.f32 	%f48, %f41, %f38, %f33;
	fma.rn.ftz.f32 	%f50, %f42, %f38, %f35;
	add.s32 	%r39, %r19, 1;
	min.s32 	%r40, %r29, %r39;
	max.s32 	%r41, %r23, %r40;
	sub.ftz.f32 	%f51, %f14, %f13;
	abs.ftz.f32 	%f52, %f51;
	mul.ftz.f32 	%f53, %f52, %f22;
	add.s32 	%r42, %r32, %r41;
	mul.wide.s32 	%rd11, %r42, 16;
	add.s64 	%rd12, %rd4, %rd11;
	ld.global.v4.f32 	{%f54, %f55, %f56, %f57}, [%rd12];
	fma.rn.ftz.f32 	%f59, %f54, %f53, %f44;
	fma.rn.ftz.f32 	%f61, %f55, %f53, %f46;
	fma.rn.ftz.f32 	%f63, %f56, %f53, %f48;
	fma.rn.ftz.f32 	%f65, %f57, %f53, %f50;
	mul.ftz.f32 	%f66, %f52, %f37;
	add.s32 	%r43, %r37, %r41;
	mul.wide.s32 	%rd13, %r43, 16;
	add.s64 	%rd14, %rd4, %rd13;
	ld.global.v4.f32 	{%f67, %f68, %f69, %f70}, [%rd14];
	fma.rn.ftz.f32 	%f72, %f70, %f66, %f65;
	fma.rn.ftz.f32 	%f74, %f69, %f66, %f63;
	fma.rn.ftz.f32 	%f76, %f68, %f66, %f61;
	fma.rn.ftz.f32 	%f78, %f67, %f66, %f59;
	st.global.v4.f32 	[%rd6], {%f78, %f76, %f74, %f72};

BB3_2:
	ret;
}

.visible .entry _d_resize_flow_nn_kernel_f2p_f2p(
	.param .u64 _d_resize_flow_nn_kernel_f2p_f2p_param_0,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_1,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_2,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_3,
	.param .u64 _d_resize_flow_nn_kernel_f2p_f2p_param_4,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_5,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_6,
	.param .u32 _d_resize_flow_nn_kernel_f2p_f2p_param_7,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_8,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_9,
	.param .f32 _d_resize_flow_nn_kernel_f2p_f2p_param_10
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<19>;
	.reg .f32 	%f<16>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_resize_flow_nn_kernel_f2p_f2p_param_0];
	ld.param.u32 	%r3, [_d_resize_flow_nn_kernel_f2p_f2p_param_1];
	ld.param.u32 	%r5, [_d_resize_flow_nn_kernel_f2p_f2p_param_2];
	ld.param.u32 	%r6, [_d_resize_flow_nn_kernel_f2p_f2p_param_3];
	ld.param.u64 	%rd2, [_d_resize_flow_nn_kernel_f2p_f2p_param_4];
	ld.param.u32 	%r4, [_d_resize_flow_nn_kernel_f2p_f2p_param_5];
	ld.param.f32 	%f1, [_d_resize_flow_nn_kernel_f2p_f2p_param_8];
	ld.param.f32 	%f2, [_d_resize_flow_nn_kernel_f2p_f2p_param_9];
	ld.param.f32 	%f3, [_d_resize_flow_nn_kernel_f2p_f2p_param_10];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB4_2;
	bra.uni 	BB4_1;

BB4_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	add.s32 	%r13, %r1, 1;
	cvt.rn.f32.s32	%f4, %r13;
	rcp.approx.ftz.f32 	%f5, %f1;
	fma.rn.ftz.f32 	%f6, %f4, %f5, 0fBF800000;
	add.s32 	%r14, %r2, 1;
	cvt.rn.f32.s32	%f7, %r14;
	rcp.approx.ftz.f32 	%f8, %f2;
	fma.rn.ftz.f32 	%f9, %f7, %f8, 0fBF800000;
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	mul.wide.s32 	%rd5, %r15, 8;
	add.s64 	%rd6, %rd3, %rd5;
	cvt.rzi.ftz.s32.f32	%r16, %f6;
	cvt.rzi.ftz.s32.f32	%r17, %f9;
	mad.lo.s32 	%r18, %r17, %r4, %r16;
	mul.wide.s32 	%rd7, %r18, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f10, %f11}, [%rd8];
	mul.ftz.f32 	%f13, %f11, %f3;
	mul.ftz.f32 	%f15, %f10, %f3;
	st.global.v2.f32 	[%rd6], {%f15, %f13};

BB4_2:
	ret;
}

.visible .entry _d_resize_flow_kernel_f2p_f1p(
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_0,
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_1,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_2,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_3,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_4,
	.param .u64 _d_resize_flow_kernel_f2p_f1p_param_5,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_6,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_7,
	.param .u32 _d_resize_flow_kernel_f2p_f1p_param_8,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_9,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_10,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_11,
	.param .f32 _d_resize_flow_kernel_f2p_f1p_param_12
)
{
	.reg .pred 	%p<6>;
	.reg .s32 	%r<44>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<18>;


	ld.param.u64 	%rd1, [_d_resize_flow_kernel_f2p_f1p_param_0];
	ld.param.u64 	%rd2, [_d_resize_flow_kernel_f2p_f1p_param_1];
	ld.param.u32 	%r3, [_d_resize_flow_kernel_f2p_f1p_param_2];
	ld.param.u32 	%r7, [_d_resize_flow_kernel_f2p_f1p_param_3];
	ld.param.u32 	%r8, [_d_resize_flow_kernel_f2p_f1p_param_4];
	ld.param.u64 	%rd3, [_d_resize_flow_kernel_f2p_f1p_param_5];
	ld.param.u32 	%r4, [_d_resize_flow_kernel_f2p_f1p_param_6];
	ld.param.u32 	%r5, [_d_resize_flow_kernel_f2p_f1p_param_7];
	ld.param.u32 	%r6, [_d_resize_flow_kernel_f2p_f1p_param_8];
	ld.param.f32 	%f1, [_d_resize_flow_kernel_f2p_f1p_param_9];
	ld.param.f32 	%f2, [_d_resize_flow_kernel_f2p_f1p_param_10];
	ld.param.f32 	%f3, [_d_resize_flow_kernel_f2p_f1p_param_11];
	ld.param.f32 	%f4, [_d_resize_flow_kernel_f2p_f1p_param_12];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB5_2;
	bra.uni 	BB5_1;

BB5_1:
	cvta.to.global.u64 	%rd4, %rd2;
	cvta.to.global.u64 	%rd5, %rd1;
	cvta.to.global.u64 	%rd6, %rd3;
	add.s32 	%r15, %r1, 1;
	mov.u32 	%r16, 1;
	cvt.rn.f32.s32	%f5, %r15;
	rcp.approx.ftz.f32 	%f6, %f1;
	mov.f32 	%f7, 0f3F800000;
	fma.rn.ftz.f32 	%f8, %f5, %f6, 0fBF800000;
	add.s32 	%r17, %r2, 1;
	cvt.rn.f32.s32	%f9, %r17;
	rcp.approx.ftz.f32 	%f10, %f2;
	fma.rn.ftz.f32 	%f11, %f9, %f10, 0fBF800000;
	cvt.rzi.ftz.s32.f32	%r18, %f8;
	cvt.rzi.ftz.s32.f32	%r19, %f11;
	cvt.rn.f32.s32	%f12, %r18;
	sub.ftz.f32 	%f13, %f8, %f12;
	cvt.rzi.ftz.s32.f32	%r20, %f13;
	min.s32 	%r21, %r20, %r16;
	setp.gt.s32	%p4, %r21, 0;
	mov.u32 	%r22, 0;
	cvt.rn.f32.s32	%f14, %r21;
	selp.f32	%f15, %f14, 0f00000000, %p4;
	mov.f32 	%f16, 0f00000000;
	cvt.rn.f32.s32	%f17, %r19;
	sub.ftz.f32 	%f18, %f11, %f17;
	cvt.rzi.ftz.s32.f32	%r23, %f18;
	min.s32 	%r24, %r23, %r16;
	setp.gt.s32	%p5, %r24, 0;
	cvt.rn.f32.s32	%f19, %r24;
	selp.f32	%f20, %f19, 0f00000000, %p5;
	add.s32 	%r25, %r6, -1;
	min.s32 	%r26, %r25, %r19;
	max.s32 	%r27, %r22, %r26;
	sub.ftz.f32 	%f21, %f7, %f20;
	add.s32 	%r28, %r5, -1;
	min.s32 	%r29, %r28, %r18;
	max.s32 	%r30, %r22, %r29;
	sub.ftz.f32 	%f22, %f7, %f15;
	abs.ftz.f32 	%f23, %f22;
	abs.ftz.f32 	%f24, %f21;
	mul.ftz.f32 	%f25, %f23, %f24;
	mul.lo.s32 	%r31, %r27, %r4;
	add.s32 	%r32, %r31, %r30;
	mul.wide.s32 	%rd7, %r32, 8;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.v2.f32 	{%f26, %f27}, [%rd8];
	fma.rn.ftz.f32 	%f29, %f26, %f25, 0f00000000;
	fma.rn.ftz.f32 	%f31, %f27, %f25, 0f00000000;
	add.s32 	%r33, %r19, 1;
	min.s32 	%r34, %r25, %r33;
	max.s32 	%r35, %r22, %r34;
	sub.ftz.f32 	%f32, %f16, %f20;
	abs.ftz.f32 	%f33, %f32;
	mul.ftz.f32 	%f34, %f23, %f33;
	mul.lo.s32 	%r36, %r35, %r4;
	add.s32 	%r37, %r36, %r30;
	mul.wide.s32 	%rd9, %r37, 8;
	add.s64 	%rd10, %rd6, %rd9;
	ld.global.v2.f32 	{%f35, %f36}, [%rd10];
	fma.rn.ftz.f32 	%f38, %f35, %f34, %f29;
	fma.rn.ftz.f32 	%f40, %f36, %f34, %f31;
	add.s32 	%r38, %r18, 1;
	min.s32 	%r39, %r28, %r38;
	max.s32 	%r40, %r22, %r39;
	sub.ftz.f32 	%f41, %f16, %f15;
	abs.ftz.f32 	%f42, %f41;
	mul.ftz.f32 	%f43, %f42, %f24;
	add.s32 	%r41, %r31, %r40;
	mul.wide.s32 	%rd11, %r41, 8;
	add.s64 	%rd12, %rd6, %rd11;
	ld.global.v2.f32 	{%f44, %f45}, [%rd12];
	fma.rn.ftz.f32 	%f47, %f44, %f43, %f38;
	fma.rn.ftz.f32 	%f49, %f45, %f43, %f40;
	mul.ftz.f32 	%f50, %f42, %f33;
	add.s32 	%r42, %r36, %r40;
	mul.wide.s32 	%rd13, %r42, 8;
	add.s64 	%rd14, %rd6, %rd13;
	ld.global.v2.f32 	{%f51, %f52}, [%rd14];
	fma.rn.ftz.f32 	%f54, %f51, %f50, %f47;
	fma.rn.ftz.f32 	%f56, %f52, %f50, %f49;
	mul.ftz.f32 	%f57, %f54, %f3;
	mad.lo.s32 	%r43, %r2, %r3, %r1;
	mul.wide.s32 	%rd15, %r43, 4;
	add.s64 	%rd16, %rd5, %rd15;
	st.global.f32 	[%rd16], %f57;
	mul.ftz.f32 	%f58, %f56, %f4;
	add.s64 	%rd17, %rd4, %rd15;
	st.global.f32 	[%rd17], %f58;

BB5_2:
	ret;
}

.visible .entry _d_resize_flow_kernel_f2p_f2p(
	.param .u64 _d_resize_flow_kernel_f2p_f2p_param_0,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_1,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_2,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_3,
	.param .u64 _d_resize_flow_kernel_f2p_f2p_param_4,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_5,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_6,
	.param .u32 _d_resize_flow_kernel_f2p_f2p_param_7,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_8,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_9,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_10,
	.param .f32 _d_resize_flow_kernel_f2p_f2p_param_11
)
{
	.reg .pred 	%p<6>;
	.reg .s32 	%r<44>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd1, [_d_resize_flow_kernel_f2p_f2p_param_0];
	ld.param.u32 	%r3, [_d_resize_flow_kernel_f2p_f2p_param_1];
	ld.param.u32 	%r7, [_d_resize_flow_kernel_f2p_f2p_param_2];
	ld.param.u32 	%r8, [_d_resize_flow_kernel_f2p_f2p_param_3];
	ld.param.u64 	%rd2, [_d_resize_flow_kernel_f2p_f2p_param_4];
	ld.param.u32 	%r4, [_d_resize_flow_kernel_f2p_f2p_param_5];
	ld.param.u32 	%r5, [_d_resize_flow_kernel_f2p_f2p_param_6];
	ld.param.u32 	%r6, [_d_resize_flow_kernel_f2p_f2p_param_7];
	ld.param.f32 	%f1, [_d_resize_flow_kernel_f2p_f2p_param_8];
	ld.param.f32 	%f2, [_d_resize_flow_kernel_f2p_f2p_param_9];
	ld.param.f32 	%f3, [_d_resize_flow_kernel_f2p_f2p_param_10];
	ld.param.f32 	%f4, [_d_resize_flow_kernel_f2p_f2p_param_11];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB6_2;
	bra.uni 	BB6_1;

BB6_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	add.s32 	%r15, %r1, 1;
	mov.u32 	%r16, 1;
	cvt.rn.f32.s32	%f5, %r15;
	rcp.approx.ftz.f32 	%f6, %f1;
	mov.f32 	%f7, 0f3F800000;
	fma.rn.ftz.f32 	%f8, %f5, %f6, 0fBF800000;
	add.s32 	%r17, %r2, 1;
	cvt.rn.f32.s32	%f9, %r17;
	rcp.approx.ftz.f32 	%f10, %f2;
	fma.rn.ftz.f32 	%f11, %f9, %f10, 0fBF800000;
	cvt.rzi.ftz.s32.f32	%r18, %f8;
	cvt.rzi.ftz.s32.f32	%r19, %f11;
	cvt.rn.f32.s32	%f12, %r18;
	sub.ftz.f32 	%f13, %f8, %f12;
	cvt.rzi.ftz.s32.f32	%r20, %f13;
	min.s32 	%r21, %r20, %r16;
	setp.gt.s32	%p4, %r21, 0;
	mov.u32 	%r22, 0;
	cvt.rn.f32.s32	%f14, %r21;
	selp.f32	%f15, %f14, 0f00000000, %p4;
	mov.f32 	%f16, 0f00000000;
	cvt.rn.f32.s32	%f17, %r19;
	sub.ftz.f32 	%f18, %f11, %f17;
	cvt.rzi.ftz.s32.f32	%r23, %f18;
	min.s32 	%r24, %r23, %r16;
	setp.gt.s32	%p5, %r24, 0;
	cvt.rn.f32.s32	%f19, %r24;
	selp.f32	%f20, %f19, 0f00000000, %p5;
	add.s32 	%r25, %r6, -1;
	min.s32 	%r26, %r25, %r19;
	max.s32 	%r27, %r22, %r26;
	sub.ftz.f32 	%f21, %f7, %f20;
	add.s32 	%r28, %r5, -1;
	min.s32 	%r29, %r28, %r18;
	max.s32 	%r30, %r22, %r29;
	sub.ftz.f32 	%f22, %f7, %f15;
	abs.ftz.f32 	%f23, %f22;
	abs.ftz.f32 	%f24, %f21;
	mul.ftz.f32 	%f25, %f23, %f24;
	mul.lo.s32 	%r31, %r27, %r4;
	add.s32 	%r32, %r31, %r30;
	mul.wide.s32 	%rd5, %r32, 8;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v2.f32 	{%f26, %f27}, [%rd6];
	fma.rn.ftz.f32 	%f29, %f26, %f25, 0f00000000;
	fma.rn.ftz.f32 	%f31, %f27, %f25, 0f00000000;
	add.s32 	%r33, %r19, 1;
	min.s32 	%r34, %r25, %r33;
	max.s32 	%r35, %r22, %r34;
	sub.ftz.f32 	%f32, %f16, %f20;
	abs.ftz.f32 	%f33, %f32;
	mul.ftz.f32 	%f34, %f23, %f33;
	mul.lo.s32 	%r36, %r35, %r4;
	add.s32 	%r37, %r36, %r30;
	mul.wide.s32 	%rd7, %r37, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f35, %f36}, [%rd8];
	fma.rn.ftz.f32 	%f38, %f35, %f34, %f29;
	fma.rn.ftz.f32 	%f40, %f36, %f34, %f31;
	add.s32 	%r38, %r18, 1;
	min.s32 	%r39, %r28, %r38;
	max.s32 	%r40, %r22, %r39;
	sub.ftz.f32 	%f41, %f16, %f15;
	abs.ftz.f32 	%f42, %f41;
	mul.ftz.f32 	%f43, %f42, %f24;
	add.s32 	%r41, %r31, %r40;
	mul.wide.s32 	%rd9, %r41, 8;
	add.s64 	%rd10, %rd4, %rd9;
	ld.global.v2.f32 	{%f44, %f45}, [%rd10];
	fma.rn.ftz.f32 	%f47, %f44, %f43, %f38;
	fma.rn.ftz.f32 	%f49, %f45, %f43, %f40;
	mul.ftz.f32 	%f50, %f42, %f33;
	add.s32 	%r42, %r36, %r40;
	mul.wide.s32 	%rd11, %r42, 8;
	add.s64 	%rd12, %rd4, %rd11;
	ld.global.v2.f32 	{%f51, %f52}, [%rd12];
	fma.rn.ftz.f32 	%f54, %f51, %f50, %f47;
	fma.rn.ftz.f32 	%f56, %f52, %f50, %f49;
	mad.lo.s32 	%r43, %r2, %r3, %r1;
	mul.wide.s32 	%rd13, %r43, 8;
	add.s64 	%rd14, %rd3, %rd13;
	mul.ftz.f32 	%f57, %f56, %f4;
	mul.ftz.f32 	%f58, %f54, %f3;
	st.global.v2.f32 	[%rd14], {%f58, %f57};

BB6_2:
	ret;
}

.visible .entry _d_image_init_kernel_float(
	.param .u64 _d_image_init_kernel_float_param_0,
	.param .u32 _d_image_init_kernel_float_param_1,
	.param .f32 _d_image_init_kernel_float_param_2,
	.param .u32 _d_image_init_kernel_float_param_3,
	.param .u32 _d_image_init_kernel_float_param_4
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<2>;
	.reg .s64 	%rd<5>;


	ld.param.u64 	%rd1, [_d_image_init_kernel_float_param_0];
	ld.param.u32 	%r3, [_d_image_init_kernel_float_param_1];
	ld.param.f32 	%f1, [_d_image_init_kernel_float_param_2];
	ld.param.u32 	%r4, [_d_image_init_kernel_float_param_3];
	ld.param.u32 	%r5, [_d_image_init_kernel_float_param_4];
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r4;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB7_2;
	bra.uni 	BB7_1;

BB7_1:
	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	mul.wide.s32 	%rd3, %r12, 4;
	add.s64 	%rd4, %rd2, %rd3;
	st.global.f32 	[%rd4], %f1;

BB7_2:
	ret;
}

.visible .entry _d_image_init_kernel_float2(
	.param .u64 _d_image_init_kernel_float2_param_0,
	.param .u32 _d_image_init_kernel_float2_param_1,
	.param .align 8 .b8 _d_image_init_kernel_float2_param_2[8],
	.param .u32 _d_image_init_kernel_float2_param_3,
	.param .u32 _d_image_init_kernel_float2_param_4
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<3>;
	.reg .s64 	%rd<5>;


	ld.param.u64 	%rd1, [_d_image_init_kernel_float2_param_0];
	ld.param.u32 	%r3, [_d_image_init_kernel_float2_param_1];
	ld.param.f32 	%f2, [_d_image_init_kernel_float2_param_2+4];
	ld.param.f32 	%f1, [_d_image_init_kernel_float2_param_2];
	ld.param.u32 	%r4, [_d_image_init_kernel_float2_param_3];
	ld.param.u32 	%r5, [_d_image_init_kernel_float2_param_4];
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r4;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB8_2;
	bra.uni 	BB8_1;

BB8_1:
	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	mul.wide.s32 	%rd3, %r12, 8;
	add.s64 	%rd4, %rd2, %rd3;
	st.global.v2.f32 	[%rd4], {%f1, %f2};

BB8_2:
	ret;
}

.visible .entry _d_image_cvt_vec2_to_sg2_kernel_float_float2(
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_0,
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_1,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_2,
	.param .u64 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_3,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_4,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_5,
	.param .u32 _d_image_cvt_vec2_to_sg2_kernel_float_float2_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<5>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd1, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_0];
	ld.param.u64 	%rd2, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_1];
	ld.param.u32 	%r3, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_2];
	ld.param.u64 	%rd3, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_3];
	ld.param.u32 	%r4, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_4];
	ld.param.u32 	%r5, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_5];
	ld.param.u32 	%r6, [_d_image_cvt_vec2_to_sg2_kernel_float_float2_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB9_2;
	bra.uni 	BB9_1;

BB9_1:
	cvta.to.global.u64 	%rd4, %rd2;
	cvta.to.global.u64 	%rd5, %rd1;
	cvta.to.global.u64 	%rd6, %rd3;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd7, %r13, 8;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.v2.f32 	{%f1, %f2}, [%rd8];
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd9, %r14, 4;
	add.s64 	%rd10, %rd5, %rd9;
	st.global.f32 	[%rd10], %f1;
	add.s64 	%rd11, %rd4, %rd9;
	st.global.f32 	[%rd11], %f2;

BB9_2:
	ret;
}

.visible .entry _d_image_cvt_sg2_to_vec2_kernel_float2_float(
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_0,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_1,
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_2,
	.param .u64 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_3,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_4,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_5,
	.param .u32 _d_image_cvt_sg2_to_vec2_kernel_float2_float_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<3>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd1, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_0];
	ld.param.u32 	%r3, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_1];
	ld.param.u64 	%rd2, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_2];
	ld.param.u64 	%rd3, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_3];
	ld.param.u32 	%r4, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_4];
	ld.param.u32 	%r5, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_5];
	ld.param.u32 	%r6, [_d_image_cvt_sg2_to_vec2_kernel_float2_float_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB10_2;
	bra.uni 	BB10_1;

BB10_1:
	cvta.to.global.u64 	%rd4, %rd1;
	cvta.to.global.u64 	%rd5, %rd3;
	cvta.to.global.u64 	%rd6, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd7, %r13, 4;
	add.s64 	%rd8, %rd6, %rd7;
	add.s64 	%rd9, %rd5, %rd7;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd10, %r14, 8;
	add.s64 	%rd11, %rd4, %rd10;
	ld.global.f32 	%f1, [%rd9];
	ld.global.f32 	%f2, [%rd8];
	st.global.v2.f32 	[%rd11], {%f2, %f1};

BB10_2:
	ret;
}

.visible .entry _d_image_mutiply_scalar_kernel_float2(
	.param .u64 _d_image_mutiply_scalar_kernel_float2_param_0,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_1,
	.param .u64 _d_image_mutiply_scalar_kernel_float2_param_2,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_3,
	.param .f32 _d_image_mutiply_scalar_kernel_float2_param_4,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_5,
	.param .u32 _d_image_mutiply_scalar_kernel_float2_param_6
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<8>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_image_mutiply_scalar_kernel_float2_param_0];
	ld.param.u32 	%r3, [_d_image_mutiply_scalar_kernel_float2_param_1];
	ld.param.u64 	%rd2, [_d_image_mutiply_scalar_kernel_float2_param_2];
	ld.param.u32 	%r4, [_d_image_mutiply_scalar_kernel_float2_param_3];
	ld.param.f32 	%f1, [_d_image_mutiply_scalar_kernel_float2_param_4];
	ld.param.u32 	%r5, [_d_image_mutiply_scalar_kernel_float2_param_5];
	ld.param.u32 	%r6, [_d_image_mutiply_scalar_kernel_float2_param_6];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB11_2;
	bra.uni 	BB11_1;

BB11_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r13, %r2, %r3, %r1;
	mad.lo.s32 	%r14, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r13, 8;
	add.s64 	%rd6, %rd3, %rd5;
	mul.wide.s32 	%rd7, %r14, 8;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.v2.f32 	{%f2, %f3}, [%rd8];
	mul.ftz.f32 	%f5, %f3, %f1;
	mul.ftz.f32 	%f7, %f2, %f1;
	st.global.v2.f32 	[%rd6], {%f7, %f5};

BB11_2:
	ret;
}

.visible .entry scale_source_kernel(
	.param .u64 scale_source_kernel_param_0,
	.param .u64 scale_source_kernel_param_1,
	.param .u32 scale_source_kernel_param_2,
	.param .u32 scale_source_kernel_param_3,
	.param .u32 scale_source_kernel_param_4
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<24>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd4, [scale_source_kernel_param_0];
	ld.param.u64 	%rd5, [scale_source_kernel_param_1];
	ld.param.u32 	%r3, [scale_source_kernel_param_2];
	ld.param.u32 	%r5, [scale_source_kernel_param_3];
	ld.param.u32 	%r4, [scale_source_kernel_param_4];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r3;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB12_5;
	bra.uni 	BB12_1;

BB12_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r12;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB12_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f4, %f16;
	mov.f32 	%f23, %f15;
	mov.f32 	%f22, %f14;
	mov.f32 	%f21, %f13;
	bra.uni 	BB12_4;

BB12_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f21, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f22, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f23, %temp;
	}

BB12_4:
	shl.b64 	%rd10, %rd3, 4;
	add.s64 	%rd11, %rd2, %rd10;
	mul.ftz.f32 	%f17, %f21, 0f437F0000;
	mul.ftz.f32 	%f18, %f22, 0f437F0000;
	mul.ftz.f32 	%f19, %f23, 0f437F0000;
	mov.f32 	%f20, 0f437F0000;
	st.global.v4.f32 	[%rd11], {%f17, %f18, %f19, %f20};

BB12_5:
	ret;
}

.visible .entry unscale_dest_kernel(
	.param .u64 unscale_dest_kernel_param_0,
	.param .u64 unscale_dest_kernel_param_1,
	.param .u32 unscale_dest_kernel_param_2,
	.param .u32 unscale_dest_kernel_param_3,
	.param .u32 unscale_dest_kernel_param_4
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<14>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd4, [unscale_dest_kernel_param_0];
	ld.param.u64 	%rd3, [unscale_dest_kernel_param_1];
	ld.param.u32 	%r3, [unscale_dest_kernel_param_2];
	ld.param.u32 	%r5, [unscale_dest_kernel_param_3];
	ld.param.u32 	%r4, [unscale_dest_kernel_param_4];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r3;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB13_4;
	bra.uni 	BB13_1;

BB13_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	cvt.s64.s32	%rd2, %r12;
	mul.wide.s32 	%rd6, %r12, 16;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.f32 	{%f4, %f5, %f6, %f7}, [%rd7];
	mov.f32 	%f8, 0f437F0000;
	div.approx.ftz.f32 	%f1, %f4, %f8;
	div.approx.ftz.f32 	%f2, %f5, %f8;
	div.approx.ftz.f32 	%f3, %f6, %f8;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB13_3;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	mov.f32 	%f12, 0f3F800000;
	st.global.v4.f32 	[%rd9], {%f1, %f2, %f3, %f12};
	bra.uni 	BB13_4;

BB13_3:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	mov.f32 	%f13, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs4, %rs3, %rs2, %rs1};

BB13_4:
	ret;
}


