//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64


.visible .entry debug_copy_normalized_packed_float_array_to_float4_image_kernel(
	.param .u64 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_0,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_1,
	.param .u64 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_2,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_3,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_4,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_5,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_6,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_7,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_8,
	.param .u32 debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_9
)
{
	.reg .pred 	%p<14>;
	.reg .s32 	%r<21>;
	.reg .f32 	%f<8>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_0];
	ld.param.u32 	%r5, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_1];
	ld.param.u64 	%rd2, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_2];
	ld.param.u32 	%r6, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_3];
	ld.param.u32 	%r7, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_4];
	ld.param.u32 	%r8, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_5];
	ld.param.u32 	%r9, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_6];
	ld.param.u32 	%r10, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_7];
	ld.param.u32 	%r11, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_8];
	ld.param.u32 	%r12, [debug_copy_normalized_packed_float_array_to_float4_image_kernel_param_9];
	mov.u32 	%r13, %ntid.x;
	mov.u32 	%r14, %ctaid.x;
	mov.u32 	%r15, %tid.x;
	mad.lo.s32 	%r1, %r13, %r14, %r15;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	add.s32 	%r3, %r1, %r11;
	add.s32 	%r4, %r2, %r12;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	setp.gt.s32	%p4, %r3, -1;
	and.pred  	%p5, %p3, %p4;
	setp.lt.s32	%p6, %r3, %r9;
	and.pred  	%p7, %p5, %p6;
	setp.gt.s32	%p8, %r4, -1;
	and.pred  	%p9, %p7, %p8;
	setp.lt.s32	%p10, %r4, %r10;
	and.pred  	%p11, %p9, %p10;
	@!%p11 bra 	BB0_5;
	bra.uni 	BB0_1;

BB0_1:
	cvta.to.global.u64 	%rd3, %rd2;
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd4, %r19, 4;
	add.s64 	%rd5, %rd3, %rd4;
	ld.global.f32 	%f1, [%rd5];
	setp.leu.ftz.f32	%p12, %f1, 0f3F800000;
	@%p12 bra 	BB0_3;

	mov.f32 	%f7, 0f437F0000;
	bra.uni 	BB0_4;

BB0_3:
	setp.lt.ftz.f32	%p13, %f1, 0f00000000;
	mul.ftz.f32 	%f4, %f1, 0f437F0000;
	selp.f32	%f7, 0f00000000, %f4, %p13;

BB0_4:
	cvta.to.global.u64 	%rd6, %rd1;
	mad.lo.s32 	%r20, %r4, %r5, %r3;
	mul.wide.s32 	%rd7, %r20, 16;
	add.s64 	%rd8, %rd6, %rd7;
	mov.f32 	%f6, 0f437F0000;
	st.global.v4.f32 	[%rd8], {%f7, %f7, %f7, %f6};

BB0_5:
	ret;
}

.visible .entry debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel(
	.param .u64 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_0,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_1,
	.param .u64 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_2,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_3,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_4,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_5,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_6,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_7,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_8,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_9,
	.param .u32 debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_10
)
{
	.reg .pred 	%p<15>;
	.reg .s32 	%r<22>;
	.reg .f32 	%f<12>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_0];
	ld.param.u32 	%r5, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_1];
	ld.param.u64 	%rd2, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_2];
	ld.param.u32 	%r6, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_3];
	ld.param.u32 	%r8, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_4];
	ld.param.u32 	%r9, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_5];
	ld.param.u32 	%r10, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_6];
	ld.param.u32 	%r11, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_7];
	ld.param.u32 	%r12, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_8];
	ld.param.u32 	%r13, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_9];
	ld.param.u32 	%r7, [debug_copy_normalized_packed_float2_uvarray_to_float4_image_kernel_param_10];
	mov.u32 	%r14, %ntid.x;
	mov.u32 	%r15, %ctaid.x;
	mov.u32 	%r16, %tid.x;
	mad.lo.s32 	%r1, %r14, %r15, %r16;
	mov.u32 	%r17, %ntid.y;
	mov.u32 	%r18, %ctaid.y;
	mov.u32 	%r19, %tid.y;
	mad.lo.s32 	%r2, %r17, %r18, %r19;
	add.s32 	%r3, %r1, %r12;
	add.s32 	%r4, %r2, %r13;
	setp.lt.s32	%p1, %r1, %r8;
	setp.lt.s32	%p2, %r2, %r9;
	and.pred  	%p3, %p1, %p2;
	setp.gt.s32	%p4, %r3, -1;
	and.pred  	%p5, %p3, %p4;
	setp.lt.s32	%p6, %r3, %r10;
	and.pred  	%p7, %p5, %p6;
	setp.gt.s32	%p8, %r4, -1;
	and.pred  	%p9, %p7, %p8;
	setp.lt.s32	%p10, %r4, %r11;
	and.pred  	%p11, %p9, %p10;
	@!%p11 bra 	BB1_5;
	bra.uni 	BB1_1;

BB1_1:
	cvta.to.global.u64 	%rd3, %rd2;
	mad.lo.s32 	%r20, %r2, %r6, %r1;
	mul.wide.s32 	%rd4, %r20, 8;
	add.s64 	%rd5, %rd3, %rd4;
	setp.eq.s32	%p12, %r7, 0;
	ld.global.v2.f32 	{%f4, %f5}, [%rd5];
	selp.f32	%f1, %f5, %f4, %p12;
	setp.leu.ftz.f32	%p13, %f1, 0f3F800000;
	@%p13 bra 	BB1_3;

	mov.f32 	%f11, 0f437F0000;
	bra.uni 	BB1_4;

BB1_3:
	setp.lt.ftz.f32	%p14, %f1, 0f00000000;
	mul.ftz.f32 	%f8, %f1, 0f437F0000;
	selp.f32	%f11, 0f00000000, %f8, %p14;

BB1_4:
	cvta.to.global.u64 	%rd6, %rd1;
	mad.lo.s32 	%r21, %r4, %r5, %r3;
	mul.wide.s32 	%rd7, %r21, 16;
	add.s64 	%rd8, %rd6, %rd7;
	mov.f32 	%f10, 0f437F0000;
	st.global.v4.f32 	[%rd8], {%f11, %f11, %f11, %f10};

BB1_5:
	ret;
}

.visible .entry debug_copy_normalized_float4_image_to_float4_image_kernel(
	.param .u64 debug_copy_normalized_float4_image_to_float4_image_kernel_param_0,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_1,
	.param .u64 debug_copy_normalized_float4_image_to_float4_image_kernel_param_2,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_3,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_4,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_5,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_6,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_7,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_8,
	.param .u32 debug_copy_normalized_float4_image_to_float4_image_kernel_param_9
)
{
	.reg .pred 	%p<18>;
	.reg .s32 	%r<21>;
	.reg .f32 	%f<28>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_0];
	ld.param.u32 	%r5, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_1];
	ld.param.u64 	%rd2, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_2];
	ld.param.u32 	%r6, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_3];
	ld.param.u32 	%r7, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_4];
	ld.param.u32 	%r8, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_5];
	ld.param.u32 	%r9, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_6];
	ld.param.u32 	%r10, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_7];
	ld.param.u32 	%r11, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_8];
	ld.param.u32 	%r12, [debug_copy_normalized_float4_image_to_float4_image_kernel_param_9];
	mov.u32 	%r13, %ntid.x;
	mov.u32 	%r14, %ctaid.x;
	mov.u32 	%r15, %tid.x;
	mad.lo.s32 	%r1, %r13, %r14, %r15;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	add.s32 	%r3, %r1, %r11;
	add.s32 	%r4, %r2, %r12;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	setp.gt.s32	%p4, %r3, -1;
	and.pred  	%p5, %p3, %p4;
	setp.lt.s32	%p6, %r3, %r9;
	and.pred  	%p7, %p5, %p6;
	setp.gt.s32	%p8, %r4, -1;
	and.pred  	%p9, %p7, %p8;
	setp.lt.s32	%p10, %r4, %r10;
	and.pred  	%p11, %p9, %p10;
	@!%p11 bra 	BB2_11;
	bra.uni 	BB2_1;

BB2_1:
	cvta.to.global.u64 	%rd3, %rd2;
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd4, %r19, 16;
	add.s64 	%rd5, %rd3, %rd4;
	ld.global.v4.f32 	{%f14, %f15, %f16, %f17}, [%rd5];
	setp.leu.ftz.f32	%p12, %f14, 0f3F800000;
	@%p12 bra 	BB2_3;

	mov.f32 	%f25, 0f437F0000;
	bra.uni 	BB2_4;

BB2_3:
	setp.lt.ftz.f32	%p13, %f14, 0f00000000;
	mul.ftz.f32 	%f18, %f14, 0f437F0000;
	selp.f32	%f25, 0f00000000, %f18, %p13;

BB2_4:
	setp.leu.ftz.f32	%p14, %f15, 0f3F800000;
	@%p14 bra 	BB2_6;

	mov.f32 	%f26, 0f437F0000;
	bra.uni 	BB2_7;

BB2_6:
	setp.lt.ftz.f32	%p15, %f15, 0f00000000;
	mul.ftz.f32 	%f20, %f15, 0f437F0000;
	selp.f32	%f26, 0f00000000, %f20, %p15;

BB2_7:
	setp.leu.ftz.f32	%p16, %f16, 0f3F800000;
	@%p16 bra 	BB2_9;

	mov.f32 	%f27, 0f437F0000;
	bra.uni 	BB2_10;

BB2_9:
	setp.lt.ftz.f32	%p17, %f16, 0f00000000;
	mul.ftz.f32 	%f22, %f16, 0f437F0000;
	selp.f32	%f27, 0f00000000, %f22, %p17;

BB2_10:
	cvta.to.global.u64 	%rd6, %rd1;
	mad.lo.s32 	%r20, %r4, %r5, %r3;
	mul.wide.s32 	%rd7, %r20, 16;
	add.s64 	%rd8, %rd6, %rd7;
	mov.f32 	%f24, 0f437F0000;
	st.global.v4.f32 	[%rd8], {%f25, %f26, %f27, %f24};

BB2_11:
	ret;
}


