//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.global .texref texture0_RECT;
.global .texref texture2_2D;
// ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local has been demoted

.visible .entry ShaderKernel_IRIDASPrimary(
	.param .u64 ShaderKernel_IRIDASPrimary_param_0,
	.param .u32 ShaderKernel_IRIDASPrimary_param_1,
	.param .u32 ShaderKernel_IRIDASPrimary_param_2,
	.param .u32 ShaderKernel_IRIDASPrimary_param_3,
	.param .u32 ShaderKernel_IRIDASPrimary_param_4,
	.param .u64 ShaderKernel_IRIDASPrimary_param_5,
	.param .u64 ShaderKernel_IRIDASPrimary_param_6,
	.param .u64 ShaderKernel_IRIDASPrimary_param_7,
	.param .u64 ShaderKernel_IRIDASPrimary_param_8
)
{
	.reg .pred 	%p<40>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<32>;
	.reg .f32 	%f<355>;
	.reg .s64 	%rd<24>;
	// demoted variable
	.shared .align 16 .b8 ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local[192];

	ld.param.u64 	%rd3, [ShaderKernel_IRIDASPrimary_param_0];
	ld.param.u32 	%r4, [ShaderKernel_IRIDASPrimary_param_1];
	ld.param.u32 	%r5, [ShaderKernel_IRIDASPrimary_param_2];
	ld.param.u32 	%r6, [ShaderKernel_IRIDASPrimary_param_3];
	ld.param.u32 	%r7, [ShaderKernel_IRIDASPrimary_param_4];
	ld.param.u64 	%rd4, [ShaderKernel_IRIDASPrimary_param_5];
	ld.param.u64 	%rd5, [ShaderKernel_IRIDASPrimary_param_8];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r1, %tid.x;
	mad.lo.s32 	%r2, %r8, %r9, %r1;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r3, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r2, %r6;
	setp.lt.s32	%p2, %r3, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB0_61;
	bra.uni 	BB0_1;

BB0_1:
	setp.gt.u32	%p4, %r1, 11;
	@%p4 bra 	BB0_3;

	cvta.to.global.u64 	%rd6, %rd4;
	mul.wide.u32 	%rd7, %r1, 16;
	mov.u64 	%rd8, ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local;
	add.s64 	%rd9, %rd8, %rd7;
	add.s64 	%rd10, %rd6, %rd7;
	ld.global.v4.f32 	{%f135, %f136, %f137, %f138}, [%rd10];
	st.shared.v4.f32 	[%rd9], {%f135, %f136, %f137, %f138};

BB0_3:
	cvt.rn.f32.s32	%f143, %r2;
	add.ftz.f32 	%f1, %f143, 0f3F000000;
	cvt.rn.f32.s32	%f144, %r3;
	add.ftz.f32 	%f2, %f144, 0f3F000000;
	bar.sync 	0;
	// inline asm
	tex.2d.v4.f32.f32 {%f145, %f146, %f147, %f148}, [texture0_RECT, {%f1, %f2}];
	// inline asm
	ld.shared.v4.f32 	{%f151, %f152, %f153, %f154}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local];
	mul.ftz.f32 	%f155, %f146, %f152;
	fma.rn.ftz.f32 	%f156, %f147, %f151, %f155;
	fma.rn.ftz.f32 	%f14, %f145, %f153, %f156;
	sub.ftz.f32 	%f15, %f147, %f14;
	sub.ftz.f32 	%f16, %f146, %f14;
	sub.ftz.f32 	%f17, %f145, %f14;
	ld.global.u32 	%r13, [%rd1];
	setp.eq.s32	%p5, %r13, 0;
	mov.f32 	%f354, %f145;
	mov.f32 	%f353, %f146;
	mov.f32 	%f352, %f147;
	@%p5 bra 	BB0_44;

	fma.rn.ftz.f32 	%f161, %f14, 0f3F7FF000, 0f3A000000;
	mov.f32 	%f162, 0f3D800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f157, %f158, %f159, %f160}, [texture2_2D, {%f161, %f162}];
	// inline asm
	ld.shared.v4.f32 	{%f163, %f164, %f165, %f166}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+16];
	fma.rn.ftz.f32 	%f168, %f163, %f15, %f14;
	fma.rn.ftz.f32 	%f169, %f163, %f16, %f14;
	fma.rn.ftz.f32 	%f170, %f163, %f17, %f14;
	ld.shared.v4.f32 	{%f171, %f172, %f173, %f174}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+32];
	ld.global.u32 	%r14, [%rd1+4];
	setp.eq.s32	%p6, %r14, 0;
	ld.shared.v4.f32 	{%f175, %f176, %f177, %f178}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+80];
	fma.rn.ftz.f32 	%f334, %f168, %f171, %f175;
	fma.rn.ftz.f32 	%f335, %f169, %f172, %f176;
	fma.rn.ftz.f32 	%f336, %f170, %f173, %f177;
	ld.shared.v4.f32 	{%f185, %f186, %f187, %f188}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+160];
	ld.shared.v4.f32 	{%f189, %f190, %f191, %f192}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+176];
	@%p6 bra 	BB0_15;

	abs.ftz.f32 	%f33, %f335;
	abs.ftz.f32 	%f34, %f336;
	abs.ftz.f32 	%f35, %f334;
	setp.gtu.ftz.f32	%p7, %f35, 0f00000000;
	@%p7 bra 	BB0_7;

	mov.f32 	%f331, 0f00000000;
	bra.uni 	BB0_8;

BB0_7:
	ld.shared.f32 	%f194, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+128];
	lg2.approx.ftz.f32 	%f195, %f35;
	mul.ftz.f32 	%f196, %f195, %f194;
	ex2.approx.ftz.f32 	%f331, %f196;

BB0_8:
	setp.gtu.ftz.f32	%p8, %f33, 0f00000000;
	@%p8 bra 	BB0_10;

	mov.f32 	%f332, 0f00000000;
	bra.uni 	BB0_11;

BB0_10:
	ld.shared.f32 	%f198, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+132];
	lg2.approx.ftz.f32 	%f199, %f33;
	mul.ftz.f32 	%f200, %f199, %f198;
	ex2.approx.ftz.f32 	%f332, %f200;

BB0_11:
	setp.gtu.ftz.f32	%p9, %f34, 0f00000000;
	@%p9 bra 	BB0_13;

	mov.f32 	%f333, 0f00000000;
	bra.uni 	BB0_14;

BB0_13:
	ld.shared.f32 	%f202, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+136];
	lg2.approx.ftz.f32 	%f203, %f34;
	mul.ftz.f32 	%f204, %f203, %f202;
	ex2.approx.ftz.f32 	%f333, %f204;

BB0_14:
	setp.lt.ftz.f32	%p10, %f334, 0f00000000;
	selp.f32	%f205, 0fBF800000, 0f3F800000, %p10;
	setp.lt.ftz.f32	%p11, %f335, 0f00000000;
	selp.f32	%f206, 0fBF800000, 0f3F800000, %p11;
	setp.lt.ftz.f32	%p12, %f336, 0f00000000;
	selp.f32	%f207, 0fBF800000, 0f3F800000, %p12;
	mul.ftz.f32 	%f334, %f331, %f205;
	mul.ftz.f32 	%f335, %f332, %f206;
	mul.ftz.f32 	%f336, %f333, %f207;

BB0_15:
	ld.global.u32 	%r15, [%rd1+28];
	setp.eq.s32	%p13, %r15, 0;
	@%p13 bra 	BB0_17;

	mul.ftz.f32 	%f208, %f335, %f152;
	fma.rn.ftz.f32 	%f209, %f334, %f151, %f208;
	fma.rn.ftz.f32 	%f210, %f336, %f153, %f209;
	sub.ftz.f32 	%f211, %f334, %f210;
	sub.ftz.f32 	%f212, %f335, %f210;
	sub.ftz.f32 	%f213, %f336, %f210;
	fma.rn.ftz.f32 	%f334, %f189, %f211, %f210;
	fma.rn.ftz.f32 	%f335, %f189, %f212, %f210;
	fma.rn.ftz.f32 	%f336, %f189, %f213, %f210;

BB0_17:
	fma.rn.ftz.f32 	%f214, %f164, %f15, %f14;
	fma.rn.ftz.f32 	%f215, %f164, %f16, %f14;
	fma.rn.ftz.f32 	%f216, %f164, %f17, %f14;
	ld.shared.v4.f32 	{%f217, %f218, %f219, %f220}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+48];
	ld.global.u32 	%r16, [%rd1+8];
	setp.eq.s32	%p14, %r16, 0;
	ld.shared.v4.f32 	{%f221, %f222, %f223, %f224}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+96];
	fma.rn.ftz.f32 	%f340, %f214, %f217, %f221;
	fma.rn.ftz.f32 	%f341, %f215, %f218, %f222;
	fma.rn.ftz.f32 	%f342, %f216, %f219, %f223;
	@%p14 bra 	BB0_28;

	abs.ftz.f32 	%f57, %f341;
	abs.ftz.f32 	%f58, %f342;
	abs.ftz.f32 	%f59, %f340;
	setp.gtu.ftz.f32	%p15, %f59, 0f00000000;
	@%p15 bra 	BB0_20;

	mov.f32 	%f337, 0f00000000;
	bra.uni 	BB0_21;

BB0_20:
	ld.shared.f32 	%f232, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+144];
	lg2.approx.ftz.f32 	%f233, %f59;
	mul.ftz.f32 	%f234, %f233, %f232;
	ex2.approx.ftz.f32 	%f337, %f234;

BB0_21:
	setp.gtu.ftz.f32	%p16, %f57, 0f00000000;
	@%p16 bra 	BB0_23;

	mov.f32 	%f338, 0f00000000;
	bra.uni 	BB0_24;

BB0_23:
	ld.shared.f32 	%f236, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+148];
	lg2.approx.ftz.f32 	%f237, %f57;
	mul.ftz.f32 	%f238, %f237, %f236;
	ex2.approx.ftz.f32 	%f338, %f238;

BB0_24:
	setp.gtu.ftz.f32	%p17, %f58, 0f00000000;
	@%p17 bra 	BB0_26;

	mov.f32 	%f339, 0f00000000;
	bra.uni 	BB0_27;

BB0_26:
	ld.shared.f32 	%f240, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+152];
	lg2.approx.ftz.f32 	%f241, %f58;
	mul.ftz.f32 	%f242, %f241, %f240;
	ex2.approx.ftz.f32 	%f339, %f242;

BB0_27:
	setp.lt.ftz.f32	%p18, %f340, 0f00000000;
	selp.f32	%f243, 0fBF800000, 0f3F800000, %p18;
	setp.lt.ftz.f32	%p19, %f341, 0f00000000;
	selp.f32	%f244, 0fBF800000, 0f3F800000, %p19;
	setp.lt.ftz.f32	%p20, %f342, 0f00000000;
	selp.f32	%f245, 0fBF800000, 0f3F800000, %p20;
	mul.ftz.f32 	%f340, %f337, %f243;
	mul.ftz.f32 	%f341, %f338, %f244;
	mul.ftz.f32 	%f342, %f339, %f245;

BB0_28:
	ld.global.u32 	%r17, [%rd1+32];
	setp.eq.s32	%p21, %r17, 0;
	@%p21 bra 	BB0_30;

	mul.ftz.f32 	%f246, %f341, %f152;
	fma.rn.ftz.f32 	%f247, %f340, %f151, %f246;
	fma.rn.ftz.f32 	%f248, %f342, %f153, %f247;
	sub.ftz.f32 	%f249, %f340, %f248;
	sub.ftz.f32 	%f250, %f341, %f248;
	sub.ftz.f32 	%f251, %f342, %f248;
	fma.rn.ftz.f32 	%f340, %f190, %f249, %f248;
	fma.rn.ftz.f32 	%f341, %f190, %f250, %f248;
	fma.rn.ftz.f32 	%f342, %f190, %f251, %f248;

BB0_30:
	mul.ftz.f32 	%f252, %f157, %f334;
	mul.ftz.f32 	%f253, %f157, %f335;
	mul.ftz.f32 	%f254, %f157, %f336;
	fma.rn.ftz.f32 	%f78, %f158, %f340, %f252;
	fma.rn.ftz.f32 	%f79, %f158, %f341, %f253;
	fma.rn.ftz.f32 	%f80, %f158, %f342, %f254;
	fma.rn.ftz.f32 	%f255, %f165, %f15, %f14;
	fma.rn.ftz.f32 	%f256, %f165, %f16, %f14;
	fma.rn.ftz.f32 	%f257, %f165, %f17, %f14;
	ld.shared.v4.f32 	{%f258, %f259, %f260, %f261}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+64];
	ld.global.u32 	%r18, [%rd1+12];
	setp.eq.s32	%p22, %r18, 0;
	ld.shared.v4.f32 	{%f262, %f263, %f264, %f265}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+112];
	fma.rn.ftz.f32 	%f346, %f255, %f258, %f262;
	fma.rn.ftz.f32 	%f347, %f256, %f259, %f263;
	fma.rn.ftz.f32 	%f348, %f257, %f260, %f264;
	@%p22 bra 	BB0_41;

	abs.ftz.f32 	%f84, %f347;
	abs.ftz.f32 	%f85, %f348;
	abs.ftz.f32 	%f86, %f346;
	setp.gtu.ftz.f32	%p23, %f86, 0f00000000;
	@%p23 bra 	BB0_33;

	mov.f32 	%f343, 0f00000000;
	bra.uni 	BB0_34;

BB0_33:
	lg2.approx.ftz.f32 	%f273, %f86;
	mul.ftz.f32 	%f274, %f273, %f185;
	ex2.approx.ftz.f32 	%f343, %f274;

BB0_34:
	setp.gtu.ftz.f32	%p24, %f84, 0f00000000;
	@%p24 bra 	BB0_36;

	mov.f32 	%f344, 0f00000000;
	bra.uni 	BB0_37;

BB0_36:
	lg2.approx.ftz.f32 	%f276, %f84;
	mul.ftz.f32 	%f277, %f276, %f186;
	ex2.approx.ftz.f32 	%f344, %f277;

BB0_37:
	setp.gtu.ftz.f32	%p25, %f85, 0f00000000;
	@%p25 bra 	BB0_39;

	mov.f32 	%f345, 0f00000000;
	bra.uni 	BB0_40;

BB0_39:
	lg2.approx.ftz.f32 	%f279, %f85;
	mul.ftz.f32 	%f280, %f279, %f187;
	ex2.approx.ftz.f32 	%f345, %f280;

BB0_40:
	setp.lt.ftz.f32	%p26, %f346, 0f00000000;
	selp.f32	%f281, 0fBF800000, 0f3F800000, %p26;
	setp.lt.ftz.f32	%p27, %f347, 0f00000000;
	selp.f32	%f282, 0fBF800000, 0f3F800000, %p27;
	setp.lt.ftz.f32	%p28, %f348, 0f00000000;
	selp.f32	%f283, 0fBF800000, 0f3F800000, %p28;
	mul.ftz.f32 	%f346, %f343, %f281;
	mul.ftz.f32 	%f347, %f344, %f282;
	mul.ftz.f32 	%f348, %f345, %f283;

BB0_41:
	ld.global.u32 	%r19, [%rd1+36];
	setp.eq.s32	%p29, %r19, 0;
	@%p29 bra 	BB0_43;

	mul.ftz.f32 	%f284, %f347, %f152;
	fma.rn.ftz.f32 	%f285, %f346, %f151, %f284;
	fma.rn.ftz.f32 	%f286, %f348, %f153, %f285;
	sub.ftz.f32 	%f287, %f346, %f286;
	sub.ftz.f32 	%f288, %f347, %f286;
	sub.ftz.f32 	%f289, %f348, %f286;
	fma.rn.ftz.f32 	%f346, %f191, %f287, %f286;
	fma.rn.ftz.f32 	%f347, %f191, %f288, %f286;
	fma.rn.ftz.f32 	%f348, %f191, %f289, %f286;

BB0_43:
	fma.rn.ftz.f32 	%f352, %f159, %f346, %f78;
	fma.rn.ftz.f32 	%f353, %f159, %f347, %f79;
	fma.rn.ftz.f32 	%f354, %f159, %f348, %f80;
	bra.uni 	BB0_58;

BB0_44:
	ld.global.u32 	%r20, [%rd1+16];
	setp.eq.s32	%p30, %r20, 0;
	@%p30 bra 	BB0_58;

	ld.shared.f32 	%f290, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+16];
	fma.rn.ftz.f32 	%f291, %f290, %f15, %f14;
	fma.rn.ftz.f32 	%f292, %f290, %f16, %f14;
	fma.rn.ftz.f32 	%f293, %f290, %f17, %f14;
	ld.shared.v4.f32 	{%f294, %f295, %f296, %f297}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+32];
	ld.global.u32 	%r21, [%rd1+20];
	setp.eq.s32	%p31, %r21, 0;
	ld.shared.v4.f32 	{%f298, %f299, %f300, %f301}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+80];
	fma.rn.ftz.f32 	%f352, %f291, %f294, %f298;
	fma.rn.ftz.f32 	%f353, %f292, %f295, %f299;
	fma.rn.ftz.f32 	%f354, %f293, %f296, %f300;
	ld.shared.v4.f32 	{%f308, %f309, %f310, %f311}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+128];
	@%p31 bra 	BB0_56;

	abs.ftz.f32 	%f114, %f353;
	abs.ftz.f32 	%f115, %f354;
	abs.ftz.f32 	%f116, %f352;
	setp.gtu.ftz.f32	%p32, %f116, 0f00000000;
	@%p32 bra 	BB0_48;

	mov.f32 	%f349, 0f00000000;
	bra.uni 	BB0_49;

BB0_48:
	lg2.approx.ftz.f32 	%f313, %f116;
	mul.ftz.f32 	%f314, %f313, %f308;
	ex2.approx.ftz.f32 	%f349, %f314;

BB0_49:
	setp.gtu.ftz.f32	%p33, %f114, 0f00000000;
	@%p33 bra 	BB0_51;

	mov.f32 	%f350, 0f00000000;
	bra.uni 	BB0_52;

BB0_51:
	lg2.approx.ftz.f32 	%f316, %f114;
	mul.ftz.f32 	%f317, %f316, %f309;
	ex2.approx.ftz.f32 	%f350, %f317;

BB0_52:
	setp.gtu.ftz.f32	%p34, %f115, 0f00000000;
	@%p34 bra 	BB0_54;

	mov.f32 	%f351, 0f00000000;
	bra.uni 	BB0_55;

BB0_54:
	lg2.approx.ftz.f32 	%f319, %f115;
	mul.ftz.f32 	%f320, %f319, %f310;
	ex2.approx.ftz.f32 	%f351, %f320;

BB0_55:
	setp.lt.ftz.f32	%p35, %f352, 0f00000000;
	selp.f32	%f321, 0fBF800000, 0f3F800000, %p35;
	setp.lt.ftz.f32	%p36, %f353, 0f00000000;
	selp.f32	%f322, 0fBF800000, 0f3F800000, %p36;
	setp.lt.ftz.f32	%p37, %f354, 0f00000000;
	selp.f32	%f323, 0fBF800000, 0f3F800000, %p37;
	mul.ftz.f32 	%f352, %f349, %f321;
	mul.ftz.f32 	%f353, %f350, %f322;
	mul.ftz.f32 	%f354, %f351, %f323;

BB0_56:
	ld.global.u32 	%r22, [%rd1+24];
	setp.eq.s32	%p38, %r22, 0;
	@%p38 bra 	BB0_58;

	mul.ftz.f32 	%f324, %f353, %f152;
	fma.rn.ftz.f32 	%f325, %f352, %f151, %f324;
	fma.rn.ftz.f32 	%f326, %f354, %f153, %f325;
	sub.ftz.f32 	%f327, %f352, %f326;
	sub.ftz.f32 	%f328, %f353, %f326;
	sub.ftz.f32 	%f329, %f354, %f326;
	ld.shared.f32 	%f330, [ShaderKernel_IRIDASPrimary$__cuda_local_var_180709_584_non_const_p_local+176];
	fma.rn.ftz.f32 	%f352, %f330, %f327, %f326;
	fma.rn.ftz.f32 	%f353, %f330, %f328, %f326;
	fma.rn.ftz.f32 	%f354, %f330, %f329, %f326;

BB0_58:
	mad.lo.s32 	%r31, %r3, %r4, %r2;
	cvt.s64.s32	%rd2, %r31;
	setp.eq.s32	%p39, %r5, 0;
	@%p39 bra 	BB0_60;

	cvta.to.global.u64 	%rd18, %rd3;
	shl.b64 	%rd19, %rd2, 4;
	add.s64 	%rd20, %rd18, %rd19;
	st.global.v4.f32 	[%rd20], {%f354, %f353, %f352, %f148};
	bra.uni 	BB0_61;

BB0_60:
	cvta.to.global.u64 	%rd21, %rd3;
	shl.b64 	%rd22, %rd2, 3;
	add.s64 	%rd23, %rd21, %rd22;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f148;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f352;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f353;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f354;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd23], {%rs4, %rs3, %rs2, %rs1};

BB0_61:
	ret;
}


