//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/Iridas/IRIDASLIB/GPU/IRIDASPrimary.cu", 1399785249, 6569
	.file	2 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\iridas\\iridaslib\\gpu\\IrGPGPUShaders.h", 1399785249, 44561
	.file	3 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
.global .texref texture0_RECT;
.global .texref texture2_2D;
// ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local has been demoted
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func  (.param .b32 func_retval0) _Z5POWs_ff(
	.param .b32 _Z5POWs_ff_param_0,
	.param .b32 _Z5POWs_ff_param_1
)
{
	.reg .pred 	%p<5>;
	.reg .f32 	%f<10>;


	ld.param.f32 	%f3, [_Z5POWs_ff_param_0];
	ld.param.f32 	%f4, [_Z5POWs_ff_param_1];
	.loc 2 978 1
	setp.eq.ftz.f32	%p1, %f3, 0f00000000;
	setp.eq.ftz.f32	%p2, %f4, 0f00000000;
	and.pred  	%p3, %p1, %p2;
	.loc 2 978 1
	@!%p3 bra 	BB0_2;
	bra.uni 	BB0_1;

BB0_1:
	mov.f32 	%f9, 0f7FFFFFFF;
	bra.uni 	BB0_5;

BB0_2:
	.loc 2 978 1
	setp.geu.ftz.f32	%p4, %f4, 0f00000000;
	@%p4 bra 	BB0_4;

	mov.f32 	%f9, 0f3F800000;
	bra.uni 	BB0_5;

BB0_4:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f5, %f3;
	mul.ftz.f32 	%f6, %f5, %f4;
	ex2.approx.ftz.f32 	%f9, %f6;

BB0_5:
	st.param.f32	[func_retval0+0], %f9;
	.loc 2 978 38
	ret;
}

.visible .entry ShaderKernel_IRIDASPrimary(
	.param .u64 ShaderKernel_IRIDASPrimary_param_0,
	.param .u32 ShaderKernel_IRIDASPrimary_param_1,
	.param .u32 ShaderKernel_IRIDASPrimary_param_2,
	.param .u32 ShaderKernel_IRIDASPrimary_param_3,
	.param .u32 ShaderKernel_IRIDASPrimary_param_4,
	.param .u64 ShaderKernel_IRIDASPrimary_param_5,
	.param .u64 ShaderKernel_IRIDASPrimary_param_6,
	.param .u64 ShaderKernel_IRIDASPrimary_param_7,
	.param .u64 ShaderKernel_IRIDASPrimary_param_8
)
{
	.reg .pred 	%p<76>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<32>;
	.reg .f32 	%f<372>;
	.reg .s64 	%rd<24>;
	// demoted variable
	.shared .align 16 .b8 ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local[192];

	ld.param.u64 	%rd4, [ShaderKernel_IRIDASPrimary_param_0];
	ld.param.u32 	%r4, [ShaderKernel_IRIDASPrimary_param_1];
	ld.param.u32 	%r5, [ShaderKernel_IRIDASPrimary_param_2];
	ld.param.u32 	%r6, [ShaderKernel_IRIDASPrimary_param_3];
	ld.param.u32 	%r7, [ShaderKernel_IRIDASPrimary_param_4];
	ld.param.u64 	%rd6, [ShaderKernel_IRIDASPrimary_param_5];
	ld.param.u64 	%rd5, [ShaderKernel_IRIDASPrimary_param_8];
	cvta.to.global.u64 	%rd1, %rd5;
	cvta.to.global.u64 	%rd2, %rd6;
	.loc 1 64 1
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r1, %tid.x;
	mad.lo.s32 	%r2, %r8, %r9, %r1;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r3, %r10, %r11, %r12;
	.loc 1 64 1
	setp.lt.s32	%p1, %r2, %r6;
	setp.lt.s32	%p2, %r3, %r7;
	and.pred  	%p3, %p1, %p2;
	.loc 1 64 1
	@!%p3 bra 	BB1_85;
	bra.uni 	BB1_1;

BB1_1:
	.loc 1 64 1
	cvt.rn.f32.s32	%f151, %r2;
	add.ftz.f32 	%f1, %f151, 0f3F000000;
	cvt.rn.f32.s32	%f152, %r3;
	add.ftz.f32 	%f2, %f152, 0f3F000000;
	.loc 1 64 1
	setp.gt.u32	%p4, %r1, 11;
	@%p4 bra 	BB1_3;

	.loc 1 64 1
	mul.wide.u32 	%rd7, %r1, 16;
	mov.u64 	%rd8, ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local;
	add.s64 	%rd9, %rd8, %rd7;
	add.s64 	%rd10, %rd2, %rd7;
	ld.global.v4.f32 	{%f153, %f154, %f155, %f156}, [%rd10];
	st.shared.v4.f32 	[%rd9], {%f153, %f154, %f155, %f156};

BB1_3:
	.loc 1 64 1
	bar.sync 	0;
	.loc 1 64 105
	// inline asm
	tex.2d.v4.f32.f32 {%f161, %f162, %f163, %f164}, [texture0_RECT, {%f1, %f2}];
	// inline asm
	.loc 1 64 1
	ld.shared.v4.f32 	{%f167, %f168, %f169, %f170}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	mul.ftz.f32 	%f171, %f162, %f168;
	fma.rn.ftz.f32 	%f172, %f163, %f167, %f171;
	fma.rn.ftz.f32 	%f10, %f161, %f169, %f172;
	.loc 1 64 1
	sub.ftz.f32 	%f11, %f163, %f10;
	sub.ftz.f32 	%f12, %f162, %f10;
	sub.ftz.f32 	%f13, %f161, %f10;
	.loc 1 64 1
	ld.global.u32 	%r13, [%rd1];
	setp.eq.s32	%p5, %r13, 0;
	.loc 1 64 105
	mov.f32 	%f371, %f161;
	mov.f32 	%f370, %f162;
	mov.f32 	%f369, %f163;
	.loc 1 64 1
	@%p5 bra 	BB1_62;

	.loc 1 64 1
	fma.rn.ftz.f32 	%f177, %f10, 0f3F7FC000, 0f3A000000;
	mov.f32 	%f178, 0f3D800000;
	.loc 1 64 152
	// inline asm
	tex.2d.v4.f32.f32 {%f173, %f174, %f175, %f176}, [texture2_2D, {%f177, %f178}];
	// inline asm
	.loc 1 64 1
	ld.shared.f32 	%f179, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+16];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f180, %f179, %f11, %f10;
	fma.rn.ftz.f32 	%f181, %f179, %f12, %f10;
	fma.rn.ftz.f32 	%f182, %f179, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r14, [%rd1+4];
	setp.eq.s32	%p6, %r14, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f183, %f184, %f185, %f186}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+80];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f188, %f189, %f190, %f191}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+32];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f351, %f180, %f188, %f183;
	fma.rn.ftz.f32 	%f352, %f181, %f189, %f184;
	fma.rn.ftz.f32 	%f353, %f182, %f190, %f185;
	.loc 1 64 1
	@%p6 bra 	BB1_21;

	.loc 1 64 1
	setp.lt.ftz.f32	%p7, %f351, 0f00000000;
	selp.f32	%f21, 0fBF800000, 0f3F800000, %p7;
	setp.lt.ftz.f32	%p8, %f352, 0f00000000;
	selp.f32	%f22, 0fBF800000, 0f3F800000, %p8;
	setp.lt.ftz.f32	%p9, %f353, 0f00000000;
	selp.f32	%f23, 0fBF800000, 0f3F800000, %p9;
	.loc 3 2750 10
	abs.ftz.f32 	%f24, %f352;
	abs.ftz.f32 	%f25, %f353;
	abs.ftz.f32 	%f26, %f351;
	.loc 2 978 1
	setp.eq.ftz.f32	%p10, %f26, 0f00000000;
	.loc 1 64 1
	ld.shared.f32 	%f27, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+128];
	.loc 2 978 1
	setp.eq.ftz.f32	%p11, %f27, 0f00000000;
	and.pred  	%p12, %p10, %p11;
	.loc 2 978 1
	@!%p12 bra 	BB1_7;
	bra.uni 	BB1_6;

BB1_6:
	mov.f32 	%f348, 0f7FFFFFFF;
	bra.uni 	BB1_10;

BB1_7:
	.loc 2 978 1
	setp.geu.ftz.f32	%p13, %f27, 0f00000000;
	@%p13 bra 	BB1_9;

	mov.f32 	%f348, 0f3F800000;
	bra.uni 	BB1_10;

BB1_9:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f197, %f26;
	mul.ftz.f32 	%f198, %f27, %f197;
	ex2.approx.ftz.f32 	%f348, %f198;

BB1_10:
	.loc 1 64 1
	ld.shared.f32 	%f30, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+132];
	.loc 2 978 1
	setp.eq.ftz.f32	%p14, %f30, 0f00000000;
	setp.eq.ftz.f32	%p15, %f24, 0f00000000;
	and.pred  	%p16, %p15, %p14;
	.loc 2 978 1
	@!%p16 bra 	BB1_12;
	bra.uni 	BB1_11;

BB1_11:
	mov.f32 	%f349, 0f7FFFFFFF;
	bra.uni 	BB1_15;

BB1_12:
	.loc 2 978 1
	setp.geu.ftz.f32	%p17, %f30, 0f00000000;
	@%p17 bra 	BB1_14;

	mov.f32 	%f349, 0f3F800000;
	bra.uni 	BB1_15;

BB1_14:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f201, %f24;
	mul.ftz.f32 	%f202, %f30, %f201;
	ex2.approx.ftz.f32 	%f349, %f202;

BB1_15:
	.loc 1 64 1
	ld.shared.f32 	%f33, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+136];
	.loc 2 978 1
	setp.eq.ftz.f32	%p18, %f33, 0f00000000;
	setp.eq.ftz.f32	%p19, %f25, 0f00000000;
	and.pred  	%p20, %p19, %p18;
	.loc 2 978 1
	@!%p20 bra 	BB1_17;
	bra.uni 	BB1_16;

BB1_16:
	mov.f32 	%f350, 0f7FFFFFFF;
	bra.uni 	BB1_20;

BB1_17:
	.loc 2 978 1
	setp.geu.ftz.f32	%p21, %f33, 0f00000000;
	@%p21 bra 	BB1_19;

	mov.f32 	%f350, 0f3F800000;
	bra.uni 	BB1_20;

BB1_19:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f205, %f25;
	mul.ftz.f32 	%f206, %f33, %f205;
	ex2.approx.ftz.f32 	%f350, %f206;

BB1_20:
	.loc 1 64 1
	mul.ftz.f32 	%f351, %f348, %f21;
	mul.ftz.f32 	%f352, %f349, %f22;
	mul.ftz.f32 	%f353, %f350, %f23;

BB1_21:
	.loc 1 64 1
	ld.global.u32 	%r15, [%rd1+28];
	setp.eq.s32	%p22, %r15, 0;
	@%p22 bra 	BB1_23;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f209, %f210, %f211, %f212}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f214, %f352, %f210;
	fma.rn.ftz.f32 	%f216, %f351, %f209, %f214;
	fma.rn.ftz.f32 	%f218, %f353, %f211, %f216;
	.loc 1 64 1
	sub.ftz.f32 	%f219, %f351, %f218;
	sub.ftz.f32 	%f220, %f352, %f218;
	sub.ftz.f32 	%f221, %f353, %f218;
	.loc 1 64 1
	ld.shared.f32 	%f222, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+176];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f351, %f222, %f219, %f218;
	fma.rn.ftz.f32 	%f352, %f222, %f220, %f218;
	fma.rn.ftz.f32 	%f353, %f222, %f221, %f218;

BB1_23:
	.loc 1 64 1
	mul.ftz.f32 	%f48, %f173, %f351;
	mul.ftz.f32 	%f49, %f173, %f352;
	mul.ftz.f32 	%f50, %f173, %f353;
	.loc 1 64 1
	ld.shared.f32 	%f223, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+20];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f224, %f223, %f11, %f10;
	fma.rn.ftz.f32 	%f225, %f223, %f12, %f10;
	fma.rn.ftz.f32 	%f226, %f223, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r16, [%rd1+8];
	setp.eq.s32	%p23, %r16, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f227, %f228, %f229, %f230}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+96];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f232, %f233, %f234, %f235}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+48];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f357, %f224, %f232, %f227;
	fma.rn.ftz.f32 	%f358, %f225, %f233, %f228;
	fma.rn.ftz.f32 	%f359, %f226, %f234, %f229;
	.loc 1 64 1
	@%p23 bra 	BB1_40;

	.loc 1 64 1
	setp.lt.ftz.f32	%p24, %f357, 0f00000000;
	selp.f32	%f54, 0fBF800000, 0f3F800000, %p24;
	setp.lt.ftz.f32	%p25, %f358, 0f00000000;
	selp.f32	%f55, 0fBF800000, 0f3F800000, %p25;
	setp.lt.ftz.f32	%p26, %f359, 0f00000000;
	selp.f32	%f56, 0fBF800000, 0f3F800000, %p26;
	.loc 3 2750 10
	abs.ftz.f32 	%f57, %f358;
	abs.ftz.f32 	%f58, %f359;
	abs.ftz.f32 	%f59, %f357;
	.loc 2 978 1
	setp.eq.ftz.f32	%p27, %f59, 0f00000000;
	.loc 1 64 1
	ld.shared.f32 	%f60, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+144];
	.loc 2 978 1
	setp.eq.ftz.f32	%p28, %f60, 0f00000000;
	and.pred  	%p29, %p27, %p28;
	.loc 2 978 1
	@!%p29 bra 	BB1_26;
	bra.uni 	BB1_25;

BB1_25:
	mov.f32 	%f354, 0f7FFFFFFF;
	bra.uni 	BB1_29;

BB1_26:
	.loc 2 978 1
	setp.geu.ftz.f32	%p30, %f60, 0f00000000;
	@%p30 bra 	BB1_28;

	mov.f32 	%f354, 0f3F800000;
	bra.uni 	BB1_29;

BB1_28:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f241, %f59;
	mul.ftz.f32 	%f242, %f60, %f241;
	ex2.approx.ftz.f32 	%f354, %f242;

BB1_29:
	.loc 1 64 1
	ld.shared.f32 	%f63, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+148];
	.loc 2 978 1
	setp.eq.ftz.f32	%p31, %f63, 0f00000000;
	setp.eq.ftz.f32	%p32, %f57, 0f00000000;
	and.pred  	%p33, %p32, %p31;
	.loc 2 978 1
	@!%p33 bra 	BB1_31;
	bra.uni 	BB1_30;

BB1_30:
	mov.f32 	%f355, 0f7FFFFFFF;
	bra.uni 	BB1_34;

BB1_31:
	.loc 2 978 1
	setp.geu.ftz.f32	%p34, %f63, 0f00000000;
	@%p34 bra 	BB1_33;

	mov.f32 	%f355, 0f3F800000;
	bra.uni 	BB1_34;

BB1_33:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f245, %f57;
	mul.ftz.f32 	%f246, %f63, %f245;
	ex2.approx.ftz.f32 	%f355, %f246;

BB1_34:
	.loc 1 64 1
	ld.shared.f32 	%f66, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+152];
	.loc 2 978 1
	setp.eq.ftz.f32	%p35, %f66, 0f00000000;
	setp.eq.ftz.f32	%p36, %f58, 0f00000000;
	and.pred  	%p37, %p36, %p35;
	.loc 2 978 1
	@!%p37 bra 	BB1_36;
	bra.uni 	BB1_35;

BB1_35:
	mov.f32 	%f356, 0f7FFFFFFF;
	bra.uni 	BB1_39;

BB1_36:
	.loc 2 978 1
	setp.geu.ftz.f32	%p38, %f66, 0f00000000;
	@%p38 bra 	BB1_38;

	mov.f32 	%f356, 0f3F800000;
	bra.uni 	BB1_39;

BB1_38:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f249, %f58;
	mul.ftz.f32 	%f250, %f66, %f249;
	ex2.approx.ftz.f32 	%f356, %f250;

BB1_39:
	.loc 1 64 1
	mul.ftz.f32 	%f357, %f354, %f54;
	mul.ftz.f32 	%f358, %f355, %f55;
	mul.ftz.f32 	%f359, %f356, %f56;

BB1_40:
	.loc 1 64 1
	ld.global.u32 	%r17, [%rd1+32];
	setp.eq.s32	%p39, %r17, 0;
	@%p39 bra 	BB1_42;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f253, %f254, %f255, %f256}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f258, %f358, %f254;
	fma.rn.ftz.f32 	%f260, %f357, %f253, %f258;
	fma.rn.ftz.f32 	%f262, %f359, %f255, %f260;
	.loc 1 64 1
	sub.ftz.f32 	%f263, %f357, %f262;
	sub.ftz.f32 	%f264, %f358, %f262;
	sub.ftz.f32 	%f265, %f359, %f262;
	.loc 1 64 1
	ld.shared.f32 	%f266, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+180];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f357, %f266, %f263, %f262;
	fma.rn.ftz.f32 	%f358, %f266, %f264, %f262;
	fma.rn.ftz.f32 	%f359, %f266, %f265, %f262;

BB1_42:
	.loc 1 64 1
	fma.rn.ftz.f32 	%f81, %f174, %f357, %f48;
	fma.rn.ftz.f32 	%f82, %f174, %f358, %f49;
	fma.rn.ftz.f32 	%f83, %f174, %f359, %f50;
	.loc 1 64 1
	ld.shared.f32 	%f267, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+24];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f268, %f267, %f11, %f10;
	fma.rn.ftz.f32 	%f269, %f267, %f12, %f10;
	fma.rn.ftz.f32 	%f270, %f267, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r18, [%rd1+12];
	setp.eq.s32	%p40, %r18, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f271, %f272, %f273, %f274}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+112];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f276, %f277, %f278, %f279}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+64];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f363, %f268, %f276, %f271;
	fma.rn.ftz.f32 	%f364, %f269, %f277, %f272;
	fma.rn.ftz.f32 	%f365, %f270, %f278, %f273;
	.loc 1 64 1
	@%p40 bra 	BB1_59;

	.loc 1 64 1
	setp.lt.ftz.f32	%p41, %f363, 0f00000000;
	selp.f32	%f87, 0fBF800000, 0f3F800000, %p41;
	setp.lt.ftz.f32	%p42, %f364, 0f00000000;
	selp.f32	%f88, 0fBF800000, 0f3F800000, %p42;
	setp.lt.ftz.f32	%p43, %f365, 0f00000000;
	selp.f32	%f89, 0fBF800000, 0f3F800000, %p43;
	.loc 3 2750 10
	abs.ftz.f32 	%f90, %f364;
	abs.ftz.f32 	%f91, %f365;
	abs.ftz.f32 	%f92, %f363;
	.loc 2 978 1
	setp.eq.ftz.f32	%p44, %f92, 0f00000000;
	.loc 1 64 1
	ld.shared.f32 	%f93, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+160];
	.loc 2 978 1
	setp.eq.ftz.f32	%p45, %f93, 0f00000000;
	and.pred  	%p46, %p44, %p45;
	.loc 2 978 1
	@!%p46 bra 	BB1_45;
	bra.uni 	BB1_44;

BB1_44:
	mov.f32 	%f360, 0f7FFFFFFF;
	bra.uni 	BB1_48;

BB1_45:
	.loc 2 978 1
	setp.geu.ftz.f32	%p47, %f93, 0f00000000;
	@%p47 bra 	BB1_47;

	mov.f32 	%f360, 0f3F800000;
	bra.uni 	BB1_48;

BB1_47:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f285, %f92;
	mul.ftz.f32 	%f286, %f93, %f285;
	ex2.approx.ftz.f32 	%f360, %f286;

BB1_48:
	.loc 1 64 1
	ld.shared.f32 	%f96, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+164];
	.loc 2 978 1
	setp.eq.ftz.f32	%p48, %f96, 0f00000000;
	setp.eq.ftz.f32	%p49, %f90, 0f00000000;
	and.pred  	%p50, %p49, %p48;
	.loc 2 978 1
	@!%p50 bra 	BB1_50;
	bra.uni 	BB1_49;

BB1_49:
	mov.f32 	%f361, 0f7FFFFFFF;
	bra.uni 	BB1_53;

BB1_50:
	.loc 2 978 1
	setp.geu.ftz.f32	%p51, %f96, 0f00000000;
	@%p51 bra 	BB1_52;

	mov.f32 	%f361, 0f3F800000;
	bra.uni 	BB1_53;

BB1_52:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f289, %f90;
	mul.ftz.f32 	%f290, %f96, %f289;
	ex2.approx.ftz.f32 	%f361, %f290;

BB1_53:
	.loc 1 64 1
	ld.shared.f32 	%f99, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+168];
	.loc 2 978 1
	setp.eq.ftz.f32	%p52, %f99, 0f00000000;
	setp.eq.ftz.f32	%p53, %f91, 0f00000000;
	and.pred  	%p54, %p53, %p52;
	.loc 2 978 1
	@!%p54 bra 	BB1_55;
	bra.uni 	BB1_54;

BB1_54:
	mov.f32 	%f362, 0f7FFFFFFF;
	bra.uni 	BB1_58;

BB1_55:
	.loc 2 978 1
	setp.geu.ftz.f32	%p55, %f99, 0f00000000;
	@%p55 bra 	BB1_57;

	mov.f32 	%f362, 0f3F800000;
	bra.uni 	BB1_58;

BB1_57:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f293, %f91;
	mul.ftz.f32 	%f294, %f99, %f293;
	ex2.approx.ftz.f32 	%f362, %f294;

BB1_58:
	.loc 1 64 1
	mul.ftz.f32 	%f363, %f360, %f87;
	mul.ftz.f32 	%f364, %f361, %f88;
	mul.ftz.f32 	%f365, %f362, %f89;

BB1_59:
	.loc 1 64 1
	ld.global.u32 	%r19, [%rd1+36];
	setp.eq.s32	%p56, %r19, 0;
	@%p56 bra 	BB1_61;

	.loc 1 64 1
	ld.shared.v4.f32 	{%f297, %f298, %f299, %f300}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local];
	.loc 1 64 1
	mul.ftz.f32 	%f302, %f364, %f298;
	fma.rn.ftz.f32 	%f304, %f363, %f297, %f302;
	fma.rn.ftz.f32 	%f306, %f365, %f299, %f304;
	.loc 1 64 1
	sub.ftz.f32 	%f307, %f363, %f306;
	sub.ftz.f32 	%f308, %f364, %f306;
	sub.ftz.f32 	%f309, %f365, %f306;
	.loc 1 64 1
	ld.shared.f32 	%f310, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+184];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f363, %f310, %f307, %f306;
	fma.rn.ftz.f32 	%f364, %f310, %f308, %f306;
	fma.rn.ftz.f32 	%f365, %f310, %f309, %f306;

BB1_61:
	.loc 1 64 1
	fma.rn.ftz.f32 	%f369, %f175, %f363, %f81;
	fma.rn.ftz.f32 	%f370, %f175, %f364, %f82;
	fma.rn.ftz.f32 	%f371, %f175, %f365, %f83;
	bra.uni 	BB1_82;

BB1_62:
	.loc 1 64 1
	ld.global.u32 	%r20, [%rd1+16];
	setp.eq.s32	%p57, %r20, 0;
	@%p57 bra 	BB1_82;

	.loc 1 64 1
	ld.shared.f32 	%f311, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+16];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f312, %f311, %f11, %f10;
	fma.rn.ftz.f32 	%f313, %f311, %f12, %f10;
	fma.rn.ftz.f32 	%f314, %f311, %f13, %f10;
	.loc 1 64 1
	ld.global.u32 	%r21, [%rd1+20];
	setp.eq.s32	%p58, %r21, 0;
	.loc 1 64 1
	ld.shared.v4.f32 	{%f315, %f316, %f317, %f318}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+80];
	.loc 1 64 1
	ld.shared.v4.f32 	{%f320, %f321, %f322, %f323}, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+32];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f369, %f312, %f320, %f315;
	fma.rn.ftz.f32 	%f370, %f313, %f321, %f316;
	fma.rn.ftz.f32 	%f371, %f314, %f322, %f317;
	.loc 1 64 1
	@%p58 bra 	BB1_80;

	.loc 1 64 1
	setp.lt.ftz.f32	%p59, %f369, 0f00000000;
	selp.f32	%f120, 0fBF800000, 0f3F800000, %p59;
	setp.lt.ftz.f32	%p60, %f370, 0f00000000;
	selp.f32	%f121, 0fBF800000, 0f3F800000, %p60;
	setp.lt.ftz.f32	%p61, %f371, 0f00000000;
	selp.f32	%f122, 0fBF800000, 0f3F800000, %p61;
	.loc 3 2750 10
	abs.ftz.f32 	%f123, %f370;
	abs.ftz.f32 	%f124, %f371;
	abs.ftz.f32 	%f125, %f369;
	.loc 2 978 1
	setp.eq.ftz.f32	%p62, %f125, 0f00000000;
	.loc 1 64 1
	ld.shared.f32 	%f126, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+128];
	.loc 2 978 1
	setp.eq.ftz.f32	%p63, %f126, 0f00000000;
	and.pred  	%p64, %p62, %p63;
	.loc 2 978 1
	@!%p64 bra 	BB1_66;
	bra.uni 	BB1_65;

BB1_65:
	mov.f32 	%f366, 0f7FFFFFFF;
	bra.uni 	BB1_69;

BB1_66:
	.loc 2 978 1
	setp.geu.ftz.f32	%p65, %f126, 0f00000000;
	@%p65 bra 	BB1_68;

	mov.f32 	%f366, 0f3F800000;
	bra.uni 	BB1_69;

BB1_68:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f329, %f125;
	mul.ftz.f32 	%f330, %f126, %f329;
	ex2.approx.ftz.f32 	%f366, %f330;

BB1_69:
	.loc 1 64 1
	ld.shared.f32 	%f129, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+132];
	.loc 2 978 1
	setp.eq.ftz.f32	%p66, %f129, 0f00000000;
	setp.eq.ftz.f32	%p67, %f123, 0f00000000;
	and.pred  	%p68, %p67, %p66;
	.loc 2 978 1
	@!%p68 bra 	BB1_71;
	bra.uni 	BB1_70;

BB1_70:
	mov.f32 	%f367, 0f7FFFFFFF;
	bra.uni 	BB1_74;

BB1_71:
	.loc 2 978 1
	setp.geu.ftz.f32	%p69, %f129, 0f00000000;
	@%p69 bra 	BB1_73;

	mov.f32 	%f367, 0f3F800000;
	bra.uni 	BB1_74;

BB1_73:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f333, %f123;
	mul.ftz.f32 	%f334, %f129, %f333;
	ex2.approx.ftz.f32 	%f367, %f334;

BB1_74:
	.loc 1 64 1
	ld.shared.f32 	%f132, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+136];
	.loc 2 978 1
	setp.eq.ftz.f32	%p70, %f132, 0f00000000;
	setp.eq.ftz.f32	%p71, %f124, 0f00000000;
	and.pred  	%p72, %p71, %p70;
	.loc 2 978 1
	@!%p72 bra 	BB1_76;
	bra.uni 	BB1_75;

BB1_75:
	mov.f32 	%f368, 0f7FFFFFFF;
	bra.uni 	BB1_79;

BB1_76:
	.loc 2 978 1
	setp.geu.ftz.f32	%p73, %f132, 0f00000000;
	@%p73 bra 	BB1_78;

	mov.f32 	%f368, 0f3F800000;
	bra.uni 	BB1_79;

BB1_78:
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f337, %f124;
	mul.ftz.f32 	%f338, %f132, %f337;
	ex2.approx.ftz.f32 	%f368, %f338;

BB1_79:
	.loc 1 64 1
	mul.ftz.f32 	%f369, %f366, %f120;
	mul.ftz.f32 	%f370, %f367, %f121;
	mul.ftz.f32 	%f371, %f368, %f122;

BB1_80:
	.loc 1 64 1
	ld.global.u32 	%r22, [%rd1+24];
	setp.eq.s32	%p74, %r22, 0;
	@%p74 bra 	BB1_82;

	.loc 1 64 1
	mul.ftz.f32 	%f341, %f370, %f168;
	fma.rn.ftz.f32 	%f342, %f369, %f167, %f341;
	fma.rn.ftz.f32 	%f343, %f371, %f169, %f342;
	.loc 1 64 1
	sub.ftz.f32 	%f344, %f369, %f343;
	sub.ftz.f32 	%f345, %f370, %f343;
	sub.ftz.f32 	%f346, %f371, %f343;
	.loc 1 64 1
	ld.shared.f32 	%f347, [ShaderKernel_IRIDASPrimary$__cuda_local_var_170302_584_non_const_p_local+176];
	.loc 1 64 1
	fma.rn.ftz.f32 	%f369, %f347, %f344, %f343;
	fma.rn.ftz.f32 	%f370, %f347, %f345, %f343;
	fma.rn.ftz.f32 	%f371, %f347, %f346, %f343;

BB1_82:
	.loc 1 64 1
	mad.lo.s32 	%r31, %r3, %r4, %r2;
	.loc 1 64 1
	cvt.s64.s32	%rd3, %r31;
	.loc 1 64 1
	setp.eq.s32	%p75, %r5, 0;
	@%p75 bra 	BB1_84;

	cvta.to.global.u64 	%rd18, %rd4;
	.loc 1 64 1
	shl.b64 	%rd19, %rd3, 4;
	add.s64 	%rd20, %rd18, %rd19;
	st.global.v4.f32 	[%rd20], {%f371, %f370, %f369, %f164};
	bra.uni 	BB1_85;

BB1_84:
	cvta.to.global.u64 	%rd21, %rd4;
	.loc 1 64 1
	shl.b64 	%rd22, %rd3, 3;
	add.s64 	%rd23, %rd21, %rd22;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f371;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f370;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f369;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f164;
	mov.b16 	%rs4, %temp;
}
	.loc 1 64 241
	st.global.v4.u16 	[%rd23], {%rs1, %rs2, %rs3, %rs4};

BB1_85:
	.loc 1 64 2
	ret;
}


