//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/Renderers/RendererGPU/Src/Effects/Basic3D.cu", 1399785316, 16134
	.file	2 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
.global .texref inTexture;
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z12QuadCoverage6float2S_S_S_Pv(
	.param .align 8 .b8 _Z12QuadCoverage6float2S_S_S_Pv_param_0[8],
	.param .align 8 .b8 _Z12QuadCoverage6float2S_S_S_Pv_param_1[8],
	.param .align 8 .b8 _Z12QuadCoverage6float2S_S_S_Pv_param_2[8],
	.param .align 8 .b8 _Z12QuadCoverage6float2S_S_S_Pv_param_3[8],
	.param .b64 _Z12QuadCoverage6float2S_S_S_Pv_param_4
)
{
	.local .align 8 .b8 	__local_depot0[256];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<16>;
	.reg .s32 	%r<36>;
	.reg .f32 	%f<263>;
	.reg .s64 	%rd<45>;


	mov.u64 	%SPL, __local_depot0;
	ld.param.f32 	%f257, [_Z12QuadCoverage6float2S_S_S_Pv_param_0+4];
	ld.param.f32 	%f256, [_Z12QuadCoverage6float2S_S_S_Pv_param_0];
	ld.param.f32 	%f255, [_Z12QuadCoverage6float2S_S_S_Pv_param_1+4];
	ld.param.f32 	%f254, [_Z12QuadCoverage6float2S_S_S_Pv_param_1];
	ld.param.f32 	%f253, [_Z12QuadCoverage6float2S_S_S_Pv_param_2+4];
	ld.param.f32 	%f252, [_Z12QuadCoverage6float2S_S_S_Pv_param_2];
	ld.param.f32 	%f251, [_Z12QuadCoverage6float2S_S_S_Pv_param_3+4];
	ld.param.f32 	%f250, [_Z12QuadCoverage6float2S_S_S_Pv_param_3];
	add.u64 	%rd5, %SPL, 0;
	add.u64 	%rd6, %SPL, 224;
	add.u64 	%rd7, %SPL, 240;
	mov.u32 	%r34, 0;
	.loc 1 29 1
	st.local.u32 	[%rd6], %r34;
	mov.u32 	%r16, 1;
	.loc 1 29 1
	st.local.u32 	[%rd7], %r16;
	st.local.v2.f32 	[%rd5], {%f256, %f257};
	st.local.v2.f32 	[%rd5+8], {%f254, %f255};
	st.local.f32 	[%rd5+16], %f252;
	st.local.f32 	[%rd5+20], %f253;
	st.local.v2.f32 	[%rd5+24], {%f250, %f251};
	mov.f32 	%f262, 0f00000000;
	mov.u32 	%r33, %r34;
	mov.f32 	%f261, %f262;
	mov.f32 	%f260, %f262;
	mov.f32 	%f259, %f262;
	mov.f32 	%f258, %f262;

BB0_1:
	.loc 1 29 1
	add.ftz.f32 	%f16, %f256, %f254;
	mul.ftz.f32 	%f17, %f16, 0f3F000000;
	add.ftz.f32 	%f20, %f257, %f255;
	mul.ftz.f32 	%f21, %f20, 0f3F000000;
	add.ftz.f32 	%f90, %f254, %f252;
	mul.ftz.f32 	%f23, %f90, 0f3F000000;
	add.ftz.f32 	%f91, %f255, %f253;
	mul.ftz.f32 	%f25, %f91, 0f3F000000;
	add.ftz.f32 	%f92, %f252, %f250;
	mul.ftz.f32 	%f27, %f92, 0f3F000000;
	add.ftz.f32 	%f93, %f253, %f251;
	mul.ftz.f32 	%f29, %f93, 0f3F000000;
	add.ftz.f32 	%f94, %f250, %f256;
	mul.ftz.f32 	%f30, %f94, 0f3F000000;
	add.ftz.f32 	%f95, %f251, %f257;
	mul.ftz.f32 	%f31, %f95, 0f3F000000;
	add.ftz.f32 	%f96, %f17, %f27;
	mul.ftz.f32 	%f32, %f96, 0f3F000000;
	add.ftz.f32 	%f97, %f21, %f29;
	mul.ftz.f32 	%f33, %f97, 0f3F000000;
	sub.ftz.f32 	%f98, %f30, %f23;
	sub.ftz.f32 	%f99, %f31, %f25;
	mul.ftz.f32 	%f100, %f99, %f99;
	fma.rn.ftz.f32 	%f34, %f98, %f98, %f100;
	.loc 1 29 1
	setp.gt.ftz.f32	%p1, %f34, 0f3F800000;
	selp.u32	%r17, 1, 0, %p1;
	.loc 1 29 1
	sub.ftz.f32 	%f101, %f27, %f17;
	sub.ftz.f32 	%f102, %f29, %f21;
	mul.ftz.f32 	%f103, %f102, %f102;
	fma.rn.ftz.f32 	%f35, %f101, %f101, %f103;
	.loc 1 29 1
	setp.gt.ftz.f32	%p2, %f35, 0f3F800000;
	.loc 1 29 1
	or.b32  	%r18, %r17, 2;
	.loc 1 29 1
	selp.b32	%r3, %r18, %r17, %p2;
	.loc 1 29 1
	setp.eq.s32	%p3, %r3, 0;
	mul.wide.s32 	%rd8, %r34, 4;
	add.s64 	%rd4, %rd7, %rd8;
	.loc 1 29 1
	@%p3 bra 	BB0_16;

	mul.wide.s32 	%rd9, %r34, 4;
	add.s64 	%rd10, %rd6, %rd9;
	.loc 1 29 1
	ld.local.u32 	%r4, [%rd10];
	setp.eq.s32	%p4, %r4, 2;
	@%p4 bra 	BB0_10;

	setp.lt.ftz.f32	%p5, %f34, 0f40800000;
	setp.lt.ftz.f32	%p6, %f35, 0f40800000;
	and.pred  	%p7, %p5, %p6;
	.loc 1 29 1
	@%p7 bra 	BB0_10;

	.loc 1 29 1
	setp.eq.s32	%p8, %r3, 3;
	@%p8 bra 	BB0_8;

	.loc 1 29 1
	setp.eq.s32	%p9, %r3, 2;
	mul.wide.s32 	%rd11, %r33, 32;
	add.s64 	%rd12, %rd5, %rd11;
	.loc 1 29 1
	st.local.v2.f32 	[%rd12], {%f256, %f257};
	.loc 1 29 1
	@%p9 bra 	BB0_7;

	mul.wide.s32 	%rd13, %r33, 32;
	add.s64 	%rd14, %rd5, %rd13;
	.loc 1 29 1
	st.local.v2.f32 	[%rd14+8], {%f17, %f21};
	st.local.v2.f32 	[%rd14+16], {%f27, %f29};
	st.local.f32 	[%rd14+24], %f250;
	st.local.f32 	[%rd14+28], %f251;
	st.local.v2.f32 	[%rd14+32], {%f17, %f21};
	st.local.f32 	[%rd14+40], %f254;
	st.local.f32 	[%rd14+44], %f255;
	st.local.v2.f32 	[%rd14+48], {%f252, %f253};
	st.local.f32 	[%rd14+56], %f27;
	st.local.f32 	[%rd14+60], %f29;
	mov.u32 	%r35, 2;
	bra.uni 	BB0_9;

BB0_7:
	mul.wide.s32 	%rd15, %r33, 32;
	add.s64 	%rd16, %rd5, %rd15;
	.loc 1 29 1
	st.local.v2.f32 	[%rd16+8], {%f254, %f255};
	st.local.v2.f32 	[%rd16+16], {%f23, %f25};
	st.local.f32 	[%rd16+24], %f30;
	st.local.f32 	[%rd16+28], %f31;
	st.local.v2.f32 	[%rd16+32], {%f30, %f31};
	st.local.f32 	[%rd16+40], %f23;
	st.local.f32 	[%rd16+44], %f25;
	st.local.v2.f32 	[%rd16+48], {%f252, %f253};
	st.local.f32 	[%rd16+56], %f250;
	st.local.f32 	[%rd16+60], %f251;
	mov.u32 	%r35, 2;
	bra.uni 	BB0_9;

BB0_8:
	mul.wide.s32 	%rd17, %r33, 32;
	add.s64 	%rd18, %rd5, %rd17;
	.loc 1 29 1
	st.local.v2.f32 	[%rd18], {%f32, %f33};
	st.local.v2.f32 	[%rd18+8], {%f30, %f31};
	st.local.f32 	[%rd18+16], %f256;
	st.local.f32 	[%rd18+20], %f257;
	st.local.v2.f32 	[%rd18+24], {%f17, %f21};
	st.local.f32 	[%rd18+32], %f32;
	st.local.f32 	[%rd18+36], %f33;
	st.local.v2.f32 	[%rd18+40], {%f17, %f21};
	st.local.f32 	[%rd18+48], %f254;
	st.local.f32 	[%rd18+52], %f255;
	st.local.v2.f32 	[%rd18+56], {%f23, %f25};
	st.local.f32 	[%rd18+64], %f32;
	st.local.f32 	[%rd18+68], %f33;
	st.local.v2.f32 	[%rd18+72], {%f23, %f25};
	st.local.f32 	[%rd18+80], %f252;
	st.local.f32 	[%rd18+84], %f253;
	st.local.v2.f32 	[%rd18+88], {%f27, %f29};
	st.local.f32 	[%rd18+96], %f32;
	st.local.f32 	[%rd18+100], %f33;
	st.local.v2.f32 	[%rd18+104], {%f27, %f29};
	st.local.f32 	[%rd18+112], %f250;
	st.local.f32 	[%rd18+116], %f251;
	st.local.v2.f32 	[%rd18+120], {%f30, %f31};
	mov.u32 	%r35, 4;

BB0_9:
	.loc 1 29 1
	add.s32 	%r22, %r33, %r35;
	add.s32 	%r33, %r22, -1;
	mul.wide.s32 	%rd19, %r34, 4;
	add.s64 	%rd20, %rd7, %rd19;
	.loc 1 29 1
	ld.local.u32 	%r23, [%rd20];
	add.s32 	%r24, %r23, -1;
	st.local.u32 	[%rd20], %r24;
	.loc 1 29 1
	setp.ne.s32	%p10, %r24, 0;
	selp.u32	%r25, 1, 0, %p10;
	add.s32 	%r34, %r25, %r34;
	mul.wide.s32 	%rd21, %r34, 4;
	add.s64 	%rd22, %rd6, %rd21;
	.loc 1 29 1
	add.s32 	%r26, %r4, 1;
	st.local.u32 	[%rd22], %r26;
	add.s64 	%rd23, %rd7, %rd21;
	.loc 1 29 1
	st.local.u32 	[%rd23], %r35;
	bra.uni 	BB0_17;

BB0_10:
	.loc 1 29 1
	add.s32 	%r33, %r33, -1;
	.loc 1 29 1
	setp.eq.s32	%p11, %r3, 3;
	@%p11 bra 	BB0_14;

	.loc 1 29 1
	setp.eq.s32	%p12, %r3, 2;
	@%p12 bra 	BB0_13;

	.loc 1 29 1
	add.ftz.f32 	%f116, %f256, %f17;
	add.ftz.f32 	%f117, %f116, %f27;
	add.ftz.f32 	%f118, %f117, %f250;
	mul.ftz.f32 	%f108, %f118, 0f3E800000;
	add.ftz.f32 	%f119, %f257, %f21;
	add.ftz.f32 	%f120, %f119, %f29;
	add.ftz.f32 	%f121, %f120, %f251;
	mul.ftz.f32 	%f109, %f121, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f104, %f105, %f106, %f107}, [inTexture, {%f108, %f109}];
	// inline asm
	add.ftz.f32 	%f122, %f258, %f104;
	add.ftz.f32 	%f123, %f259, %f105;
	add.ftz.f32 	%f124, %f260, %f106;
	add.ftz.f32 	%f125, %f261, %f107;
	.loc 1 29 1
	add.ftz.f32 	%f126, %f17, %f254;
	add.ftz.f32 	%f127, %f126, %f252;
	add.ftz.f32 	%f128, %f127, %f27;
	mul.ftz.f32 	%f114, %f128, 0f3E800000;
	add.ftz.f32 	%f129, %f21, %f255;
	add.ftz.f32 	%f130, %f129, %f253;
	add.ftz.f32 	%f131, %f130, %f29;
	mul.ftz.f32 	%f115, %f131, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f110, %f111, %f112, %f113}, [inTexture, {%f114, %f115}];
	// inline asm
	add.ftz.f32 	%f258, %f122, %f110;
	add.ftz.f32 	%f259, %f123, %f111;
	add.ftz.f32 	%f260, %f124, %f112;
	add.ftz.f32 	%f261, %f125, %f113;
	.loc 1 29 1
	add.ftz.f32 	%f262, %f262, 0f40000000;
	bra.uni 	BB0_15;

BB0_13:
	.loc 1 29 1
	add.ftz.f32 	%f249, %f257, %f255;
	add.ftz.f32 	%f248, %f256, %f254;
	.loc 1 29 1
	add.ftz.f32 	%f144, %f248, %f23;
	add.ftz.f32 	%f145, %f144, %f30;
	mul.ftz.f32 	%f136, %f145, 0f3E800000;
	add.ftz.f32 	%f146, %f249, %f25;
	add.ftz.f32 	%f147, %f146, %f31;
	mul.ftz.f32 	%f137, %f147, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f132, %f133, %f134, %f135}, [inTexture, {%f136, %f137}];
	// inline asm
	add.ftz.f32 	%f148, %f258, %f132;
	add.ftz.f32 	%f149, %f259, %f133;
	add.ftz.f32 	%f150, %f260, %f134;
	add.ftz.f32 	%f151, %f261, %f135;
	.loc 1 29 1
	add.ftz.f32 	%f152, %f30, %f23;
	add.ftz.f32 	%f153, %f152, %f252;
	add.ftz.f32 	%f154, %f153, %f250;
	mul.ftz.f32 	%f142, %f154, 0f3E800000;
	add.ftz.f32 	%f155, %f31, %f25;
	add.ftz.f32 	%f156, %f155, %f253;
	add.ftz.f32 	%f157, %f156, %f251;
	mul.ftz.f32 	%f143, %f157, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f138, %f139, %f140, %f141}, [inTexture, {%f142, %f143}];
	// inline asm
	add.ftz.f32 	%f258, %f148, %f138;
	add.ftz.f32 	%f259, %f149, %f139;
	add.ftz.f32 	%f260, %f150, %f140;
	add.ftz.f32 	%f261, %f151, %f141;
	.loc 1 29 1
	add.ftz.f32 	%f262, %f262, 0f40000000;
	bra.uni 	BB0_15;

BB0_14:
	.loc 1 29 1
	add.ftz.f32 	%f262, %f262, 0f40800000;
	add.ftz.f32 	%f182, %f32, %f30;
	add.ftz.f32 	%f183, %f182, %f256;
	add.ftz.f32 	%f184, %f183, %f17;
	mul.ftz.f32 	%f162, %f184, 0f3E800000;
	add.ftz.f32 	%f185, %f33, %f31;
	add.ftz.f32 	%f186, %f185, %f257;
	add.ftz.f32 	%f187, %f186, %f21;
	mul.ftz.f32 	%f163, %f187, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f158, %f159, %f160, %f161}, [inTexture, {%f162, %f163}];
	// inline asm
	add.ftz.f32 	%f188, %f258, %f158;
	add.ftz.f32 	%f189, %f259, %f159;
	add.ftz.f32 	%f190, %f260, %f160;
	add.ftz.f32 	%f191, %f261, %f161;
	.loc 1 29 1
	add.ftz.f32 	%f192, %f32, %f17;
	add.ftz.f32 	%f193, %f192, %f254;
	add.ftz.f32 	%f194, %f193, %f23;
	mul.ftz.f32 	%f168, %f194, 0f3E800000;
	add.ftz.f32 	%f195, %f33, %f21;
	add.ftz.f32 	%f196, %f195, %f255;
	add.ftz.f32 	%f197, %f196, %f25;
	mul.ftz.f32 	%f169, %f197, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f164, %f165, %f166, %f167}, [inTexture, {%f168, %f169}];
	// inline asm
	add.ftz.f32 	%f198, %f188, %f164;
	add.ftz.f32 	%f199, %f189, %f165;
	add.ftz.f32 	%f200, %f190, %f166;
	add.ftz.f32 	%f201, %f191, %f167;
	.loc 1 29 1
	add.ftz.f32 	%f202, %f32, %f23;
	add.ftz.f32 	%f203, %f202, %f252;
	add.ftz.f32 	%f204, %f203, %f27;
	mul.ftz.f32 	%f174, %f204, 0f3E800000;
	add.ftz.f32 	%f205, %f33, %f25;
	add.ftz.f32 	%f206, %f205, %f253;
	add.ftz.f32 	%f207, %f206, %f29;
	mul.ftz.f32 	%f175, %f207, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f170, %f171, %f172, %f173}, [inTexture, {%f174, %f175}];
	// inline asm
	add.ftz.f32 	%f208, %f198, %f170;
	add.ftz.f32 	%f209, %f199, %f171;
	add.ftz.f32 	%f210, %f200, %f172;
	add.ftz.f32 	%f211, %f201, %f173;
	.loc 1 29 1
	add.ftz.f32 	%f212, %f32, %f27;
	add.ftz.f32 	%f213, %f212, %f250;
	add.ftz.f32 	%f214, %f213, %f30;
	mul.ftz.f32 	%f180, %f214, 0f3E800000;
	add.ftz.f32 	%f215, %f33, %f29;
	add.ftz.f32 	%f216, %f215, %f251;
	add.ftz.f32 	%f217, %f216, %f31;
	mul.ftz.f32 	%f181, %f217, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f176, %f177, %f178, %f179}, [inTexture, {%f180, %f181}];
	// inline asm
	add.ftz.f32 	%f258, %f208, %f176;
	add.ftz.f32 	%f259, %f209, %f177;
	add.ftz.f32 	%f260, %f210, %f178;
	add.ftz.f32 	%f261, %f211, %f179;

BB0_15:
	mul.wide.s32 	%rd40, %r34, 4;
	add.s64 	%rd41, %rd7, %rd40;
	.loc 1 29 1
	ld.local.u32 	%r27, [%rd41];
	add.s32 	%r28, %r27, -1;
	st.local.u32 	[%rd41], %r28;
	.loc 1 29 1
	setp.eq.s32	%p13, %r28, 0;
	selp.b32	%r29, -1, 0, %p13;
	add.s32 	%r34, %r29, %r34;
	bra.uni 	BB0_17;

BB0_16:
	.loc 1 29 1
	add.s32 	%r33, %r33, -1;
	add.ftz.f32 	%f262, %f262, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f218, %f219, %f220, %f221}, [inTexture, {%f32, %f33}];
	// inline asm
	add.ftz.f32 	%f258, %f258, %f218;
	add.ftz.f32 	%f259, %f259, %f219;
	add.ftz.f32 	%f260, %f260, %f220;
	add.ftz.f32 	%f261, %f261, %f221;
	.loc 1 29 1
	ld.local.u32 	%r30, [%rd4];
	add.s32 	%r31, %r30, -1;
	st.local.u32 	[%rd4], %r31;
	.loc 1 29 1
	setp.eq.s32	%p14, %r31, 0;
	selp.b32	%r32, -1, 0, %p14;
	add.s32 	%r34, %r32, %r34;

BB0_17:
	.loc 1 29 10
	setp.gt.s32	%p15, %r33, -1;
	@%p15 bra 	BB0_19;

	mov.f32 	%f224, 0f3F800000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f225, %f224, %f262;
	.loc 1 29 83
	mul.ftz.f32 	%f226, %f258, %f225;
	mul.ftz.f32 	%f227, %f259, %f225;
	mul.ftz.f32 	%f228, %f260, %f225;
	mul.ftz.f32 	%f229, %f261, %f225;
	st.param.f32	[func_retval0+0], %f226;
	st.param.f32	[func_retval0+4], %f227;
	st.param.f32	[func_retval0+8], %f228;
	st.param.f32	[func_retval0+12], %f229;
	ret;

BB0_19:
	mul.wide.s32 	%rd43, %r33, 32;
	add.s64 	%rd44, %rd5, %rd43;
	.loc 1 29 1
	ld.local.v2.f32 	{%f230, %f231}, [%rd44+24];
	mov.f32 	%f251, %f231;
	mov.f32 	%f250, %f230;
	ld.local.v2.f32 	{%f232, %f233}, [%rd44+16];
	mov.f32 	%f253, %f233;
	mov.f32 	%f252, %f232;
	ld.local.v2.f32 	{%f234, %f235}, [%rd44+8];
	mov.f32 	%f255, %f235;
	mov.f32 	%f254, %f234;
	ld.local.v2.f32 	{%f236, %f237}, [%rd44];
	mov.f32 	%f257, %f237;
	mov.f32 	%f256, %f236;
	bra.uni 	BB0_1;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z10SampleQuad6float2S_S_S_Pv(
	.param .align 8 .b8 _Z10SampleQuad6float2S_S_S_Pv_param_0[8],
	.param .align 8 .b8 _Z10SampleQuad6float2S_S_S_Pv_param_1[8],
	.param .align 8 .b8 _Z10SampleQuad6float2S_S_S_Pv_param_2[8],
	.param .align 8 .b8 _Z10SampleQuad6float2S_S_S_Pv_param_3[8],
	.param .b64 _Z10SampleQuad6float2S_S_S_Pv_param_4
)
{
	.local .align 8 .b8 	__local_depot1[256];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<23>;
	.reg .s32 	%r<40>;
	.reg .f32 	%f<837>;
	.reg .s64 	%rd<137>;


	mov.u64 	%SPL, __local_depot1;
	ld.param.f32 	%f5, [_Z10SampleQuad6float2S_S_S_Pv_param_0+4];
	ld.param.f32 	%f2, [_Z10SampleQuad6float2S_S_S_Pv_param_0];
	ld.param.f32 	%f4, [_Z10SampleQuad6float2S_S_S_Pv_param_1+4];
	ld.param.f32 	%f1, [_Z10SampleQuad6float2S_S_S_Pv_param_1];
	ld.param.f32 	%f9, [_Z10SampleQuad6float2S_S_S_Pv_param_2+4];
	ld.param.f32 	%f7, [_Z10SampleQuad6float2S_S_S_Pv_param_2];
	ld.param.f32 	%f13, [_Z10SampleQuad6float2S_S_S_Pv_param_3+4];
	ld.param.f32 	%f11, [_Z10SampleQuad6float2S_S_S_Pv_param_3];
	add.u64 	%rd4, %SPL, 0;
	add.u64 	%rd5, %SPL, 224;
	add.u64 	%rd6, %SPL, 240;
	.loc 1 29 1
	add.ftz.f32 	%f134, %f2, %f1;
	mul.ftz.f32 	%f3, %f134, 0f3F000000;
	add.ftz.f32 	%f135, %f5, %f4;
	mul.ftz.f32 	%f6, %f135, 0f3F000000;
	add.ftz.f32 	%f136, %f1, %f7;
	mul.ftz.f32 	%f8, %f136, 0f3F000000;
	add.ftz.f32 	%f137, %f4, %f9;
	mul.ftz.f32 	%f10, %f137, 0f3F000000;
	add.ftz.f32 	%f138, %f7, %f11;
	mul.ftz.f32 	%f12, %f138, 0f3F000000;
	add.ftz.f32 	%f139, %f9, %f13;
	mul.ftz.f32 	%f14, %f139, 0f3F000000;
	add.ftz.f32 	%f140, %f11, %f2;
	mul.ftz.f32 	%f15, %f140, 0f3F000000;
	add.ftz.f32 	%f141, %f13, %f5;
	mul.ftz.f32 	%f16, %f141, 0f3F000000;
	add.ftz.f32 	%f142, %f3, %f12;
	mul.ftz.f32 	%f17, %f142, 0f3F000000;
	add.ftz.f32 	%f143, %f6, %f14;
	mul.ftz.f32 	%f18, %f143, 0f3F000000;
	sub.ftz.f32 	%f144, %f15, %f8;
	sub.ftz.f32 	%f145, %f16, %f10;
	mul.ftz.f32 	%f146, %f145, %f145;
	fma.rn.ftz.f32 	%f19, %f144, %f144, %f146;
	.loc 1 29 1
	setp.gt.ftz.f32	%p1, %f19, 0f3F8020C5;
	selp.u32	%r14, 1, 0, %p1;
	.loc 1 29 1
	sub.ftz.f32 	%f147, %f3, %f12;
	sub.ftz.f32 	%f148, %f6, %f14;
	mul.ftz.f32 	%f149, %f148, %f148;
	fma.rn.ftz.f32 	%f20, %f147, %f147, %f149;
	.loc 1 29 1
	setp.gt.ftz.f32	%p2, %f20, 0f3F8020C5;
	.loc 1 29 1
	or.b32  	%r15, %r14, 2;
	.loc 1 29 1
	selp.b32	%r16, %r15, %r14, %p2;
	.loc 1 29 1
	setp.eq.s32	%p3, %r16, 0;
	@%p3 bra 	BB1_25;

	.loc 1 29 1
	setp.gt.ftz.f32	%p4, %f19, 0f40800000;
	setp.gt.ftz.f32	%p5, %f20, 0f40800000;
	or.pred  	%p6, %p4, %p5;
	.loc 1 29 1
	@%p6 bra 	BB1_5;

	ld.param.f32 	%f798, [_Z10SampleQuad6float2S_S_S_Pv_param_3+4];
	ld.param.f32 	%f797, [_Z10SampleQuad6float2S_S_S_Pv_param_3];
	ld.param.f32 	%f796, [_Z10SampleQuad6float2S_S_S_Pv_param_2+4];
	ld.param.f32 	%f795, [_Z10SampleQuad6float2S_S_S_Pv_param_2];
	ld.param.f32 	%f794, [_Z10SampleQuad6float2S_S_S_Pv_param_1+4];
	ld.param.f32 	%f793, [_Z10SampleQuad6float2S_S_S_Pv_param_1];
	ld.param.f32 	%f792, [_Z10SampleQuad6float2S_S_S_Pv_param_0+4];
	ld.param.f32 	%f791, [_Z10SampleQuad6float2S_S_S_Pv_param_0];
	.loc 1 29 1
	add.ftz.f32 	%f174, %f17, %f3;
	add.ftz.f32 	%f175, %f174, %f791;
	add.ftz.f32 	%f176, %f175, %f15;
	mul.ftz.f32 	%f154, %f176, 0f3E800000;
	add.ftz.f32 	%f177, %f18, %f6;
	add.ftz.f32 	%f178, %f177, %f792;
	add.ftz.f32 	%f179, %f178, %f16;
	mul.ftz.f32 	%f155, %f179, 0f3E800000;
	.loc 1 29 217
	// inline asm
	tex.2d.v4.f32.f32 {%f150, %f151, %f152, %f153}, [inTexture, {%f154, %f155}];
	// inline asm
	.loc 1 29 1
	add.ftz.f32 	%f180, %f174, %f793;
	add.ftz.f32 	%f181, %f180, %f8;
	mul.ftz.f32 	%f160, %f181, 0f3E800000;
	add.ftz.f32 	%f182, %f177, %f794;
	add.ftz.f32 	%f183, %f182, %f10;
	mul.ftz.f32 	%f161, %f183, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f156, %f157, %f158, %f159}, [inTexture, {%f160, %f161}];
	// inline asm
	add.ftz.f32 	%f184, %f150, %f156;
	add.ftz.f32 	%f185, %f151, %f157;
	add.ftz.f32 	%f186, %f152, %f158;
	add.ftz.f32 	%f187, %f153, %f159;
	.loc 1 29 1
	add.ftz.f32 	%f188, %f17, %f12;
	add.ftz.f32 	%f189, %f188, %f795;
	add.ftz.f32 	%f190, %f189, %f8;
	mul.ftz.f32 	%f166, %f190, 0f3E800000;
	add.ftz.f32 	%f191, %f18, %f14;
	add.ftz.f32 	%f192, %f191, %f796;
	add.ftz.f32 	%f193, %f192, %f10;
	mul.ftz.f32 	%f167, %f193, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f162, %f163, %f164, %f165}, [inTexture, {%f166, %f167}];
	// inline asm
	add.ftz.f32 	%f194, %f184, %f162;
	add.ftz.f32 	%f195, %f185, %f163;
	add.ftz.f32 	%f196, %f186, %f164;
	add.ftz.f32 	%f197, %f187, %f165;
	.loc 1 29 1
	add.ftz.f32 	%f198, %f188, %f797;
	add.ftz.f32 	%f199, %f198, %f15;
	mul.ftz.f32 	%f172, %f199, 0f3E800000;
	add.ftz.f32 	%f200, %f191, %f798;
	add.ftz.f32 	%f201, %f200, %f16;
	mul.ftz.f32 	%f173, %f201, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f168, %f169, %f170, %f171}, [inTexture, {%f172, %f173}];
	// inline asm
	add.ftz.f32 	%f202, %f194, %f168;
	add.ftz.f32 	%f203, %f195, %f169;
	add.ftz.f32 	%f204, %f196, %f170;
	add.ftz.f32 	%f205, %f197, %f171;
	.loc 1 29 1
	mul.ftz.f32 	%f816, %f202, 0f3E800000;
	mul.ftz.f32 	%f817, %f203, 0f3E800000;
	mul.ftz.f32 	%f818, %f204, 0f3E800000;
	mul.ftz.f32 	%f819, %f205, 0f3E800000;
	.loc 2 2770 10
	max.ftz.f32 	%f25, %f19, %f20;
	.loc 1 29 1
	setp.geu.ftz.f32	%p7, %f25, 0f3FB851EC;
	@%p7 bra 	BB1_4;

	.loc 1 29 1
	add.ftz.f32 	%f302, %f25, 0fBF800000;
	mov.f32 	%f303, 0f3EE147B0;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f304, %f302, %f303;
	.loc 1 29 1
	add.ftz.f32 	%f305, %f17, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f306, %f305;
	.loc 1 29 1
	add.ftz.f32 	%f307, %f18, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f308, %f307;
	sub.ftz.f32 	%f309, %f305, %f306;
	sub.ftz.f32 	%f310, %f307, %f308;
	fma.rn.ftz.f32 	%f311, %f309, 0fBF000000, 0f3F800000;
	mov.f32 	%f312, 0f3F800000;
	fma.rn.ftz.f32 	%f313, %f311, %f309, 0fBF000000;
	mul.ftz.f32 	%f314, %f313, %f309;
	fma.rn.ftz.f32 	%f315, %f309, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f316, %f315, %f309;
	fma.rn.ftz.f32 	%f317, %f316, %f309, 0f3F800000;
	sub.ftz.f32 	%f318, %f312, %f309;
	fma.rn.ftz.f32 	%f319, %f318, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f320, %f319, %f318;
	fma.rn.ftz.f32 	%f321, %f320, %f318, 0f3F800000;
	fma.rn.ftz.f32 	%f322, %f318, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f323, %f322, %f318, 0fBF000000;
	mul.ftz.f32 	%f324, %f323, %f318;
	fma.rn.ftz.f32 	%f325, %f310, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f326, %f325, %f310, 0fBF000000;
	mul.ftz.f32 	%f327, %f326, %f310;
	fma.rn.ftz.f32 	%f328, %f310, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f329, %f328, %f310;
	fma.rn.ftz.f32 	%f330, %f329, %f310, 0f3F800000;
	sub.ftz.f32 	%f331, %f312, %f310;
	fma.rn.ftz.f32 	%f332, %f331, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f333, %f332, %f331;
	fma.rn.ftz.f32 	%f334, %f333, %f331, 0f3F800000;
	fma.rn.ftz.f32 	%f335, %f331, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f336, %f335, %f331, 0fBF000000;
	mul.ftz.f32 	%f337, %f336, %f331;
	add.ftz.f32 	%f338, %f306, 0fBF000000;
	add.ftz.f32 	%f339, %f308, 0fBF000000;
	add.ftz.f32 	%f258, %f338, 0f00000000;
	add.ftz.f32 	%f229, %f339, 0f00000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f206, %f207, %f208, %f209}, [inTexture, {%f258, %f229}];
	// inline asm
	mul.ftz.f32 	%f340, %f314, %f327;
	add.ftz.f32 	%f288, %f338, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f212, %f213, %f214, %f215}, [inTexture, {%f288, %f229}];
	// inline asm
	mul.ftz.f32 	%f341, %f317, %f327;
	mul.ftz.f32 	%f342, %f212, %f341;
	mul.ftz.f32 	%f343, %f213, %f341;
	mul.ftz.f32 	%f344, %f214, %f341;
	mul.ftz.f32 	%f345, %f215, %f341;
	fma.rn.ftz.f32 	%f346, %f206, %f340, %f342;
	fma.rn.ftz.f32 	%f347, %f207, %f340, %f343;
	fma.rn.ftz.f32 	%f348, %f208, %f340, %f344;
	fma.rn.ftz.f32 	%f349, %f209, %f340, %f345;
	add.ftz.f32 	%f294, %f338, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f218, %f219, %f220, %f221}, [inTexture, {%f294, %f229}];
	// inline asm
	mul.ftz.f32 	%f350, %f321, %f327;
	fma.rn.ftz.f32 	%f351, %f218, %f350, %f346;
	fma.rn.ftz.f32 	%f352, %f219, %f350, %f347;
	fma.rn.ftz.f32 	%f353, %f220, %f350, %f348;
	fma.rn.ftz.f32 	%f354, %f221, %f350, %f349;
	add.ftz.f32 	%f300, %f338, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f224, %f225, %f226, %f227}, [inTexture, {%f300, %f229}];
	// inline asm
	mul.ftz.f32 	%f355, %f324, %f327;
	fma.rn.ftz.f32 	%f356, %f224, %f355, %f351;
	fma.rn.ftz.f32 	%f357, %f225, %f355, %f352;
	fma.rn.ftz.f32 	%f358, %f226, %f355, %f353;
	fma.rn.ftz.f32 	%f359, %f227, %f355, %f354;
	add.ftz.f32 	%f253, %f339, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f230, %f231, %f232, %f233}, [inTexture, {%f258, %f253}];
	// inline asm
	mul.ftz.f32 	%f360, %f314, %f330;
	fma.rn.ftz.f32 	%f361, %f230, %f360, %f356;
	fma.rn.ftz.f32 	%f362, %f231, %f360, %f357;
	fma.rn.ftz.f32 	%f363, %f232, %f360, %f358;
	fma.rn.ftz.f32 	%f364, %f233, %f360, %f359;
	// inline asm
	tex.2d.v4.f32.f32 {%f236, %f237, %f238, %f239}, [inTexture, {%f288, %f253}];
	// inline asm
	mul.ftz.f32 	%f365, %f317, %f330;
	fma.rn.ftz.f32 	%f366, %f236, %f365, %f361;
	fma.rn.ftz.f32 	%f367, %f237, %f365, %f362;
	fma.rn.ftz.f32 	%f368, %f238, %f365, %f363;
	fma.rn.ftz.f32 	%f369, %f239, %f365, %f364;
	// inline asm
	tex.2d.v4.f32.f32 {%f242, %f243, %f244, %f245}, [inTexture, {%f294, %f253}];
	// inline asm
	mul.ftz.f32 	%f370, %f321, %f330;
	fma.rn.ftz.f32 	%f371, %f242, %f370, %f366;
	fma.rn.ftz.f32 	%f372, %f243, %f370, %f367;
	fma.rn.ftz.f32 	%f373, %f244, %f370, %f368;
	fma.rn.ftz.f32 	%f374, %f245, %f370, %f369;
	// inline asm
	tex.2d.v4.f32.f32 {%f248, %f249, %f250, %f251}, [inTexture, {%f300, %f253}];
	// inline asm
	mul.ftz.f32 	%f375, %f324, %f330;
	fma.rn.ftz.f32 	%f376, %f248, %f375, %f371;
	fma.rn.ftz.f32 	%f377, %f249, %f375, %f372;
	fma.rn.ftz.f32 	%f378, %f250, %f375, %f373;
	fma.rn.ftz.f32 	%f379, %f251, %f375, %f374;
	add.ftz.f32 	%f277, %f339, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f254, %f255, %f256, %f257}, [inTexture, {%f258, %f277}];
	// inline asm
	mul.ftz.f32 	%f380, %f314, %f334;
	fma.rn.ftz.f32 	%f381, %f254, %f380, %f376;
	fma.rn.ftz.f32 	%f382, %f255, %f380, %f377;
	fma.rn.ftz.f32 	%f383, %f256, %f380, %f378;
	fma.rn.ftz.f32 	%f384, %f257, %f380, %f379;
	// inline asm
	tex.2d.v4.f32.f32 {%f260, %f261, %f262, %f263}, [inTexture, {%f288, %f277}];
	// inline asm
	mul.ftz.f32 	%f385, %f317, %f334;
	fma.rn.ftz.f32 	%f386, %f260, %f385, %f381;
	fma.rn.ftz.f32 	%f387, %f261, %f385, %f382;
	fma.rn.ftz.f32 	%f388, %f262, %f385, %f383;
	fma.rn.ftz.f32 	%f389, %f263, %f385, %f384;
	// inline asm
	tex.2d.v4.f32.f32 {%f266, %f267, %f268, %f269}, [inTexture, {%f294, %f277}];
	// inline asm
	mul.ftz.f32 	%f390, %f321, %f334;
	fma.rn.ftz.f32 	%f391, %f266, %f390, %f386;
	fma.rn.ftz.f32 	%f392, %f267, %f390, %f387;
	fma.rn.ftz.f32 	%f393, %f268, %f390, %f388;
	fma.rn.ftz.f32 	%f394, %f269, %f390, %f389;
	// inline asm
	tex.2d.v4.f32.f32 {%f272, %f273, %f274, %f275}, [inTexture, {%f300, %f277}];
	// inline asm
	mul.ftz.f32 	%f395, %f324, %f334;
	fma.rn.ftz.f32 	%f396, %f272, %f395, %f391;
	fma.rn.ftz.f32 	%f397, %f273, %f395, %f392;
	fma.rn.ftz.f32 	%f398, %f274, %f395, %f393;
	fma.rn.ftz.f32 	%f399, %f275, %f395, %f394;
	add.ftz.f32 	%f282, %f338, 0f80000000;
	add.ftz.f32 	%f301, %f339, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f278, %f279, %f280, %f281}, [inTexture, {%f282, %f301}];
	// inline asm
	mul.ftz.f32 	%f400, %f314, %f337;
	fma.rn.ftz.f32 	%f401, %f278, %f400, %f396;
	fma.rn.ftz.f32 	%f402, %f279, %f400, %f397;
	fma.rn.ftz.f32 	%f403, %f280, %f400, %f398;
	fma.rn.ftz.f32 	%f404, %f281, %f400, %f399;
	// inline asm
	tex.2d.v4.f32.f32 {%f284, %f285, %f286, %f287}, [inTexture, {%f288, %f301}];
	// inline asm
	mul.ftz.f32 	%f405, %f317, %f337;
	fma.rn.ftz.f32 	%f406, %f284, %f405, %f401;
	fma.rn.ftz.f32 	%f407, %f285, %f405, %f402;
	fma.rn.ftz.f32 	%f408, %f286, %f405, %f403;
	fma.rn.ftz.f32 	%f409, %f287, %f405, %f404;
	// inline asm
	tex.2d.v4.f32.f32 {%f290, %f291, %f292, %f293}, [inTexture, {%f294, %f301}];
	// inline asm
	mul.ftz.f32 	%f410, %f321, %f337;
	fma.rn.ftz.f32 	%f411, %f290, %f410, %f406;
	fma.rn.ftz.f32 	%f412, %f291, %f410, %f407;
	fma.rn.ftz.f32 	%f413, %f292, %f410, %f408;
	fma.rn.ftz.f32 	%f414, %f293, %f410, %f409;
	// inline asm
	tex.2d.v4.f32.f32 {%f296, %f297, %f298, %f299}, [inTexture, {%f300, %f301}];
	// inline asm
	mul.ftz.f32 	%f415, %f324, %f337;
	fma.rn.ftz.f32 	%f416, %f296, %f415, %f411;
	fma.rn.ftz.f32 	%f417, %f297, %f415, %f412;
	fma.rn.ftz.f32 	%f418, %f298, %f415, %f413;
	fma.rn.ftz.f32 	%f419, %f299, %f415, %f414;
	.loc 1 29 1
	sub.ftz.f32 	%f420, %f816, %f416;
	fma.rn.ftz.f32 	%f816, %f304, %f420, %f416;
	sub.ftz.f32 	%f421, %f817, %f417;
	fma.rn.ftz.f32 	%f817, %f304, %f421, %f417;
	sub.ftz.f32 	%f422, %f818, %f418;
	fma.rn.ftz.f32 	%f818, %f304, %f422, %f418;
	sub.ftz.f32 	%f423, %f819, %f419;
	fma.rn.ftz.f32 	%f819, %f304, %f423, %f419;

BB1_4:
	.loc 1 29 1
	mov.f32 	%f836, %f819;
	mov.f32 	%f835, %f818;
	mov.f32 	%f834, %f817;
	mov.f32 	%f833, %f816;
	bra.uni 	BB1_26;

BB1_5:
	ld.param.f32 	%f814, [_Z10SampleQuad6float2S_S_S_Pv_param_3+4];
	ld.param.f32 	%f813, [_Z10SampleQuad6float2S_S_S_Pv_param_3];
	ld.param.f32 	%f812, [_Z10SampleQuad6float2S_S_S_Pv_param_2+4];
	ld.param.f32 	%f811, [_Z10SampleQuad6float2S_S_S_Pv_param_2];
	ld.param.f32 	%f810, [_Z10SampleQuad6float2S_S_S_Pv_param_1+4];
	ld.param.f32 	%f809, [_Z10SampleQuad6float2S_S_S_Pv_param_1];
	ld.param.f32 	%f808, [_Z10SampleQuad6float2S_S_S_Pv_param_0+4];
	ld.param.f32 	%f807, [_Z10SampleQuad6float2S_S_S_Pv_param_0];
	mov.f32 	%f827, %f808;
	mov.f32 	%f826, %f807;
	mov.f32 	%f825, %f810;
	mov.f32 	%f824, %f809;
	mov.f32 	%f823, %f812;
	mov.f32 	%f822, %f811;
	mov.f32 	%f821, %f814;
	mov.f32 	%f820, %f813;
	mov.u32 	%r38, 0;
	.loc 1 29 1
	st.local.u32 	[%rd5], %r38;
	mov.u32 	%r19, 1;
	.loc 1 29 1
	st.local.u32 	[%rd6], %r19;
	st.local.v2.f32 	[%rd4], {%f807, %f808};
	st.local.v2.f32 	[%rd4+8], {%f809, %f810};
	st.local.f32 	[%rd4+16], %f811;
	st.local.f32 	[%rd4+20], %f812;
	st.local.v2.f32 	[%rd4+24], {%f813, %f814};
	mov.f32 	%f832, 0f00000000;
	mov.u32 	%r37, %r38;
	mov.f32 	%f831, %f832;
	mov.f32 	%f830, %f832;
	mov.f32 	%f829, %f832;
	mov.f32 	%f828, %f832;

BB1_6:
	.loc 1 29 1
	add.ftz.f32 	%f53, %f826, %f824;
	mul.ftz.f32 	%f54, %f53, 0f3F000000;
	add.ftz.f32 	%f57, %f827, %f825;
	mul.ftz.f32 	%f58, %f57, 0f3F000000;
	add.ftz.f32 	%f429, %f824, %f822;
	mul.ftz.f32 	%f60, %f429, 0f3F000000;
	add.ftz.f32 	%f430, %f825, %f823;
	mul.ftz.f32 	%f62, %f430, 0f3F000000;
	add.ftz.f32 	%f431, %f822, %f820;
	mul.ftz.f32 	%f64, %f431, 0f3F000000;
	add.ftz.f32 	%f432, %f823, %f821;
	mul.ftz.f32 	%f66, %f432, 0f3F000000;
	add.ftz.f32 	%f433, %f820, %f826;
	mul.ftz.f32 	%f67, %f433, 0f3F000000;
	add.ftz.f32 	%f434, %f821, %f827;
	mul.ftz.f32 	%f68, %f434, 0f3F000000;
	add.ftz.f32 	%f435, %f54, %f64;
	mul.ftz.f32 	%f69, %f435, 0f3F000000;
	add.ftz.f32 	%f436, %f58, %f66;
	mul.ftz.f32 	%f70, %f436, 0f3F000000;
	sub.ftz.f32 	%f437, %f67, %f60;
	sub.ftz.f32 	%f438, %f68, %f62;
	mul.ftz.f32 	%f439, %f438, %f438;
	fma.rn.ftz.f32 	%f71, %f437, %f437, %f439;
	.loc 1 29 1
	setp.gt.ftz.f32	%p8, %f71, 0f3F800000;
	selp.u32	%r20, 1, 0, %p8;
	.loc 1 29 1
	sub.ftz.f32 	%f440, %f64, %f54;
	sub.ftz.f32 	%f441, %f66, %f58;
	mul.ftz.f32 	%f442, %f441, %f441;
	fma.rn.ftz.f32 	%f72, %f440, %f440, %f442;
	.loc 1 29 1
	setp.gt.ftz.f32	%p9, %f72, 0f3F800000;
	.loc 1 29 1
	or.b32  	%r21, %r20, 2;
	.loc 1 29 1
	selp.b32	%r3, %r21, %r20, %p9;
	.loc 1 29 1
	setp.eq.s32	%p10, %r3, 0;
	@%p10 bra 	BB1_21;

	mul.wide.s32 	%rd49, %r38, 4;
	add.s64 	%rd50, %rd5, %rd49;
	.loc 1 29 1
	ld.local.u32 	%r4, [%rd50];
	setp.eq.s32	%p11, %r4, 2;
	@%p11 bra 	BB1_15;

	setp.lt.ftz.f32	%p12, %f71, 0f40800000;
	setp.lt.ftz.f32	%p13, %f72, 0f40800000;
	and.pred  	%p14, %p12, %p13;
	.loc 1 29 1
	@%p14 bra 	BB1_15;

	.loc 1 29 1
	setp.eq.s32	%p15, %r3, 3;
	@%p15 bra 	BB1_13;

	.loc 1 29 1
	setp.eq.s32	%p16, %r3, 2;
	mul.wide.s32 	%rd53, %r37, 32;
	add.s64 	%rd54, %rd4, %rd53;
	.loc 1 29 1
	st.local.v2.f32 	[%rd54], {%f826, %f827};
	.loc 1 29 1
	@%p16 bra 	BB1_12;

	mul.wide.s32 	%rd57, %r37, 32;
	add.s64 	%rd58, %rd4, %rd57;
	.loc 1 29 1
	st.local.v2.f32 	[%rd58+8], {%f54, %f58};
	st.local.v2.f32 	[%rd58+16], {%f64, %f66};
	st.local.f32 	[%rd58+24], %f820;
	st.local.f32 	[%rd58+28], %f821;
	st.local.v2.f32 	[%rd58+32], {%f54, %f58};
	st.local.f32 	[%rd58+40], %f824;
	st.local.f32 	[%rd58+44], %f825;
	st.local.v2.f32 	[%rd58+48], {%f822, %f823};
	st.local.f32 	[%rd58+56], %f64;
	st.local.f32 	[%rd58+60], %f66;
	mov.u32 	%r39, 2;
	bra.uni 	BB1_14;

BB1_12:
	mul.wide.s32 	%rd61, %r37, 32;
	add.s64 	%rd62, %rd4, %rd61;
	.loc 1 29 1
	st.local.v2.f32 	[%rd62+8], {%f824, %f825};
	st.local.v2.f32 	[%rd62+16], {%f60, %f62};
	st.local.f32 	[%rd62+24], %f67;
	st.local.f32 	[%rd62+28], %f68;
	st.local.v2.f32 	[%rd62+32], {%f67, %f68};
	st.local.f32 	[%rd62+40], %f60;
	st.local.f32 	[%rd62+44], %f62;
	st.local.v2.f32 	[%rd62+48], {%f822, %f823};
	st.local.f32 	[%rd62+56], %f820;
	st.local.f32 	[%rd62+60], %f821;
	mov.u32 	%r39, 2;
	bra.uni 	BB1_14;

BB1_13:
	mul.wide.s32 	%rd65, %r37, 32;
	add.s64 	%rd66, %rd4, %rd65;
	.loc 1 29 1
	st.local.v2.f32 	[%rd66], {%f69, %f70};
	st.local.v2.f32 	[%rd66+8], {%f67, %f68};
	st.local.f32 	[%rd66+16], %f826;
	st.local.f32 	[%rd66+20], %f827;
	st.local.v2.f32 	[%rd66+24], {%f54, %f58};
	st.local.f32 	[%rd66+32], %f69;
	st.local.f32 	[%rd66+36], %f70;
	st.local.v2.f32 	[%rd66+40], {%f54, %f58};
	st.local.f32 	[%rd66+48], %f824;
	st.local.f32 	[%rd66+52], %f825;
	st.local.v2.f32 	[%rd66+56], {%f60, %f62};
	st.local.f32 	[%rd66+64], %f69;
	st.local.f32 	[%rd66+68], %f70;
	st.local.v2.f32 	[%rd66+72], {%f60, %f62};
	st.local.f32 	[%rd66+80], %f822;
	st.local.f32 	[%rd66+84], %f823;
	st.local.v2.f32 	[%rd66+88], {%f64, %f66};
	st.local.f32 	[%rd66+96], %f69;
	st.local.f32 	[%rd66+100], %f70;
	st.local.v2.f32 	[%rd66+104], {%f64, %f66};
	st.local.f32 	[%rd66+112], %f820;
	st.local.f32 	[%rd66+116], %f821;
	st.local.v2.f32 	[%rd66+120], {%f67, %f68};
	mov.u32 	%r39, 4;

BB1_14:
	.loc 1 29 1
	add.s32 	%r25, %r37, %r39;
	add.s32 	%r37, %r25, -1;
	mul.wide.s32 	%rd69, %r38, 4;
	add.s64 	%rd70, %rd6, %rd69;
	.loc 1 29 1
	ld.local.u32 	%r26, [%rd70];
	add.s32 	%r27, %r26, -1;
	st.local.u32 	[%rd70], %r27;
	.loc 1 29 1
	setp.ne.s32	%p17, %r27, 0;
	selp.u32	%r28, 1, 0, %p17;
	add.s32 	%r38, %r28, %r38;
	mul.wide.s32 	%rd73, %r38, 4;
	add.s64 	%rd74, %rd5, %rd73;
	.loc 1 29 1
	add.s32 	%r29, %r4, 1;
	st.local.u32 	[%rd74], %r29;
	add.s64 	%rd75, %rd6, %rd73;
	.loc 1 29 1
	st.local.u32 	[%rd75], %r39;
	bra.uni 	BB1_22;

BB1_15:
	.loc 1 29 1
	setp.eq.s32	%p18, %r3, 3;
	@%p18 bra 	BB1_19;

	.loc 1 29 1
	setp.eq.s32	%p19, %r3, 2;
	@%p19 bra 	BB1_18;

	.loc 1 29 1
	add.ftz.f32 	%f455, %f826, %f54;
	add.ftz.f32 	%f456, %f455, %f64;
	add.ftz.f32 	%f457, %f456, %f820;
	mul.ftz.f32 	%f447, %f457, 0f3E800000;
	add.ftz.f32 	%f458, %f827, %f58;
	add.ftz.f32 	%f459, %f458, %f66;
	add.ftz.f32 	%f460, %f459, %f821;
	mul.ftz.f32 	%f448, %f460, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f443, %f444, %f445, %f446}, [inTexture, {%f447, %f448}];
	// inline asm
	add.ftz.f32 	%f461, %f828, %f443;
	add.ftz.f32 	%f462, %f829, %f444;
	add.ftz.f32 	%f463, %f830, %f445;
	add.ftz.f32 	%f464, %f831, %f446;
	.loc 1 29 1
	add.ftz.f32 	%f465, %f54, %f824;
	add.ftz.f32 	%f466, %f465, %f822;
	add.ftz.f32 	%f467, %f466, %f64;
	mul.ftz.f32 	%f453, %f467, 0f3E800000;
	add.ftz.f32 	%f468, %f58, %f825;
	add.ftz.f32 	%f469, %f468, %f823;
	add.ftz.f32 	%f470, %f469, %f66;
	mul.ftz.f32 	%f454, %f470, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f449, %f450, %f451, %f452}, [inTexture, {%f453, %f454}];
	// inline asm
	add.ftz.f32 	%f828, %f461, %f449;
	add.ftz.f32 	%f829, %f462, %f450;
	add.ftz.f32 	%f830, %f463, %f451;
	add.ftz.f32 	%f831, %f464, %f452;
	.loc 1 29 1
	add.ftz.f32 	%f832, %f832, 0f40000000;
	bra.uni 	BB1_20;

BB1_18:
	.loc 1 29 1
	add.ftz.f32 	%f815, %f827, %f825;
	add.ftz.f32 	%f790, %f826, %f824;
	.loc 1 29 1
	add.ftz.f32 	%f483, %f790, %f60;
	add.ftz.f32 	%f484, %f483, %f67;
	mul.ftz.f32 	%f475, %f484, 0f3E800000;
	add.ftz.f32 	%f485, %f815, %f62;
	add.ftz.f32 	%f486, %f485, %f68;
	mul.ftz.f32 	%f476, %f486, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f471, %f472, %f473, %f474}, [inTexture, {%f475, %f476}];
	// inline asm
	add.ftz.f32 	%f487, %f828, %f471;
	add.ftz.f32 	%f488, %f829, %f472;
	add.ftz.f32 	%f489, %f830, %f473;
	add.ftz.f32 	%f490, %f831, %f474;
	.loc 1 29 1
	add.ftz.f32 	%f491, %f67, %f60;
	add.ftz.f32 	%f492, %f491, %f822;
	add.ftz.f32 	%f493, %f492, %f820;
	mul.ftz.f32 	%f481, %f493, 0f3E800000;
	add.ftz.f32 	%f494, %f68, %f62;
	add.ftz.f32 	%f495, %f494, %f823;
	add.ftz.f32 	%f496, %f495, %f821;
	mul.ftz.f32 	%f482, %f496, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f477, %f478, %f479, %f480}, [inTexture, {%f481, %f482}];
	// inline asm
	add.ftz.f32 	%f828, %f487, %f477;
	add.ftz.f32 	%f829, %f488, %f478;
	add.ftz.f32 	%f830, %f489, %f479;
	add.ftz.f32 	%f831, %f490, %f480;
	.loc 1 29 1
	add.ftz.f32 	%f832, %f832, 0f40000000;
	bra.uni 	BB1_20;

BB1_19:
	.loc 1 29 1
	add.ftz.f32 	%f832, %f832, 0f40800000;
	add.ftz.f32 	%f521, %f69, %f67;
	add.ftz.f32 	%f522, %f521, %f826;
	add.ftz.f32 	%f523, %f522, %f54;
	mul.ftz.f32 	%f501, %f523, 0f3E800000;
	add.ftz.f32 	%f524, %f70, %f68;
	add.ftz.f32 	%f525, %f524, %f827;
	add.ftz.f32 	%f526, %f525, %f58;
	mul.ftz.f32 	%f502, %f526, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f497, %f498, %f499, %f500}, [inTexture, {%f501, %f502}];
	// inline asm
	add.ftz.f32 	%f527, %f828, %f497;
	add.ftz.f32 	%f528, %f829, %f498;
	add.ftz.f32 	%f529, %f830, %f499;
	add.ftz.f32 	%f530, %f831, %f500;
	.loc 1 29 1
	add.ftz.f32 	%f531, %f69, %f54;
	add.ftz.f32 	%f532, %f531, %f824;
	add.ftz.f32 	%f533, %f532, %f60;
	mul.ftz.f32 	%f507, %f533, 0f3E800000;
	add.ftz.f32 	%f534, %f70, %f58;
	add.ftz.f32 	%f535, %f534, %f825;
	add.ftz.f32 	%f536, %f535, %f62;
	mul.ftz.f32 	%f508, %f536, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f503, %f504, %f505, %f506}, [inTexture, {%f507, %f508}];
	// inline asm
	add.ftz.f32 	%f537, %f527, %f503;
	add.ftz.f32 	%f538, %f528, %f504;
	add.ftz.f32 	%f539, %f529, %f505;
	add.ftz.f32 	%f540, %f530, %f506;
	.loc 1 29 1
	add.ftz.f32 	%f541, %f69, %f60;
	add.ftz.f32 	%f542, %f541, %f822;
	add.ftz.f32 	%f543, %f542, %f64;
	mul.ftz.f32 	%f513, %f543, 0f3E800000;
	add.ftz.f32 	%f544, %f70, %f62;
	add.ftz.f32 	%f545, %f544, %f823;
	add.ftz.f32 	%f546, %f545, %f66;
	mul.ftz.f32 	%f514, %f546, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f509, %f510, %f511, %f512}, [inTexture, {%f513, %f514}];
	// inline asm
	add.ftz.f32 	%f547, %f537, %f509;
	add.ftz.f32 	%f548, %f538, %f510;
	add.ftz.f32 	%f549, %f539, %f511;
	add.ftz.f32 	%f550, %f540, %f512;
	.loc 1 29 1
	add.ftz.f32 	%f551, %f69, %f64;
	add.ftz.f32 	%f552, %f551, %f820;
	add.ftz.f32 	%f553, %f552, %f67;
	mul.ftz.f32 	%f519, %f553, 0f3E800000;
	add.ftz.f32 	%f554, %f70, %f66;
	add.ftz.f32 	%f555, %f554, %f821;
	add.ftz.f32 	%f556, %f555, %f68;
	mul.ftz.f32 	%f520, %f556, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f515, %f516, %f517, %f518}, [inTexture, {%f519, %f520}];
	// inline asm
	add.ftz.f32 	%f828, %f547, %f515;
	add.ftz.f32 	%f829, %f548, %f516;
	add.ftz.f32 	%f830, %f549, %f517;
	add.ftz.f32 	%f831, %f550, %f518;

BB1_20:
	.loc 1 29 1
	add.s32 	%r37, %r37, -1;
	mul.wide.s32 	%rd94, %r38, 4;
	add.s64 	%rd95, %rd6, %rd94;
	.loc 1 29 1
	ld.local.u32 	%r30, [%rd95];
	add.s32 	%r31, %r30, -1;
	st.local.u32 	[%rd95], %r31;
	.loc 1 29 1
	setp.eq.s32	%p20, %r31, 0;
	selp.b32	%r32, -1, 0, %p20;
	add.s32 	%r38, %r32, %r38;
	bra.uni 	BB1_22;

BB1_21:
	.loc 1 29 1
	add.s32 	%r37, %r37, -1;
	add.ftz.f32 	%f832, %f832, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f557, %f558, %f559, %f560}, [inTexture, {%f69, %f70}];
	// inline asm
	add.ftz.f32 	%f828, %f828, %f557;
	add.ftz.f32 	%f829, %f829, %f558;
	add.ftz.f32 	%f830, %f830, %f559;
	add.ftz.f32 	%f831, %f831, %f560;
	mul.wide.s32 	%rd99, %r38, 4;
	add.s64 	%rd100, %rd6, %rd99;
	.loc 1 29 1
	ld.local.u32 	%r33, [%rd100];
	add.s32 	%r34, %r33, -1;
	st.local.u32 	[%rd100], %r34;
	.loc 1 29 1
	setp.eq.s32	%p21, %r34, 0;
	selp.b32	%r35, -1, 0, %p21;
	add.s32 	%r38, %r35, %r38;

BB1_22:
	.loc 1 29 10
	setp.gt.s32	%p22, %r37, -1;
	@%p22 bra 	BB1_24;

	mov.f32 	%f563, 0f3F800000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f564, %f563, %f832;
	.loc 1 29 83
	mul.ftz.f32 	%f833, %f828, %f564;
	mul.ftz.f32 	%f834, %f829, %f564;
	mul.ftz.f32 	%f835, %f830, %f564;
	mul.ftz.f32 	%f836, %f831, %f564;
	bra.uni 	BB1_26;

BB1_24:
	mul.wide.s32 	%rd103, %r37, 32;
	add.s64 	%rd104, %rd4, %rd103;
	.loc 1 29 1
	ld.local.v2.f32 	{%f565, %f566}, [%rd104+24];
	mov.f32 	%f821, %f566;
	mov.f32 	%f820, %f565;
	ld.local.v2.f32 	{%f567, %f568}, [%rd104+16];
	mov.f32 	%f823, %f568;
	mov.f32 	%f822, %f567;
	ld.local.v2.f32 	{%f569, %f570}, [%rd104+8];
	mov.f32 	%f825, %f570;
	mov.f32 	%f824, %f569;
	ld.local.v2.f32 	{%f571, %f572}, [%rd104];
	mov.f32 	%f827, %f572;
	mov.f32 	%f826, %f571;
	bra.uni 	BB1_6;

BB1_25:
	.loc 1 29 1
	add.ftz.f32 	%f669, %f17, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f670, %f669;
	.loc 1 29 1
	add.ftz.f32 	%f671, %f18, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f672, %f671;
	sub.ftz.f32 	%f673, %f669, %f670;
	sub.ftz.f32 	%f674, %f671, %f672;
	fma.rn.ftz.f32 	%f675, %f673, 0fBF000000, 0f3F800000;
	mov.f32 	%f676, 0f3F800000;
	fma.rn.ftz.f32 	%f677, %f675, %f673, 0fBF000000;
	mul.ftz.f32 	%f678, %f677, %f673;
	fma.rn.ftz.f32 	%f679, %f673, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f680, %f679, %f673;
	fma.rn.ftz.f32 	%f681, %f680, %f673, 0f3F800000;
	sub.ftz.f32 	%f682, %f676, %f673;
	fma.rn.ftz.f32 	%f683, %f682, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f684, %f683, %f682;
	fma.rn.ftz.f32 	%f685, %f684, %f682, 0f3F800000;
	fma.rn.ftz.f32 	%f686, %f682, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f687, %f686, %f682, 0fBF000000;
	mul.ftz.f32 	%f688, %f687, %f682;
	fma.rn.ftz.f32 	%f689, %f674, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f690, %f689, %f674, 0fBF000000;
	mul.ftz.f32 	%f691, %f690, %f674;
	fma.rn.ftz.f32 	%f692, %f674, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f693, %f692, %f674;
	fma.rn.ftz.f32 	%f694, %f693, %f674, 0f3F800000;
	sub.ftz.f32 	%f695, %f676, %f674;
	fma.rn.ftz.f32 	%f696, %f695, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f697, %f696, %f695;
	fma.rn.ftz.f32 	%f698, %f697, %f695, 0f3F800000;
	fma.rn.ftz.f32 	%f699, %f695, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f700, %f699, %f695, 0fBF000000;
	mul.ftz.f32 	%f701, %f700, %f695;
	add.ftz.f32 	%f702, %f670, 0fBF000000;
	add.ftz.f32 	%f703, %f672, 0fBF000000;
	add.ftz.f32 	%f625, %f702, 0f00000000;
	add.ftz.f32 	%f596, %f703, 0f00000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f573, %f574, %f575, %f576}, [inTexture, {%f625, %f596}];
	// inline asm
	mul.ftz.f32 	%f704, %f678, %f691;
	add.ftz.f32 	%f655, %f702, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f579, %f580, %f581, %f582}, [inTexture, {%f655, %f596}];
	// inline asm
	mul.ftz.f32 	%f705, %f681, %f691;
	mul.ftz.f32 	%f706, %f579, %f705;
	mul.ftz.f32 	%f707, %f580, %f705;
	mul.ftz.f32 	%f708, %f581, %f705;
	mul.ftz.f32 	%f709, %f582, %f705;
	fma.rn.ftz.f32 	%f710, %f573, %f704, %f706;
	fma.rn.ftz.f32 	%f711, %f574, %f704, %f707;
	fma.rn.ftz.f32 	%f712, %f575, %f704, %f708;
	fma.rn.ftz.f32 	%f713, %f576, %f704, %f709;
	add.ftz.f32 	%f661, %f702, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f585, %f586, %f587, %f588}, [inTexture, {%f661, %f596}];
	// inline asm
	mul.ftz.f32 	%f714, %f685, %f691;
	fma.rn.ftz.f32 	%f715, %f585, %f714, %f710;
	fma.rn.ftz.f32 	%f716, %f586, %f714, %f711;
	fma.rn.ftz.f32 	%f717, %f587, %f714, %f712;
	fma.rn.ftz.f32 	%f718, %f588, %f714, %f713;
	add.ftz.f32 	%f667, %f702, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f591, %f592, %f593, %f594}, [inTexture, {%f667, %f596}];
	// inline asm
	mul.ftz.f32 	%f719, %f688, %f691;
	fma.rn.ftz.f32 	%f720, %f591, %f719, %f715;
	fma.rn.ftz.f32 	%f721, %f592, %f719, %f716;
	fma.rn.ftz.f32 	%f722, %f593, %f719, %f717;
	fma.rn.ftz.f32 	%f723, %f594, %f719, %f718;
	add.ftz.f32 	%f620, %f703, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f597, %f598, %f599, %f600}, [inTexture, {%f625, %f620}];
	// inline asm
	mul.ftz.f32 	%f724, %f678, %f694;
	fma.rn.ftz.f32 	%f725, %f597, %f724, %f720;
	fma.rn.ftz.f32 	%f726, %f598, %f724, %f721;
	fma.rn.ftz.f32 	%f727, %f599, %f724, %f722;
	fma.rn.ftz.f32 	%f728, %f600, %f724, %f723;
	// inline asm
	tex.2d.v4.f32.f32 {%f603, %f604, %f605, %f606}, [inTexture, {%f655, %f620}];
	// inline asm
	mul.ftz.f32 	%f729, %f681, %f694;
	fma.rn.ftz.f32 	%f730, %f603, %f729, %f725;
	fma.rn.ftz.f32 	%f731, %f604, %f729, %f726;
	fma.rn.ftz.f32 	%f732, %f605, %f729, %f727;
	fma.rn.ftz.f32 	%f733, %f606, %f729, %f728;
	// inline asm
	tex.2d.v4.f32.f32 {%f609, %f610, %f611, %f612}, [inTexture, {%f661, %f620}];
	// inline asm
	mul.ftz.f32 	%f734, %f685, %f694;
	fma.rn.ftz.f32 	%f735, %f609, %f734, %f730;
	fma.rn.ftz.f32 	%f736, %f610, %f734, %f731;
	fma.rn.ftz.f32 	%f737, %f611, %f734, %f732;
	fma.rn.ftz.f32 	%f738, %f612, %f734, %f733;
	// inline asm
	tex.2d.v4.f32.f32 {%f615, %f616, %f617, %f618}, [inTexture, {%f667, %f620}];
	// inline asm
	mul.ftz.f32 	%f739, %f688, %f694;
	fma.rn.ftz.f32 	%f740, %f615, %f739, %f735;
	fma.rn.ftz.f32 	%f741, %f616, %f739, %f736;
	fma.rn.ftz.f32 	%f742, %f617, %f739, %f737;
	fma.rn.ftz.f32 	%f743, %f618, %f739, %f738;
	add.ftz.f32 	%f644, %f703, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f621, %f622, %f623, %f624}, [inTexture, {%f625, %f644}];
	// inline asm
	mul.ftz.f32 	%f744, %f678, %f698;
	fma.rn.ftz.f32 	%f745, %f621, %f744, %f740;
	fma.rn.ftz.f32 	%f746, %f622, %f744, %f741;
	fma.rn.ftz.f32 	%f747, %f623, %f744, %f742;
	fma.rn.ftz.f32 	%f748, %f624, %f744, %f743;
	// inline asm
	tex.2d.v4.f32.f32 {%f627, %f628, %f629, %f630}, [inTexture, {%f655, %f644}];
	// inline asm
	mul.ftz.f32 	%f749, %f681, %f698;
	fma.rn.ftz.f32 	%f750, %f627, %f749, %f745;
	fma.rn.ftz.f32 	%f751, %f628, %f749, %f746;
	fma.rn.ftz.f32 	%f752, %f629, %f749, %f747;
	fma.rn.ftz.f32 	%f753, %f630, %f749, %f748;
	// inline asm
	tex.2d.v4.f32.f32 {%f633, %f634, %f635, %f636}, [inTexture, {%f661, %f644}];
	// inline asm
	mul.ftz.f32 	%f754, %f685, %f698;
	fma.rn.ftz.f32 	%f755, %f633, %f754, %f750;
	fma.rn.ftz.f32 	%f756, %f634, %f754, %f751;
	fma.rn.ftz.f32 	%f757, %f635, %f754, %f752;
	fma.rn.ftz.f32 	%f758, %f636, %f754, %f753;
	// inline asm
	tex.2d.v4.f32.f32 {%f639, %f640, %f641, %f642}, [inTexture, {%f667, %f644}];
	// inline asm
	mul.ftz.f32 	%f759, %f688, %f698;
	fma.rn.ftz.f32 	%f760, %f639, %f759, %f755;
	fma.rn.ftz.f32 	%f761, %f640, %f759, %f756;
	fma.rn.ftz.f32 	%f762, %f641, %f759, %f757;
	fma.rn.ftz.f32 	%f763, %f642, %f759, %f758;
	add.ftz.f32 	%f649, %f702, 0f80000000;
	add.ftz.f32 	%f668, %f703, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f645, %f646, %f647, %f648}, [inTexture, {%f649, %f668}];
	// inline asm
	mul.ftz.f32 	%f764, %f678, %f701;
	fma.rn.ftz.f32 	%f765, %f645, %f764, %f760;
	fma.rn.ftz.f32 	%f766, %f646, %f764, %f761;
	fma.rn.ftz.f32 	%f767, %f647, %f764, %f762;
	fma.rn.ftz.f32 	%f768, %f648, %f764, %f763;
	// inline asm
	tex.2d.v4.f32.f32 {%f651, %f652, %f653, %f654}, [inTexture, {%f655, %f668}];
	// inline asm
	mul.ftz.f32 	%f769, %f681, %f701;
	fma.rn.ftz.f32 	%f770, %f651, %f769, %f765;
	fma.rn.ftz.f32 	%f771, %f652, %f769, %f766;
	fma.rn.ftz.f32 	%f772, %f653, %f769, %f767;
	fma.rn.ftz.f32 	%f773, %f654, %f769, %f768;
	// inline asm
	tex.2d.v4.f32.f32 {%f657, %f658, %f659, %f660}, [inTexture, {%f661, %f668}];
	// inline asm
	mul.ftz.f32 	%f774, %f685, %f701;
	fma.rn.ftz.f32 	%f775, %f657, %f774, %f770;
	fma.rn.ftz.f32 	%f776, %f658, %f774, %f771;
	fma.rn.ftz.f32 	%f777, %f659, %f774, %f772;
	fma.rn.ftz.f32 	%f778, %f660, %f774, %f773;
	// inline asm
	tex.2d.v4.f32.f32 {%f663, %f664, %f665, %f666}, [inTexture, {%f667, %f668}];
	// inline asm
	mul.ftz.f32 	%f779, %f688, %f701;
	fma.rn.ftz.f32 	%f833, %f663, %f779, %f775;
	fma.rn.ftz.f32 	%f834, %f664, %f779, %f776;
	fma.rn.ftz.f32 	%f835, %f665, %f779, %f777;
	fma.rn.ftz.f32 	%f836, %f666, %f779, %f778;

BB1_26:
	st.param.f32	[func_retval0+0], %f833;
	st.param.f32	[func_retval0+4], %f834;
	st.param.f32	[func_retval0+8], %f835;
	st.param.f32	[func_retval0+12], %f836;
	.loc 1 29 1
	ret;
}

.visible .entry cuda_kernel_renderquad(
	.param .u64 cuda_kernel_renderquad_param_0,
	.param .u32 cuda_kernel_renderquad_param_1,
	.param .u32 cuda_kernel_renderquad_param_2,
	.param .f32 cuda_kernel_renderquad_param_3,
	.param .f32 cuda_kernel_renderquad_param_4,
	.param .u32 cuda_kernel_renderquad_param_5,
	.param .u32 cuda_kernel_renderquad_param_6,
	.param .u32 cuda_kernel_renderquad_param_7,
	.param .align 16 .b8 cuda_kernel_renderquad_param_8[16],
	.param .align 16 .b8 cuda_kernel_renderquad_param_9[16],
	.param .align 16 .b8 cuda_kernel_renderquad_param_10[16],
	.param .align 16 .b8 cuda_kernel_renderquad_param_11[16],
	.param .align 8 .b8 cuda_kernel_renderquad_param_12[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_13[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_14[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_15[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_16[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_17[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_18[8],
	.param .align 8 .b8 cuda_kernel_renderquad_param_19[8],
	.param .u32 cuda_kernel_renderquad_param_20,
	.param .u64 cuda_kernel_renderquad_param_21
)
{
	.local .align 8 .b8 	__local_depot2[256];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<35>;
	.reg .s16 	%rs<6>;
	.reg .s32 	%r<75>;
	.reg .f32 	%f<1180>;
	.reg .s64 	%rd<149>;


	mov.u64 	%SPL, __local_depot2;
	cvta.local.u64 	%SP, %SPL;
	ld.param.u64 	%rd4, [cuda_kernel_renderquad_param_0];
	ld.param.u32 	%r19, [cuda_kernel_renderquad_param_1];
	ld.param.u32 	%r20, [cuda_kernel_renderquad_param_2];
	ld.param.f32 	%f183, [cuda_kernel_renderquad_param_3];
	ld.param.f32 	%f184, [cuda_kernel_renderquad_param_4];
	ld.param.u32 	%r16, [cuda_kernel_renderquad_param_6];
	ld.param.u32 	%r17, [cuda_kernel_renderquad_param_7];
	ld.param.f32 	%f3, [cuda_kernel_renderquad_param_8+8];
	ld.param.f32 	%f2, [cuda_kernel_renderquad_param_8+4];
	ld.param.f32 	%f1, [cuda_kernel_renderquad_param_8];
	ld.param.f32 	%f6, [cuda_kernel_renderquad_param_9+8];
	ld.param.f32 	%f5, [cuda_kernel_renderquad_param_9+4];
	ld.param.f32 	%f4, [cuda_kernel_renderquad_param_9];
	ld.param.f32 	%f9, [cuda_kernel_renderquad_param_10+8];
	ld.param.f32 	%f8, [cuda_kernel_renderquad_param_10+4];
	ld.param.f32 	%f7, [cuda_kernel_renderquad_param_10];
	ld.param.f32 	%f12, [cuda_kernel_renderquad_param_11+8];
	ld.param.f32 	%f11, [cuda_kernel_renderquad_param_11+4];
	ld.param.f32 	%f10, [cuda_kernel_renderquad_param_11];
	ld.param.f32 	%f202, [cuda_kernel_renderquad_param_12+4];
	ld.param.f32 	%f201, [cuda_kernel_renderquad_param_12];
	ld.param.f32 	%f204, [cuda_kernel_renderquad_param_13+4];
	ld.param.f32 	%f203, [cuda_kernel_renderquad_param_13];
	ld.param.f32 	%f206, [cuda_kernel_renderquad_param_14+4];
	ld.param.f32 	%f205, [cuda_kernel_renderquad_param_14];
	ld.param.f32 	%f208, [cuda_kernel_renderquad_param_15+4];
	ld.param.f32 	%f207, [cuda_kernel_renderquad_param_15];
	ld.param.f32 	%f210, [cuda_kernel_renderquad_param_16+4];
	ld.param.f32 	%f209, [cuda_kernel_renderquad_param_16];
	ld.param.f32 	%f212, [cuda_kernel_renderquad_param_17+4];
	ld.param.f32 	%f211, [cuda_kernel_renderquad_param_17];
	ld.param.f32 	%f214, [cuda_kernel_renderquad_param_18+4];
	ld.param.f32 	%f213, [cuda_kernel_renderquad_param_18];
	ld.param.f32 	%f216, [cuda_kernel_renderquad_param_19+4];
	ld.param.f32 	%f215, [cuda_kernel_renderquad_param_19];
	cvta.to.global.u64 	%rd1, %rd4;
	.loc 1 29 1
	mov.u32 	%r21, %ntid.x;
	mov.u32 	%r22, %ctaid.x;
	mov.u32 	%r23, %tid.x;
	mad.lo.s32 	%r1, %r21, %r22, %r23;
	mov.u32 	%r24, %ntid.y;
	mov.u32 	%r25, %ctaid.y;
	mov.u32 	%r26, %tid.y;
	mad.lo.s32 	%r2, %r24, %r25, %r26;
	.loc 1 29 1
	setp.ge.s32	%p1, %r2, %r20;
	setp.ge.s32	%p2, %r1, %r19;
	or.pred  	%p3, %p2, %p1;
	.loc 1 29 1
	@%p3 bra 	BB2_50;

	.loc 1 29 1
	cvt.rn.f32.s32	%f13, %r1;
	add.ftz.f32 	%f217, %f13, 0f3F000000;
	cvt.rn.f32.s32	%f14, %r2;
	add.ftz.f32 	%f218, %f14, 0f3F000000;
	sub.ftz.f32 	%f219, %f202, %f218;
	mul.ftz.f32 	%f220, %f209, %f219;
	sub.ftz.f32 	%f221, %f201, %f217;
	mul.ftz.f32 	%f222, %f210, %f221;
	sub.ftz.f32 	%f223, %f220, %f222;
	add.ftz.f32 	%f224, %f223, 0f3F000000;
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f225, %f224;
	mov.f32 	%f226, 0f3F800000;
	sub.ftz.f32 	%f227, %f226, %f225;
	.loc 1 29 1
	sub.ftz.f32 	%f228, %f204, %f218;
	mul.ftz.f32 	%f229, %f211, %f228;
	sub.ftz.f32 	%f230, %f203, %f217;
	mul.ftz.f32 	%f231, %f212, %f230;
	sub.ftz.f32 	%f232, %f229, %f231;
	add.ftz.f32 	%f233, %f232, 0f3F000000;
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f234, %f233;
	sub.ftz.f32 	%f235, %f226, %f234;
	mul.ftz.f32 	%f236, %f227, %f235;
	.loc 1 29 1
	sub.ftz.f32 	%f237, %f206, %f218;
	mul.ftz.f32 	%f238, %f213, %f237;
	sub.ftz.f32 	%f239, %f205, %f217;
	mul.ftz.f32 	%f240, %f214, %f239;
	sub.ftz.f32 	%f241, %f238, %f240;
	add.ftz.f32 	%f242, %f241, 0f3F000000;
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f243, %f242;
	sub.ftz.f32 	%f244, %f226, %f243;
	mul.ftz.f32 	%f245, %f236, %f244;
	.loc 1 29 1
	sub.ftz.f32 	%f246, %f208, %f218;
	mul.ftz.f32 	%f247, %f215, %f246;
	sub.ftz.f32 	%f248, %f207, %f217;
	mul.ftz.f32 	%f249, %f216, %f248;
	sub.ftz.f32 	%f250, %f247, %f249;
	add.ftz.f32 	%f251, %f250, 0f3F000000;
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f252, %f251;
	sub.ftz.f32 	%f253, %f226, %f252;
	mul.ftz.f32 	%f15, %f245, %f253;
	.loc 1 29 1
	setp.eq.ftz.f32	%p4, %f15, 0f00000000;
	.loc 1 29 1
	mad.lo.s32 	%r27, %r2, %r16, %r1;
	mul.wide.s32 	%rd5, %r27, 16;
	add.s64 	%rd2, %rd1, %rd5;
	mul.wide.s32 	%rd6, %r27, 8;
	add.s64 	%rd3, %rd1, %rd6;
	.loc 1 29 1
	@%p4 bra 	BB2_47;

	.loc 1 29 1
	sub.ftz.f32 	%f254, %f13, %f10;
	sub.ftz.f32 	%f255, %f14, %f11;
	mul.ftz.f32 	%f256, %f255, %f255;
	fma.rn.ftz.f32 	%f257, %f254, %f254, %f256;
	mov.f32 	%f258, 0f00000000;
	.loc 1 29 1
	sub.ftz.f32 	%f259, %f258, %f12;
	fma.rn.ftz.f32 	%f260, %f259, %f259, %f257;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f261, %f260;
	mul.ftz.f32 	%f262, %f254, %f261;
	mul.ftz.f32 	%f263, %f255, %f261;
	mul.ftz.f32 	%f264, %f259, %f261;
	mul.ftz.f32 	%f265, %f263, %f9;
	mul.ftz.f32 	%f266, %f8, %f264;
	sub.ftz.f32 	%f267, %f265, %f266;
	mul.ftz.f32 	%f268, %f264, %f7;
	mul.ftz.f32 	%f269, %f9, %f262;
	sub.ftz.f32 	%f270, %f268, %f269;
	mul.ftz.f32 	%f271, %f262, %f8;
	mul.ftz.f32 	%f272, %f7, %f263;
	sub.ftz.f32 	%f273, %f271, %f272;
	mul.ftz.f32 	%f274, %f5, %f270;
	fma.rn.ftz.f32 	%f275, %f4, %f267, %f274;
	fma.rn.ftz.f32 	%f276, %f6, %f273, %f275;
	rcp.approx.ftz.f32 	%f277, %f276;
	sub.ftz.f32 	%f278, %f10, %f1;
	sub.ftz.f32 	%f279, %f11, %f2;
	mul.ftz.f32 	%f280, %f279, %f270;
	fma.rn.ftz.f32 	%f281, %f278, %f267, %f280;
	sub.ftz.f32 	%f282, %f12, %f3;
	fma.rn.ftz.f32 	%f283, %f282, %f273, %f281;
	mul.ftz.f32 	%f284, %f283, %f277;
	mul.ftz.f32 	%f285, %f279, %f6;
	mul.ftz.f32 	%f286, %f5, %f282;
	sub.ftz.f32 	%f287, %f285, %f286;
	mul.ftz.f32 	%f288, %f282, %f4;
	mul.ftz.f32 	%f289, %f6, %f278;
	sub.ftz.f32 	%f290, %f288, %f289;
	mul.ftz.f32 	%f291, %f278, %f5;
	mul.ftz.f32 	%f292, %f4, %f279;
	sub.ftz.f32 	%f293, %f291, %f292;
	mul.ftz.f32 	%f294, %f263, %f290;
	fma.rn.ftz.f32 	%f295, %f262, %f287, %f294;
	fma.rn.ftz.f32 	%f296, %f264, %f293, %f295;
	mul.ftz.f32 	%f297, %f296, %f277;
	.loc 1 29 1
	add.s32 	%r28, %r1, 1;
	cvt.rn.f32.s32	%f298, %r28;
	sub.ftz.f32 	%f299, %f298, %f10;
	fma.rn.ftz.f32 	%f300, %f299, %f299, %f256;
	fma.rn.ftz.f32 	%f301, %f259, %f259, %f300;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f302, %f301;
	mul.ftz.f32 	%f303, %f299, %f302;
	mul.ftz.f32 	%f304, %f255, %f302;
	mul.ftz.f32 	%f305, %f259, %f302;
	mul.ftz.f32 	%f306, %f304, %f9;
	mul.ftz.f32 	%f307, %f8, %f305;
	sub.ftz.f32 	%f308, %f306, %f307;
	mul.ftz.f32 	%f309, %f305, %f7;
	mul.ftz.f32 	%f310, %f9, %f303;
	sub.ftz.f32 	%f311, %f309, %f310;
	mul.ftz.f32 	%f312, %f303, %f8;
	mul.ftz.f32 	%f313, %f7, %f304;
	sub.ftz.f32 	%f314, %f312, %f313;
	mul.ftz.f32 	%f315, %f5, %f311;
	fma.rn.ftz.f32 	%f316, %f4, %f308, %f315;
	fma.rn.ftz.f32 	%f317, %f6, %f314, %f316;
	rcp.approx.ftz.f32 	%f318, %f317;
	mul.ftz.f32 	%f319, %f279, %f311;
	fma.rn.ftz.f32 	%f320, %f278, %f308, %f319;
	fma.rn.ftz.f32 	%f321, %f282, %f314, %f320;
	mul.ftz.f32 	%f322, %f321, %f318;
	mul.ftz.f32 	%f323, %f304, %f290;
	fma.rn.ftz.f32 	%f324, %f303, %f287, %f323;
	fma.rn.ftz.f32 	%f325, %f305, %f293, %f324;
	mul.ftz.f32 	%f326, %f325, %f318;
	.loc 1 29 1
	add.s32 	%r29, %r2, 1;
	cvt.rn.f32.s32	%f327, %r29;
	sub.ftz.f32 	%f328, %f327, %f11;
	mul.ftz.f32 	%f329, %f328, %f328;
	fma.rn.ftz.f32 	%f330, %f299, %f299, %f329;
	fma.rn.ftz.f32 	%f331, %f259, %f259, %f330;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f332, %f331;
	mul.ftz.f32 	%f333, %f299, %f332;
	mul.ftz.f32 	%f334, %f328, %f332;
	mul.ftz.f32 	%f335, %f259, %f332;
	mul.ftz.f32 	%f336, %f334, %f9;
	mul.ftz.f32 	%f337, %f8, %f335;
	sub.ftz.f32 	%f338, %f336, %f337;
	mul.ftz.f32 	%f339, %f335, %f7;
	mul.ftz.f32 	%f340, %f9, %f333;
	sub.ftz.f32 	%f341, %f339, %f340;
	mul.ftz.f32 	%f342, %f333, %f8;
	mul.ftz.f32 	%f343, %f7, %f334;
	sub.ftz.f32 	%f344, %f342, %f343;
	mul.ftz.f32 	%f345, %f5, %f341;
	fma.rn.ftz.f32 	%f346, %f4, %f338, %f345;
	fma.rn.ftz.f32 	%f347, %f6, %f344, %f346;
	rcp.approx.ftz.f32 	%f348, %f347;
	mul.ftz.f32 	%f349, %f279, %f341;
	fma.rn.ftz.f32 	%f350, %f278, %f338, %f349;
	fma.rn.ftz.f32 	%f351, %f282, %f344, %f350;
	mul.ftz.f32 	%f352, %f351, %f348;
	mul.ftz.f32 	%f353, %f334, %f290;
	fma.rn.ftz.f32 	%f354, %f333, %f287, %f353;
	fma.rn.ftz.f32 	%f355, %f335, %f293, %f354;
	mul.ftz.f32 	%f356, %f355, %f348;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f357, %f254, %f254, %f329;
	fma.rn.ftz.f32 	%f358, %f259, %f259, %f357;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f359, %f358;
	mul.ftz.f32 	%f360, %f254, %f359;
	mul.ftz.f32 	%f361, %f328, %f359;
	mul.ftz.f32 	%f362, %f259, %f359;
	mul.ftz.f32 	%f363, %f361, %f9;
	mul.ftz.f32 	%f364, %f8, %f362;
	sub.ftz.f32 	%f365, %f363, %f364;
	mul.ftz.f32 	%f366, %f362, %f7;
	mul.ftz.f32 	%f367, %f9, %f360;
	sub.ftz.f32 	%f368, %f366, %f367;
	mul.ftz.f32 	%f369, %f360, %f8;
	mul.ftz.f32 	%f370, %f7, %f361;
	sub.ftz.f32 	%f371, %f369, %f370;
	mul.ftz.f32 	%f372, %f5, %f368;
	fma.rn.ftz.f32 	%f373, %f4, %f365, %f372;
	fma.rn.ftz.f32 	%f374, %f6, %f371, %f373;
	rcp.approx.ftz.f32 	%f375, %f374;
	mul.ftz.f32 	%f376, %f279, %f368;
	fma.rn.ftz.f32 	%f377, %f278, %f365, %f376;
	fma.rn.ftz.f32 	%f378, %f282, %f371, %f377;
	mul.ftz.f32 	%f379, %f378, %f375;
	mul.ftz.f32 	%f380, %f361, %f290;
	fma.rn.ftz.f32 	%f381, %f360, %f287, %f380;
	fma.rn.ftz.f32 	%f382, %f362, %f293, %f381;
	mul.ftz.f32 	%f383, %f382, %f375;
	.loc 1 29 1
	mul.ftz.f32 	%f16, %f284, %f183;
	mul.ftz.f32 	%f17, %f297, %f184;
	mul.ftz.f32 	%f1160, %f322, %f183;
	mul.ftz.f32 	%f1161, %f326, %f184;
	mul.ftz.f32 	%f1158, %f352, %f183;
	mul.ftz.f32 	%f1159, %f356, %f184;
	mul.ftz.f32 	%f1156, %f379, %f183;
	mul.ftz.f32 	%f1157, %f383, %f184;
	.loc 1 29 1
	add.ftz.f32 	%f384, %f16, %f1160;
	mul.ftz.f32 	%f30, %f384, 0f3F000000;
	add.ftz.f32 	%f385, %f17, %f1161;
	mul.ftz.f32 	%f31, %f385, 0f3F000000;
	add.ftz.f32 	%f386, %f1160, %f1158;
	mul.ftz.f32 	%f32, %f386, 0f3F000000;
	add.ftz.f32 	%f387, %f1161, %f1159;
	mul.ftz.f32 	%f33, %f387, 0f3F000000;
	add.ftz.f32 	%f388, %f1158, %f1156;
	mul.ftz.f32 	%f34, %f388, 0f3F000000;
	add.ftz.f32 	%f389, %f1159, %f1157;
	mul.ftz.f32 	%f35, %f389, 0f3F000000;
	add.ftz.f32 	%f390, %f1156, %f16;
	mul.ftz.f32 	%f36, %f390, 0f3F000000;
	add.ftz.f32 	%f391, %f1157, %f17;
	mul.ftz.f32 	%f37, %f391, 0f3F000000;
	add.ftz.f32 	%f392, %f30, %f34;
	mul.ftz.f32 	%f38, %f392, 0f3F000000;
	add.ftz.f32 	%f393, %f31, %f35;
	mul.ftz.f32 	%f39, %f393, 0f3F000000;
	sub.ftz.f32 	%f394, %f36, %f32;
	sub.ftz.f32 	%f395, %f37, %f33;
	mul.ftz.f32 	%f396, %f395, %f395;
	fma.rn.ftz.f32 	%f40, %f394, %f394, %f396;
	.loc 1 29 1
	setp.gt.ftz.f32	%p5, %f40, 0f3F8020C5;
	selp.u32	%r30, 1, 0, %p5;
	.loc 1 29 1
	sub.ftz.f32 	%f397, %f30, %f34;
	sub.ftz.f32 	%f398, %f31, %f35;
	mul.ftz.f32 	%f399, %f398, %f398;
	fma.rn.ftz.f32 	%f41, %f397, %f397, %f399;
	.loc 1 29 1
	setp.gt.ftz.f32	%p6, %f41, 0f3F8020C5;
	.loc 1 29 1
	or.b32  	%r31, %r30, 2;
	.loc 1 29 1
	selp.b32	%r32, %r31, %r30, %p6;
	.loc 1 29 1
	setp.eq.s32	%p7, %r32, 0;
	@%p7 bra 	BB2_27;

	.loc 1 29 1
	setp.gt.ftz.f32	%p8, %f40, 0f40800000;
	setp.gt.ftz.f32	%p9, %f41, 0f40800000;
	or.pred  	%p10, %p8, %p9;
	.loc 1 29 1
	@%p10 bra 	BB2_7;

	.loc 1 29 1
	add.ftz.f32 	%f424, %f38, %f30;
	add.ftz.f32 	%f425, %f424, %f16;
	add.ftz.f32 	%f426, %f425, %f36;
	mul.ftz.f32 	%f404, %f426, 0f3E800000;
	add.ftz.f32 	%f427, %f39, %f31;
	add.ftz.f32 	%f428, %f427, %f17;
	add.ftz.f32 	%f429, %f428, %f37;
	mul.ftz.f32 	%f405, %f429, 0f3E800000;
	.loc 1 29 217
	// inline asm
	tex.2d.v4.f32.f32 {%f400, %f401, %f402, %f403}, [inTexture, {%f404, %f405}];
	// inline asm
	.loc 1 29 1
	add.ftz.f32 	%f430, %f424, %f1160;
	add.ftz.f32 	%f431, %f430, %f32;
	mul.ftz.f32 	%f410, %f431, 0f3E800000;
	add.ftz.f32 	%f432, %f427, %f1161;
	add.ftz.f32 	%f433, %f432, %f33;
	mul.ftz.f32 	%f411, %f433, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f406, %f407, %f408, %f409}, [inTexture, {%f410, %f411}];
	// inline asm
	add.ftz.f32 	%f434, %f400, %f406;
	add.ftz.f32 	%f435, %f401, %f407;
	add.ftz.f32 	%f436, %f402, %f408;
	add.ftz.f32 	%f437, %f403, %f409;
	.loc 1 29 1
	add.ftz.f32 	%f438, %f38, %f34;
	add.ftz.f32 	%f439, %f438, %f1158;
	add.ftz.f32 	%f440, %f439, %f32;
	mul.ftz.f32 	%f416, %f440, 0f3E800000;
	add.ftz.f32 	%f441, %f39, %f35;
	add.ftz.f32 	%f442, %f441, %f1159;
	add.ftz.f32 	%f443, %f442, %f33;
	mul.ftz.f32 	%f417, %f443, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f412, %f413, %f414, %f415}, [inTexture, {%f416, %f417}];
	// inline asm
	add.ftz.f32 	%f444, %f434, %f412;
	add.ftz.f32 	%f445, %f435, %f413;
	add.ftz.f32 	%f446, %f436, %f414;
	add.ftz.f32 	%f447, %f437, %f415;
	.loc 1 29 1
	add.ftz.f32 	%f448, %f438, %f1156;
	add.ftz.f32 	%f449, %f448, %f36;
	mul.ftz.f32 	%f422, %f449, 0f3E800000;
	add.ftz.f32 	%f450, %f441, %f1157;
	add.ftz.f32 	%f451, %f450, %f37;
	mul.ftz.f32 	%f423, %f451, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f418, %f419, %f420, %f421}, [inTexture, {%f422, %f423}];
	// inline asm
	add.ftz.f32 	%f452, %f444, %f418;
	add.ftz.f32 	%f453, %f445, %f419;
	add.ftz.f32 	%f454, %f446, %f420;
	add.ftz.f32 	%f455, %f447, %f421;
	.loc 1 29 1
	mul.ftz.f32 	%f1152, %f452, 0f3E800000;
	mul.ftz.f32 	%f1153, %f453, 0f3E800000;
	mul.ftz.f32 	%f1154, %f454, 0f3E800000;
	mul.ftz.f32 	%f1155, %f455, 0f3E800000;
	.loc 2 2770 10
	max.ftz.f32 	%f46, %f40, %f41;
	.loc 1 29 1
	setp.geu.ftz.f32	%p11, %f46, 0f3FB851EC;
	@%p11 bra 	BB2_6;

	mov.f32 	%f1149, 0f3F800000;
	.loc 1 29 1
	add.ftz.f32 	%f552, %f46, 0fBF800000;
	mov.f32 	%f553, 0f3EE147B0;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f554, %f552, %f553;
	.loc 1 29 1
	add.ftz.f32 	%f555, %f38, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f556, %f555;
	.loc 1 29 1
	add.ftz.f32 	%f557, %f39, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f558, %f557;
	sub.ftz.f32 	%f559, %f555, %f556;
	sub.ftz.f32 	%f560, %f557, %f558;
	fma.rn.ftz.f32 	%f561, %f559, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f563, %f561, %f559, 0fBF000000;
	mul.ftz.f32 	%f564, %f563, %f559;
	fma.rn.ftz.f32 	%f565, %f559, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f566, %f565, %f559;
	fma.rn.ftz.f32 	%f567, %f566, %f559, 0f3F800000;
	sub.ftz.f32 	%f568, %f1149, %f559;
	fma.rn.ftz.f32 	%f569, %f568, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f570, %f569, %f568;
	fma.rn.ftz.f32 	%f571, %f570, %f568, 0f3F800000;
	fma.rn.ftz.f32 	%f572, %f568, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f573, %f572, %f568, 0fBF000000;
	mul.ftz.f32 	%f574, %f573, %f568;
	fma.rn.ftz.f32 	%f575, %f560, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f576, %f575, %f560, 0fBF000000;
	mul.ftz.f32 	%f577, %f576, %f560;
	fma.rn.ftz.f32 	%f578, %f560, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f579, %f578, %f560;
	fma.rn.ftz.f32 	%f580, %f579, %f560, 0f3F800000;
	sub.ftz.f32 	%f581, %f1149, %f560;
	fma.rn.ftz.f32 	%f582, %f581, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f583, %f582, %f581;
	fma.rn.ftz.f32 	%f584, %f583, %f581, 0f3F800000;
	fma.rn.ftz.f32 	%f585, %f581, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f586, %f585, %f581, 0fBF000000;
	mul.ftz.f32 	%f587, %f586, %f581;
	add.ftz.f32 	%f588, %f556, 0fBF000000;
	add.ftz.f32 	%f589, %f558, 0fBF000000;
	add.ftz.f32 	%f508, %f588, 0f00000000;
	add.ftz.f32 	%f479, %f589, 0f00000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f456, %f457, %f458, %f459}, [inTexture, {%f508, %f479}];
	// inline asm
	mul.ftz.f32 	%f590, %f564, %f577;
	add.ftz.f32 	%f538, %f588, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f462, %f463, %f464, %f465}, [inTexture, {%f538, %f479}];
	// inline asm
	mul.ftz.f32 	%f591, %f567, %f577;
	mul.ftz.f32 	%f592, %f462, %f591;
	mul.ftz.f32 	%f593, %f463, %f591;
	mul.ftz.f32 	%f594, %f464, %f591;
	mul.ftz.f32 	%f595, %f465, %f591;
	fma.rn.ftz.f32 	%f596, %f456, %f590, %f592;
	fma.rn.ftz.f32 	%f597, %f457, %f590, %f593;
	fma.rn.ftz.f32 	%f598, %f458, %f590, %f594;
	fma.rn.ftz.f32 	%f599, %f459, %f590, %f595;
	add.ftz.f32 	%f544, %f588, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f468, %f469, %f470, %f471}, [inTexture, {%f544, %f479}];
	// inline asm
	mul.ftz.f32 	%f600, %f571, %f577;
	fma.rn.ftz.f32 	%f601, %f468, %f600, %f596;
	fma.rn.ftz.f32 	%f602, %f469, %f600, %f597;
	fma.rn.ftz.f32 	%f603, %f470, %f600, %f598;
	fma.rn.ftz.f32 	%f604, %f471, %f600, %f599;
	add.ftz.f32 	%f550, %f588, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f474, %f475, %f476, %f477}, [inTexture, {%f550, %f479}];
	// inline asm
	mul.ftz.f32 	%f605, %f574, %f577;
	fma.rn.ftz.f32 	%f606, %f474, %f605, %f601;
	fma.rn.ftz.f32 	%f607, %f475, %f605, %f602;
	fma.rn.ftz.f32 	%f608, %f476, %f605, %f603;
	fma.rn.ftz.f32 	%f609, %f477, %f605, %f604;
	add.ftz.f32 	%f503, %f589, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f480, %f481, %f482, %f483}, [inTexture, {%f508, %f503}];
	// inline asm
	mul.ftz.f32 	%f610, %f564, %f580;
	fma.rn.ftz.f32 	%f611, %f480, %f610, %f606;
	fma.rn.ftz.f32 	%f612, %f481, %f610, %f607;
	fma.rn.ftz.f32 	%f613, %f482, %f610, %f608;
	fma.rn.ftz.f32 	%f614, %f483, %f610, %f609;
	// inline asm
	tex.2d.v4.f32.f32 {%f486, %f487, %f488, %f489}, [inTexture, {%f538, %f503}];
	// inline asm
	mul.ftz.f32 	%f615, %f567, %f580;
	fma.rn.ftz.f32 	%f616, %f486, %f615, %f611;
	fma.rn.ftz.f32 	%f617, %f487, %f615, %f612;
	fma.rn.ftz.f32 	%f618, %f488, %f615, %f613;
	fma.rn.ftz.f32 	%f619, %f489, %f615, %f614;
	// inline asm
	tex.2d.v4.f32.f32 {%f492, %f493, %f494, %f495}, [inTexture, {%f544, %f503}];
	// inline asm
	mul.ftz.f32 	%f620, %f571, %f580;
	fma.rn.ftz.f32 	%f621, %f492, %f620, %f616;
	fma.rn.ftz.f32 	%f622, %f493, %f620, %f617;
	fma.rn.ftz.f32 	%f623, %f494, %f620, %f618;
	fma.rn.ftz.f32 	%f624, %f495, %f620, %f619;
	// inline asm
	tex.2d.v4.f32.f32 {%f498, %f499, %f500, %f501}, [inTexture, {%f550, %f503}];
	// inline asm
	mul.ftz.f32 	%f625, %f574, %f580;
	fma.rn.ftz.f32 	%f626, %f498, %f625, %f621;
	fma.rn.ftz.f32 	%f627, %f499, %f625, %f622;
	fma.rn.ftz.f32 	%f628, %f500, %f625, %f623;
	fma.rn.ftz.f32 	%f629, %f501, %f625, %f624;
	add.ftz.f32 	%f527, %f589, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f504, %f505, %f506, %f507}, [inTexture, {%f508, %f527}];
	// inline asm
	mul.ftz.f32 	%f630, %f564, %f584;
	fma.rn.ftz.f32 	%f631, %f504, %f630, %f626;
	fma.rn.ftz.f32 	%f632, %f505, %f630, %f627;
	fma.rn.ftz.f32 	%f633, %f506, %f630, %f628;
	fma.rn.ftz.f32 	%f634, %f507, %f630, %f629;
	// inline asm
	tex.2d.v4.f32.f32 {%f510, %f511, %f512, %f513}, [inTexture, {%f538, %f527}];
	// inline asm
	mul.ftz.f32 	%f635, %f567, %f584;
	fma.rn.ftz.f32 	%f636, %f510, %f635, %f631;
	fma.rn.ftz.f32 	%f637, %f511, %f635, %f632;
	fma.rn.ftz.f32 	%f638, %f512, %f635, %f633;
	fma.rn.ftz.f32 	%f639, %f513, %f635, %f634;
	// inline asm
	tex.2d.v4.f32.f32 {%f516, %f517, %f518, %f519}, [inTexture, {%f544, %f527}];
	// inline asm
	mul.ftz.f32 	%f640, %f571, %f584;
	fma.rn.ftz.f32 	%f641, %f516, %f640, %f636;
	fma.rn.ftz.f32 	%f642, %f517, %f640, %f637;
	fma.rn.ftz.f32 	%f643, %f518, %f640, %f638;
	fma.rn.ftz.f32 	%f644, %f519, %f640, %f639;
	// inline asm
	tex.2d.v4.f32.f32 {%f522, %f523, %f524, %f525}, [inTexture, {%f550, %f527}];
	// inline asm
	mul.ftz.f32 	%f645, %f574, %f584;
	fma.rn.ftz.f32 	%f646, %f522, %f645, %f641;
	fma.rn.ftz.f32 	%f647, %f523, %f645, %f642;
	fma.rn.ftz.f32 	%f648, %f524, %f645, %f643;
	fma.rn.ftz.f32 	%f649, %f525, %f645, %f644;
	add.ftz.f32 	%f532, %f588, 0f80000000;
	add.ftz.f32 	%f551, %f589, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f528, %f529, %f530, %f531}, [inTexture, {%f532, %f551}];
	// inline asm
	mul.ftz.f32 	%f650, %f564, %f587;
	fma.rn.ftz.f32 	%f651, %f528, %f650, %f646;
	fma.rn.ftz.f32 	%f652, %f529, %f650, %f647;
	fma.rn.ftz.f32 	%f653, %f530, %f650, %f648;
	fma.rn.ftz.f32 	%f654, %f531, %f650, %f649;
	// inline asm
	tex.2d.v4.f32.f32 {%f534, %f535, %f536, %f537}, [inTexture, {%f538, %f551}];
	// inline asm
	mul.ftz.f32 	%f655, %f567, %f587;
	fma.rn.ftz.f32 	%f656, %f534, %f655, %f651;
	fma.rn.ftz.f32 	%f657, %f535, %f655, %f652;
	fma.rn.ftz.f32 	%f658, %f536, %f655, %f653;
	fma.rn.ftz.f32 	%f659, %f537, %f655, %f654;
	// inline asm
	tex.2d.v4.f32.f32 {%f540, %f541, %f542, %f543}, [inTexture, {%f544, %f551}];
	// inline asm
	mul.ftz.f32 	%f660, %f571, %f587;
	fma.rn.ftz.f32 	%f661, %f540, %f660, %f656;
	fma.rn.ftz.f32 	%f662, %f541, %f660, %f657;
	fma.rn.ftz.f32 	%f663, %f542, %f660, %f658;
	fma.rn.ftz.f32 	%f664, %f543, %f660, %f659;
	// inline asm
	tex.2d.v4.f32.f32 {%f546, %f547, %f548, %f549}, [inTexture, {%f550, %f551}];
	// inline asm
	mul.ftz.f32 	%f665, %f574, %f587;
	fma.rn.ftz.f32 	%f666, %f546, %f665, %f661;
	fma.rn.ftz.f32 	%f667, %f547, %f665, %f662;
	fma.rn.ftz.f32 	%f668, %f548, %f665, %f663;
	fma.rn.ftz.f32 	%f669, %f549, %f665, %f664;
	.loc 1 29 1
	sub.ftz.f32 	%f670, %f1152, %f666;
	fma.rn.ftz.f32 	%f1152, %f554, %f670, %f666;
	sub.ftz.f32 	%f671, %f1153, %f667;
	fma.rn.ftz.f32 	%f1153, %f554, %f671, %f667;
	sub.ftz.f32 	%f672, %f1154, %f668;
	fma.rn.ftz.f32 	%f1154, %f554, %f672, %f668;
	sub.ftz.f32 	%f673, %f1155, %f669;
	fma.rn.ftz.f32 	%f1155, %f554, %f673, %f669;

BB2_6:
	.loc 1 29 1
	mov.f32 	%f1172, %f1155;
	mov.f32 	%f1171, %f1154;
	mov.f32 	%f1170, %f1153;
	mov.f32 	%f1169, %f1152;
	bra.uni 	BB2_28;

BB2_7:
	mov.f32 	%f1168, 0f00000000;
	add.u64 	%rd47, %SP, 0;
	add.u64 	%rd48, %SP, 224;
	add.u64 	%rd49, %SP, 240;
	cvta.to.local.u64 	%rd50, %rd48;
	mov.u32 	%r73, 0;
	.loc 1 29 1
	st.local.u32 	[%rd50], %r73;
	cvta.to.local.u64 	%rd51, %rd49;
	mov.u32 	%r35, 1;
	.loc 1 29 1
	st.local.u32 	[%rd51], %r35;
	cvta.to.local.u64 	%rd52, %rd47;
	.loc 1 29 1
	st.local.v2.f32 	[%rd52], {%f16, %f17};
	st.local.v2.f32 	[%rd52+8], {%f1160, %f1161};
	st.local.f32 	[%rd52+16], %f1158;
	st.local.f32 	[%rd52+20], %f1159;
	st.local.v2.f32 	[%rd52+24], {%f1156, %f1157};
	.loc 1 29 1
	mov.f32 	%f1163, %f17;
	mov.f32 	%f1162, %f16;
	mov.u32 	%r72, %r73;
	mov.f32 	%f1167, %f1168;
	mov.f32 	%f1166, %f1168;
	mov.f32 	%f1165, %f1168;
	mov.f32 	%f1164, %f1168;

BB2_8:
	.loc 1 29 1
	add.ftz.f32 	%f76, %f1162, %f1160;
	mul.ftz.f32 	%f77, %f76, 0f3F000000;
	add.ftz.f32 	%f80, %f1163, %f1161;
	mul.ftz.f32 	%f81, %f80, 0f3F000000;
	add.ftz.f32 	%f679, %f1160, %f1158;
	mul.ftz.f32 	%f83, %f679, 0f3F000000;
	add.ftz.f32 	%f680, %f1161, %f1159;
	mul.ftz.f32 	%f85, %f680, 0f3F000000;
	add.ftz.f32 	%f681, %f1158, %f1156;
	mul.ftz.f32 	%f87, %f681, 0f3F000000;
	add.ftz.f32 	%f682, %f1159, %f1157;
	mul.ftz.f32 	%f89, %f682, 0f3F000000;
	add.ftz.f32 	%f683, %f1156, %f1162;
	mul.ftz.f32 	%f90, %f683, 0f3F000000;
	add.ftz.f32 	%f684, %f1157, %f1163;
	mul.ftz.f32 	%f91, %f684, 0f3F000000;
	add.ftz.f32 	%f685, %f77, %f87;
	mul.ftz.f32 	%f92, %f685, 0f3F000000;
	add.ftz.f32 	%f686, %f81, %f89;
	mul.ftz.f32 	%f93, %f686, 0f3F000000;
	sub.ftz.f32 	%f687, %f90, %f83;
	sub.ftz.f32 	%f688, %f91, %f85;
	mul.ftz.f32 	%f689, %f688, %f688;
	fma.rn.ftz.f32 	%f94, %f687, %f687, %f689;
	.loc 1 29 1
	setp.gt.ftz.f32	%p12, %f94, 0f3F800000;
	selp.u32	%r36, 1, 0, %p12;
	.loc 1 29 1
	sub.ftz.f32 	%f690, %f87, %f77;
	sub.ftz.f32 	%f691, %f89, %f81;
	mul.ftz.f32 	%f692, %f691, %f691;
	fma.rn.ftz.f32 	%f95, %f690, %f690, %f692;
	.loc 1 29 1
	setp.gt.ftz.f32	%p13, %f95, 0f3F800000;
	.loc 1 29 1
	or.b32  	%r37, %r36, 2;
	.loc 1 29 1
	selp.b32	%r5, %r37, %r36, %p13;
	.loc 1 29 1
	setp.eq.s32	%p14, %r5, 0;
	@%p14 bra 	BB2_23;

	mul.wide.s32 	%rd55, %r73, 4;
	add.s64 	%rd56, %rd50, %rd55;
	.loc 1 29 1
	ld.local.u32 	%r6, [%rd56];
	setp.eq.s32	%p15, %r6, 2;
	@%p15 bra 	BB2_17;

	setp.lt.ftz.f32	%p16, %f94, 0f40800000;
	setp.lt.ftz.f32	%p17, %f95, 0f40800000;
	and.pred  	%p18, %p16, %p17;
	.loc 1 29 1
	@%p18 bra 	BB2_17;

	.loc 1 29 1
	setp.eq.s32	%p19, %r5, 3;
	@%p19 bra 	BB2_15;

	.loc 1 29 1
	setp.eq.s32	%p20, %r5, 2;
	mul.wide.s32 	%rd59, %r72, 32;
	add.s64 	%rd60, %rd52, %rd59;
	.loc 1 29 1
	st.local.v2.f32 	[%rd60], {%f1162, %f1163};
	.loc 1 29 1
	@%p20 bra 	BB2_14;

	mul.wide.s32 	%rd63, %r72, 32;
	add.s64 	%rd64, %rd52, %rd63;
	.loc 1 29 1
	st.local.v2.f32 	[%rd64+8], {%f77, %f81};
	st.local.v2.f32 	[%rd64+16], {%f87, %f89};
	st.local.f32 	[%rd64+24], %f1156;
	st.local.f32 	[%rd64+28], %f1157;
	st.local.v2.f32 	[%rd64+32], {%f77, %f81};
	st.local.f32 	[%rd64+40], %f1160;
	st.local.f32 	[%rd64+44], %f1161;
	st.local.v2.f32 	[%rd64+48], {%f1158, %f1159};
	st.local.f32 	[%rd64+56], %f87;
	st.local.f32 	[%rd64+60], %f89;
	mov.u32 	%r74, 2;
	bra.uni 	BB2_16;

BB2_14:
	mul.wide.s32 	%rd67, %r72, 32;
	add.s64 	%rd68, %rd52, %rd67;
	.loc 1 29 1
	st.local.v2.f32 	[%rd68+8], {%f1160, %f1161};
	st.local.v2.f32 	[%rd68+16], {%f83, %f85};
	st.local.f32 	[%rd68+24], %f90;
	st.local.f32 	[%rd68+28], %f91;
	st.local.v2.f32 	[%rd68+32], {%f90, %f91};
	st.local.f32 	[%rd68+40], %f83;
	st.local.f32 	[%rd68+44], %f85;
	st.local.v2.f32 	[%rd68+48], {%f1158, %f1159};
	st.local.f32 	[%rd68+56], %f1156;
	st.local.f32 	[%rd68+60], %f1157;
	mov.u32 	%r74, 2;
	bra.uni 	BB2_16;

BB2_15:
	mul.wide.s32 	%rd71, %r72, 32;
	add.s64 	%rd72, %rd52, %rd71;
	.loc 1 29 1
	st.local.v2.f32 	[%rd72], {%f92, %f93};
	st.local.v2.f32 	[%rd72+8], {%f90, %f91};
	st.local.f32 	[%rd72+16], %f1162;
	st.local.f32 	[%rd72+20], %f1163;
	st.local.v2.f32 	[%rd72+24], {%f77, %f81};
	st.local.f32 	[%rd72+32], %f92;
	st.local.f32 	[%rd72+36], %f93;
	st.local.v2.f32 	[%rd72+40], {%f77, %f81};
	st.local.f32 	[%rd72+48], %f1160;
	st.local.f32 	[%rd72+52], %f1161;
	st.local.v2.f32 	[%rd72+56], {%f83, %f85};
	st.local.f32 	[%rd72+64], %f92;
	st.local.f32 	[%rd72+68], %f93;
	st.local.v2.f32 	[%rd72+72], {%f83, %f85};
	st.local.f32 	[%rd72+80], %f1158;
	st.local.f32 	[%rd72+84], %f1159;
	st.local.v2.f32 	[%rd72+88], {%f87, %f89};
	st.local.f32 	[%rd72+96], %f92;
	st.local.f32 	[%rd72+100], %f93;
	st.local.v2.f32 	[%rd72+104], {%f87, %f89};
	st.local.f32 	[%rd72+112], %f1156;
	st.local.f32 	[%rd72+116], %f1157;
	st.local.v2.f32 	[%rd72+120], {%f90, %f91};
	mov.u32 	%r74, 4;

BB2_16:
	.loc 1 29 1
	add.s32 	%r41, %r72, %r74;
	add.s32 	%r72, %r41, -1;
	mul.wide.s32 	%rd75, %r73, 4;
	add.s64 	%rd76, %rd51, %rd75;
	.loc 1 29 1
	ld.local.u32 	%r42, [%rd76];
	add.s32 	%r43, %r42, -1;
	st.local.u32 	[%rd76], %r43;
	.loc 1 29 1
	setp.ne.s32	%p21, %r43, 0;
	selp.u32	%r44, 1, 0, %p21;
	add.s32 	%r73, %r44, %r73;
	mul.wide.s32 	%rd79, %r73, 4;
	add.s64 	%rd80, %rd50, %rd79;
	.loc 1 29 1
	add.s32 	%r45, %r6, 1;
	st.local.u32 	[%rd80], %r45;
	add.s64 	%rd81, %rd51, %rd79;
	.loc 1 29 1
	st.local.u32 	[%rd81], %r74;
	bra.uni 	BB2_24;

BB2_17:
	.loc 1 29 1
	add.s32 	%r72, %r72, -1;
	.loc 1 29 1
	setp.eq.s32	%p22, %r5, 3;
	@%p22 bra 	BB2_21;

	.loc 1 29 1
	setp.eq.s32	%p23, %r5, 2;
	@%p23 bra 	BB2_20;

	.loc 1 29 1
	add.ftz.f32 	%f705, %f1162, %f77;
	add.ftz.f32 	%f706, %f705, %f87;
	add.ftz.f32 	%f707, %f706, %f1156;
	mul.ftz.f32 	%f697, %f707, 0f3E800000;
	add.ftz.f32 	%f708, %f1163, %f81;
	add.ftz.f32 	%f709, %f708, %f89;
	add.ftz.f32 	%f710, %f709, %f1157;
	mul.ftz.f32 	%f698, %f710, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f693, %f694, %f695, %f696}, [inTexture, {%f697, %f698}];
	// inline asm
	add.ftz.f32 	%f711, %f1164, %f693;
	add.ftz.f32 	%f712, %f1165, %f694;
	add.ftz.f32 	%f713, %f1166, %f695;
	add.ftz.f32 	%f714, %f1167, %f696;
	.loc 1 29 1
	add.ftz.f32 	%f715, %f77, %f1160;
	add.ftz.f32 	%f716, %f715, %f1158;
	add.ftz.f32 	%f717, %f716, %f87;
	mul.ftz.f32 	%f703, %f717, 0f3E800000;
	add.ftz.f32 	%f718, %f81, %f1161;
	add.ftz.f32 	%f719, %f718, %f1159;
	add.ftz.f32 	%f720, %f719, %f89;
	mul.ftz.f32 	%f704, %f720, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f699, %f700, %f701, %f702}, [inTexture, {%f703, %f704}];
	// inline asm
	add.ftz.f32 	%f1164, %f711, %f699;
	add.ftz.f32 	%f1165, %f712, %f700;
	add.ftz.f32 	%f1166, %f713, %f701;
	add.ftz.f32 	%f1167, %f714, %f702;
	.loc 1 29 1
	add.ftz.f32 	%f1168, %f1168, 0f40000000;
	bra.uni 	BB2_22;

BB2_20:
	.loc 1 29 1
	add.ftz.f32 	%f1148, %f1163, %f1161;
	add.ftz.f32 	%f1147, %f1162, %f1160;
	.loc 1 29 1
	add.ftz.f32 	%f733, %f1147, %f83;
	add.ftz.f32 	%f734, %f733, %f90;
	mul.ftz.f32 	%f725, %f734, 0f3E800000;
	add.ftz.f32 	%f735, %f1148, %f85;
	add.ftz.f32 	%f736, %f735, %f91;
	mul.ftz.f32 	%f726, %f736, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f721, %f722, %f723, %f724}, [inTexture, {%f725, %f726}];
	// inline asm
	add.ftz.f32 	%f737, %f1164, %f721;
	add.ftz.f32 	%f738, %f1165, %f722;
	add.ftz.f32 	%f739, %f1166, %f723;
	add.ftz.f32 	%f740, %f1167, %f724;
	.loc 1 29 1
	add.ftz.f32 	%f741, %f90, %f83;
	add.ftz.f32 	%f742, %f741, %f1158;
	add.ftz.f32 	%f743, %f742, %f1156;
	mul.ftz.f32 	%f731, %f743, 0f3E800000;
	add.ftz.f32 	%f744, %f91, %f85;
	add.ftz.f32 	%f745, %f744, %f1159;
	add.ftz.f32 	%f746, %f745, %f1157;
	mul.ftz.f32 	%f732, %f746, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f727, %f728, %f729, %f730}, [inTexture, {%f731, %f732}];
	// inline asm
	add.ftz.f32 	%f1164, %f737, %f727;
	add.ftz.f32 	%f1165, %f738, %f728;
	add.ftz.f32 	%f1166, %f739, %f729;
	add.ftz.f32 	%f1167, %f740, %f730;
	.loc 1 29 1
	add.ftz.f32 	%f1168, %f1168, 0f40000000;
	bra.uni 	BB2_22;

BB2_21:
	.loc 1 29 1
	add.ftz.f32 	%f1168, %f1168, 0f40800000;
	add.ftz.f32 	%f771, %f92, %f90;
	add.ftz.f32 	%f772, %f771, %f1162;
	add.ftz.f32 	%f773, %f772, %f77;
	mul.ftz.f32 	%f751, %f773, 0f3E800000;
	add.ftz.f32 	%f774, %f93, %f91;
	add.ftz.f32 	%f775, %f774, %f1163;
	add.ftz.f32 	%f776, %f775, %f81;
	mul.ftz.f32 	%f752, %f776, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f747, %f748, %f749, %f750}, [inTexture, {%f751, %f752}];
	// inline asm
	add.ftz.f32 	%f777, %f1164, %f747;
	add.ftz.f32 	%f778, %f1165, %f748;
	add.ftz.f32 	%f779, %f1166, %f749;
	add.ftz.f32 	%f780, %f1167, %f750;
	.loc 1 29 1
	add.ftz.f32 	%f781, %f92, %f77;
	add.ftz.f32 	%f782, %f781, %f1160;
	add.ftz.f32 	%f783, %f782, %f83;
	mul.ftz.f32 	%f757, %f783, 0f3E800000;
	add.ftz.f32 	%f784, %f93, %f81;
	add.ftz.f32 	%f785, %f784, %f1161;
	add.ftz.f32 	%f786, %f785, %f85;
	mul.ftz.f32 	%f758, %f786, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f753, %f754, %f755, %f756}, [inTexture, {%f757, %f758}];
	// inline asm
	add.ftz.f32 	%f787, %f777, %f753;
	add.ftz.f32 	%f788, %f778, %f754;
	add.ftz.f32 	%f789, %f779, %f755;
	add.ftz.f32 	%f790, %f780, %f756;
	.loc 1 29 1
	add.ftz.f32 	%f791, %f92, %f83;
	add.ftz.f32 	%f792, %f791, %f1158;
	add.ftz.f32 	%f793, %f792, %f87;
	mul.ftz.f32 	%f763, %f793, 0f3E800000;
	add.ftz.f32 	%f794, %f93, %f85;
	add.ftz.f32 	%f795, %f794, %f1159;
	add.ftz.f32 	%f796, %f795, %f89;
	mul.ftz.f32 	%f764, %f796, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f759, %f760, %f761, %f762}, [inTexture, {%f763, %f764}];
	// inline asm
	add.ftz.f32 	%f797, %f787, %f759;
	add.ftz.f32 	%f798, %f788, %f760;
	add.ftz.f32 	%f799, %f789, %f761;
	add.ftz.f32 	%f800, %f790, %f762;
	.loc 1 29 1
	add.ftz.f32 	%f801, %f92, %f87;
	add.ftz.f32 	%f802, %f801, %f1156;
	add.ftz.f32 	%f803, %f802, %f90;
	mul.ftz.f32 	%f769, %f803, 0f3E800000;
	add.ftz.f32 	%f804, %f93, %f89;
	add.ftz.f32 	%f805, %f804, %f1157;
	add.ftz.f32 	%f806, %f805, %f91;
	mul.ftz.f32 	%f770, %f806, 0f3E800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f765, %f766, %f767, %f768}, [inTexture, {%f769, %f770}];
	// inline asm
	add.ftz.f32 	%f1164, %f797, %f765;
	add.ftz.f32 	%f1165, %f798, %f766;
	add.ftz.f32 	%f1166, %f799, %f767;
	add.ftz.f32 	%f1167, %f800, %f768;

BB2_22:
	mul.wide.s32 	%rd100, %r73, 4;
	add.s64 	%rd101, %rd51, %rd100;
	.loc 1 29 1
	ld.local.u32 	%r46, [%rd101];
	add.s32 	%r47, %r46, -1;
	st.local.u32 	[%rd101], %r47;
	.loc 1 29 1
	setp.eq.s32	%p24, %r47, 0;
	selp.b32	%r48, -1, 0, %p24;
	add.s32 	%r73, %r48, %r73;
	bra.uni 	BB2_24;

BB2_23:
	.loc 1 29 1
	add.s32 	%r72, %r72, -1;
	add.ftz.f32 	%f1168, %f1168, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f807, %f808, %f809, %f810}, [inTexture, {%f92, %f93}];
	// inline asm
	add.ftz.f32 	%f1164, %f1164, %f807;
	add.ftz.f32 	%f1165, %f1165, %f808;
	add.ftz.f32 	%f1166, %f1166, %f809;
	add.ftz.f32 	%f1167, %f1167, %f810;
	mul.wide.s32 	%rd105, %r73, 4;
	add.s64 	%rd106, %rd51, %rd105;
	.loc 1 29 1
	ld.local.u32 	%r49, [%rd106];
	add.s32 	%r50, %r49, -1;
	st.local.u32 	[%rd106], %r50;
	.loc 1 29 1
	setp.eq.s32	%p25, %r50, 0;
	selp.b32	%r51, -1, 0, %p25;
	add.s32 	%r73, %r51, %r73;

BB2_24:
	.loc 1 29 10
	setp.gt.s32	%p26, %r72, -1;
	@%p26 bra 	BB2_26;

	mov.f32 	%f1133, 0f3F800000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f814, %f1133, %f1168;
	.loc 1 29 83
	mul.ftz.f32 	%f1169, %f1164, %f814;
	mul.ftz.f32 	%f1170, %f1165, %f814;
	mul.ftz.f32 	%f1171, %f1166, %f814;
	mul.ftz.f32 	%f1172, %f1167, %f814;
	bra.uni 	BB2_28;

BB2_26:
	mul.wide.s32 	%rd109, %r72, 32;
	add.s64 	%rd110, %rd52, %rd109;
	.loc 1 29 1
	ld.local.v2.f32 	{%f815, %f816}, [%rd110+24];
	mov.f32 	%f1157, %f816;
	mov.f32 	%f1156, %f815;
	ld.local.v2.f32 	{%f817, %f818}, [%rd110+16];
	mov.f32 	%f1159, %f818;
	mov.f32 	%f1158, %f817;
	ld.local.v2.f32 	{%f819, %f820}, [%rd110+8];
	mov.f32 	%f1161, %f820;
	mov.f32 	%f1160, %f819;
	ld.local.v2.f32 	{%f821, %f822}, [%rd110];
	mov.f32 	%f1163, %f822;
	mov.f32 	%f1162, %f821;
	bra.uni 	BB2_8;

BB2_27:
	mov.f32 	%f1151, 0f3F800000;
	.loc 1 29 1
	add.ftz.f32 	%f919, %f38, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f920, %f919;
	.loc 1 29 1
	add.ftz.f32 	%f921, %f39, 0fBF000000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f922, %f921;
	sub.ftz.f32 	%f923, %f919, %f920;
	sub.ftz.f32 	%f924, %f921, %f922;
	fma.rn.ftz.f32 	%f925, %f923, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f927, %f925, %f923, 0fBF000000;
	mul.ftz.f32 	%f928, %f927, %f923;
	fma.rn.ftz.f32 	%f929, %f923, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f930, %f929, %f923;
	fma.rn.ftz.f32 	%f931, %f930, %f923, 0f3F800000;
	sub.ftz.f32 	%f932, %f1151, %f923;
	fma.rn.ftz.f32 	%f933, %f932, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f934, %f933, %f932;
	fma.rn.ftz.f32 	%f935, %f934, %f932, 0f3F800000;
	fma.rn.ftz.f32 	%f936, %f932, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f937, %f936, %f932, 0fBF000000;
	mul.ftz.f32 	%f938, %f937, %f932;
	fma.rn.ftz.f32 	%f939, %f924, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f940, %f939, %f924, 0fBF000000;
	mul.ftz.f32 	%f941, %f940, %f924;
	fma.rn.ftz.f32 	%f942, %f924, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f943, %f942, %f924;
	fma.rn.ftz.f32 	%f944, %f943, %f924, 0f3F800000;
	sub.ftz.f32 	%f945, %f1151, %f924;
	fma.rn.ftz.f32 	%f946, %f945, 0f3FC00000, 0fC0200000;
	mul.ftz.f32 	%f947, %f946, %f945;
	fma.rn.ftz.f32 	%f948, %f947, %f945, 0f3F800000;
	fma.rn.ftz.f32 	%f949, %f945, 0fBF000000, 0f3F800000;
	fma.rn.ftz.f32 	%f950, %f949, %f945, 0fBF000000;
	mul.ftz.f32 	%f951, %f950, %f945;
	add.ftz.f32 	%f952, %f920, 0fBF000000;
	add.ftz.f32 	%f953, %f922, 0fBF000000;
	add.ftz.f32 	%f875, %f952, 0f00000000;
	add.ftz.f32 	%f846, %f953, 0f00000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f823, %f824, %f825, %f826}, [inTexture, {%f875, %f846}];
	// inline asm
	mul.ftz.f32 	%f954, %f928, %f941;
	add.ftz.f32 	%f905, %f952, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f829, %f830, %f831, %f832}, [inTexture, {%f905, %f846}];
	// inline asm
	mul.ftz.f32 	%f955, %f931, %f941;
	mul.ftz.f32 	%f956, %f829, %f955;
	mul.ftz.f32 	%f957, %f830, %f955;
	mul.ftz.f32 	%f958, %f831, %f955;
	mul.ftz.f32 	%f959, %f832, %f955;
	fma.rn.ftz.f32 	%f960, %f823, %f954, %f956;
	fma.rn.ftz.f32 	%f961, %f824, %f954, %f957;
	fma.rn.ftz.f32 	%f962, %f825, %f954, %f958;
	fma.rn.ftz.f32 	%f963, %f826, %f954, %f959;
	add.ftz.f32 	%f911, %f952, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f835, %f836, %f837, %f838}, [inTexture, {%f911, %f846}];
	// inline asm
	mul.ftz.f32 	%f964, %f935, %f941;
	fma.rn.ftz.f32 	%f965, %f835, %f964, %f960;
	fma.rn.ftz.f32 	%f966, %f836, %f964, %f961;
	fma.rn.ftz.f32 	%f967, %f837, %f964, %f962;
	fma.rn.ftz.f32 	%f968, %f838, %f964, %f963;
	add.ftz.f32 	%f917, %f952, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f841, %f842, %f843, %f844}, [inTexture, {%f917, %f846}];
	// inline asm
	mul.ftz.f32 	%f969, %f938, %f941;
	fma.rn.ftz.f32 	%f970, %f841, %f969, %f965;
	fma.rn.ftz.f32 	%f971, %f842, %f969, %f966;
	fma.rn.ftz.f32 	%f972, %f843, %f969, %f967;
	fma.rn.ftz.f32 	%f973, %f844, %f969, %f968;
	add.ftz.f32 	%f870, %f953, 0f3F800000;
	// inline asm
	tex.2d.v4.f32.f32 {%f847, %f848, %f849, %f850}, [inTexture, {%f875, %f870}];
	// inline asm
	mul.ftz.f32 	%f974, %f928, %f944;
	fma.rn.ftz.f32 	%f975, %f847, %f974, %f970;
	fma.rn.ftz.f32 	%f976, %f848, %f974, %f971;
	fma.rn.ftz.f32 	%f977, %f849, %f974, %f972;
	fma.rn.ftz.f32 	%f978, %f850, %f974, %f973;
	// inline asm
	tex.2d.v4.f32.f32 {%f853, %f854, %f855, %f856}, [inTexture, {%f905, %f870}];
	// inline asm
	mul.ftz.f32 	%f979, %f931, %f944;
	fma.rn.ftz.f32 	%f980, %f853, %f979, %f975;
	fma.rn.ftz.f32 	%f981, %f854, %f979, %f976;
	fma.rn.ftz.f32 	%f982, %f855, %f979, %f977;
	fma.rn.ftz.f32 	%f983, %f856, %f979, %f978;
	// inline asm
	tex.2d.v4.f32.f32 {%f859, %f860, %f861, %f862}, [inTexture, {%f911, %f870}];
	// inline asm
	mul.ftz.f32 	%f984, %f935, %f944;
	fma.rn.ftz.f32 	%f985, %f859, %f984, %f980;
	fma.rn.ftz.f32 	%f986, %f860, %f984, %f981;
	fma.rn.ftz.f32 	%f987, %f861, %f984, %f982;
	fma.rn.ftz.f32 	%f988, %f862, %f984, %f983;
	// inline asm
	tex.2d.v4.f32.f32 {%f865, %f866, %f867, %f868}, [inTexture, {%f917, %f870}];
	// inline asm
	mul.ftz.f32 	%f989, %f938, %f944;
	fma.rn.ftz.f32 	%f990, %f865, %f989, %f985;
	fma.rn.ftz.f32 	%f991, %f866, %f989, %f986;
	fma.rn.ftz.f32 	%f992, %f867, %f989, %f987;
	fma.rn.ftz.f32 	%f993, %f868, %f989, %f988;
	add.ftz.f32 	%f894, %f953, 0f40000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f871, %f872, %f873, %f874}, [inTexture, {%f875, %f894}];
	// inline asm
	mul.ftz.f32 	%f994, %f928, %f948;
	fma.rn.ftz.f32 	%f995, %f871, %f994, %f990;
	fma.rn.ftz.f32 	%f996, %f872, %f994, %f991;
	fma.rn.ftz.f32 	%f997, %f873, %f994, %f992;
	fma.rn.ftz.f32 	%f998, %f874, %f994, %f993;
	// inline asm
	tex.2d.v4.f32.f32 {%f877, %f878, %f879, %f880}, [inTexture, {%f905, %f894}];
	// inline asm
	mul.ftz.f32 	%f999, %f931, %f948;
	fma.rn.ftz.f32 	%f1000, %f877, %f999, %f995;
	fma.rn.ftz.f32 	%f1001, %f878, %f999, %f996;
	fma.rn.ftz.f32 	%f1002, %f879, %f999, %f997;
	fma.rn.ftz.f32 	%f1003, %f880, %f999, %f998;
	// inline asm
	tex.2d.v4.f32.f32 {%f883, %f884, %f885, %f886}, [inTexture, {%f911, %f894}];
	// inline asm
	mul.ftz.f32 	%f1004, %f935, %f948;
	fma.rn.ftz.f32 	%f1005, %f883, %f1004, %f1000;
	fma.rn.ftz.f32 	%f1006, %f884, %f1004, %f1001;
	fma.rn.ftz.f32 	%f1007, %f885, %f1004, %f1002;
	fma.rn.ftz.f32 	%f1008, %f886, %f1004, %f1003;
	// inline asm
	tex.2d.v4.f32.f32 {%f889, %f890, %f891, %f892}, [inTexture, {%f917, %f894}];
	// inline asm
	mul.ftz.f32 	%f1009, %f938, %f948;
	fma.rn.ftz.f32 	%f1010, %f889, %f1009, %f1005;
	fma.rn.ftz.f32 	%f1011, %f890, %f1009, %f1006;
	fma.rn.ftz.f32 	%f1012, %f891, %f1009, %f1007;
	fma.rn.ftz.f32 	%f1013, %f892, %f1009, %f1008;
	add.ftz.f32 	%f899, %f952, 0f80000000;
	add.ftz.f32 	%f918, %f953, 0f40400000;
	// inline asm
	tex.2d.v4.f32.f32 {%f895, %f896, %f897, %f898}, [inTexture, {%f899, %f918}];
	// inline asm
	mul.ftz.f32 	%f1014, %f928, %f951;
	fma.rn.ftz.f32 	%f1015, %f895, %f1014, %f1010;
	fma.rn.ftz.f32 	%f1016, %f896, %f1014, %f1011;
	fma.rn.ftz.f32 	%f1017, %f897, %f1014, %f1012;
	fma.rn.ftz.f32 	%f1018, %f898, %f1014, %f1013;
	// inline asm
	tex.2d.v4.f32.f32 {%f901, %f902, %f903, %f904}, [inTexture, {%f905, %f918}];
	// inline asm
	mul.ftz.f32 	%f1019, %f931, %f951;
	fma.rn.ftz.f32 	%f1020, %f901, %f1019, %f1015;
	fma.rn.ftz.f32 	%f1021, %f902, %f1019, %f1016;
	fma.rn.ftz.f32 	%f1022, %f903, %f1019, %f1017;
	fma.rn.ftz.f32 	%f1023, %f904, %f1019, %f1018;
	// inline asm
	tex.2d.v4.f32.f32 {%f907, %f908, %f909, %f910}, [inTexture, {%f911, %f918}];
	// inline asm
	mul.ftz.f32 	%f1024, %f935, %f951;
	fma.rn.ftz.f32 	%f1025, %f907, %f1024, %f1020;
	fma.rn.ftz.f32 	%f1026, %f908, %f1024, %f1021;
	fma.rn.ftz.f32 	%f1027, %f909, %f1024, %f1022;
	fma.rn.ftz.f32 	%f1028, %f910, %f1024, %f1023;
	// inline asm
	tex.2d.v4.f32.f32 {%f913, %f914, %f915, %f916}, [inTexture, {%f917, %f918}];
	// inline asm
	mul.ftz.f32 	%f1029, %f938, %f951;
	fma.rn.ftz.f32 	%f1169, %f913, %f1029, %f1025;
	fma.rn.ftz.f32 	%f1170, %f914, %f1029, %f1026;
	fma.rn.ftz.f32 	%f1171, %f915, %f1029, %f1027;
	fma.rn.ftz.f32 	%f1172, %f916, %f1029, %f1028;

BB2_28:
	ld.param.u32 	%r70, [cuda_kernel_renderquad_param_20];
	.loc 1 29 1
	mul.ftz.f32 	%f1174, %f1169, %f15;
	mul.ftz.f32 	%f1175, %f1170, %f15;
	mul.ftz.f32 	%f1176, %f1171, %f15;
	mul.ftz.f32 	%f152, %f1172, %f15;
	.loc 1 29 1
	setp.eq.s32	%p27, %r70, 0;
	@%p27 bra 	BB2_33;

	ld.param.f32 	%f1145, [cuda_kernel_renderquad_param_8+8];
	ld.param.f32 	%f1144, [cuda_kernel_renderquad_param_8+4];
	ld.param.f32 	%f1143, [cuda_kernel_renderquad_param_8];
	ld.param.f32 	%f1142, [cuda_kernel_renderquad_param_9+8];
	ld.param.f32 	%f1141, [cuda_kernel_renderquad_param_9];
	ld.param.f32 	%f1140, [cuda_kernel_renderquad_param_9+4];
	ld.param.f32 	%f1139, [cuda_kernel_renderquad_param_10];
	ld.param.f32 	%f1138, [cuda_kernel_renderquad_param_10+4];
	ld.param.f32 	%f1137, [cuda_kernel_renderquad_param_10+8];
	ld.param.f32 	%f1136, [cuda_kernel_renderquad_param_11+8];
	ld.param.f32 	%f1135, [cuda_kernel_renderquad_param_11+4];
	ld.param.f32 	%f1134, [cuda_kernel_renderquad_param_11];
	.loc 1 29 1
	mul.ftz.f32 	%f1030, %f1140, %f1140;
	fma.rn.ftz.f32 	%f1031, %f1141, %f1141, %f1030;
	fma.rn.ftz.f32 	%f1032, %f1142, %f1142, %f1031;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f1033, %f1032;
	.loc 1 29 122
	mul.ftz.f32 	%f1034, %f1141, %f1033;
	mul.ftz.f32 	%f1035, %f1140, %f1033;
	mul.ftz.f32 	%f1036, %f1142, %f1033;
	.loc 1 29 1
	mul.ftz.f32 	%f1037, %f1138, %f1138;
	fma.rn.ftz.f32 	%f1038, %f1139, %f1139, %f1037;
	fma.rn.ftz.f32 	%f1039, %f1137, %f1137, %f1038;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f1040, %f1039;
	.loc 1 29 122
	mul.ftz.f32 	%f1041, %f1139, %f1040;
	mul.ftz.f32 	%f1042, %f1138, %f1040;
	mul.ftz.f32 	%f1043, %f1137, %f1040;
	.loc 1 29 1
	mul.ftz.f32 	%f1044, %f1035, %f1043;
	mul.ftz.f32 	%f1045, %f1042, %f1036;
	sub.ftz.f32 	%f1046, %f1044, %f1045;
	mul.ftz.f32 	%f1047, %f1036, %f1041;
	mul.ftz.f32 	%f1048, %f1043, %f1034;
	sub.ftz.f32 	%f1049, %f1047, %f1048;
	mul.ftz.f32 	%f1050, %f1034, %f1042;
	mul.ftz.f32 	%f1051, %f1041, %f1035;
	sub.ftz.f32 	%f1052, %f1050, %f1051;
	fma.rn.ftz.f32 	%f1053, %f1034, %f16, %f1143;
	fma.rn.ftz.f32 	%f1054, %f1041, %f17, %f1053;
	fma.rn.ftz.f32 	%f1055, %f1035, %f16, %f1144;
	fma.rn.ftz.f32 	%f1056, %f1042, %f17, %f1055;
	fma.rn.ftz.f32 	%f1057, %f1036, %f16, %f1145;
	fma.rn.ftz.f32 	%f1058, %f1043, %f17, %f1057;
	add.ftz.f32 	%f1059, %f1054, 0f42C80000;
	add.ftz.f32 	%f1060, %f1056, 0f42C80000;
	add.ftz.f32 	%f1061, %f1058, 0f42C80000;
	sub.ftz.f32 	%f1062, %f1054, %f1134;
	sub.ftz.f32 	%f1063, %f1056, %f1135;
	sub.ftz.f32 	%f1064, %f1058, %f1136;
	mul.ftz.f32 	%f1065, %f1060, %f1060;
	fma.rn.ftz.f32 	%f1066, %f1059, %f1059, %f1065;
	fma.rn.ftz.f32 	%f1067, %f1061, %f1061, %f1066;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f1068, %f1067;
	.loc 1 29 116
	mul.ftz.f32 	%f1069, %f1059, %f1068;
	mul.ftz.f32 	%f1070, %f1060, %f1068;
	mul.ftz.f32 	%f1071, %f1061, %f1068;
	.loc 1 29 1
	mul.ftz.f32 	%f1072, %f1063, %f1063;
	fma.rn.ftz.f32 	%f1073, %f1062, %f1062, %f1072;
	fma.rn.ftz.f32 	%f1074, %f1064, %f1064, %f1073;
	.loc 2 2775 10
	rsqrt.approx.ftz.f32 	%f1075, %f1074;
	.loc 1 29 116
	mul.ftz.f32 	%f1076, %f1062, %f1075;
	mul.ftz.f32 	%f1077, %f1063, %f1075;
	mul.ftz.f32 	%f1078, %f1064, %f1075;
	.loc 1 29 1
	mul.ftz.f32 	%f1079, %f1049, %f1070;
	fma.rn.ftz.f32 	%f1080, %f1046, %f1069, %f1079;
	fma.rn.ftz.f32 	%f1081, %f1052, %f1071, %f1080;
	add.ftz.f32 	%f1082, %f1081, %f1081;
	mul.ftz.f32 	%f1083, %f1046, %f1082;
	mul.ftz.f32 	%f1084, %f1049, %f1082;
	mul.ftz.f32 	%f1085, %f1052, %f1082;
	sub.ftz.f32 	%f1086, %f1083, %f1069;
	sub.ftz.f32 	%f1087, %f1084, %f1070;
	sub.ftz.f32 	%f1088, %f1085, %f1071;
	mul.ftz.f32 	%f1089, %f1087, %f1077;
	fma.rn.ftz.f32 	%f1090, %f1086, %f1076, %f1089;
	fma.rn.ftz.f32 	%f1091, %f1088, %f1078, %f1090;
	mov.f32 	%f1092, 0f00000000;
	.loc 2 2770 10
	max.ftz.f32 	%f153, %f1091, %f1092;
	.loc 1 29 1
	setp.ltu.ftz.f32	%p28, %f153, 0f00000000;
	@%p28 bra 	BB2_31;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1093, %f153;
	mul.ftz.f32 	%f1094, %f1093, 0f418CCCCD;
	ex2.approx.ftz.f32 	%f1173, %f1094;
	bra.uni 	BB2_32;

BB2_31:
	.loc 1 29 103
	neg.ftz.f32 	%f1095, %f153;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1096, %f1095;
	mul.ftz.f32 	%f1097, %f1096, 0f418CCCCD;
	ex2.approx.ftz.f32 	%f1098, %f1097;
	.loc 1 29 175
	neg.ftz.f32 	%f1173, %f1098;

BB2_32:
	.loc 1 29 1
	fma.rn.ftz.f32 	%f1174, %f1173, %f152, %f1174;
	fma.rn.ftz.f32 	%f1175, %f1173, %f152, %f1175;
	fma.rn.ftz.f32 	%f1176, %f1173, %f152, %f1176;

BB2_33:
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f163, %f152;
	.loc 1 29 1
	setp.neu.ftz.f32	%p29, %f163, 0f00000000;
	@%p29 bra 	BB2_35;

	mov.f32 	%f1179, 0f00000000;
	mov.f32 	%f1178, %f1179;
	mov.f32 	%f1177, %f1179;
	bra.uni 	BB2_44;

BB2_35:
	mov.f32 	%f1146, 0f3F800000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f164, %f1146, %f163;
	.loc 1 29 1
	mul.ftz.f32 	%f165, %f1174, %f164;
	setp.ltu.ftz.f32	%p30, %f165, 0f00000000;
	@%p30 bra 	BB2_37;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1100, %f165;
	mul.ftz.f32 	%f1101, %f1100, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1177, %f1101;
	bra.uni 	BB2_38;

BB2_37:
	.loc 1 29 187
	neg.ftz.f32 	%f1102, %f165;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1103, %f1102;
	mul.ftz.f32 	%f1104, %f1103, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1105, %f1104;
	.loc 1 29 226
	neg.ftz.f32 	%f1177, %f1105;

BB2_38:
	.loc 1 29 1
	mul.ftz.f32 	%f169, %f1175, %f164;
	setp.ltu.ftz.f32	%p31, %f169, 0f00000000;
	@%p31 bra 	BB2_40;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1106, %f169;
	mul.ftz.f32 	%f1107, %f1106, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1178, %f1107;
	bra.uni 	BB2_41;

BB2_40:
	.loc 1 29 187
	neg.ftz.f32 	%f1108, %f169;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1109, %f1108;
	mul.ftz.f32 	%f1110, %f1109, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1111, %f1110;
	.loc 1 29 226
	neg.ftz.f32 	%f1178, %f1111;

BB2_41:
	.loc 1 29 1
	mul.ftz.f32 	%f173, %f1176, %f164;
	setp.ltu.ftz.f32	%p32, %f173, 0f00000000;
	@%p32 bra 	BB2_43;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1112, %f173;
	mul.ftz.f32 	%f1113, %f1112, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1179, %f1113;
	bra.uni 	BB2_44;

BB2_43:
	.loc 1 29 187
	neg.ftz.f32 	%f1114, %f173;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f1115, %f1114;
	mul.ftz.f32 	%f1116, %f1115, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f1117, %f1116;
	.loc 1 29 226
	neg.ftz.f32 	%f1179, %f1117;

BB2_44:
	ld.param.u32 	%r71, [cuda_kernel_renderquad_param_7];
	.loc 1 29 1
	setp.eq.s32	%p33, %r71, 0;
	@%p33 bra 	BB2_46;

	mul.wide.s32 	%rd144, %r27, 16;
	add.s64 	%rd145, %rd1, %rd144;
	.loc 1 29 1
	st.global.v4.f32 	[%rd145], {%f1177, %f1178, %f1179, %f163};
	bra.uni 	BB2_50;

BB2_46:
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1177;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1178;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1179;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f163;
	mov.b16 	%rs4, %temp;
}
	mul.wide.s32 	%rd147, %r27, 8;
	add.s64 	%rd148, %rd1, %rd147;
	.loc 1 29 242
	st.global.v4.u16 	[%rd148], {%rs1, %rs2, %rs3, %rs4};
	bra.uni 	BB2_50;

BB2_47:
	.loc 1 29 1
	setp.eq.s32	%p34, %r17, 0;
	@%p34 bra 	BB2_49;

	mov.f32 	%f1121, 0f00000000;
	.loc 1 29 1
	st.global.v4.f32 	[%rd2], {%f1121, %f1121, %f1121, %f1121};
	bra.uni 	BB2_50;

BB2_49:
	mov.f32 	%f1122, 0f00000000;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1122;
	mov.b16 	%rs5, %temp;
}
	.loc 1 29 242
	st.global.v4.u16 	[%rd3], {%rs5, %rs5, %rs5, %rs5};

BB2_50:
	.loc 1 29 2
	ret;
}

.visible .entry cuda_kernel_prepsource(
	.param .u64 cuda_kernel_prepsource_param_0,
	.param .u64 cuda_kernel_prepsource_param_1,
	.param .u32 cuda_kernel_prepsource_param_2,
	.param .u32 cuda_kernel_prepsource_param_3,
	.param .u32 cuda_kernel_prepsource_param_4,
	.param .u32 cuda_kernel_prepsource_param_5,
	.param .u32 cuda_kernel_prepsource_param_6
)
{
	.reg .pred 	%p<9>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [cuda_kernel_prepsource_param_0];
	ld.param.u64 	%rd4, [cuda_kernel_prepsource_param_1];
	ld.param.u32 	%r8, [cuda_kernel_prepsource_param_2];
	ld.param.u32 	%r9, [cuda_kernel_prepsource_param_3];
	ld.param.u32 	%r5, [cuda_kernel_prepsource_param_4];
	ld.param.u32 	%r6, [cuda_kernel_prepsource_param_5];
	ld.param.u32 	%r7, [cuda_kernel_prepsource_param_6];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 1 29 1
	mov.u32 	%r10, %ntid.x;
	mov.u32 	%r11, %ctaid.x;
	mov.u32 	%r12, %tid.x;
	mad.lo.s32 	%r1, %r10, %r11, %r12;
	mov.u32 	%r13, %ntid.y;
	mov.u32 	%r14, %ctaid.y;
	mov.u32 	%r15, %tid.y;
	mad.lo.s32 	%r2, %r13, %r14, %r15;
	.loc 1 29 1
	setp.lt.s32	%p1, %r1, %r8;
	setp.lt.s32	%p2, %r2, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 29 1
	@!%p3 bra 	BB3_16;
	bra.uni 	BB3_1;

BB3_1:
	.loc 1 29 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r7, 0;
	@%p4 bra 	BB3_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f33, %f34, %f35, %f36}, [%rd6];
	mov.f32 	%f58, %f36;
	mov.f32 	%f57, %f35;
	mov.f32 	%f56, %f34;
	mov.f32 	%f55, %f33;
	bra.uni 	BB3_4;

BB3_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 1 29 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f55, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f56, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f57, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f58, %temp;
	}

BB3_4:
	.loc 2 2820 10
	cvt.ftz.sat.f32.f32	%f15, %f58;
	.loc 1 29 1
	setp.ltu.ftz.f32	%p5, %f57, 0f00000000;
	@%p5 bra 	BB3_6;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f37, %f57;
	mul.ftz.f32 	%f38, %f37, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f59, %f38;
	bra.uni 	BB3_7;

BB3_6:
	.loc 1 29 143
	neg.ftz.f32 	%f39, %f57;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f40, %f39;
	mul.ftz.f32 	%f41, %f40, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f42, %f41;
	.loc 1 29 181
	neg.ftz.f32 	%f59, %f42;

BB3_7:
	mul.ftz.f32 	%f20, %f59, %f15;
	.loc 1 29 1
	setp.ltu.ftz.f32	%p6, %f56, 0f00000000;
	@%p6 bra 	BB3_9;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f43, %f56;
	mul.ftz.f32 	%f44, %f43, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f60, %f44;
	bra.uni 	BB3_10;

BB3_9:
	.loc 1 29 143
	neg.ftz.f32 	%f45, %f56;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f46, %f45;
	mul.ftz.f32 	%f47, %f46, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f48, %f47;
	.loc 1 29 181
	neg.ftz.f32 	%f60, %f48;

BB3_10:
	mul.ftz.f32 	%f24, %f60, %f15;
	.loc 1 29 1
	setp.ltu.ftz.f32	%p7, %f55, 0f00000000;
	@%p7 bra 	BB3_12;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f49, %f55;
	mul.ftz.f32 	%f50, %f49, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f61, %f50;
	bra.uni 	BB3_13;

BB3_12:
	.loc 1 29 143
	neg.ftz.f32 	%f51, %f55;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f52, %f51;
	mul.ftz.f32 	%f53, %f52, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f54, %f53;
	.loc 1 29 181
	neg.ftz.f32 	%f61, %f54;

BB3_13:
	mul.ftz.f32 	%f29, %f61, %f15;
	.loc 1 29 1
	mad.lo.s32 	%r4, %r2, %r6, %r1;
	.loc 1 29 1
	@%p4 bra 	BB3_15;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 1 29 1
	st.global.v4.f32 	[%rd10], {%f29, %f24, %f20, %f15};
	bra.uni 	BB3_16;

BB3_15:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f24;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f20;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	.loc 1 29 242
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB3_16:
	.loc 1 29 2
	ret;
}


