//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.global .texref inputTex;

.visible .func _Z15QuadBoundingBoxPK6float2PfS2_S2_S2_(
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_0,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_1,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_2,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_3,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_4
)
{
	.reg .f32 	%f<39>;
	.reg .s64 	%rd<6>;


	ld.param.u64 	%rd1, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_0];
	ld.param.u64 	%rd2, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_1];
	ld.param.u64 	%rd3, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_2];
	ld.param.u64 	%rd4, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_3];
	ld.param.u64 	%rd5, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_4];
	ld.f32 	%f1, [%rd1];
	st.f32 	[%rd3], %f1;
	st.f32 	[%rd2], %f1;
	ld.f32 	%f2, [%rd1+4];
	st.f32 	[%rd5], %f2;
	st.f32 	[%rd4], %f2;
	ld.f32 	%f3, [%rd2];
	ld.f32 	%f4, [%rd1+8];
	min.ftz.f32 	%f5, %f3, %f4;
	st.f32 	[%rd2], %f5;
	ld.f32 	%f6, [%rd3];
	ld.f32 	%f7, [%rd1+8];
	max.ftz.f32 	%f8, %f6, %f7;
	st.f32 	[%rd3], %f8;
	ld.f32 	%f9, [%rd4];
	ld.f32 	%f10, [%rd1+12];
	min.ftz.f32 	%f11, %f9, %f10;
	st.f32 	[%rd4], %f11;
	ld.f32 	%f12, [%rd5];
	ld.f32 	%f13, [%rd1+12];
	max.ftz.f32 	%f14, %f12, %f13;
	st.f32 	[%rd5], %f14;
	ld.f32 	%f15, [%rd2];
	ld.f32 	%f16, [%rd1+16];
	min.ftz.f32 	%f17, %f15, %f16;
	st.f32 	[%rd2], %f17;
	ld.f32 	%f18, [%rd3];
	ld.f32 	%f19, [%rd1+16];
	max.ftz.f32 	%f20, %f18, %f19;
	st.f32 	[%rd3], %f20;
	ld.f32 	%f21, [%rd4];
	ld.f32 	%f22, [%rd1+20];
	min.ftz.f32 	%f23, %f21, %f22;
	st.f32 	[%rd4], %f23;
	ld.f32 	%f24, [%rd5];
	ld.f32 	%f25, [%rd1+20];
	max.ftz.f32 	%f26, %f24, %f25;
	st.f32 	[%rd5], %f26;
	ld.f32 	%f27, [%rd2];
	ld.f32 	%f28, [%rd1+24];
	min.ftz.f32 	%f29, %f27, %f28;
	st.f32 	[%rd2], %f29;
	ld.f32 	%f30, [%rd3];
	ld.f32 	%f31, [%rd1+24];
	max.ftz.f32 	%f32, %f30, %f31;
	st.f32 	[%rd3], %f32;
	ld.f32 	%f33, [%rd4];
	ld.f32 	%f34, [%rd1+28];
	min.ftz.f32 	%f35, %f33, %f34;
	st.f32 	[%rd4], %f35;
	ld.f32 	%f36, [%rd5];
	ld.f32 	%f37, [%rd1+28];
	max.ftz.f32 	%f38, %f36, %f37;
	st.f32 	[%rd5], %f38;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z17QuadContainsPointPK6float2S1_(
	.param .b64 _Z17QuadContainsPointPK6float2S1__param_0,
	.param .b64 _Z17QuadContainsPointPK6float2S1__param_1
)
{
	.reg .pred 	%p<5>;
	.reg .s32 	%r<5>;
	.reg .f32 	%f<37>;
	.reg .s64 	%rd<4>;


	ld.param.u64 	%rd1, [_Z17QuadContainsPointPK6float2S1__param_0];
	ld.param.u64 	%rd3, [_Z17QuadContainsPointPK6float2S1__param_1];
	ld.v2.f32 	{%f15, %f16}, [%rd3];
	ld.v2.f32 	{%f17, %f18}, [%rd1];
	sub.ftz.f32 	%f2, %f17, %f15;
	sub.ftz.f32 	%f4, %f18, %f16;
	ld.v2.f32 	{%f21, %f22}, [%rd1+8];
	sub.ftz.f32 	%f5, %f21, %f15;
	sub.ftz.f32 	%f6, %f22, %f16;
	mul.ftz.f32 	%f25, %f2, %f6;
	mul.ftz.f32 	%f26, %f5, %f4;
	setp.ltu.ftz.f32	%p1, %f25, %f26;
	ld.v2.f32 	{%f27, %f28}, [%rd1+24];
	ld.v2.f32 	{%f29, %f30}, [%rd1+16];
	@%p1 bra 	BB1_5;

	sub.ftz.f32 	%f11, %f27, %f15;
	sub.ftz.f32 	%f12, %f28, %f16;
	mul.ftz.f32 	%f31, %f5, %f12;
	mul.ftz.f32 	%f32, %f11, %f6;
	setp.ltu.ftz.f32	%p2, %f31, %f32;
	@%p2 bra 	BB1_5;

	sub.ftz.f32 	%f13, %f29, %f15;
	sub.ftz.f32 	%f14, %f30, %f16;
	mul.ftz.f32 	%f33, %f11, %f14;
	mul.ftz.f32 	%f34, %f13, %f12;
	setp.ltu.ftz.f32	%p3, %f33, %f34;
	@%p3 bra 	BB1_5;

	mul.ftz.f32 	%f35, %f13, %f4;
	mul.ftz.f32 	%f36, %f2, %f14;
	setp.ltu.ftz.f32	%p4, %f35, %f36;
	@%p4 bra 	BB1_5;

	mov.u32 	%r4, 1;
	bra.uni 	BB1_6;

BB1_5:
	mov.u32 	%r4, 0;

BB1_6:
	st.param.b32	[func_retval0+0], %r4;
	ret;
}

.visible .func _Z23computeInverseTransformPK6float2Pf(
	.param .b64 _Z23computeInverseTransformPK6float2Pf_param_0,
	.param .b64 _Z23computeInverseTransformPK6float2Pf_param_1
)
{
	.reg .pred 	%p<6>;
	.reg .f32 	%f<89>;
	.reg .s64 	%rd<3>;


	ld.param.u64 	%rd2, [_Z23computeInverseTransformPK6float2Pf_param_0];
	ld.param.u64 	%rd1, [_Z23computeInverseTransformPK6float2Pf_param_1];
	ld.v2.f32 	{%f27, %f28}, [%rd2+8];
	ld.v2.f32 	{%f29, %f30}, [%rd2];
	sub.ftz.f32 	%f31, %f29, %f27;
	ld.v2.f32 	{%f32, %f33}, [%rd2+24];
	add.ftz.f32 	%f34, %f31, %f32;
	ld.v2.f32 	{%f35, %f36}, [%rd2+16];
	sub.ftz.f32 	%f5, %f34, %f35;
	sub.ftz.f32 	%f37, %f30, %f28;
	add.ftz.f32 	%f38, %f37, %f33;
	sub.ftz.f32 	%f10, %f38, %f36;
	abs.ftz.f32 	%f39, %f5;
	setp.geu.ftz.f32	%p3, %f39, 0f3727C5AC;
	@%p3 bra 	BB2_2;

	mov.pred 	%p5, -1;
	bra.uni 	BB2_3;

BB2_2:
	abs.ftz.f32 	%f40, %f10;
	setp.lt.ftz.f32	%p5, %f40, 0f3727C5AC;

BB2_3:
	@%p5 bra 	BB2_5;

	sub.ftz.f32 	%f41, %f27, %f32;
	sub.ftz.f32 	%f42, %f36, %f33;
	mul.ftz.f32 	%f43, %f41, %f42;
	sub.ftz.f32 	%f44, %f28, %f33;
	sub.ftz.f32 	%f45, %f35, %f32;
	mul.ftz.f32 	%f46, %f45, %f44;
	sub.ftz.f32 	%f47, %f43, %f46;
	mul.ftz.f32 	%f48, %f5, %f42;
	mul.ftz.f32 	%f49, %f45, %f10;
	sub.ftz.f32 	%f50, %f48, %f49;
	div.approx.ftz.f32 	%f87, %f50, %f47;
	mul.ftz.f32 	%f51, %f41, %f10;
	mul.ftz.f32 	%f52, %f5, %f44;
	sub.ftz.f32 	%f53, %f51, %f52;
	div.approx.ftz.f32 	%f88, %f53, %f47;
	sub.ftz.f32 	%f54, %f27, %f29;
	fma.rn.ftz.f32 	%f85, %f87, %f27, %f54;
	sub.ftz.f32 	%f55, %f35, %f29;
	fma.rn.ftz.f32 	%f84, %f88, %f35, %f55;
	sub.ftz.f32 	%f56, %f28, %f30;
	fma.rn.ftz.f32 	%f83, %f87, %f28, %f56;
	sub.ftz.f32 	%f57, %f36, %f30;
	fma.rn.ftz.f32 	%f86, %f88, %f36, %f57;
	bra.uni 	BB2_6;

BB2_5:
	sub.ftz.f32 	%f85, %f27, %f29;
	sub.ftz.f32 	%f83, %f28, %f30;
	sub.ftz.f32 	%f84, %f32, %f27;
	sub.ftz.f32 	%f86, %f33, %f28;
	mov.f32 	%f88, 0f00000000;
	mov.f32 	%f87, %f88;

BB2_6:
	mul.ftz.f32 	%f60, %f30, %f88;
	sub.ftz.f32 	%f61, %f86, %f60;
	st.f32 	[%rd1], %f61;
	mul.ftz.f32 	%f62, %f30, %f87;
	sub.ftz.f32 	%f63, %f62, %f83;
	st.f32 	[%rd1+4], %f63;
	mul.ftz.f32 	%f64, %f86, %f87;
	mul.ftz.f32 	%f65, %f83, %f88;
	sub.ftz.f32 	%f66, %f65, %f64;
	st.f32 	[%rd1+8], %f66;
	mul.ftz.f32 	%f67, %f29, %f88;
	sub.ftz.f32 	%f68, %f67, %f84;
	st.f32 	[%rd1+12], %f68;
	mul.ftz.f32 	%f69, %f29, %f87;
	sub.ftz.f32 	%f70, %f85, %f69;
	st.f32 	[%rd1+16], %f70;
	mul.ftz.f32 	%f71, %f85, %f88;
	mul.ftz.f32 	%f72, %f84, %f87;
	sub.ftz.f32 	%f73, %f72, %f71;
	st.f32 	[%rd1+20], %f73;
	mul.ftz.f32 	%f74, %f29, %f86;
	mul.ftz.f32 	%f75, %f84, %f30;
	sub.ftz.f32 	%f76, %f75, %f74;
	st.f32 	[%rd1+24], %f76;
	mul.ftz.f32 	%f77, %f85, %f30;
	mul.ftz.f32 	%f78, %f29, %f83;
	sub.ftz.f32 	%f79, %f78, %f77;
	st.f32 	[%rd1+28], %f79;
	mul.ftz.f32 	%f80, %f84, %f83;
	mul.ftz.f32 	%f81, %f85, %f86;
	sub.ftz.f32 	%f82, %f81, %f80;
	st.f32 	[%rd1+32], %f82;
	ret;
}

.visible .entry StabilizerWarpKernel(
	.param .u64 StabilizerWarpKernel_param_0,
	.param .u32 StabilizerWarpKernel_param_1,
	.param .u64 StabilizerWarpKernel_param_2,
	.param .u32 StabilizerWarpKernel_param_3,
	.param .u32 StabilizerWarpKernel_param_4,
	.param .u32 StabilizerWarpKernel_param_5,
	.param .u32 StabilizerWarpKernel_param_6,
	.param .u32 StabilizerWarpKernel_param_7,
	.param .u64 StabilizerWarpKernel_param_8,
	.param .u32 StabilizerWarpKernel_param_9,
	.param .u32 StabilizerWarpKernel_param_10,
	.param .align 16 .b8 StabilizerWarpKernel_param_11[16],
	.param .align 16 .b8 StabilizerWarpKernel_param_12[16],
	.param .u32 StabilizerWarpKernel_param_13,
	.param .u32 StabilizerWarpKernel_param_14,
	.param .u32 StabilizerWarpKernel_param_15,
	.param .u32 StabilizerWarpKernel_param_16,
	.param .u32 StabilizerWarpKernel_param_17,
	.param .f32 StabilizerWarpKernel_param_18
)
{
	.reg .pred 	%p<31>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<73>;
	.reg .f32 	%f<214>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r24, [StabilizerWarpKernel_param_1];
	ld.param.u64 	%rd2, [StabilizerWarpKernel_param_2];
	ld.param.u32 	%r25, [StabilizerWarpKernel_param_3];
	ld.param.u32 	%r26, [StabilizerWarpKernel_param_4];
	ld.param.u32 	%r27, [StabilizerWarpKernel_param_5];
	ld.param.u32 	%r28, [StabilizerWarpKernel_param_6];
	ld.param.u32 	%r29, [StabilizerWarpKernel_param_7];
	ld.param.u64 	%rd3, [StabilizerWarpKernel_param_8];
	ld.param.u32 	%r30, [StabilizerWarpKernel_param_9];
	ld.param.u32 	%r36, [StabilizerWarpKernel_param_10];
	ld.param.f32 	%f4, [StabilizerWarpKernel_param_11+12];
	ld.param.f32 	%f3, [StabilizerWarpKernel_param_11+8];
	ld.param.f32 	%f2, [StabilizerWarpKernel_param_11+4];
	ld.param.f32 	%f1, [StabilizerWarpKernel_param_11];
	ld.param.f32 	%f66, [StabilizerWarpKernel_param_12+12];
	ld.param.f32 	%f65, [StabilizerWarpKernel_param_12+8];
	ld.param.f32 	%f64, [StabilizerWarpKernel_param_12+4];
	ld.param.f32 	%f63, [StabilizerWarpKernel_param_12];
	ld.param.u32 	%r31, [StabilizerWarpKernel_param_13];
	ld.param.u32 	%r32, [StabilizerWarpKernel_param_14];
	ld.param.u32 	%r33, [StabilizerWarpKernel_param_15];
	ld.param.u32 	%r34, [StabilizerWarpKernel_param_16];
	ld.param.u32 	%r35, [StabilizerWarpKernel_param_17];
	ld.param.f32 	%f67, [StabilizerWarpKernel_param_18];
	add.s32 	%r1, %r30, -1;
	mov.u32 	%r2, %ctaid.x;
	setp.lt.s32	%p3, %r2, %r1;
	add.s32 	%r3, %r36, -1;
	mov.u32 	%r4, %ctaid.y;
	setp.lt.s32	%p4, %r4, %r3;
	and.pred  	%p5, %p3, %p4;
	@!%p5 bra 	BB3_22;
	bra.uni 	BB3_1;

BB3_1:
	cvta.to.global.u64 	%rd4, %rd3;
	sub.ftz.f32 	%f68, %f3, %f1;
	cvt.rpi.ftz.f32.f32	%f69, %f68;
	cvt.rzi.ftz.s32.f32	%r37, %f69;
	sub.ftz.f32 	%f70, %f4, %f2;
	cvt.rpi.ftz.f32.f32	%f71, %f70;
	cvt.rzi.ftz.s32.f32	%r38, %f71;
	cvt.rn.f32.s32	%f5, %r37;
	mul.ftz.f32 	%f72, %f5, %f67;
	cvt.rn.f32.s32	%f6, %r38;
	mul.ftz.f32 	%f73, %f6, %f67;
	mad.lo.s32 	%r39, %r4, %r30, %r2;
	shl.b32 	%r40, %r39, 1;
	mul.wide.s32 	%rd5, %r40, 4;
	add.s64 	%rd6, %rd4, %rd5;
	add.s32 	%r41, %r4, 1;
	mad.lo.s32 	%r42, %r41, %r30, %r2;
	shl.b32 	%r43, %r42, 1;
	mul.wide.s32 	%rd7, %r43, 4;
	add.s64 	%rd8, %rd4, %rd7;
	ld.global.f32 	%f7, [%rd6+8];
	ld.global.f32 	%f8, [%rd6];
	min.ftz.f32 	%f74, %f8, %f7;
	max.ftz.f32 	%f75, %f8, %f7;
	ld.global.f32 	%f9, [%rd6+12];
	ld.global.f32 	%f10, [%rd6+4];
	min.ftz.f32 	%f76, %f10, %f9;
	max.ftz.f32 	%f77, %f10, %f9;
	ld.global.f32 	%f11, [%rd8];
	min.ftz.f32 	%f78, %f74, %f11;
	max.ftz.f32 	%f79, %f75, %f11;
	ld.global.f32 	%f12, [%rd8+4];
	min.ftz.f32 	%f80, %f76, %f12;
	max.ftz.f32 	%f81, %f77, %f12;
	ld.global.f32 	%f13, [%rd8+8];
	min.ftz.f32 	%f82, %f78, %f13;
	max.ftz.f32 	%f83, %f79, %f13;
	ld.global.f32 	%f14, [%rd8+12];
	min.ftz.f32 	%f84, %f80, %f14;
	max.ftz.f32 	%f85, %f81, %f14;
	add.ftz.f32 	%f86, %f65, %f1;
	add.ftz.f32 	%f87, %f63, %f1;
	add.ftz.f32 	%f88, %f87, %f86;
	mul.ftz.f32 	%f89, %f88, %f67;
	mov.f32 	%f90, 0f40000000;
	div.approx.ftz.f32 	%f91, %f89, %f90;
	add.ftz.f32 	%f92, %f66, %f2;
	add.ftz.f32 	%f93, %f64, %f2;
	add.ftz.f32 	%f94, %f93, %f92;
	mul.ftz.f32 	%f95, %f94, %f67;
	div.approx.ftz.f32 	%f96, %f95, %f90;
	cvt.rzi.ftz.s32.f32	%r5, %f91;
	cvt.rzi.ftz.s32.f32	%r6, %f96;
	setp.eq.s32	%p6, %r31, 0;
	selp.b32	%r44, %r37, %r25, %p6;
	add.s32 	%r45, %r44, -1;
	cvt.rn.f32.s32	%f97, %r45;
	mul.ftz.f32 	%f98, %f97, 0f3F000000;
	selp.b32	%r46, %r38, %r26, %p6;
	add.s32 	%r47, %r46, -1;
	cvt.rn.f32.s32	%f99, %r47;
	mul.ftz.f32 	%f100, %f99, 0f3F000000;
	cvt.rzi.ftz.s32.f32	%r7, %f98;
	cvt.rzi.ftz.s32.f32	%r8, %f100;
	cvt.rn.f32.s32	%f101, %r5;
	sub.ftz.f32 	%f102, %f91, %f101;
	cvt.rn.f32.s32	%f103, %r7;
	sub.ftz.f32 	%f104, %f98, %f103;
	sub.ftz.f32 	%f15, %f102, %f104;
	cvt.rn.f32.s32	%f105, %r6;
	sub.ftz.f32 	%f106, %f96, %f105;
	cvt.rn.f32.s32	%f107, %r8;
	sub.ftz.f32 	%f108, %f100, %f107;
	sub.ftz.f32 	%f16, %f106, %f108;
	cvt.rzi.ftz.s32.f32	%r48, %f72;
	cvt.rzi.ftz.s32.f32	%r49, %f73;
	fma.rn.ftz.f32 	%f109, %f82, %f67, 0fBF800000;
	cvt.rmi.ftz.f32.f32	%f110, %f109;
	cvt.rzi.ftz.s32.f32	%r9, %f110;
	fma.rn.ftz.f32 	%f111, %f84, %f67, 0fBF800000;
	cvt.rmi.ftz.f32.f32	%f112, %f111;
	cvt.rzi.ftz.s32.f32	%r10, %f112;
	fma.rn.ftz.f32 	%f113, %f83, %f67, 0f3F800000;
	cvt.rpi.ftz.f32.f32	%f114, %f113;
	cvt.rzi.ftz.s32.f32	%r50, %f114;
	min.s32 	%r11, %r50, %r48;
	fma.rn.ftz.f32 	%f115, %f85, %f67, 0f3F800000;
	cvt.rpi.ftz.f32.f32	%f116, %f115;
	cvt.rzi.ftz.s32.f32	%r51, %f116;
	min.s32 	%r12, %r51, %r49;
	add.s32 	%r52, %r28, -1;
	cvt.rn.f32.s32	%f117, %r52;
	cvt.rn.f32.s32	%f118, %r1;
	div.approx.ftz.f32 	%f17, %f117, %f118;
	add.s32 	%r53, %r29, -1;
	cvt.rn.f32.s32	%f119, %r53;
	cvt.rn.f32.s32	%f120, %r3;
	div.approx.ftz.f32 	%f18, %f119, %f120;
	sub.ftz.f32 	%f121, %f8, %f7;
	add.ftz.f32 	%f122, %f121, %f13;
	sub.ftz.f32 	%f19, %f122, %f11;
	sub.ftz.f32 	%f123, %f10, %f9;
	add.ftz.f32 	%f124, %f123, %f14;
	sub.ftz.f32 	%f20, %f124, %f12;
	abs.ftz.f32 	%f125, %f19;
	setp.geu.ftz.f32	%p7, %f125, 0f3727C5AC;
	@%p7 bra 	BB3_3;

	mov.pred 	%p30, -1;
	bra.uni 	BB3_4;

BB3_3:
	abs.ftz.f32 	%f126, %f20;
	setp.lt.ftz.f32	%p30, %f126, 0f3727C5AC;

BB3_4:
	sub.ftz.f32 	%f208, %f7, %f8;
	sub.ftz.f32 	%f210, %f9, %f10;
	@%p30 bra 	BB3_6;

	sub.ftz.f32 	%f127, %f7, %f13;
	sub.ftz.f32 	%f128, %f12, %f14;
	mul.ftz.f32 	%f129, %f127, %f128;
	sub.ftz.f32 	%f130, %f9, %f14;
	sub.ftz.f32 	%f131, %f11, %f13;
	mul.ftz.f32 	%f132, %f131, %f130;
	sub.ftz.f32 	%f133, %f129, %f132;
	mul.ftz.f32 	%f134, %f19, %f128;
	mul.ftz.f32 	%f135, %f131, %f20;
	sub.ftz.f32 	%f136, %f134, %f135;
	div.approx.ftz.f32 	%f212, %f136, %f133;
	mul.ftz.f32 	%f137, %f127, %f20;
	mul.ftz.f32 	%f138, %f19, %f130;
	sub.ftz.f32 	%f139, %f137, %f138;
	div.approx.ftz.f32 	%f213, %f139, %f133;
	fma.rn.ftz.f32 	%f208, %f212, %f7, %f208;
	sub.ftz.f32 	%f140, %f11, %f8;
	fma.rn.ftz.f32 	%f209, %f213, %f11, %f140;
	fma.rn.ftz.f32 	%f210, %f212, %f9, %f210;
	sub.ftz.f32 	%f141, %f12, %f10;
	fma.rn.ftz.f32 	%f211, %f213, %f12, %f141;
	bra.uni 	BB3_7;

BB3_6:
	sub.ftz.f32 	%f209, %f13, %f7;
	sub.ftz.f32 	%f211, %f14, %f9;
	mov.f32 	%f213, 0f00000000;
	mov.f32 	%f212, %f213;

BB3_7:
	mov.u32 	%r54, %tid.y;
	add.s32 	%r71, %r54, %r10;
	setp.gt.s32	%p9, %r71, %r12;
	@%p9 bra 	BB3_22;

	ld.param.u32 	%r69, [StabilizerWarpKernel_param_7];
	ld.param.u32 	%r68, [StabilizerWarpKernel_param_6];
	mov.u32 	%r67, %ctaid.x;
	mov.u32 	%r66, %ctaid.y;
	mul.ftz.f32 	%f144, %f10, %f213;
	sub.ftz.f32 	%f37, %f211, %f144;
	mul.ftz.f32 	%f145, %f10, %f212;
	sub.ftz.f32 	%f38, %f145, %f210;
	mul.ftz.f32 	%f146, %f211, %f212;
	mul.ftz.f32 	%f147, %f210, %f213;
	sub.ftz.f32 	%f39, %f147, %f146;
	mul.ftz.f32 	%f148, %f8, %f213;
	sub.ftz.f32 	%f40, %f148, %f209;
	mul.ftz.f32 	%f149, %f8, %f212;
	sub.ftz.f32 	%f41, %f208, %f149;
	mul.ftz.f32 	%f150, %f208, %f213;
	mul.ftz.f32 	%f151, %f209, %f212;
	sub.ftz.f32 	%f42, %f151, %f150;
	mul.ftz.f32 	%f152, %f8, %f211;
	mul.ftz.f32 	%f153, %f209, %f10;
	sub.ftz.f32 	%f43, %f153, %f152;
	mul.ftz.f32 	%f154, %f208, %f10;
	mul.ftz.f32 	%f155, %f8, %f210;
	sub.ftz.f32 	%f44, %f155, %f154;
	mul.ftz.f32 	%f156, %f209, %f210;
	mul.ftz.f32 	%f157, %f208, %f211;
	sub.ftz.f32 	%f45, %f157, %f156;
	sub.s32 	%r14, %r8, %r6;
	mov.u32 	%r55, %tid.x;
	add.s32 	%r15, %r55, %r9;
	sub.s32 	%r16, %r7, %r5;
	cvt.rn.f32.s32	%f46, %r67;
	cvt.rn.f32.s32	%f47, %r66;
	cvt.rn.f32.s32	%f158, %r68;
	add.ftz.f32 	%f48, %f158, 0fBF800347;
	cvt.rn.f32.s32	%f159, %r69;
	add.ftz.f32 	%f49, %f159, 0fBF800347;

BB3_9:
	selp.b32	%r56, 0, %r33, %p6;
	add.s32 	%r18, %r14, %r71;
	setp.lt.s32	%p11, %r18, %r56;
	selp.b32	%r57, %r26, %r35, %p6;
	setp.ge.s32	%p12, %r18, %r57;
	or.pred  	%p13, %p11, %p12;
	@%p13 bra 	BB3_21;

	setp.gt.s32	%p14, %r15, %r11;
	cvt.rn.f32.s32	%f160, %r71;
	add.ftz.f32 	%f161, %f160, %f16;
	div.approx.ftz.f32 	%f50, %f161, %f67;
	@%p14 bra 	BB3_21;

	mov.u32 	%r72, %r15;

BB3_12:
	mov.u32 	%r20, %r72;
	selp.b32	%r58, 0, %r32, %p6;
	add.s32 	%r21, %r16, %r20;
	setp.lt.s32	%p16, %r21, %r58;
	selp.b32	%r59, %r25, %r34, %p6;
	setp.ge.s32	%p17, %r21, %r59;
	or.pred  	%p18, %p16, %p17;
	@%p18 bra 	BB3_20;

	cvt.rn.f32.s32	%f162, %r20;
	add.ftz.f32 	%f163, %f162, %f15;
	div.approx.ftz.f32 	%f54, %f163, %f67;
	sub.ftz.f32 	%f55, %f8, %f54;
	sub.ftz.f32 	%f56, %f7, %f54;
	sub.ftz.f32 	%f164, %f9, %f50;
	mul.ftz.f32 	%f165, %f55, %f164;
	sub.ftz.f32 	%f166, %f10, %f50;
	mul.ftz.f32 	%f167, %f56, %f166;
	setp.ltu.ftz.f32	%p19, %f165, %f167;
	@%p19 bra 	BB3_20;

	sub.ftz.f32 	%f57, %f13, %f54;
	sub.ftz.f32 	%f168, %f14, %f50;
	mul.ftz.f32 	%f169, %f56, %f168;
	mul.ftz.f32 	%f171, %f57, %f164;
	setp.ltu.ftz.f32	%p20, %f169, %f171;
	@%p20 bra 	BB3_20;

	sub.ftz.f32 	%f58, %f11, %f54;
	sub.ftz.f32 	%f172, %f12, %f50;
	mul.ftz.f32 	%f173, %f57, %f172;
	mul.ftz.f32 	%f175, %f58, %f168;
	setp.ltu.ftz.f32	%p21, %f173, %f175;
	@%p21 bra 	BB3_20;

	sub.ftz.f32 	%f204, %f8, %f54;
	sub.ftz.f32 	%f203, %f10, %f50;
	setp.lt.ftz.f32	%p22, %f50, %f6;
	mul.ftz.f32 	%f177, %f58, %f203;
	mul.ftz.f32 	%f179, %f204, %f172;
	setp.ge.ftz.f32	%p23, %f177, %f179;
	setp.lt.ftz.f32	%p24, %f54, %f5;
	and.pred  	%p25, %p23, %p24;
	and.pred  	%p26, %p25, %p22;
	@!%p26 bra 	BB3_20;
	bra.uni 	BB3_17;

BB3_17:
	mul.ftz.f32 	%f207, %f42, %f50;
	mul.ftz.f32 	%f206, %f41, %f50;
	add.s32 	%r65, %r14, %r71;
	mul.lo.s32 	%r64, %r65, %r27;
	add.s32 	%r63, %r16, %r20;
	mul.ftz.f32 	%f205, %f40, %f50;
	setp.eq.s32	%p27, %r24, 0;
	fma.rn.ftz.f32 	%f186, %f39, %f54, %f207;
	add.ftz.f32 	%f187, %f186, %f45;
	fma.rn.ftz.f32 	%f188, %f37, %f54, %f205;
	add.ftz.f32 	%f189, %f188, %f43;
	div.approx.ftz.f32 	%f190, %f189, %f187;
	fma.rn.ftz.f32 	%f191, %f38, %f54, %f206;
	add.ftz.f32 	%f192, %f191, %f44;
	div.approx.ftz.f32 	%f193, %f192, %f187;
	add.ftz.f32 	%f194, %f46, %f190;
	fma.rn.ftz.f32 	%f195, %f194, %f17, 0fBF000000;
	add.ftz.f32 	%f196, %f47, %f193;
	fma.rn.ftz.f32 	%f197, %f196, %f18, 0fBF000000;
	mov.f32 	%f198, 0f38D1B717;
	max.ftz.f32 	%f199, %f198, %f195;
	min.ftz.f32 	%f200, %f48, %f199;
	max.ftz.f32 	%f201, %f198, %f197;
	min.ftz.f32 	%f202, %f49, %f201;
	add.ftz.f32 	%f184, %f200, 0f3F000000;
	add.ftz.f32 	%f185, %f202, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f180, %f181, %f182, %f183}, [inputTex, {%f184, %f185}];
	// inline asm
	add.s32 	%r60, %r63, %r64;
	cvt.s64.s32	%rd1, %r60;
	@%p27 bra 	BB3_19;

	cvta.to.global.u64 	%rd10, %rd2;
	shl.b64 	%rd11, %rd1, 4;
	add.s64 	%rd12, %rd10, %rd11;
	st.global.v4.f32 	[%rd12], {%f180, %f181, %f182, %f183};
	bra.uni 	BB3_20;

BB3_19:
	cvta.to.global.u64 	%rd13, %rd2;
	shl.b64 	%rd14, %rd1, 3;
	add.s64 	%rd15, %rd13, %rd14;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f180;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f181;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f182;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f183;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB3_20:
	mov.u32 	%r61, %ntid.x;
	add.s32 	%r22, %r61, %r20;
	setp.le.s32	%p28, %r22, %r11;
	mov.u32 	%r72, %r22;
	@%p28 bra 	BB3_12;

BB3_21:
	mov.u32 	%r62, %ntid.y;
	add.s32 	%r71, %r62, %r71;
	setp.le.s32	%p29, %r71, %r12;
	@%p29 bra 	BB3_9;

BB3_22:
	ret;
}


