//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/Renderers/RendererGPU/Src/Effects/StabilizerWarp.cu", 1399785316, 10108
	.file	2 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
.global .texref inputTex;
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func _Z15QuadBoundingBoxPK6float2PfS2_S2_S2_(
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_0,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_1,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_2,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_3,
	.param .b64 _Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_4
)
{
	.reg .f32 	%f<39>;
	.reg .s64 	%rd<6>;


	ld.param.u64 	%rd1, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_0];
	ld.param.u64 	%rd2, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_1];
	ld.param.u64 	%rd3, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_2];
	ld.param.u64 	%rd4, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_3];
	ld.param.u64 	%rd5, [_Z15QuadBoundingBoxPK6float2PfS2_S2_S2__param_4];
	.loc 1 29 1
	ld.f32 	%f1, [%rd1];
	st.f32 	[%rd3], %f1;
	st.f32 	[%rd2], %f1;
	ld.f32 	%f2, [%rd1+4];
	st.f32 	[%rd5], %f2;
	st.f32 	[%rd4], %f2;
	.loc 1 29 1
	ld.f32 	%f3, [%rd1+8];
	ld.f32 	%f4, [%rd2];
	.loc 2 2765 10
	min.ftz.f32 	%f5, %f4, %f3;
	.loc 1 29 74
	st.f32 	[%rd2], %f5;
	.loc 1 29 1
	ld.f32 	%f6, [%rd1+8];
	ld.f32 	%f7, [%rd3];
	.loc 2 2770 10
	max.ftz.f32 	%f8, %f7, %f6;
	.loc 1 29 72
	st.f32 	[%rd3], %f8;
	.loc 1 29 1
	ld.f32 	%f9, [%rd1+12];
	ld.f32 	%f10, [%rd4];
	.loc 2 2765 10
	min.ftz.f32 	%f11, %f10, %f9;
	.loc 1 29 74
	st.f32 	[%rd4], %f11;
	.loc 1 29 1
	ld.f32 	%f12, [%rd1+12];
	ld.f32 	%f13, [%rd5];
	.loc 2 2770 10
	max.ftz.f32 	%f14, %f13, %f12;
	.loc 1 29 72
	st.f32 	[%rd5], %f14;
	.loc 1 29 1
	ld.f32 	%f15, [%rd1+16];
	ld.f32 	%f16, [%rd2];
	.loc 2 2765 10
	min.ftz.f32 	%f17, %f16, %f15;
	.loc 1 29 74
	st.f32 	[%rd2], %f17;
	.loc 1 29 1
	ld.f32 	%f18, [%rd1+16];
	ld.f32 	%f19, [%rd3];
	.loc 2 2770 10
	max.ftz.f32 	%f20, %f19, %f18;
	.loc 1 29 72
	st.f32 	[%rd3], %f20;
	.loc 1 29 1
	ld.f32 	%f21, [%rd1+20];
	ld.f32 	%f22, [%rd4];
	.loc 2 2765 10
	min.ftz.f32 	%f23, %f22, %f21;
	.loc 1 29 74
	st.f32 	[%rd4], %f23;
	.loc 1 29 1
	ld.f32 	%f24, [%rd1+20];
	ld.f32 	%f25, [%rd5];
	.loc 2 2770 10
	max.ftz.f32 	%f26, %f25, %f24;
	.loc 1 29 72
	st.f32 	[%rd5], %f26;
	.loc 1 29 1
	ld.f32 	%f27, [%rd1+24];
	ld.f32 	%f28, [%rd2];
	.loc 2 2765 10
	min.ftz.f32 	%f29, %f28, %f27;
	.loc 1 29 74
	st.f32 	[%rd2], %f29;
	.loc 1 29 1
	ld.f32 	%f30, [%rd1+24];
	ld.f32 	%f31, [%rd3];
	.loc 2 2770 10
	max.ftz.f32 	%f32, %f31, %f30;
	.loc 1 29 72
	st.f32 	[%rd3], %f32;
	.loc 1 29 1
	ld.f32 	%f33, [%rd1+28];
	ld.f32 	%f34, [%rd4];
	.loc 2 2765 10
	min.ftz.f32 	%f35, %f34, %f33;
	.loc 1 29 74
	st.f32 	[%rd4], %f35;
	.loc 1 29 1
	ld.f32 	%f36, [%rd1+28];
	ld.f32 	%f37, [%rd5];
	.loc 2 2770 10
	max.ftz.f32 	%f38, %f37, %f36;
	.loc 1 29 72
	st.f32 	[%rd5], %f38;
	.loc 1 29 2
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z17QuadContainsPointPK6float2S1_(
	.param .b64 _Z17QuadContainsPointPK6float2S1__param_0,
	.param .b64 _Z17QuadContainsPointPK6float2S1__param_1
)
{
	.reg .pred 	%p<5>;
	.reg .s32 	%r<5>;
	.reg .f32 	%f<37>;
	.reg .s64 	%rd<3>;


	ld.param.u64 	%rd1, [_Z17QuadContainsPointPK6float2S1__param_0];
	ld.param.u64 	%rd2, [_Z17QuadContainsPointPK6float2S1__param_1];
	.loc 1 29 1
	ld.v2.f32 	{%f15, %f16}, [%rd2];
	ld.v2.f32 	{%f17, %f18}, [%rd1];
	sub.ftz.f32 	%f2, %f17, %f15;
	sub.ftz.f32 	%f4, %f18, %f16;
	ld.v2.f32 	{%f21, %f22}, [%rd1+8];
	sub.ftz.f32 	%f5, %f21, %f15;
	sub.ftz.f32 	%f6, %f22, %f16;
	.loc 1 29 1
	mul.ftz.f32 	%f25, %f2, %f6;
	mul.ftz.f32 	%f26, %f5, %f4;
	setp.ltu.ftz.f32	%p1, %f25, %f26;
	ld.v2.f32 	{%f27, %f28}, [%rd1+16];
	ld.v2.f32 	{%f29, %f30}, [%rd1+24];
	.loc 1 29 1
	@%p1 bra 	BB1_5;

	.loc 1 29 1
	sub.ftz.f32 	%f11, %f29, %f15;
	sub.ftz.f32 	%f12, %f30, %f16;
	.loc 1 29 1
	mul.ftz.f32 	%f31, %f5, %f12;
	mul.ftz.f32 	%f32, %f11, %f6;
	setp.ltu.ftz.f32	%p2, %f31, %f32;
	@%p2 bra 	BB1_5;

	.loc 1 29 1
	sub.ftz.f32 	%f13, %f27, %f15;
	sub.ftz.f32 	%f14, %f28, %f16;
	.loc 1 29 1
	mul.ftz.f32 	%f33, %f11, %f14;
	mul.ftz.f32 	%f34, %f13, %f12;
	setp.ltu.ftz.f32	%p3, %f33, %f34;
	@%p3 bra 	BB1_5;

	mul.ftz.f32 	%f35, %f13, %f4;
	mul.ftz.f32 	%f36, %f2, %f14;
	setp.ltu.ftz.f32	%p4, %f35, %f36;
	@%p4 bra 	BB1_5;

	mov.u32 	%r4, 1;
	bra.uni 	BB1_6;

BB1_5:
	mov.u32 	%r4, 0;

BB1_6:
	st.param.b32	[func_retval0+0], %r4;
	.loc 1 29 1
	ret;
}

.visible .func _Z23computeInverseTransformPK6float2Pf(
	.param .b64 _Z23computeInverseTransformPK6float2Pf_param_0,
	.param .b64 _Z23computeInverseTransformPK6float2Pf_param_1
)
{
	.reg .pred 	%p<6>;
	.reg .f32 	%f<89>;
	.reg .s64 	%rd<3>;


	ld.param.u64 	%rd2, [_Z23computeInverseTransformPK6float2Pf_param_0];
	ld.param.u64 	%rd1, [_Z23computeInverseTransformPK6float2Pf_param_1];
	.loc 1 29 1
	ld.v2.f32 	{%f27, %f28}, [%rd2+8];
	ld.v2.f32 	{%f29, %f30}, [%rd2];
	sub.ftz.f32 	%f31, %f29, %f27;
	ld.v2.f32 	{%f32, %f33}, [%rd2+24];
	add.ftz.f32 	%f34, %f31, %f32;
	ld.v2.f32 	{%f35, %f36}, [%rd2+16];
	sub.ftz.f32 	%f5, %f34, %f35;
	sub.ftz.f32 	%f37, %f30, %f28;
	add.ftz.f32 	%f38, %f37, %f33;
	sub.ftz.f32 	%f10, %f38, %f36;
	.loc 2 2750 10
	abs.ftz.f32 	%f39, %f5;
	.loc 1 29 7
	setp.geu.ftz.f32	%p3, %f39, 0f3727C5AC;
	@%p3 bra 	BB2_2;

	mov.pred 	%p5, -1;
	bra.uni 	BB2_3;

BB2_2:
	.loc 2 2750 10
	abs.ftz.f32 	%f40, %f10;
	.loc 1 29 85
	setp.lt.ftz.f32	%p5, %f40, 0f3727C5AC;

BB2_3:
	.loc 1 29 1
	sub.ftz.f32 	%f83, %f27, %f29;
	sub.ftz.f32 	%f85, %f28, %f30;
	.loc 1 29 85
	@%p5 bra 	BB2_5;

	.loc 1 29 1
	sub.ftz.f32 	%f41, %f27, %f32;
	sub.ftz.f32 	%f42, %f36, %f33;
	mul.ftz.f32 	%f43, %f41, %f42;
	sub.ftz.f32 	%f44, %f28, %f33;
	sub.ftz.f32 	%f45, %f35, %f32;
	mul.ftz.f32 	%f46, %f45, %f44;
	sub.ftz.f32 	%f47, %f43, %f46;
	mul.ftz.f32 	%f48, %f5, %f42;
	mul.ftz.f32 	%f49, %f45, %f10;
	sub.ftz.f32 	%f50, %f48, %f49;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f87, %f50, %f47;
	.loc 1 29 1
	mul.ftz.f32 	%f51, %f41, %f10;
	mul.ftz.f32 	%f52, %f5, %f44;
	sub.ftz.f32 	%f53, %f51, %f52;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f88, %f53, %f47;
	.loc 1 29 1
	sub.ftz.f32 	%f54, %f27, %f29;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f83, %f87, %f27, %f54;
	sub.ftz.f32 	%f55, %f35, %f29;
	fma.rn.ftz.f32 	%f84, %f88, %f35, %f55;
	.loc 1 29 1
	sub.ftz.f32 	%f56, %f28, %f30;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f85, %f87, %f28, %f56;
	sub.ftz.f32 	%f57, %f36, %f30;
	fma.rn.ftz.f32 	%f86, %f88, %f36, %f57;
	bra.uni 	BB2_6;

BB2_5:
	.loc 1 29 1
	sub.ftz.f32 	%f84, %f32, %f27;
	sub.ftz.f32 	%f86, %f33, %f28;
	mov.f32 	%f88, 0f00000000;
	mov.f32 	%f87, %f88;

BB2_6:
	.loc 1 29 1
	mul.ftz.f32 	%f60, %f30, %f88;
	sub.ftz.f32 	%f61, %f86, %f60;
	st.f32 	[%rd1], %f61;
	mul.ftz.f32 	%f62, %f30, %f87;
	sub.ftz.f32 	%f63, %f62, %f85;
	st.f32 	[%rd1+4], %f63;
	mul.ftz.f32 	%f64, %f86, %f87;
	mul.ftz.f32 	%f65, %f85, %f88;
	sub.ftz.f32 	%f66, %f65, %f64;
	st.f32 	[%rd1+8], %f66;
	mul.ftz.f32 	%f67, %f29, %f88;
	sub.ftz.f32 	%f68, %f67, %f84;
	st.f32 	[%rd1+12], %f68;
	mul.ftz.f32 	%f69, %f29, %f87;
	sub.ftz.f32 	%f70, %f83, %f69;
	st.f32 	[%rd1+16], %f70;
	mul.ftz.f32 	%f71, %f83, %f88;
	mul.ftz.f32 	%f72, %f84, %f87;
	sub.ftz.f32 	%f73, %f72, %f71;
	st.f32 	[%rd1+20], %f73;
	mul.ftz.f32 	%f74, %f29, %f86;
	mul.ftz.f32 	%f75, %f84, %f30;
	sub.ftz.f32 	%f76, %f75, %f74;
	st.f32 	[%rd1+24], %f76;
	mul.ftz.f32 	%f77, %f83, %f30;
	mul.ftz.f32 	%f78, %f29, %f85;
	sub.ftz.f32 	%f79, %f78, %f77;
	st.f32 	[%rd1+28], %f79;
	mul.ftz.f32 	%f80, %f84, %f85;
	mul.ftz.f32 	%f81, %f83, %f86;
	sub.ftz.f32 	%f82, %f81, %f80;
	st.f32 	[%rd1+32], %f82;
	.loc 1 29 2
	ret;
}

.visible .entry StabilizerWarpKernel(
	.param .u64 StabilizerWarpKernel_param_0,
	.param .u32 StabilizerWarpKernel_param_1,
	.param .u64 StabilizerWarpKernel_param_2,
	.param .u32 StabilizerWarpKernel_param_3,
	.param .u32 StabilizerWarpKernel_param_4,
	.param .u32 StabilizerWarpKernel_param_5,
	.param .u32 StabilizerWarpKernel_param_6,
	.param .u32 StabilizerWarpKernel_param_7,
	.param .u64 StabilizerWarpKernel_param_8,
	.param .u32 StabilizerWarpKernel_param_9,
	.param .u32 StabilizerWarpKernel_param_10,
	.param .align 16 .b8 StabilizerWarpKernel_param_11[16],
	.param .align 16 .b8 StabilizerWarpKernel_param_12[16],
	.param .u32 StabilizerWarpKernel_param_13,
	.param .u32 StabilizerWarpKernel_param_14,
	.param .u32 StabilizerWarpKernel_param_15,
	.param .u32 StabilizerWarpKernel_param_16,
	.param .u32 StabilizerWarpKernel_param_17,
	.param .f32 StabilizerWarpKernel_param_18
)
{
	.reg .pred 	%p<30>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<79>;
	.reg .f32 	%f<218>;
	.reg .s64 	%rd<15>;


	ld.param.u32 	%r29, [StabilizerWarpKernel_param_1];
	ld.param.u64 	%rd2, [StabilizerWarpKernel_param_2];
	ld.param.u32 	%r30, [StabilizerWarpKernel_param_3];
	ld.param.u32 	%r31, [StabilizerWarpKernel_param_4];
	ld.param.u32 	%r32, [StabilizerWarpKernel_param_5];
	ld.param.u32 	%r33, [StabilizerWarpKernel_param_6];
	ld.param.u32 	%r34, [StabilizerWarpKernel_param_7];
	ld.param.u64 	%rd3, [StabilizerWarpKernel_param_8];
	ld.param.u32 	%r35, [StabilizerWarpKernel_param_9];
	ld.param.u32 	%r41, [StabilizerWarpKernel_param_10];
	ld.param.f32 	%f4, [StabilizerWarpKernel_param_11+12];
	ld.param.f32 	%f3, [StabilizerWarpKernel_param_11+8];
	ld.param.f32 	%f2, [StabilizerWarpKernel_param_11+4];
	ld.param.f32 	%f1, [StabilizerWarpKernel_param_11];
	ld.param.f32 	%f8, [StabilizerWarpKernel_param_12+12];
	ld.param.f32 	%f7, [StabilizerWarpKernel_param_12+8];
	ld.param.f32 	%f6, [StabilizerWarpKernel_param_12+4];
	ld.param.f32 	%f5, [StabilizerWarpKernel_param_12];
	ld.param.u32 	%r36, [StabilizerWarpKernel_param_13];
	ld.param.u32 	%r73, [StabilizerWarpKernel_param_14];
	ld.param.u32 	%r75, [StabilizerWarpKernel_param_15];
	ld.param.u32 	%r39, [StabilizerWarpKernel_param_16];
	ld.param.u32 	%r40, [StabilizerWarpKernel_param_17];
	ld.param.f32 	%f71, [StabilizerWarpKernel_param_18];
	cvta.to.global.u64 	%rd1, %rd3;
	.loc 1 29 1
	add.s32 	%r1, %r35, -1;
	.loc 1 29 1
	mov.u32 	%r2, %ctaid.x;
	.loc 1 29 1
	setp.lt.s32	%p3, %r2, %r1;
	add.s32 	%r3, %r41, -1;
	.loc 1 29 1
	mov.u32 	%r4, %ctaid.y;
	.loc 1 29 1
	setp.lt.s32	%p4, %r4, %r3;
	and.pred  	%p5, %p3, %p4;
	@!%p5 bra 	BB3_24;
	bra.uni 	BB3_1;

BB3_1:
	.loc 1 29 1
	sub.ftz.f32 	%f72, %f3, %f1;
	.loc 2 2815 10
	cvt.rpi.ftz.f32.f32	%f73, %f72;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r42, %f73;
	.loc 1 29 1
	sub.ftz.f32 	%f74, %f4, %f2;
	.loc 2 2815 10
	cvt.rpi.ftz.f32.f32	%f75, %f74;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r43, %f75;
	.loc 1 29 1
	cvt.rn.f32.s32	%f9, %r42;
	mul.ftz.f32 	%f76, %f9, %f71;
	cvt.rn.f32.s32	%f10, %r43;
	mul.ftz.f32 	%f77, %f10, %f71;
	.loc 1 29 1
	mad.lo.s32 	%r44, %r4, %r35, %r2;
	shl.b32 	%r45, %r44, 1;
	mul.wide.s32 	%rd4, %r45, 4;
	add.s64 	%rd5, %rd1, %rd4;
	.loc 1 29 1
	add.s32 	%r46, %r4, 1;
	mad.lo.s32 	%r47, %r46, %r35, %r2;
	shl.b32 	%r48, %r47, 1;
	mul.wide.s32 	%rd6, %r48, 4;
	add.s64 	%rd7, %rd1, %rd6;
	.loc 1 29 1
	ld.global.f32 	%f11, [%rd5+8];
	ld.global.f32 	%f12, [%rd5];
	.loc 2 2765 10
	min.ftz.f32 	%f78, %f12, %f11;
	.loc 2 2770 10
	max.ftz.f32 	%f79, %f12, %f11;
	.loc 1 29 1
	ld.global.f32 	%f13, [%rd5+12];
	ld.global.f32 	%f14, [%rd5+4];
	.loc 2 2765 10
	min.ftz.f32 	%f80, %f14, %f13;
	.loc 2 2770 10
	max.ftz.f32 	%f81, %f14, %f13;
	.loc 1 29 1
	ld.global.f32 	%f15, [%rd7];
	.loc 2 2765 10
	min.ftz.f32 	%f82, %f78, %f15;
	.loc 2 2770 10
	max.ftz.f32 	%f83, %f79, %f15;
	.loc 1 29 1
	ld.global.f32 	%f16, [%rd7+4];
	.loc 2 2765 10
	min.ftz.f32 	%f84, %f80, %f16;
	.loc 2 2770 10
	max.ftz.f32 	%f85, %f81, %f16;
	.loc 1 29 1
	ld.global.f32 	%f17, [%rd7+8];
	.loc 2 2765 10
	min.ftz.f32 	%f86, %f82, %f17;
	.loc 2 2770 10
	max.ftz.f32 	%f87, %f83, %f17;
	.loc 1 29 1
	ld.global.f32 	%f18, [%rd7+12];
	.loc 2 2765 10
	min.ftz.f32 	%f88, %f84, %f18;
	.loc 2 2770 10
	max.ftz.f32 	%f89, %f85, %f18;
	.loc 1 29 1
	add.ftz.f32 	%f90, %f7, %f1;
	add.ftz.f32 	%f91, %f5, %f1;
	add.ftz.f32 	%f92, %f91, %f90;
	mul.ftz.f32 	%f93, %f92, %f71;
	mov.f32 	%f94, 0f40000000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f95, %f93, %f94;
	.loc 1 29 1
	add.ftz.f32 	%f96, %f8, %f2;
	add.ftz.f32 	%f97, %f6, %f2;
	add.ftz.f32 	%f98, %f97, %f96;
	mul.ftz.f32 	%f99, %f98, %f71;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f100, %f99, %f94;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r5, %f95;
	cvt.rzi.ftz.s32.f32	%r6, %f100;
	.loc 1 29 1
	setp.eq.s32	%p6, %r36, 0;
	selp.b32	%r49, %r42, %r30, %p6;
	add.s32 	%r50, %r49, -1;
	cvt.rn.f32.s32	%f101, %r50;
	.loc 1 29 66
	mul.ftz.f32 	%f102, %f101, 0f3F000000;
	.loc 1 29 1
	selp.b32	%r51, %r43, %r31, %p6;
	add.s32 	%r52, %r51, -1;
	cvt.rn.f32.s32	%f103, %r52;
	.loc 1 29 66
	mul.ftz.f32 	%f104, %f103, 0f3F000000;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r7, %f102;
	cvt.rzi.ftz.s32.f32	%r8, %f104;
	.loc 1 29 1
	cvt.rn.f32.s32	%f105, %r5;
	sub.ftz.f32 	%f106, %f95, %f105;
	cvt.rn.f32.s32	%f107, %r7;
	sub.ftz.f32 	%f108, %f102, %f107;
	sub.ftz.f32 	%f19, %f106, %f108;
	cvt.rn.f32.s32	%f109, %r6;
	sub.ftz.f32 	%f110, %f100, %f109;
	cvt.rn.f32.s32	%f111, %r8;
	sub.ftz.f32 	%f112, %f104, %f111;
	sub.ftz.f32 	%f20, %f110, %f112;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r53, %f76;
	cvt.rzi.ftz.s32.f32	%r54, %f77;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f113, %f86, %f71, 0fBF800000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f114, %f113;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r9, %f114;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f115, %f88, %f71, 0fBF800000;
	.loc 2 2740 10
	cvt.rmi.ftz.f32.f32	%f116, %f115;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r10, %f116;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f117, %f87, %f71, 0f3F800000;
	.loc 2 2815 10
	cvt.rpi.ftz.f32.f32	%f118, %f117;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r55, %f118;
	.loc 2 2621 10
	min.s32 	%r11, %r55, %r53;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f119, %f89, %f71, 0f3F800000;
	.loc 2 2815 10
	cvt.rpi.ftz.f32.f32	%f120, %f119;
	.loc 2 3251 10
	cvt.rzi.ftz.s32.f32	%r56, %f120;
	.loc 2 2621 10
	min.s32 	%r12, %r56, %r54;
	.loc 1 29 1
	setp.ne.s32	%p7, %r36, 0;
	mov.u32 	%r74, %r39;
	mov.u32 	%r76, %r40;
	@%p7 bra 	BB3_3;

	mov.u32 	%r75, 0;
	mov.u32 	%r73, %r75;
	mov.u32 	%r74, %r30;
	mov.u32 	%r76, %r31;

BB3_3:
	.loc 1 29 1
	add.s32 	%r59, %r33, -1;
	cvt.rn.f32.s32	%f121, %r59;
	cvt.rn.f32.s32	%f122, %r1;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f21, %f121, %f122;
	.loc 1 29 1
	add.s32 	%r60, %r34, -1;
	cvt.rn.f32.s32	%f123, %r60;
	cvt.rn.f32.s32	%f124, %r3;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f22, %f123, %f124;
	.loc 1 29 1
	sub.ftz.f32 	%f125, %f12, %f11;
	add.ftz.f32 	%f126, %f125, %f17;
	sub.ftz.f32 	%f23, %f126, %f15;
	sub.ftz.f32 	%f127, %f14, %f13;
	add.ftz.f32 	%f128, %f127, %f18;
	sub.ftz.f32 	%f24, %f128, %f16;
	.loc 2 2750 10
	abs.ftz.f32 	%f129, %f23;
	.loc 1 29 7
	setp.geu.ftz.f32	%p8, %f129, 0f3727C5AC;
	@%p8 bra 	BB3_5;

	mov.pred 	%p29, -1;
	bra.uni 	BB3_6;

BB3_5:
	.loc 2 2750 10
	abs.ftz.f32 	%f130, %f24;
	.loc 1 29 85
	setp.lt.ftz.f32	%p29, %f130, 0f3727C5AC;

BB3_6:
	.loc 1 29 1
	sub.ftz.f32 	%f212, %f11, %f12;
	sub.ftz.f32 	%f214, %f13, %f14;
	.loc 1 29 85
	@%p29 bra 	BB3_8;

	.loc 1 29 1
	sub.ftz.f32 	%f131, %f11, %f17;
	sub.ftz.f32 	%f132, %f16, %f18;
	mul.ftz.f32 	%f133, %f131, %f132;
	sub.ftz.f32 	%f134, %f13, %f18;
	sub.ftz.f32 	%f135, %f15, %f17;
	mul.ftz.f32 	%f136, %f135, %f134;
	sub.ftz.f32 	%f137, %f133, %f136;
	mul.ftz.f32 	%f138, %f23, %f132;
	mul.ftz.f32 	%f139, %f135, %f24;
	sub.ftz.f32 	%f140, %f138, %f139;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f216, %f140, %f137;
	.loc 1 29 1
	mul.ftz.f32 	%f141, %f131, %f24;
	mul.ftz.f32 	%f142, %f23, %f134;
	sub.ftz.f32 	%f143, %f141, %f142;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f217, %f143, %f137;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f212, %f216, %f11, %f212;
	sub.ftz.f32 	%f144, %f15, %f12;
	fma.rn.ftz.f32 	%f213, %f217, %f15, %f144;
	fma.rn.ftz.f32 	%f214, %f216, %f13, %f214;
	sub.ftz.f32 	%f145, %f16, %f14;
	fma.rn.ftz.f32 	%f215, %f217, %f16, %f145;
	bra.uni 	BB3_9;

BB3_8:
	.loc 1 29 1
	sub.ftz.f32 	%f213, %f17, %f11;
	sub.ftz.f32 	%f215, %f18, %f13;
	mov.f32 	%f217, 0f00000000;
	mov.f32 	%f216, %f217;

BB3_9:
	.loc 1 29 1
	mul.ftz.f32 	%f148, %f14, %f217;
	sub.ftz.f32 	%f41, %f215, %f148;
	mul.ftz.f32 	%f149, %f14, %f216;
	sub.ftz.f32 	%f42, %f149, %f214;
	mul.ftz.f32 	%f150, %f215, %f216;
	mul.ftz.f32 	%f151, %f214, %f217;
	sub.ftz.f32 	%f43, %f151, %f150;
	mul.ftz.f32 	%f152, %f12, %f217;
	sub.ftz.f32 	%f44, %f152, %f213;
	mul.ftz.f32 	%f153, %f12, %f216;
	sub.ftz.f32 	%f45, %f212, %f153;
	mul.ftz.f32 	%f154, %f212, %f217;
	mul.ftz.f32 	%f155, %f213, %f216;
	sub.ftz.f32 	%f46, %f155, %f154;
	mul.ftz.f32 	%f156, %f12, %f215;
	mul.ftz.f32 	%f157, %f213, %f14;
	sub.ftz.f32 	%f47, %f157, %f156;
	mul.ftz.f32 	%f158, %f212, %f14;
	mul.ftz.f32 	%f159, %f12, %f214;
	sub.ftz.f32 	%f48, %f159, %f158;
	mul.ftz.f32 	%f160, %f213, %f214;
	mul.ftz.f32 	%f161, %f212, %f215;
	sub.ftz.f32 	%f49, %f161, %f160;
	.loc 1 29 1
	mov.u32 	%r61, %tid.y;
	add.s32 	%r77, %r61, %r10;
	.loc 1 29 1
	setp.gt.s32	%p10, %r77, %r12;
	@%p10 bra 	BB3_24;

	ld.param.u32 	%r69, [StabilizerWarpKernel_param_7];
	ld.param.u32 	%r68, [StabilizerWarpKernel_param_6];
	.loc 1 29 1
	mov.u32 	%r67, %ctaid.x;
	mov.u32 	%r66, %ctaid.y;
	.loc 1 29 1
	sub.s32 	%r18, %r8, %r6;
	.loc 1 29 1
	mov.u32 	%r62, %tid.x;
	add.s32 	%r19, %r62, %r9;
	.loc 1 29 1
	sub.s32 	%r20, %r7, %r5;
	.loc 1 29 1
	cvt.rn.f32.s32	%f50, %r67;
	cvt.rn.f32.s32	%f51, %r66;
	cvt.rn.f32.s32	%f162, %r68;
	add.ftz.f32 	%f52, %f162, 0fBF800347;
	cvt.rn.f32.s32	%f163, %r69;
	add.ftz.f32 	%f53, %f163, 0fBF800347;

BB3_11:
	.loc 1 29 1
	add.s32 	%r22, %r18, %r77;
	.loc 1 29 1
	setp.lt.s32	%p11, %r22, %r75;
	setp.ge.s32	%p12, %r22, %r76;
	or.pred  	%p13, %p11, %p12;
	.loc 1 29 1
	@%p13 bra 	BB3_23;

	setp.gt.s32	%p14, %r19, %r11;
	.loc 1 29 1
	cvt.rn.f32.s32	%f164, %r77;
	add.ftz.f32 	%f165, %f164, %f20;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f54, %f165, %f71;
	.loc 1 29 1
	@%p14 bra 	BB3_23;

	mov.u32 	%r78, %r19;

BB3_14:
	.loc 1 29 1
	mov.u32 	%r24, %r78;
	add.s32 	%r25, %r20, %r24;
	.loc 1 29 1
	setp.lt.s32	%p15, %r25, %r73;
	setp.ge.s32	%p16, %r25, %r74;
	or.pred  	%p17, %p15, %p16;
	.loc 1 29 1
	@%p17 bra 	BB3_22;

	.loc 1 29 1
	cvt.rn.f32.s32	%f166, %r24;
	add.ftz.f32 	%f167, %f166, %f19;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f58, %f167, %f71;
	.loc 1 29 1
	sub.ftz.f32 	%f59, %f12, %f58;
	sub.ftz.f32 	%f60, %f11, %f58;
	sub.ftz.f32 	%f168, %f13, %f54;
	.loc 1 29 1
	mul.ftz.f32 	%f169, %f59, %f168;
	.loc 1 29 1
	sub.ftz.f32 	%f170, %f14, %f54;
	.loc 1 29 1
	mul.ftz.f32 	%f171, %f60, %f170;
	setp.ltu.ftz.f32	%p18, %f169, %f171;
	@%p18 bra 	BB3_22;

	.loc 1 29 1
	sub.ftz.f32 	%f61, %f17, %f58;
	sub.ftz.f32 	%f172, %f18, %f54;
	.loc 1 29 1
	mul.ftz.f32 	%f173, %f60, %f172;
	mul.ftz.f32 	%f175, %f61, %f168;
	setp.ltu.ftz.f32	%p19, %f173, %f175;
	@%p19 bra 	BB3_22;

	.loc 1 29 1
	sub.ftz.f32 	%f62, %f15, %f58;
	sub.ftz.f32 	%f176, %f16, %f54;
	.loc 1 29 1
	mul.ftz.f32 	%f177, %f61, %f176;
	mul.ftz.f32 	%f179, %f62, %f172;
	setp.ltu.ftz.f32	%p20, %f177, %f179;
	@%p20 bra 	BB3_22;

	.loc 1 29 1
	sub.ftz.f32 	%f211, %f14, %f54;
	sub.ftz.f32 	%f207, %f12, %f58;
	.loc 1 29 1
	mul.ftz.f32 	%f181, %f62, %f211;
	mul.ftz.f32 	%f183, %f207, %f176;
	setp.ltu.ftz.f32	%p21, %f181, %f183;
	setp.geu.ftz.f32	%p22, %f58, %f9;
	or.pred  	%p23, %p22, %p21;
	setp.geu.ftz.f32	%p24, %f54, %f10;
	or.pred  	%p25, %p23, %p24;
	@%p25 bra 	BB3_22;

	.loc 1 29 1
	mul.ftz.f32 	%f210, %f46, %f54;
	.loc 1 29 1
	add.s32 	%r72, %r18, %r77;
	.loc 1 29 1
	mul.lo.s32 	%r71, %r72, %r32;
	.loc 1 29 1
	mul.ftz.f32 	%f209, %f45, %f54;
	mul.ftz.f32 	%f208, %f44, %f54;
	.loc 1 29 1
	add.s32 	%r65, %r20, %r24;
	setp.eq.s32	%p26, %r29, 0;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f190, %f43, %f58, %f210;
	add.ftz.f32 	%f191, %f190, %f49;
	fma.rn.ftz.f32 	%f192, %f41, %f58, %f208;
	add.ftz.f32 	%f193, %f192, %f47;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f194, %f193, %f191;
	.loc 1 29 1
	fma.rn.ftz.f32 	%f195, %f42, %f58, %f209;
	add.ftz.f32 	%f196, %f195, %f48;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f197, %f196, %f191;
	.loc 1 29 1
	add.ftz.f32 	%f198, %f50, %f194;
	fma.rn.ftz.f32 	%f199, %f198, %f21, 0fBF000000;
	add.ftz.f32 	%f200, %f51, %f197;
	fma.rn.ftz.f32 	%f201, %f200, %f22, 0fBF000000;
	mov.f32 	%f202, 0f38D1B717;
	.loc 2 2770 10
	max.ftz.f32 	%f203, %f202, %f199;
	.loc 2 2765 10
	min.ftz.f32 	%f204, %f52, %f203;
	.loc 2 2770 10
	max.ftz.f32 	%f205, %f202, %f201;
	.loc 2 2765 10
	min.ftz.f32 	%f206, %f53, %f205;
	.loc 1 29 1
	add.ftz.f32 	%f188, %f204, 0f3F000000;
	add.ftz.f32 	%f189, %f206, 0f3F000000;
	.loc 1 29 254
	// inline asm
	tex.2d.v4.f32.f32 {%f184, %f185, %f186, %f187}, [inputTex, {%f188, %f189}];
	// inline asm
	.loc 1 29 1
	add.s32 	%r26, %r65, %r71;
	.loc 1 29 1
	@%p26 bra 	BB3_21;

	cvta.to.global.u64 	%rd9, %rd2;
	mul.wide.s32 	%rd10, %r26, 16;
	add.s64 	%rd11, %rd9, %rd10;
	.loc 1 29 1
	st.global.v4.f32 	[%rd11], {%f184, %f185, %f186, %f187};
	bra.uni 	BB3_22;

BB3_21:
	cvta.to.global.u64 	%rd12, %rd2;
	mul.wide.s32 	%rd13, %r26, 8;
	add.s64 	%rd14, %rd12, %rd13;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f184;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f185;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f186;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f187;
	mov.b16 	%rs4, %temp;
}
	.loc 1 29 232
	st.global.v4.u16 	[%rd14], {%rs1, %rs2, %rs3, %rs4};

BB3_22:
	.loc 1 29 61
	mov.u32 	%r63, %ntid.x;
	add.s32 	%r27, %r63, %r24;
	.loc 1 29 1
	setp.le.s32	%p27, %r27, %r11;
	mov.u32 	%r78, %r27;
	@%p27 bra 	BB3_14;

BB3_23:
	.loc 1 29 61
	mov.u32 	%r64, %ntid.y;
	add.s32 	%r77, %r64, %r77;
	.loc 1 29 1
	setp.le.s32	%p28, %r77, %r12;
	@%p28 bra 	BB3_11;

BB3_24:
	.loc 1 29 2
	ret;
}


