//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.global .texref im1Tex;
.global .texref im2Tex;
.global .texref uvTex;
.global .texref lum1Tex;
.global .texref lum2Tex;
.global .texref fwdUVTex;
.global .texref bwdUVTex;
.global .texref im1RGBTex;
.global .texref im2RGBTex;

.visible .func  (.param .b32 func_retval0) _Z14__d_atomic_minPff(
	.param .b64 _Z14__d_atomic_minPff_param_0,
	.param .b32 _Z14__d_atomic_minPff_param_1
)
{
	.reg .pred 	%p<7>;
	.reg .s32 	%r<6>;
	.reg .f32 	%f<6>;
	.reg .s64 	%rd<2>;


	ld.param.u64 	%rd1, [_Z14__d_atomic_minPff_param_0];
	ld.param.f32 	%f4, [_Z14__d_atomic_minPff_param_1];
	mov.b32 	 %r1, %f4;
	ld.u32 	%r5, [%rd1];
	mov.b32 	 %f5, %r5;
	setp.gt.ftz.f32	%p1, %f5, %f4;
	setp.ne.s32	%p2, %r5, %r1;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB0_2;
	bra.uni 	BB0_1;

BB0_1:
	mov.u32 	%r3, %r5;
	atom.cas.b32 	%r5, [%rd1], %r3, %r1;
	mov.b32 	 %f5, %r5;
	setp.gt.ftz.f32	%p4, %f5, %f4;
	setp.ne.s32	%p5, %r5, %r3;
	and.pred  	%p6, %p4, %p5;
	@%p6 bra 	BB0_1;

BB0_2:
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1_(
	.param .b64 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_0,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_1,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_2,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_3,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_4,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_5,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_6,
	.param .b32 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_7,
	.param .b64 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_8,
	.param .b64 _Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_9
)
{
	.reg .pred 	%p<23>;
	.reg .s32 	%r<46>;
	.reg .f32 	%f<253>;
	.reg .s64 	%rd<14>;


	ld.param.u64 	%rd1, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_0];
	ld.param.u32 	%r21, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_1];
	ld.param.u32 	%r22, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_2];
	ld.param.u32 	%r18, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_3];
	ld.param.u32 	%r19, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_4];
	ld.param.u32 	%r20, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_5];
	ld.param.u32 	%r23, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_6];
	ld.param.f32 	%f71, [_Z27__d_lk_update_motion_vectorP6float2iiiiiifPvS1__param_7];
	sub.s32 	%r24, %r20, %r23;
	mul.lo.s32 	%r25, %r24, %r21;
	cvt.rn.f32.s32	%f1, %r25;
	mul.lo.s32 	%r26, %r24, %r22;
	cvt.rn.f32.s32	%f2, %r26;
	ld.v2.f32 	{%f72, %f73}, [%rd1];
	add.ftz.f32 	%f5, %f1, %f72;
	add.ftz.f32 	%f6, %f2, %f73;
	cvt.rn.f32.s32	%f7, %r20;
	add.ftz.f32 	%f74, %f5, %f7;
	setp.gtu.ftz.f32	%p1, %f74, 0f00000000;
	mov.f32 	%f252, 0f7F7FFFFF;
	@%p1 bra 	BB1_1;
	bra.uni 	BB1_34;

BB1_1:
	add.s32 	%r27, %r18, -1;
	cvt.rn.f32.s32	%f76, %r27;
	setp.ltu.ftz.f32	%p2, %f5, %f76;
	@%p2 bra 	BB1_2;
	bra.uni 	BB1_34;

BB1_2:
	add.ftz.f32 	%f78, %f6, %f7;
	setp.gtu.ftz.f32	%p3, %f78, 0f00000000;
	@%p3 bra 	BB1_3;
	bra.uni 	BB1_34;

BB1_3:
	add.s32 	%r28, %r19, -1;
	cvt.rn.f32.s32	%f80, %r28;
	setp.ltu.ftz.f32	%p4, %f6, %f80;
	@%p4 bra 	BB1_4;
	bra.uni 	BB1_34;

BB1_4:
	setp.lt.ftz.f32	%p5, %f6, 0f00000000;
	@%p5 bra 	BB1_6;

	mov.u32 	%r41, 0;
	bra.uni 	BB1_7;

BB1_6:
	cvt.rzi.ftz.s32.f32	%r30, %f6;
	neg.s32 	%r41, %r30;

BB1_7:
	setp.gt.ftz.f32	%p6, %f2, %f6;
	selp.f32	%f82, %f2, %f6, %p6;
	add.ftz.f32 	%f8, %f82, %f7;
	cvt.rn.f32.s32	%f9, %r19;
	setp.ltu.ftz.f32	%p7, %f8, %f9;
	mov.u32 	%r40, %r20;
	@%p7 bra 	BB1_9;

	sub.ftz.f32 	%f83, %f8, %f9;
	sub.ftz.f32 	%f84, %f7, %f83;
	cvt.rzi.ftz.s32.f32	%r3, %f84;
	mov.u32 	%r40, %r3;

BB1_9:
	mov.u32 	%r4, %r40;
	setp.lt.ftz.f32	%p8, %f5, 0f00000000;
	@%p8 bra 	BB1_11;

	mov.u32 	%r44, 0;
	bra.uni 	BB1_12;

BB1_11:
	cvt.rzi.ftz.s32.f32	%r32, %f5;
	neg.s32 	%r44, %r32;

BB1_12:
	mov.u32 	%r6, %r44;
	setp.gt.ftz.f32	%p9, %f1, %f5;
	selp.f32	%f85, %f1, %f5, %p9;
	add.ftz.f32 	%f86, %f85, %f7;
	cvt.rn.f32.s32	%f10, %r18;
	setp.ltu.ftz.f32	%p10, %f86, %f10;
	mov.u32 	%r39, %r20;
	@%p10 bra 	BB1_14;

	sub.ftz.f32 	%f90, %f86, %f10;
	sub.ftz.f32 	%f91, %f7, %f90;
	cvt.rzi.ftz.s32.f32	%r39, %f91;

BB1_14:
	mov.f32 	%f246, 0f00000000;
	setp.lt.s32	%p12, %r41, %r4;
	@%p12 bra 	BB1_16;

	mov.f32 	%f245, %f246;
	mov.f32 	%f244, %f246;
	mov.f32 	%f243, %f246;
	mov.f32 	%f242, %f246;
	mov.f32 	%f241, %f246;
	mov.f32 	%f240, %f246;
	mov.u32 	%r45, 0;
	bra.uni 	BB1_21;

BB1_16:
	mov.f32 	%f245, %f246;
	mov.f32 	%f244, %f246;
	mov.f32 	%f243, %f246;
	mov.f32 	%f242, %f246;
	mov.f32 	%f241, %f246;
	mov.f32 	%f240, %f246;
	mov.u32 	%r45, 0;

BB1_17:
	setp.ge.s32	%p13, %r6, %r39;
	@%p13 bra 	BB1_20;

	cvt.rn.f32.s32	%f18, %r41;
	add.s32 	%r35, %r41, 1;
	cvt.rn.f32.s32	%f107, %r35;
	add.ftz.f32 	%f108, %f6, %f107;
	add.ftz.f32 	%f19, %f108, 0f3F000000;
	add.s32 	%r36, %r41, -1;
	cvt.rn.f32.s32	%f109, %r36;
	add.ftz.f32 	%f110, %f6, %f109;
	add.ftz.f32 	%f20, %f110, 0f3F000000;
	mov.u32 	%r43, %r6;

BB1_19:
	mov.u32 	%r11, %r43;
	cvt.rn.f32.s32	%f147, %r11;
	add.ftz.f32 	%f148, %f1, %f147;
	add.ftz.f32 	%f115, %f148, 0f3F000000;
	add.ftz.f32 	%f149, %f2, %f18;
	add.ftz.f32 	%f116, %f149, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f111, %f112, %f113, %f114}, [lum1Tex, {%f115, %f116}];
	// inline asm
	add.ftz.f32 	%f151, %f5, %f147;
	add.ftz.f32 	%f145, %f151, 0f3F000000;
	add.ftz.f32 	%f153, %f6, %f18;
	add.ftz.f32 	%f134, %f153, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f117, %f118, %f119, %f120}, [lum2Tex, {%f145, %f134}];
	// inline asm
	sub.ftz.f32 	%f154, %f111, %f117;
	sub.ftz.f32 	%f155, %f112, %f118;
	sub.ftz.f32 	%f156, %f113, %f119;
	abs.ftz.f32 	%f157, %f154;
	abs.ftz.f32 	%f158, %f155;
	add.ftz.f32 	%f159, %f157, %f158;
	abs.ftz.f32 	%f160, %f156;
	add.ftz.f32 	%f161, %f159, %f160;
	mov.f32 	%f162, 0f40400000;
	div.approx.ftz.f32 	%f163, %f161, %f162;
	add.ftz.f32 	%f240, %f240, %f163;
	mul.ftz.f32 	%f164, %f155, %f155;
	fma.rn.ftz.f32 	%f165, %f154, %f154, %f164;
	fma.rn.ftz.f32 	%f166, %f156, %f156, %f165;
	add.ftz.f32 	%f241, %f241, %f166;
	add.s32 	%r45, %r45, 1;
	add.s32 	%r14, %r11, 1;
	cvt.rn.f32.s32	%f167, %r14;
	add.ftz.f32 	%f168, %f5, %f167;
	add.ftz.f32 	%f127, %f168, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f123, %f124, %f125, %f126}, [lum2Tex, {%f127, %f134}];
	// inline asm
	add.s32 	%r37, %r11, -1;
	cvt.rn.f32.s32	%f169, %r37;
	add.ftz.f32 	%f170, %f5, %f169;
	add.ftz.f32 	%f133, %f170, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f129, %f130, %f131, %f132}, [lum2Tex, {%f133, %f134}];
	// inline asm
	sub.ftz.f32 	%f171, %f123, %f129;
	sub.ftz.f32 	%f172, %f124, %f130;
	sub.ftz.f32 	%f173, %f125, %f131;
	mov.f32 	%f174, 0f40000000;
	div.approx.ftz.f32 	%f175, %f171, %f174;
	div.approx.ftz.f32 	%f176, %f172, %f174;
	div.approx.ftz.f32 	%f177, %f173, %f174;
	// inline asm
	tex.2d.v4.f32.f32 {%f135, %f136, %f137, %f138}, [lum2Tex, {%f145, %f19}];
	// inline asm
	// inline asm
	tex.2d.v4.f32.f32 {%f141, %f142, %f143, %f144}, [lum2Tex, {%f145, %f20}];
	// inline asm
	sub.ftz.f32 	%f178, %f135, %f141;
	sub.ftz.f32 	%f179, %f136, %f142;
	sub.ftz.f32 	%f180, %f137, %f143;
	div.approx.ftz.f32 	%f181, %f178, %f174;
	div.approx.ftz.f32 	%f182, %f179, %f174;
	div.approx.ftz.f32 	%f183, %f180, %f174;
	fma.rn.ftz.f32 	%f184, %f175, %f175, %f246;
	fma.rn.ftz.f32 	%f185, %f176, %f176, %f184;
	fma.rn.ftz.f32 	%f246, %f177, %f177, %f185;
	fma.rn.ftz.f32 	%f186, %f175, %f181, %f245;
	fma.rn.ftz.f32 	%f187, %f176, %f182, %f186;
	fma.rn.ftz.f32 	%f245, %f177, %f183, %f187;
	fma.rn.ftz.f32 	%f188, %f181, %f181, %f244;
	fma.rn.ftz.f32 	%f189, %f182, %f182, %f188;
	fma.rn.ftz.f32 	%f244, %f183, %f183, %f189;
	fma.rn.ftz.f32 	%f190, %f175, %f154, %f243;
	fma.rn.ftz.f32 	%f191, %f176, %f155, %f190;
	fma.rn.ftz.f32 	%f243, %f177, %f156, %f191;
	fma.rn.ftz.f32 	%f192, %f181, %f154, %f242;
	fma.rn.ftz.f32 	%f193, %f182, %f155, %f192;
	fma.rn.ftz.f32 	%f242, %f183, %f156, %f193;
	setp.lt.s32	%p14, %r14, %r39;
	mov.u32 	%r43, %r14;
	@%p14 bra 	BB1_19;

BB1_20:
	add.s32 	%r41, %r41, 1;
	setp.lt.s32	%p15, %r41, %r4;
	@%p15 bra 	BB1_17;

BB1_21:
	setp.gt.s32	%p16, %r45, %r20;
	@%p16 bra 	BB1_22;
	bra.uni 	BB1_34;

BB1_22:
	cvt.rn.f32.s32	%f195, %r45;
	div.approx.ftz.f32 	%f49, %f246, %f195;
	div.approx.ftz.f32 	%f50, %f245, %f195;
	div.approx.ftz.f32 	%f51, %f244, %f195;
	div.approx.ftz.f32 	%f52, %f243, %f195;
	div.approx.ftz.f32 	%f53, %f242, %f195;
	div.approx.ftz.f32 	%f54, %f241, %f195;
	div.approx.ftz.f32 	%f252, %f240, %f195;
	setp.geu.ftz.f32	%p17, %f252, %f71;
	@%p17 bra 	BB1_24;

	mov.f32 	%f252, 0f00000000;
	bra.uni 	BB1_34;

BB1_24:
	add.ftz.f32 	%f196, %f49, %f51;
	mul.ftz.f32 	%f197, %f50, %f50;
	mul.ftz.f32 	%f198, %f49, %f51;
	sub.ftz.f32 	%f199, %f198, %f197;
	mul.ftz.f32 	%f200, %f199, 0fC0800000;
	fma.rn.ftz.f32 	%f201, %f196, %f196, %f200;
	sqrt.rn.ftz.f32 	%f202, %f201;
	add.ftz.f32 	%f203, %f196, %f202;
	mov.f32 	%f204, 0f40000000;
	div.approx.ftz.f32 	%f205, %f203, %f204;
	sub.ftz.f32 	%f206, %f196, %f202;
	div.approx.ftz.f32 	%f207, %f206, %f204;
	abs.ftz.f32 	%f208, %f205;
	abs.ftz.f32 	%f209, %f207;
	setp.gt.ftz.f32	%p18, %f208, %f209;
	selp.f32	%f56, %f208, %f209, %p18;
	setp.lt.ftz.f32	%p19, %f208, %f209;
	selp.f32	%f57, %f208, %f209, %p19;
	setp.neu.ftz.f32	%p20, %f57, 0f00000000;
	@%p20 bra 	BB1_26;

	mov.f32 	%f247, 0f7F7FFFFF;
	bra.uni 	BB1_27;

BB1_26:
	div.approx.ftz.f32 	%f247, %f56, %f57;

BB1_27:
	setp.gt.ftz.f32	%p21, %f247, 0f41200000;
	@%p21 bra 	BB1_29;

	mul.ftz.f32 	%f211, %f247, 0f3DCCCCCD;
	fma.rn.ftz.f32 	%f212, %f211, %f54, %f49;
	fma.rn.ftz.f32 	%f213, %f211, %f54, %f51;
	mul.ftz.f32 	%f214, %f212, %f213;
	add.ftz.f32 	%f215, %f50, 0f00000000;
	mul.ftz.f32 	%f216, %f215, %f215;
	sub.ftz.f32 	%f217, %f214, %f216;
	rcp.approx.ftz.f32 	%f218, %f217;
	neg.ftz.f32 	%f219, %f215;
	mul.ftz.f32 	%f220, %f213, %f218;
	mul.ftz.f32 	%f221, %f218, %f219;
	mul.ftz.f32 	%f222, %f212, %f218;
	mul.ftz.f32 	%f223, %f221, %f53;
	fma.rn.ftz.f32 	%f250, %f220, %f52, %f223;
	mul.ftz.f32 	%f224, %f222, %f53;
	fma.rn.ftz.f32 	%f251, %f221, %f52, %f224;
	bra.uni 	BB1_33;

BB1_29:
	setp.neu.ftz.f32	%p22, %f50, 0f00000000;
	@%p22 bra 	BB1_31;

	mov.f32 	%f249, 0f3F800000;
	mov.f32 	%f248, 0f00000000;
	bra.uni 	BB1_32;

BB1_31:
	sub.ftz.f32 	%f225, %f56, %f49;
	div.approx.ftz.f32 	%f226, %f225, %f50;
	fma.rn.ftz.f32 	%f227, %f226, %f226, 0f3F800000;
	mov.f32 	%f228, 0f3F800000;
	sqrt.rn.ftz.f32 	%f229, %f227;
	div.approx.ftz.f32 	%f248, %f228, %f229;
	div.approx.ftz.f32 	%f249, %f226, %f229;

BB1_32:
	mul.ftz.f32 	%f232, %f249, %f53;
	fma.rn.ftz.f32 	%f233, %f248, %f52, %f232;
	add.ftz.f32 	%f234, %f56, 0f358637BD;
	div.approx.ftz.f32 	%f235, %f233, %f234;
	mul.ftz.f32 	%f250, %f248, %f235;
	mul.ftz.f32 	%f251, %f249, %f235;

BB1_33:
	add.ftz.f32 	%f236, %f72, %f250;
	add.ftz.f32 	%f237, %f73, %f251;
	st.v2.f32 	[%rd1], {%f236, %f237};

BB1_34:
	st.param.f32	[func_retval0+0], %f252;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z13__d_has_existP6float2i(
	.param .b64 _Z13__d_has_existP6float2i_param_0,
	.param .b32 _Z13__d_has_existP6float2i_param_1
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<8>;
	.reg .s32 	%r<8>;
	.reg .f32 	%f<7>;
	.reg .s64 	%rd<14>;


	ld.param.u64 	%rd7, [_Z13__d_has_existP6float2i_param_0];
	ld.param.u32 	%r3, [_Z13__d_has_existP6float2i_param_1];
	setp.gt.s32	%p1, %r3, 0;
	@%p1 bra 	BB2_2;

	mov.u16 	%rs7, 0;
	bra.uni 	BB2_7;

BB2_2:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd1, %rd7, %rd9;
	ld.v2.f32 	{%f4, %f5}, [%rd1];
	mov.u16 	%rs7, 0;
	mov.u32 	%r7, 0;
	mov.u64 	%rd12, 0;
	mov.u64 	%rd13, %rd7;

BB2_3:
	mov.u64 	%rd3, %rd13;
	shl.b64 	%rd10, %rd12, 3;
	add.s64 	%rd11, %rd7, %rd10;
	ld.f32 	%f6, [%rd3];
	ld.f32 	%f3, [%rd11+4];
	setp.neu.ftz.f32	%p2, %f4, %f6;
	@%p2 bra 	BB2_6;

	setp.neu.ftz.f32	%p3, %f5, %f3;
	@%p3 bra 	BB2_6;

	mov.u16 	%rs7, 1;

BB2_6:
	add.s64 	%rd5, %rd3, 8;
	add.s32 	%r7, %r7, 1;
	setp.lt.s32	%p4, %r7, %r3;
	add.s64 	%rd12, %rd12, 1;
	mov.u64 	%rd13, %rd5;
	@%p4 bra 	BB2_3;

BB2_7:
	cvt.u32.u16	%r5, %rs7;
	cvt.s32.s8 	%r6, %r5;
	st.param.b32	[func_retval0+0], %r6;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0_(
	.param .align 8 .b8 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_0[8],
	.param .align 8 .b8 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_1[8],
	.param .b32 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_2,
	.param .b32 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_3,
	.param .b64 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_4,
	.param .b64 _Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_5
)
{
	.reg .pred 	%p<7>;
	.reg .s32 	%r<11>;
	.reg .f32 	%f<46>;
	.reg .s64 	%rd<5>;


	ld.param.f32 	%f2, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_0+4];
	ld.param.f32 	%f1, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_0];
	ld.param.f32 	%f4, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_1+4];
	ld.param.f32 	%f3, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_1];
	ld.param.u32 	%r5, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_2];
	ld.param.f32 	%f11, [_Z30__d_blk_match_cost_texmem_lerp6float2S_ifPvS0__param_3];
	mov.f32 	%f45, 0f00000000;
	setp.gt.s32	%p1, %r5, 0;
	@%p1 bra 	BB3_1;
	bra.uni 	BB3_6;

BB3_1:
	mov.u32 	%r6, 0;
	mov.u32 	%r10, %r6;

BB3_2:
	cvt.rn.f32.s32	%f14, %r10;
	add.ftz.f32 	%f15, %f2, %f14;
	add.ftz.f32 	%f6, %f15, 0f3F000000;
	add.ftz.f32 	%f16, %f4, %f14;
	add.ftz.f32 	%f7, %f16, 0f3F000000;
	mov.u32 	%r9, %r6;

BB3_3:
	mov.u32 	%r2, %r9;
	setp.lt.s32	%p2, %r2, %r5;
	@%p2 bra 	BB3_5;

	add.s32 	%r10, %r10, 1;
	setp.lt.s32	%p3, %r10, %r5;
	@%p3 bra 	BB3_2;
	bra.uni 	BB3_6;

BB3_5:
	cvt.rn.f32.s32	%f29, %r2;
	add.ftz.f32 	%f30, %f1, %f29;
	add.ftz.f32 	%f21, %f30, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f17, %f18, %f19, %f20}, [im1Tex, {%f21, %f6}];
	// inline asm
	add.ftz.f32 	%f31, %f3, %f29;
	add.ftz.f32 	%f27, %f31, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f23, %f24, %f25, %f26}, [im2Tex, {%f27, %f7}];
	// inline asm
	sub.ftz.f32 	%f32, %f17, %f23;
	sub.ftz.f32 	%f33, %f18, %f24;
	sub.ftz.f32 	%f34, %f19, %f25;
	abs.ftz.f32 	%f35, %f32;
	abs.ftz.f32 	%f36, %f33;
	add.ftz.f32 	%f37, %f35, %f36;
	abs.ftz.f32 	%f38, %f34;
	add.ftz.f32 	%f39, %f37, %f38;
	mov.f32 	%f40, 0f40400000;
	div.approx.ftz.f32 	%f41, %f39, %f40;
	setp.gt.ftz.f32	%p4, %f41, 0f3F800000;
	selp.f32	%f42, 0f3F800000, %f41, %p4;
	abs.ftz.f32 	%f43, %f42;
	setp.lt.ftz.f32	%p5, %f43, 0f3E99999A;
	selp.f32	%f44, %f43, 0f3E99999A, %p5;
	add.ftz.f32 	%f45, %f45, %f44;
	add.s32 	%r4, %r2, 1;
	setp.leu.ftz.f32	%p6, %f45, %f11;
	mov.u32 	%r9, %r4;
	@%p6 bra 	BB3_3;

BB3_6:
	st.param.f32	[func_retval0+0], %f45;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z19__d_is_invalid_flow6float2(
	.param .align 8 .b8 _Z19__d_is_invalid_flow6float2_param_0[8]
)
{
	.reg .pred 	%p<3>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<2>;
	.reg .f32 	%f<5>;


	ld.param.f32 	%f2, [_Z19__d_is_invalid_flow6float2_param_0+4];
	ld.param.f32 	%f1, [_Z19__d_is_invalid_flow6float2_param_0];
	abs.ftz.f32 	%f3, %f1;
	setp.ltu.ftz.f32	%p1, %f3, 0f4CBEBC20;
	@%p1 bra 	BB4_2;

	mov.u16 	%rs4, 1;
	bra.uni 	BB4_3;

BB4_2:
	abs.ftz.f32 	%f4, %f2;
	setp.ge.ftz.f32	%p2, %f4, 0f4CBEBC20;
	selp.u16	%rs4, 1, 0, %p2;

BB4_3:
	cvt.u32.u16	%r1, %rs4;
	st.param.b32	[func_retval0+0], %r1;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z23__d_accumulate_valid_uvP6float2iiiiiPv(
	.param .b64 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_0,
	.param .b32 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_1,
	.param .b32 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_2,
	.param .b32 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_3,
	.param .b32 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_4,
	.param .b32 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_5,
	.param .b64 _Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_6
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<7>;
	.reg .s32 	%r<21>;
	.reg .f32 	%f<31>;
	.reg .s64 	%rd<4>;


	ld.param.u64 	%rd1, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_0];
	ld.param.u32 	%r20, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_1];
	ld.param.u32 	%r15, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_2];
	ld.param.u32 	%r16, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_3];
	ld.param.u32 	%r12, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_4];
	ld.param.u32 	%r13, [_Z23__d_accumulate_valid_uvP6float2iiiiiPv_param_5];
	add.s32 	%r17, %r12, %r15;
	cvt.rn.f32.s32	%f13, %r17;
	add.s32 	%r18, %r13, %r16;
	cvt.rn.f32.s32	%f14, %r18;
	// inline asm
	tex.2d.v4.f32.f32 {%f9, %f10, %f11, %f12}, [uvTex, {%f13, %f14}];
	// inline asm
	mov.u32 	%r19, 1;
	mov.f32 	%f30, %f10;
	mov.f32 	%f29, %f9;

BB5_1:
	abs.ftz.f32 	%f15, %f29;
	setp.ltu.ftz.f32	%p1, %f15, 0f4CBEBC20;
	@%p1 bra 	BB5_3;

	mov.u16 	%rs6, 1;
	bra.uni 	BB5_4;

BB5_3:
	abs.ftz.f32 	%f16, %f30;
	setp.ge.ftz.f32	%p2, %f16, 0f4CBEBC20;
	selp.u16	%rs6, 1, 0, %p2;

BB5_4:
	and.b16  	%rs4, %rs6, 1;
	setp.eq.b16	%p3, %rs4, 1;
	setp.lt.s32	%p4, %r19, 10;
	and.pred  	%p5, %p3, %p4;
	@%p5 bra 	BB5_8;

	@%p3 bra 	BB5_7;

	ld.v2.f32 	{%f17, %f18}, [%rd1];
	add.s32 	%r20, %r20, 1;
	add.ftz.f32 	%f20, %f18, %f30;
	add.ftz.f32 	%f22, %f17, %f29;
	st.v2.f32 	[%rd1], {%f22, %f20};

BB5_7:
	st.param.b32	[func_retval0+0], %r20;
	ret;

BB5_8:
	cvt.rn.f32.s32	%f27, %r17;
	cvt.rn.f32.s32	%f28, %r18;
	// inline asm
	tex.2d.v4.f32.f32 {%f23, %f24, %f25, %f26}, [uvTex, {%f27, %f28}];
	// inline asm
	add.s32 	%r19, %r19, 1;
	add.s32 	%r18, %r18, %r13;
	add.s32 	%r17, %r17, %r12;
	mov.f32 	%f30, %f24;
	mov.f32 	%f29, %f23;
	bra.uni 	BB5_1;
}

.visible .func  (.param .b32 func_retval0) _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv(
	.param .b64 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_0,
	.param .b32 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_1,
	.param .b64 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_2,
	.param .b64 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_3,
	.param .b32 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_4,
	.param .b32 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_5,
	.param .b32 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_6,
	.param .b32 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_7,
	.param .b64 _Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_8
)
{
	.reg .pred 	%p<12>;
	.reg .s16 	%rs<15>;
	.reg .s32 	%r<28>;
	.reg .f32 	%f<29>;
	.reg .s64 	%rd<20>;


	ld.param.u64 	%rd7, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_0];
	ld.param.u32 	%r27, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_1];
	ld.param.u64 	%rd8, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_2];
	ld.param.u64 	%rd9, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_3];
	ld.param.u32 	%r17, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_4];
	ld.param.u32 	%r18, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_5];
	ld.param.u32 	%r14, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_6];
	ld.param.u32 	%r15, [_Z21__d_fetch_valid_fwduvP6float2iPiS0_iiiiPv_param_7];
	mul.wide.s32 	%rd11, %r27, 8;
	add.s64 	%rd1, %rd7, %rd11;
	add.s32 	%r23, %r14, %r17;
	cvt.rn.f32.s32	%f10, %r23;
	add.s32 	%r24, %r15, %r18;
	cvt.rn.f32.s32	%f11, %r24;
	// inline asm
	tex.2d.v4.f32.f32 {%f6, %f7, %f8, %f9}, [fwdUVTex, {%f10, %f11}];
	// inline asm
	st.v2.f32 	[%rd1], {%f6, %f7};
	mov.u32 	%r25, 1;

BB6_1:
	ld.v2.f32 	{%f12, %f13}, [%rd1];
	abs.ftz.f32 	%f14, %f12;
	setp.ltu.ftz.f32	%p1, %f14, 0f4CBEBC20;
	@%p1 bra 	BB6_3;

	mov.u16 	%rs13, 1;
	bra.uni 	BB6_4;

BB6_3:
	abs.ftz.f32 	%f15, %f13;
	setp.ge.ftz.f32	%p2, %f15, 0f4CBEBC20;
	selp.u16	%rs13, 1, 0, %p2;

BB6_4:
	and.b16  	%rs7, %rs13, 1;
	setp.eq.b16	%p3, %rs7, 1;
	setp.lt.s32	%p4, %r25, 5;
	and.pred  	%p5, %p3, %p4;
	@%p5 bra 	BB6_15;

	@%p3 bra 	BB6_14;

	ld.v2.f32 	{%f16, %f17}, [%rd9];
	add.ftz.f32 	%f19, %f17, %f13;
	add.ftz.f32 	%f21, %f16, %f12;
	st.v2.f32 	[%rd9], {%f21, %f19};
	ld.u32 	%r19, [%rd8];
	add.s32 	%r20, %r19, 1;
	st.u32 	[%rd8], %r20;
	setp.gt.s32	%p7, %r27, 0;
	@%p7 bra 	BB6_8;

	mov.u16 	%rs14, 0;
	bra.uni 	BB6_13;

BB6_8:
	ld.f32 	%f3, [%rd1];
	mul.wide.s32 	%rd13, %r27, 8;
	add.s64 	%rd14, %rd7, %rd13;
	ld.f32 	%f4, [%rd14+4];
	mov.u16 	%rs14, 0;
	mov.u32 	%r26, 0;
	mov.u64 	%rd18, 0;
	mov.u64 	%rd19, %rd7;

BB6_9:
	mov.u64 	%rd3, %rd19;
	shl.b64 	%rd15, %rd18, 3;
	add.s64 	%rd16, %rd7, %rd15;
	ld.f32 	%f22, [%rd3];
	ld.f32 	%f5, [%rd16+4];
	setp.neu.ftz.f32	%p8, %f3, %f22;
	@%p8 bra 	BB6_12;

	setp.neu.ftz.f32	%p9, %f4, %f5;
	@%p9 bra 	BB6_12;

	mov.u16 	%rs14, 1;

BB6_12:
	add.s64 	%rd5, %rd3, 8;
	add.s32 	%r26, %r26, 1;
	setp.lt.s32	%p10, %r26, %r27;
	add.s64 	%rd18, %rd18, 1;
	mov.u64 	%rd19, %rd5;
	@%p10 bra 	BB6_9;

BB6_13:
	and.b16  	%rs12, %rs14, 255;
	setp.eq.s16	%p11, %rs12, 0;
	selp.u32	%r22, 1, 0, %p11;
	add.s32 	%r27, %r22, %r27;

BB6_14:
	st.param.b32	[func_retval0+0], %r27;
	ret;

BB6_15:
	cvt.rn.f32.s32	%f27, %r23;
	cvt.rn.f32.s32	%f28, %r24;
	add.s32 	%r25, %r25, 1;
	add.s32 	%r24, %r24, %r15;
	add.s32 	%r23, %r23, %r14;
	// inline asm
	tex.2d.v4.f32.f32 {%f23, %f24, %f25, %f26}, [fwdUVTex, {%f27, %f28}];
	// inline asm
	st.v2.f32 	[%rd1], {%f23, %f24};
	bra.uni 	BB6_1;
}

.visible .func  (.param .b32 func_retval0) _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv(
	.param .b64 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_0,
	.param .b32 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_1,
	.param .b64 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_2,
	.param .b64 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_3,
	.param .b32 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_4,
	.param .b32 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_5,
	.param .b32 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_6,
	.param .b32 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_7,
	.param .b64 _Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_8
)
{
	.reg .pred 	%p<12>;
	.reg .s16 	%rs<15>;
	.reg .s32 	%r<28>;
	.reg .f32 	%f<29>;
	.reg .s64 	%rd<20>;


	ld.param.u64 	%rd7, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_0];
	ld.param.u32 	%r27, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_1];
	ld.param.u64 	%rd8, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_2];
	ld.param.u64 	%rd9, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_3];
	ld.param.u32 	%r17, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_4];
	ld.param.u32 	%r18, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_5];
	ld.param.u32 	%r14, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_6];
	ld.param.u32 	%r15, [_Z21__d_fetch_valid_bwduvP6float2iPiS0_iiiiPv_param_7];
	mul.wide.s32 	%rd11, %r27, 8;
	add.s64 	%rd1, %rd7, %rd11;
	add.s32 	%r23, %r14, %r17;
	cvt.rn.f32.s32	%f10, %r23;
	add.s32 	%r24, %r15, %r18;
	cvt.rn.f32.s32	%f11, %r24;
	// inline asm
	tex.2d.v4.f32.f32 {%f6, %f7, %f8, %f9}, [bwdUVTex, {%f10, %f11}];
	// inline asm
	st.v2.f32 	[%rd1], {%f6, %f7};
	mov.u32 	%r25, 1;

BB7_1:
	ld.v2.f32 	{%f12, %f13}, [%rd1];
	abs.ftz.f32 	%f14, %f12;
	setp.ltu.ftz.f32	%p1, %f14, 0f4CBEBC20;
	@%p1 bra 	BB7_3;

	mov.u16 	%rs13, 1;
	bra.uni 	BB7_4;

BB7_3:
	abs.ftz.f32 	%f15, %f13;
	setp.ge.ftz.f32	%p2, %f15, 0f4CBEBC20;
	selp.u16	%rs13, 1, 0, %p2;

BB7_4:
	and.b16  	%rs7, %rs13, 1;
	setp.eq.b16	%p3, %rs7, 1;
	setp.lt.s32	%p4, %r25, 5;
	and.pred  	%p5, %p3, %p4;
	@%p5 bra 	BB7_15;

	@%p3 bra 	BB7_14;

	ld.v2.f32 	{%f16, %f17}, [%rd9];
	add.ftz.f32 	%f19, %f17, %f13;
	add.ftz.f32 	%f21, %f16, %f12;
	st.v2.f32 	[%rd9], {%f21, %f19};
	ld.u32 	%r19, [%rd8];
	add.s32 	%r20, %r19, 1;
	st.u32 	[%rd8], %r20;
	setp.gt.s32	%p7, %r27, 0;
	@%p7 bra 	BB7_8;

	mov.u16 	%rs14, 0;
	bra.uni 	BB7_13;

BB7_8:
	ld.f32 	%f3, [%rd1];
	mul.wide.s32 	%rd13, %r27, 8;
	add.s64 	%rd14, %rd7, %rd13;
	ld.f32 	%f4, [%rd14+4];
	mov.u16 	%rs14, 0;
	mov.u32 	%r26, 0;
	mov.u64 	%rd18, 0;
	mov.u64 	%rd19, %rd7;

BB7_9:
	mov.u64 	%rd3, %rd19;
	shl.b64 	%rd15, %rd18, 3;
	add.s64 	%rd16, %rd7, %rd15;
	ld.f32 	%f22, [%rd3];
	ld.f32 	%f5, [%rd16+4];
	setp.neu.ftz.f32	%p8, %f3, %f22;
	@%p8 bra 	BB7_12;

	setp.neu.ftz.f32	%p9, %f4, %f5;
	@%p9 bra 	BB7_12;

	mov.u16 	%rs14, 1;

BB7_12:
	add.s64 	%rd5, %rd3, 8;
	add.s32 	%r26, %r26, 1;
	setp.lt.s32	%p10, %r26, %r27;
	add.s64 	%rd18, %rd18, 1;
	mov.u64 	%rd19, %rd5;
	@%p10 bra 	BB7_9;

BB7_13:
	and.b16  	%rs12, %rs14, 255;
	setp.eq.s16	%p11, %rs12, 0;
	selp.u32	%r22, 1, 0, %p11;
	add.s32 	%r27, %r22, %r27;

BB7_14:
	st.param.b32	[func_retval0+0], %r27;
	ret;

BB7_15:
	cvt.rn.f32.s32	%f27, %r23;
	cvt.rn.f32.s32	%f28, %r24;
	add.s32 	%r25, %r25, 1;
	add.s32 	%r24, %r24, %r15;
	add.s32 	%r23, %r23, %r14;
	// inline asm
	tex.2d.v4.f32.f32 {%f23, %f24, %f25, %f26}, [bwdUVTex, {%f27, %f28}];
	// inline asm
	st.v2.f32 	[%rd1], {%f23, %f24};
	bra.uni 	BB7_1;
}

.visible .entry _d_compose_multichannel_data_kernel(
	.param .u64 _d_compose_multichannel_data_kernel_param_0,
	.param .u64 _d_compose_multichannel_data_kernel_param_1,
	.param .u32 _d_compose_multichannel_data_kernel_param_2,
	.param .u32 _d_compose_multichannel_data_kernel_param_3,
	.param .u32 _d_compose_multichannel_data_kernel_param_4
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<13>;
	.reg .f32 	%f<13>;
	.reg .s64 	%rd<8>;


	ld.param.u64 	%rd1, [_d_compose_multichannel_data_kernel_param_0];
	ld.param.u64 	%rd2, [_d_compose_multichannel_data_kernel_param_1];
	ld.param.u32 	%r3, [_d_compose_multichannel_data_kernel_param_2];
	ld.param.u32 	%r4, [_d_compose_multichannel_data_kernel_param_3];
	ld.param.u32 	%r5, [_d_compose_multichannel_data_kernel_param_4];
	mov.u32 	%r6, %ntid.x;
	mov.u32 	%r7, %ctaid.x;
	mov.u32 	%r8, %tid.x;
	mad.lo.s32 	%r1, %r6, %r7, %r8;
	mov.u32 	%r9, %ntid.y;
	mov.u32 	%r10, %ctaid.y;
	mov.u32 	%r11, %tid.y;
	mad.lo.s32 	%r2, %r9, %r10, %r11;
	setp.lt.s32	%p1, %r1, %r4;
	setp.lt.s32	%p2, %r2, %r5;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB8_2;
	bra.uni 	BB8_1;

BB8_1:
	cvta.to.global.u64 	%rd3, %rd1;
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r12, %r2, %r3, %r1;
	mul.wide.s32 	%rd5, %r12, 16;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v4.f32 	{%f1, %f2, %f3, %f4}, [%rd6];
	mov.f32 	%f5, 0f437F0000;
	add.s64 	%rd7, %rd3, %rd5;
	div.approx.ftz.f32 	%f7, %f3, %f5;
	div.approx.ftz.f32 	%f9, %f2, %f5;
	div.approx.ftz.f32 	%f11, %f1, %f5;
	mov.f32 	%f12, 0f00000000;
	st.global.v4.f32 	[%rd7], {%f11, %f9, %f7, %f12};

BB8_2:
	ret;
}

.visible .entry _d_lucas_kanade_kernel(
	.param .u64 _d_lucas_kanade_kernel_param_0,
	.param .u32 _d_lucas_kanade_kernel_param_1,
	.param .u32 _d_lucas_kanade_kernel_param_2,
	.param .u32 _d_lucas_kanade_kernel_param_3,
	.param .u32 _d_lucas_kanade_kernel_param_4,
	.param .u32 _d_lucas_kanade_kernel_param_5,
	.param .u32 _d_lucas_kanade_kernel_param_6,
	.param .u32 _d_lucas_kanade_kernel_param_7,
	.param .u32 _d_lucas_kanade_kernel_param_8,
	.param .f32 _d_lucas_kanade_kernel_param_9,
	.param .u64 _d_lucas_kanade_kernel_param_10,
	.param .u64 _d_lucas_kanade_kernel_param_11
)
{
	.reg .pred 	%p<32>;
	.reg .s32 	%r<70>;
	.reg .f32 	%f<296>;
	.reg .s64 	%rd<22>;


	ld.param.u64 	%rd1, [_d_lucas_kanade_kernel_param_0];
	ld.param.u32 	%r22, [_d_lucas_kanade_kernel_param_1];
	ld.param.u32 	%r28, [_d_lucas_kanade_kernel_param_2];
	ld.param.u32 	%r29, [_d_lucas_kanade_kernel_param_3];
	ld.param.u32 	%r23, [_d_lucas_kanade_kernel_param_4];
	ld.param.u32 	%r24, [_d_lucas_kanade_kernel_param_5];
	ld.param.u32 	%r25, [_d_lucas_kanade_kernel_param_6];
	ld.param.u32 	%r26, [_d_lucas_kanade_kernel_param_7];
	ld.param.u32 	%r27, [_d_lucas_kanade_kernel_param_8];
	ld.param.f32 	%f86, [_d_lucas_kanade_kernel_param_9];
	mov.u32 	%r30, %ntid.x;
	mov.u32 	%r31, %ctaid.x;
	mov.u32 	%r32, %tid.x;
	mad.lo.s32 	%r1, %r30, %r31, %r32;
	mov.u32 	%r33, %ntid.y;
	mov.u32 	%r34, %ctaid.y;
	mov.u32 	%r35, %tid.y;
	mad.lo.s32 	%r2, %r33, %r34, %r35;
	setp.lt.s32	%p1, %r1, %r28;
	setp.lt.s32	%p2, %r2, %r29;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB9_40;
	bra.uni 	BB9_1;

BB9_1:
	cvta.to.global.u64 	%rd2, %rd1;
	mad.lo.s32 	%r36, %r2, %r22, %r1;
	mul.wide.s32 	%rd3, %r36, 8;
	add.s64 	%rd4, %rd2, %rd3;
	ld.global.v2.f32 	{%f87, %f88}, [%rd4];
	setp.lt.s32	%p4, %r27, 1;
	mov.f32 	%f292, %f88;
	mov.f32 	%f280, %f87;
	@%p4 bra 	BB9_39;

	sub.s32 	%r38, %r25, %r26;
	mul.lo.s32 	%r39, %r1, %r38;
	cvt.rn.f32.s32	%f3, %r39;
	mul.lo.s32 	%r40, %r2, %r38;
	cvt.rn.f32.s32	%f4, %r40;
	cvt.rn.f32.s32	%f5, %r25;
	add.s32 	%r41, %r23, -1;
	cvt.rn.f32.s32	%f6, %r41;
	add.s32 	%r42, %r24, -1;
	cvt.rn.f32.s32	%f7, %r42;
	cvt.rn.f32.s32	%f8, %r24;
	cvt.rn.f32.s32	%f9, %r23;
	mov.f32 	%f89, 0f7F7FFFFF;
	mov.u32 	%r61, 0;
	mov.f32 	%f271, %f89;
	mov.f32 	%f281, %f280;
	mov.f32 	%f283, %f280;
	mov.f32 	%f293, %f292;
	mov.f32 	%f295, %f292;

BB9_3:
	mov.f32 	%f287, %f293;
	mov.f32 	%f294, %f287;
	mov.f32 	%f275, %f281;
	mov.f32 	%f282, %f275;
	add.ftz.f32 	%f90, %f3, %f282;
	add.ftz.f32 	%f91, %f90, %f5;
	setp.gtu.ftz.f32	%p5, %f91, 0f00000000;
	setp.ltu.ftz.f32	%p6, %f90, %f6;
	and.pred  	%p7, %p5, %p6;
	@%p7 bra 	BB9_5;

	mov.f32 	%f270, %f89;
	bra.uni 	BB9_38;

BB9_5:
	add.ftz.f32 	%f93, %f4, %f294;
	add.ftz.f32 	%f94, %f93, %f5;
	setp.gtu.ftz.f32	%p8, %f94, 0f00000000;
	setp.ltu.ftz.f32	%p9, %f93, %f7;
	and.pred  	%p10, %p8, %p9;
	@%p10 bra 	BB9_7;

	mov.f32 	%f267, %f89;
	mov.f32 	%f270, %f267;
	bra.uni 	BB9_38;

BB9_7:
	setp.lt.ftz.f32	%p11, %f93, 0f00000000;
	@%p11 bra 	BB9_9;

	mov.u32 	%r65, 0;
	bra.uni 	BB9_10;

BB9_9:
	cvt.rzi.ftz.s32.f32	%r44, %f93;
	neg.s32 	%r65, %r44;

BB9_10:
	setp.gt.ftz.f32	%p12, %f4, %f93;
	selp.f32	%f99, %f4, %f93, %p12;
	add.ftz.f32 	%f15, %f99, %f5;
	setp.ltu.ftz.f32	%p13, %f15, %f8;
	mov.u32 	%r64, %r25;
	@%p13 bra 	BB9_12;

	sub.ftz.f32 	%f100, %f15, %f8;
	sub.ftz.f32 	%f101, %f5, %f100;
	cvt.rzi.ftz.s32.f32	%r6, %f101;
	mov.u32 	%r64, %r6;

BB9_12:
	mov.u32 	%r7, %r64;
	setp.lt.ftz.f32	%p14, %f90, 0f00000000;
	@%p14 bra 	BB9_14;

	mov.u32 	%r68, 0;
	bra.uni 	BB9_15;

BB9_14:
	cvt.rzi.ftz.s32.f32	%r46, %f90;
	neg.s32 	%r68, %r46;

BB9_15:
	mov.u32 	%r9, %r68;
	setp.gt.ftz.f32	%p15, %f3, %f90;
	selp.f32	%f105, %f3, %f90, %p15;
	add.ftz.f32 	%f16, %f105, %f5;
	setp.ltu.ftz.f32	%p16, %f16, %f9;
	mov.u32 	%r63, %r25;
	@%p16 bra 	BB9_17;

	sub.ftz.f32 	%f106, %f16, %f9;
	sub.ftz.f32 	%f107, %f5, %f106;
	cvt.rzi.ftz.s32.f32	%r63, %f107;

BB9_17:
	mov.f32 	%f261, 0f00000000;
	setp.lt.s32	%p17, %r65, %r7;
	@%p17 bra 	BB9_19;

	mov.f32 	%f260, %f261;
	mov.f32 	%f259, %f261;
	mov.f32 	%f258, %f261;
	mov.f32 	%f257, %f261;
	mov.f32 	%f256, %f261;
	mov.f32 	%f255, %f261;
	mov.u32 	%r69, 0;
	bra.uni 	BB9_24;

BB9_19:
	mov.f32 	%f260, %f261;
	mov.f32 	%f259, %f261;
	mov.f32 	%f258, %f261;
	mov.f32 	%f257, %f261;
	mov.f32 	%f256, %f261;
	mov.f32 	%f255, %f261;
	mov.u32 	%r69, 0;

BB9_20:
	setp.ge.s32	%p18, %r9, %r63;
	@%p18 bra 	BB9_23;

	add.s32 	%r49, %r65, 1;
	cvt.rn.f32.s32	%f123, %r49;
	add.ftz.f32 	%f124, %f93, %f123;
	add.ftz.f32 	%f25, %f124, 0f3F000000;
	add.s32 	%r50, %r65, -1;
	cvt.rn.f32.s32	%f125, %r50;
	add.ftz.f32 	%f126, %f93, %f125;
	add.ftz.f32 	%f26, %f126, 0f3F000000;
	mov.u32 	%r67, %r9;

BB9_22:
	mov.u32 	%r14, %r67;
	cvt.rn.f32.s32	%f254, %r65;
	cvt.rn.f32.s32	%f163, %r14;
	add.ftz.f32 	%f164, %f3, %f163;
	add.ftz.f32 	%f131, %f164, 0f3F000000;
	add.ftz.f32 	%f165, %f4, %f254;
	add.ftz.f32 	%f132, %f165, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f127, %f128, %f129, %f130}, [lum1Tex, {%f131, %f132}];
	// inline asm
	add.ftz.f32 	%f167, %f90, %f163;
	add.ftz.f32 	%f161, %f167, 0f3F000000;
	add.ftz.f32 	%f169, %f93, %f254;
	add.ftz.f32 	%f150, %f169, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f133, %f134, %f135, %f136}, [lum2Tex, {%f161, %f150}];
	// inline asm
	sub.ftz.f32 	%f170, %f127, %f133;
	sub.ftz.f32 	%f171, %f128, %f134;
	sub.ftz.f32 	%f172, %f129, %f135;
	abs.ftz.f32 	%f173, %f170;
	abs.ftz.f32 	%f174, %f171;
	add.ftz.f32 	%f175, %f173, %f174;
	abs.ftz.f32 	%f176, %f172;
	add.ftz.f32 	%f177, %f175, %f176;
	mov.f32 	%f178, 0f40400000;
	div.approx.ftz.f32 	%f179, %f177, %f178;
	add.ftz.f32 	%f255, %f255, %f179;
	mul.ftz.f32 	%f180, %f171, %f171;
	fma.rn.ftz.f32 	%f181, %f170, %f170, %f180;
	fma.rn.ftz.f32 	%f182, %f172, %f172, %f181;
	add.ftz.f32 	%f256, %f256, %f182;
	add.s32 	%r69, %r69, 1;
	add.s32 	%r17, %r14, 1;
	cvt.rn.f32.s32	%f183, %r17;
	add.ftz.f32 	%f184, %f90, %f183;
	add.ftz.f32 	%f143, %f184, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f139, %f140, %f141, %f142}, [lum2Tex, {%f143, %f150}];
	// inline asm
	add.s32 	%r51, %r14, -1;
	cvt.rn.f32.s32	%f185, %r51;
	add.ftz.f32 	%f186, %f90, %f185;
	add.ftz.f32 	%f149, %f186, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f145, %f146, %f147, %f148}, [lum2Tex, {%f149, %f150}];
	// inline asm
	sub.ftz.f32 	%f187, %f139, %f145;
	sub.ftz.f32 	%f188, %f140, %f146;
	sub.ftz.f32 	%f189, %f141, %f147;
	mov.f32 	%f190, 0f40000000;
	div.approx.ftz.f32 	%f191, %f187, %f190;
	div.approx.ftz.f32 	%f192, %f188, %f190;
	div.approx.ftz.f32 	%f193, %f189, %f190;
	// inline asm
	tex.2d.v4.f32.f32 {%f151, %f152, %f153, %f154}, [lum2Tex, {%f161, %f25}];
	// inline asm
	// inline asm
	tex.2d.v4.f32.f32 {%f157, %f158, %f159, %f160}, [lum2Tex, {%f161, %f26}];
	// inline asm
	sub.ftz.f32 	%f194, %f151, %f157;
	sub.ftz.f32 	%f195, %f152, %f158;
	sub.ftz.f32 	%f196, %f153, %f159;
	div.approx.ftz.f32 	%f197, %f194, %f190;
	div.approx.ftz.f32 	%f198, %f195, %f190;
	div.approx.ftz.f32 	%f199, %f196, %f190;
	fma.rn.ftz.f32 	%f200, %f191, %f191, %f261;
	fma.rn.ftz.f32 	%f201, %f192, %f192, %f200;
	fma.rn.ftz.f32 	%f261, %f193, %f193, %f201;
	fma.rn.ftz.f32 	%f202, %f191, %f197, %f260;
	fma.rn.ftz.f32 	%f203, %f192, %f198, %f202;
	fma.rn.ftz.f32 	%f260, %f193, %f199, %f203;
	fma.rn.ftz.f32 	%f204, %f197, %f197, %f259;
	fma.rn.ftz.f32 	%f205, %f198, %f198, %f204;
	fma.rn.ftz.f32 	%f259, %f199, %f199, %f205;
	fma.rn.ftz.f32 	%f206, %f191, %f170, %f258;
	fma.rn.ftz.f32 	%f207, %f192, %f171, %f206;
	fma.rn.ftz.f32 	%f258, %f193, %f172, %f207;
	fma.rn.ftz.f32 	%f208, %f197, %f170, %f257;
	fma.rn.ftz.f32 	%f209, %f198, %f171, %f208;
	fma.rn.ftz.f32 	%f257, %f199, %f172, %f209;
	setp.lt.s32	%p19, %r17, %r63;
	mov.u32 	%r67, %r17;
	@%p19 bra 	BB9_22;

BB9_23:
	add.s32 	%r65, %r65, 1;
	setp.lt.s32	%p20, %r65, %r7;
	@%p20 bra 	BB9_20;

BB9_24:
	setp.gt.s32	%p21, %r69, %r25;
	@%p21 bra 	BB9_26;

	mov.f32 	%f268, %f89;
	mov.f32 	%f270, %f268;
	bra.uni 	BB9_38;

BB9_26:
	cvt.rn.f32.s32	%f211, %r69;
	div.approx.ftz.f32 	%f55, %f261, %f211;
	div.approx.ftz.f32 	%f56, %f260, %f211;
	div.approx.ftz.f32 	%f57, %f259, %f211;
	div.approx.ftz.f32 	%f58, %f258, %f211;
	div.approx.ftz.f32 	%f59, %f257, %f211;
	div.approx.ftz.f32 	%f60, %f256, %f211;
	div.approx.ftz.f32 	%f61, %f255, %f211;
	setp.geu.ftz.f32	%p22, %f61, %f86;
	@%p22 bra 	BB9_28;

	mov.f32 	%f252, 0f00000000;
	mov.f32 	%f270, %f252;
	bra.uni 	BB9_38;

BB9_28:
	add.ftz.f32 	%f212, %f55, %f57;
	mul.ftz.f32 	%f213, %f56, %f56;
	mul.ftz.f32 	%f214, %f55, %f57;
	sub.ftz.f32 	%f215, %f214, %f213;
	mul.ftz.f32 	%f216, %f215, 0fC0800000;
	fma.rn.ftz.f32 	%f217, %f212, %f212, %f216;
	sqrt.rn.ftz.f32 	%f218, %f217;
	add.ftz.f32 	%f219, %f212, %f218;
	mov.f32 	%f220, 0f40000000;
	div.approx.ftz.f32 	%f221, %f219, %f220;
	sub.ftz.f32 	%f222, %f212, %f218;
	div.approx.ftz.f32 	%f223, %f222, %f220;
	abs.ftz.f32 	%f224, %f221;
	abs.ftz.f32 	%f225, %f223;
	setp.gt.ftz.f32	%p23, %f224, %f225;
	selp.f32	%f62, %f224, %f225, %p23;
	setp.lt.ftz.f32	%p24, %f224, %f225;
	selp.f32	%f63, %f224, %f225, %p24;
	setp.neu.ftz.f32	%p25, %f63, 0f00000000;
	@%p25 bra 	BB9_30;

	mov.f32 	%f262, 0f7F7FFFFF;
	bra.uni 	BB9_31;

BB9_30:
	div.approx.ftz.f32 	%f262, %f62, %f63;

BB9_31:
	setp.gt.ftz.f32	%p26, %f262, 0f41200000;
	@%p26 bra 	BB9_33;

	mul.ftz.f32 	%f227, %f262, 0f3DCCCCCD;
	fma.rn.ftz.f32 	%f228, %f227, %f60, %f55;
	fma.rn.ftz.f32 	%f229, %f227, %f60, %f57;
	mul.ftz.f32 	%f230, %f228, %f229;
	add.ftz.f32 	%f231, %f56, 0f00000000;
	mul.ftz.f32 	%f232, %f231, %f231;
	sub.ftz.f32 	%f233, %f230, %f232;
	rcp.approx.ftz.f32 	%f234, %f233;
	neg.ftz.f32 	%f235, %f231;
	mul.ftz.f32 	%f236, %f229, %f234;
	mul.ftz.f32 	%f237, %f234, %f235;
	mul.ftz.f32 	%f238, %f228, %f234;
	mul.ftz.f32 	%f239, %f237, %f59;
	fma.rn.ftz.f32 	%f265, %f236, %f58, %f239;
	mul.ftz.f32 	%f240, %f238, %f59;
	fma.rn.ftz.f32 	%f266, %f237, %f58, %f240;
	bra.uni 	BB9_37;

BB9_33:
	setp.neu.ftz.f32	%p27, %f56, 0f00000000;
	@%p27 bra 	BB9_35;

	mov.f32 	%f264, 0f3F800000;
	mov.f32 	%f263, 0f00000000;
	bra.uni 	BB9_36;

BB9_35:
	sub.ftz.f32 	%f241, %f62, %f55;
	div.approx.ftz.f32 	%f242, %f241, %f56;
	fma.rn.ftz.f32 	%f243, %f242, %f242, 0f3F800000;
	mov.f32 	%f244, 0f3F800000;
	sqrt.rn.ftz.f32 	%f245, %f243;
	div.approx.ftz.f32 	%f263, %f244, %f245;
	div.approx.ftz.f32 	%f264, %f242, %f245;

BB9_36:
	mul.ftz.f32 	%f248, %f264, %f59;
	fma.rn.ftz.f32 	%f249, %f263, %f58, %f248;
	add.ftz.f32 	%f250, %f62, 0f358637BD;
	div.approx.ftz.f32 	%f251, %f249, %f250;
	mul.ftz.f32 	%f265, %f263, %f251;
	mul.ftz.f32 	%f266, %f264, %f251;

BB9_37:
	add.ftz.f32 	%f282, %f282, %f265;
	add.ftz.f32 	%f294, %f294, %f266;
	mov.f32 	%f270, %f61;

BB9_38:
	mov.f32 	%f293, %f294;
	mov.f32 	%f281, %f282;
	mov.f32 	%f80, %f270;
	setp.lt.ftz.f32	%p28, %f80, %f271;
	selp.f32	%f295, %f293, %f295, %p28;
	selp.f32	%f283, %f281, %f283, %p28;
	selp.f32	%f271, %f80, %f271, %p28;
	add.s32 	%r61, %r61, 1;
	setp.lt.s32	%p29, %r61, %r27;
	setp.gt.ftz.f32	%p30, %f80, 0f00000000;
	and.pred  	%p31, %p30, %p29;
	mov.f32 	%f280, %f283;
	mov.f32 	%f292, %f295;
	@%p31 bra 	BB9_3;

BB9_39:
	ld.param.u64 	%rd21, [_d_lucas_kanade_kernel_param_0];
	cvta.to.global.u64 	%rd20, %rd21;
	mul.wide.s32 	%rd18, %r36, 8;
	add.s64 	%rd19, %rd20, %rd18;
	st.global.v2.f32 	[%rd19], {%f280, %f292};

BB9_40:
	ret;
}

.visible .entry _d_block_vec_smoothing_kernel_texmem_time(
	.param .u64 _d_block_vec_smoothing_kernel_texmem_time_param_0,
	.param .u32 _d_block_vec_smoothing_kernel_texmem_time_param_1,
	.param .u32 _d_block_vec_smoothing_kernel_texmem_time_param_2,
	.param .u32 _d_block_vec_smoothing_kernel_texmem_time_param_3,
	.param .u32 _d_block_vec_smoothing_kernel_texmem_time_param_4,
	.param .u32 _d_block_vec_smoothing_kernel_texmem_time_param_5,
	.param .f32 _d_block_vec_smoothing_kernel_texmem_time_param_6,
	.param .f32 _d_block_vec_smoothing_kernel_texmem_time_param_7,
	.param .u64 _d_block_vec_smoothing_kernel_texmem_time_param_8,
	.param .u64 _d_block_vec_smoothing_kernel_texmem_time_param_9,
	.param .u64 _d_block_vec_smoothing_kernel_texmem_time_param_10
)
{
	.local .align 8 .b8 	__local_depot10[72];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<46>;
	.reg .s16 	%rs<45>;
	.reg .s32 	%r<103>;
	.reg .f32 	%f<211>;
	.reg .s64 	%rd<67>;


	mov.u64 	%SPL, __local_depot10;
	ld.param.u64 	%rd18, [_d_block_vec_smoothing_kernel_texmem_time_param_0];
	ld.param.u32 	%r31, [_d_block_vec_smoothing_kernel_texmem_time_param_1];
	ld.param.u32 	%r34, [_d_block_vec_smoothing_kernel_texmem_time_param_2];
	ld.param.u32 	%r35, [_d_block_vec_smoothing_kernel_texmem_time_param_3];
	ld.param.u32 	%r32, [_d_block_vec_smoothing_kernel_texmem_time_param_4];
	ld.param.u32 	%r33, [_d_block_vec_smoothing_kernel_texmem_time_param_5];
	ld.param.f32 	%f67, [_d_block_vec_smoothing_kernel_texmem_time_param_6];
	ld.param.f32 	%f68, [_d_block_vec_smoothing_kernel_texmem_time_param_7];
	add.u64 	%rd19, %SPL, 0;
	mov.u32 	%r36, %ntid.x;
	mov.u32 	%r37, %ctaid.x;
	mov.u32 	%r38, %tid.x;
	mad.lo.s32 	%r1, %r36, %r37, %r38;
	mov.u32 	%r39, %ntid.y;
	mov.u32 	%r40, %ctaid.y;
	mov.u32 	%r41, %tid.y;
	mad.lo.s32 	%r2, %r39, %r40, %r41;
	setp.lt.s32	%p1, %r1, %r34;
	setp.lt.s32	%p2, %r2, %r35;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB10_49;
	bra.uni 	BB10_1;

BB10_1:
	mov.u16 	%rs38, 0;
	st.local.u8 	[%rd19+71], %rs38;
	st.local.u8 	[%rd19+70], %rs38;
	st.local.u8 	[%rd19+69], %rs38;
	st.local.u8 	[%rd19+68], %rs38;
	st.local.u8 	[%rd19+67], %rs38;
	st.local.u8 	[%rd19+66], %rs38;
	st.local.u8 	[%rd19+65], %rs38;
	st.local.u8 	[%rd19+64], %rs38;
	st.local.u8 	[%rd19+63], %rs38;
	st.local.u8 	[%rd19+62], %rs38;
	st.local.u8 	[%rd19+61], %rs38;
	st.local.u8 	[%rd19+60], %rs38;
	st.local.u8 	[%rd19+59], %rs38;
	st.local.u8 	[%rd19+58], %rs38;
	st.local.u8 	[%rd19+57], %rs38;
	st.local.u8 	[%rd19+56], %rs38;
	st.local.u8 	[%rd19+55], %rs38;
	st.local.u8 	[%rd19+54], %rs38;
	st.local.u8 	[%rd19+53], %rs38;
	st.local.u8 	[%rd19+52], %rs38;
	st.local.u8 	[%rd19+51], %rs38;
	st.local.u8 	[%rd19+50], %rs38;
	st.local.u8 	[%rd19+49], %rs38;
	st.local.u8 	[%rd19+48], %rs38;
	st.local.u8 	[%rd19+47], %rs38;
	st.local.u8 	[%rd19+46], %rs38;
	st.local.u8 	[%rd19+45], %rs38;
	st.local.u8 	[%rd19+44], %rs38;
	st.local.u8 	[%rd19+43], %rs38;
	st.local.u8 	[%rd19+42], %rs38;
	st.local.u8 	[%rd19+41], %rs38;
	st.local.u8 	[%rd19+40], %rs38;
	st.local.u8 	[%rd19+39], %rs38;
	st.local.u8 	[%rd19+38], %rs38;
	st.local.u8 	[%rd19+37], %rs38;
	st.local.u8 	[%rd19+36], %rs38;
	st.local.u8 	[%rd19+35], %rs38;
	st.local.u8 	[%rd19+34], %rs38;
	st.local.u8 	[%rd19+33], %rs38;
	st.local.u8 	[%rd19+32], %rs38;
	st.local.u8 	[%rd19+31], %rs38;
	st.local.u8 	[%rd19+30], %rs38;
	st.local.u8 	[%rd19+29], %rs38;
	st.local.u8 	[%rd19+28], %rs38;
	st.local.u8 	[%rd19+27], %rs38;
	st.local.u8 	[%rd19+26], %rs38;
	st.local.u8 	[%rd19+25], %rs38;
	st.local.u8 	[%rd19+24], %rs38;
	st.local.u8 	[%rd19+23], %rs38;
	st.local.u8 	[%rd19+22], %rs38;
	st.local.u8 	[%rd19+21], %rs38;
	st.local.u8 	[%rd19+20], %rs38;
	st.local.u8 	[%rd19+19], %rs38;
	st.local.u8 	[%rd19+18], %rs38;
	st.local.u8 	[%rd19+17], %rs38;
	st.local.u8 	[%rd19+16], %rs38;
	st.local.u8 	[%rd19+15], %rs38;
	st.local.u8 	[%rd19+14], %rs38;
	st.local.u8 	[%rd19+13], %rs38;
	st.local.u8 	[%rd19+12], %rs38;
	st.local.u8 	[%rd19+11], %rs38;
	st.local.u8 	[%rd19+10], %rs38;
	st.local.u8 	[%rd19+9], %rs38;
	st.local.u8 	[%rd19+8], %rs38;
	st.local.u8 	[%rd19+7], %rs38;
	st.local.u8 	[%rd19+6], %rs38;
	st.local.u8 	[%rd19+5], %rs38;
	st.local.u8 	[%rd19+4], %rs38;
	st.local.u8 	[%rd19+3], %rs38;
	st.local.u8 	[%rd19+2], %rs38;
	st.local.u8 	[%rd19+1], %rs38;
	st.local.u8 	[%rd19], %rs38;
	cvt.rn.f32.s32	%f86, %r2;
	cvt.rn.f32.s32	%f73, %r1;
	add.s32 	%r43, %r1, -1;
	cvt.rn.f32.s32	%f79, %r43;
	// inline asm
	tex.2d.v4.f32.f32 {%f69, %f70, %f71, %f72}, [uvTex, {%f73, %f86}];
	// inline asm
	st.local.v2.f32 	[%rd19], {%f69, %f70};
	// inline asm
	tex.2d.v4.f32.f32 {%f75, %f76, %f77, %f78}, [uvTex, {%f79, %f86}];
	// inline asm
	setp.eq.ftz.f32	%p4, %f75, %f69;
	setp.eq.ftz.f32	%p5, %f76, %f70;
	and.pred  	%p6, %p4, %p5;
	selp.b32	%r3, 1, 2, %p6;
	add.ftz.f32 	%f87, %f69, 0f00000000;
	add.ftz.f32 	%f88, %f70, 0f00000000;
	add.ftz.f32 	%f89, %f87, %f75;
	add.ftz.f32 	%f90, %f88, %f76;
	mul.wide.u32 	%rd25, %r3, 8;
	add.s64 	%rd26, %rd19, %rd25;
	add.s32 	%r44, %r1, 1;
	cvt.rn.f32.s32	%f85, %r44;
	st.local.v2.f32 	[%rd19+8], {%f75, %f76};
	// inline asm
	tex.2d.v4.f32.f32 {%f81, %f82, %f83, %f84}, [uvTex, {%f85, %f86}];
	// inline asm
	st.local.v2.f32 	[%rd26], {%f81, %f82};
	ld.local.f32 	%f7, [%rd26];
	add.ftz.f32 	%f5, %f89, %f7;
	add.ftz.f32 	%f6, %f90, %f82;
	mov.u32 	%r92, 0;
	mov.u64 	%rd66, %rd19;

BB10_2:
	ld.local.f32 	%f91, [%rd66];
	setp.neu.ftz.f32	%p7, %f7, %f91;
	@%p7 bra 	BB10_5;

	ld.local.f32 	%f92, [%rd66+4];
	setp.neu.ftz.f32	%p8, %f82, %f92;
	@%p8 bra 	BB10_5;

	mov.u16 	%rs38, 1;

BB10_5:
	add.s64 	%rd66, %rd66, 8;
	add.s32 	%r92, %r92, 1;
	setp.lt.s32	%p9, %r92, %r3;
	@%p9 bra 	BB10_2;

	and.b16  	%rs19, %rs38, 255;
	setp.eq.s16	%p10, %rs19, 0;
	mov.u16 	%rs39, 0;
	selp.u32	%r46, 1, 0, %p10;
	add.s32 	%r6, %r46, %r3;
	add.s32 	%r51, %r2, -1;
	mul.wide.u32 	%rd29, %r6, 8;
	add.s64 	%rd30, %rd19, %rd29;
	cvt.rn.f32.s32	%f98, %r51;
	// inline asm
	tex.2d.v4.f32.f32 {%f93, %f94, %f95, %f96}, [uvTex, {%f73, %f98}];
	// inline asm
	st.local.v2.f32 	[%rd30], {%f93, %f94};
	ld.local.f32 	%f12, [%rd30];
	add.ftz.f32 	%f10, %f5, %f12;
	add.ftz.f32 	%f11, %f6, %f94;
	mov.u32 	%r93, 0;
	mov.u64 	%rd65, %rd19;

BB10_7:
	ld.local.f32 	%f99, [%rd65];
	setp.neu.ftz.f32	%p11, %f12, %f99;
	@%p11 bra 	BB10_10;

	ld.local.f32 	%f100, [%rd65+4];
	setp.neu.ftz.f32	%p12, %f94, %f100;
	@%p12 bra 	BB10_10;

	mov.u16 	%rs39, 1;

BB10_10:
	add.s64 	%rd65, %rd65, 8;
	add.s32 	%r93, %r93, 1;
	setp.lt.s32	%p13, %r93, %r6;
	@%p13 bra 	BB10_7;

	and.b16  	%rs22, %rs39, 255;
	setp.eq.s16	%p14, %rs22, 0;
	mov.u16 	%rs40, 0;
	selp.u32	%r53, 1, 0, %p14;
	add.s32 	%r9, %r53, %r6;
	mul.wide.u32 	%rd32, %r9, 8;
	add.s64 	%rd33, %rd19, %rd32;
	// inline asm
	tex.2d.v4.f32.f32 {%f101, %f102, %f103, %f104}, [uvTex, {%f79, %f98}];
	// inline asm
	st.local.v2.f32 	[%rd33], {%f101, %f102};
	ld.local.f32 	%f16, [%rd33];
	add.ftz.f32 	%f14, %f10, %f16;
	add.ftz.f32 	%f15, %f11, %f102;
	mov.u32 	%r94, 0;
	mov.u64 	%rd64, %rd19;

BB10_12:
	ld.local.f32 	%f107, [%rd64];
	setp.neu.ftz.f32	%p15, %f16, %f107;
	@%p15 bra 	BB10_15;

	ld.local.f32 	%f108, [%rd64+4];
	setp.neu.ftz.f32	%p16, %f102, %f108;
	@%p16 bra 	BB10_15;

	mov.u16 	%rs40, 1;

BB10_15:
	add.s64 	%rd64, %rd64, 8;
	add.s32 	%r94, %r94, 1;
	setp.lt.s32	%p17, %r94, %r9;
	@%p17 bra 	BB10_12;

	and.b16  	%rs25, %rs40, 255;
	setp.eq.s16	%p18, %rs25, 0;
	mov.u16 	%rs41, 0;
	selp.u32	%r55, 1, 0, %p18;
	add.s32 	%r12, %r55, %r9;
	mul.wide.u32 	%rd35, %r12, 8;
	add.s64 	%rd36, %rd19, %rd35;
	// inline asm
	tex.2d.v4.f32.f32 {%f109, %f110, %f111, %f112}, [uvTex, {%f85, %f98}];
	// inline asm
	st.local.v2.f32 	[%rd36], {%f109, %f110};
	ld.local.f32 	%f20, [%rd36];
	add.ftz.f32 	%f18, %f14, %f20;
	add.ftz.f32 	%f19, %f15, %f110;
	mov.u32 	%r95, 0;
	mov.u64 	%rd63, %rd19;

BB10_17:
	ld.local.f32 	%f115, [%rd63];
	setp.neu.ftz.f32	%p19, %f20, %f115;
	@%p19 bra 	BB10_20;

	ld.local.f32 	%f116, [%rd63+4];
	setp.neu.ftz.f32	%p20, %f110, %f116;
	@%p20 bra 	BB10_20;

	mov.u16 	%rs41, 1;

BB10_20:
	add.s64 	%rd63, %rd63, 8;
	add.s32 	%r95, %r95, 1;
	setp.lt.s32	%p21, %r95, %r12;
	@%p21 bra 	BB10_17;

	and.b16  	%rs28, %rs41, 255;
	setp.eq.s16	%p22, %rs28, 0;
	mov.u16 	%rs42, 0;
	selp.u32	%r57, 1, 0, %p22;
	add.s32 	%r15, %r57, %r12;
	add.s32 	%r62, %r2, 1;
	mul.wide.u32 	%rd38, %r15, 8;
	add.s64 	%rd39, %rd19, %rd38;
	cvt.rn.f32.s32	%f122, %r62;
	// inline asm
	tex.2d.v4.f32.f32 {%f117, %f118, %f119, %f120}, [uvTex, {%f73, %f122}];
	// inline asm
	st.local.v2.f32 	[%rd39], {%f117, %f118};
	ld.local.f32 	%f25, [%rd39];
	add.ftz.f32 	%f23, %f18, %f25;
	add.ftz.f32 	%f24, %f19, %f118;
	mov.u32 	%r96, 0;
	mov.u64 	%rd62, %rd19;

BB10_22:
	ld.local.f32 	%f123, [%rd62];
	setp.neu.ftz.f32	%p23, %f25, %f123;
	@%p23 bra 	BB10_25;

	ld.local.f32 	%f124, [%rd62+4];
	setp.neu.ftz.f32	%p24, %f118, %f124;
	@%p24 bra 	BB10_25;

	mov.u16 	%rs42, 1;

BB10_25:
	add.s64 	%rd62, %rd62, 8;
	add.s32 	%r96, %r96, 1;
	setp.lt.s32	%p25, %r96, %r15;
	@%p25 bra 	BB10_22;

	and.b16  	%rs31, %rs42, 255;
	setp.eq.s16	%p26, %rs31, 0;
	mov.u16 	%rs43, 0;
	selp.u32	%r64, 1, 0, %p26;
	add.s32 	%r18, %r64, %r15;
	mul.wide.s32 	%rd41, %r18, 8;
	add.s64 	%rd42, %rd19, %rd41;
	// inline asm
	tex.2d.v4.f32.f32 {%f125, %f126, %f127, %f128}, [uvTex, {%f79, %f122}];
	// inline asm
	st.local.v2.f32 	[%rd42], {%f125, %f126};
	ld.local.f32 	%f29, [%rd42];
	add.ftz.f32 	%f27, %f23, %f29;
	add.ftz.f32 	%f28, %f24, %f126;
	mov.u32 	%r97, 0;
	mov.u64 	%rd61, %rd19;

BB10_27:
	ld.local.f32 	%f131, [%rd61];
	setp.neu.ftz.f32	%p27, %f29, %f131;
	@%p27 bra 	BB10_30;

	ld.local.f32 	%f132, [%rd61+4];
	setp.neu.ftz.f32	%p28, %f126, %f132;
	@%p28 bra 	BB10_30;

	mov.u16 	%rs43, 1;

BB10_30:
	add.s64 	%rd61, %rd61, 8;
	add.s32 	%r97, %r97, 1;
	setp.lt.s32	%p29, %r97, %r18;
	@%p29 bra 	BB10_27;

	and.b16  	%rs33, %rs43, 255;
	setp.eq.s16	%p30, %rs33, 0;
	selp.u32	%r65, 1, 0, %p30;
	add.s32 	%r21, %r65, %r18;
	mul.wide.s32 	%rd44, %r21, 8;
	add.s64 	%rd14, %rd19, %rd44;
	// inline asm
	tex.2d.v4.f32.f32 {%f133, %f134, %f135, %f136}, [uvTex, {%f85, %f122}];
	// inline asm
	st.local.v2.f32 	[%rd14], {%f133, %f134};
	ld.local.f32 	%f139, [%rd14];
	add.ftz.f32 	%f31, %f27, %f139;
	add.ftz.f32 	%f32, %f28, %f134;
	setp.gt.s32	%p31, %r21, 0;
	@%p31 bra 	BB10_33;

	mov.u16 	%rs44, 0;
	bra.uni 	BB10_38;

BB10_33:
	ld.local.f32 	%f33, [%rd14];
	mov.u32 	%r98, 0;
	mov.u16 	%rs44, 0;
	mov.u64 	%rd60, %rd19;

BB10_34:
	mov.u64 	%rd15, %rd60;
	ld.local.f32 	%f140, [%rd15];
	setp.neu.ftz.f32	%p32, %f33, %f140;
	@%p32 bra 	BB10_37;

	ld.local.f32 	%f141, [%rd15+4];
	setp.neu.ftz.f32	%p33, %f134, %f141;
	@%p33 bra 	BB10_37;

	mov.u16 	%rs44, 1;

BB10_37:
	add.s64 	%rd17, %rd15, 8;
	add.s32 	%r98, %r98, 1;
	setp.lt.s32	%p34, %r98, %r21;
	mov.u64 	%rd60, %rd17;
	@%p34 bra 	BB10_34;

BB10_38:
	and.b16  	%rs37, %rs44, 255;
	setp.eq.s16	%p35, %rs37, 0;
	selp.u32	%r67, 1, 0, %p35;
	add.s32 	%r24, %r67, %r21;
	ld.local.v2.f32 	{%f142, %f143}, [%rd19];
	setp.lt.s32	%p36, %r24, 2;
	mov.f32 	%f35, %f143;
	mov.f32 	%f34, %f142;
	mov.f32 	%f201, %f34;
	mov.f32 	%f208, %f35;
	@%p36 bra 	BB10_48;

	setp.lt.s32	%p37, %r24, 1;
	mov.f32 	%f199, %f34;
	mov.f32 	%f201, %f199;
	mov.f32 	%f206, %f35;
	mov.f32 	%f208, %f206;
	@%p37 bra 	BB10_48;

	mov.f32 	%f145, 0f41100000;
	div.approx.ftz.f32 	%f36, %f31, %f145;
	div.approx.ftz.f32 	%f37, %f32, %f145;
	sub.s32 	%r69, %r32, %r33;
	mul.lo.s32 	%r74, %r1, %r69;
	mul.lo.s32 	%r79, %r2, %r69;
	cvt.rn.f32.s32	%f38, %r74;
	cvt.rn.f32.s32	%f39, %r79;
	mov.f32 	%f146, 0f3F800000;
	sub.ftz.f32 	%f40, %f146, %f68;
	add.ftz.f32 	%f41, %f67, 0f3F800000;
	mul.lo.s32 	%r80, %r32, %r32;
	cvt.rn.f32.s32	%f42, %r80;
	mov.f32 	%f195, 0f4CBEBC20;
	mov.u32 	%r99, 1;
	mov.f32 	%f203, %f34;
	mov.f32 	%f202, %f34;
	mov.f32 	%f210, %f35;
	mov.f32 	%f209, %f35;

BB10_41:
	mul.ftz.f32 	%f147, %f203, %f68;
	sub.ftz.f32 	%f49, %f38, %f147;
	mul.ftz.f32 	%f148, %f210, %f68;
	sub.ftz.f32 	%f51, %f39, %f148;
	fma.rn.ftz.f32 	%f52, %f203, %f40, %f38;
	fma.rn.ftz.f32 	%f53, %f210, %f40, %f39;
	mov.f32 	%f196, 0f00000000;
	setp.gt.s32	%p38, %r32, 0;
	@%p38 bra 	BB10_42;
	bra.uni 	BB10_47;

BB10_42:
	mov.u32 	%r81, 0;
	mov.u32 	%r102, %r81;

BB10_43:
	cvt.rn.f32.s32	%f151, %r102;
	add.ftz.f32 	%f152, %f51, %f151;
	add.ftz.f32 	%f55, %f152, 0f3F000000;
	add.ftz.f32 	%f153, %f53, %f151;
	add.ftz.f32 	%f56, %f153, 0f3F000000;
	mov.u32 	%r101, %r81;

BB10_44:
	mov.u32 	%r27, %r101;
	setp.lt.s32	%p39, %r27, %r32;
	@%p39 bra 	BB10_46;

	add.s32 	%r102, %r102, 1;
	setp.lt.s32	%p40, %r102, %r32;
	@%p40 bra 	BB10_43;
	bra.uni 	BB10_47;

BB10_46:
	cvt.rn.f32.s32	%f166, %r27;
	add.ftz.f32 	%f167, %f49, %f166;
	add.ftz.f32 	%f158, %f167, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f154, %f155, %f156, %f157}, [im1Tex, {%f158, %f55}];
	// inline asm
	add.ftz.f32 	%f168, %f52, %f166;
	add.ftz.f32 	%f164, %f168, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f160, %f161, %f162, %f163}, [im2Tex, {%f164, %f56}];
	// inline asm
	sub.ftz.f32 	%f169, %f154, %f160;
	sub.ftz.f32 	%f170, %f155, %f161;
	sub.ftz.f32 	%f171, %f156, %f162;
	abs.ftz.f32 	%f172, %f169;
	abs.ftz.f32 	%f173, %f170;
	add.ftz.f32 	%f174, %f172, %f173;
	abs.ftz.f32 	%f175, %f171;
	add.ftz.f32 	%f176, %f174, %f175;
	mov.f32 	%f177, 0f40400000;
	div.approx.ftz.f32 	%f178, %f176, %f177;
	setp.gt.ftz.f32	%p41, %f178, 0f3F800000;
	selp.f32	%f179, 0f3F800000, %f178, %p41;
	abs.ftz.f32 	%f180, %f179;
	setp.lt.ftz.f32	%p42, %f180, 0f3E99999A;
	selp.f32	%f181, %f180, 0f3E99999A, %p42;
	add.ftz.f32 	%f196, %f196, %f181;
	add.s32 	%r29, %r27, 1;
	setp.leu.ftz.f32	%p43, %f196, %f195;
	mov.u32 	%r101, %r29;
	@%p43 bra 	BB10_44;

BB10_47:
	sub.ftz.f32 	%f182, %f203, %f36;
	abs.ftz.f32 	%f183, %f182;
	sub.ftz.f32 	%f184, %f210, %f37;
	abs.ftz.f32 	%f185, %f184;
	add.ftz.f32 	%f186, %f183, %f185;
	mov.f32 	%f187, 0f43C80000;
	div.approx.ftz.f32 	%f188, %f186, %f187;
	mul.ftz.f32 	%f189, %f188, %f41;
	mov.f32 	%f190, 0f437F0000;
	div.approx.ftz.f32 	%f191, %f189, %f190;
	fma.rn.ftz.f32 	%f192, %f191, %f42, %f196;
	setp.lt.ftz.f32	%p44, %f192, %f195;
	selp.f32	%f208, %f210, %f209, %p44;
	selp.f32	%f201, %f203, %f202, %p44;
	selp.f32	%f195, %f192, %f195, %p44;
	setp.lt.s32	%p45, %r99, %r24;
	@%p45 bra 	BB10_50;

BB10_48:
	cvta.to.global.u64 	%rd49, %rd18;
	mad.lo.s32 	%r91, %r2, %r31, %r1;
	mul.wide.s32 	%rd50, %r91, 8;
	add.s64 	%rd51, %rd49, %rd50;
	st.global.v2.f32 	[%rd51], {%f201, %f208};

BB10_49:
	ret;

BB10_50:
	mul.wide.s32 	%rd52, %r99, 8;
	add.s64 	%rd53, %rd19, %rd52;
	ld.local.v2.f32 	{%f193, %f194}, [%rd53];
	add.s32 	%r99, %r99, 1;
	mov.f32 	%f210, %f194;
	mov.f32 	%f203, %f193;
	mov.f32 	%f202, %f201;
	mov.f32 	%f209, %f208;
	bra.uni 	BB10_41;
}

.visible .entry _d_refine_mv_blk_to_pix_kernel_texmem_time(
	.param .u64 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_0,
	.param .u32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_1,
	.param .u32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_2,
	.param .u32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_3,
	.param .u32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_4,
	.param .u32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_5,
	.param .f32 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_6,
	.param .u64 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_7,
	.param .u64 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_8,
	.param .u64 _d_refine_mv_blk_to_pix_kernel_texmem_time_param_9
)
{
	.local .align 8 .b8 	__local_depot11[72];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<40>;
	.reg .s16 	%rs<45>;
	.reg .s32 	%r<85>;
	.reg .f32 	%f<158>;
	.reg .s64 	%rd<70>;


	mov.u64 	%SPL, __local_depot11;
	ld.param.u64 	%rd20, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_0];
	ld.param.u32 	%r28, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_1];
	ld.param.u32 	%r31, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_2];
	ld.param.u32 	%r32, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_3];
	ld.param.u32 	%r29, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_4];
	ld.param.u32 	%r30, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_5];
	ld.param.f32 	%f41, [_d_refine_mv_blk_to_pix_kernel_texmem_time_param_6];
	add.u64 	%rd21, %SPL, 0;
	mov.u32 	%r33, %ntid.x;
	mov.u32 	%r34, %ctaid.x;
	mov.u32 	%r35, %tid.x;
	mad.lo.s32 	%r1, %r33, %r34, %r35;
	mov.u32 	%r36, %ntid.y;
	mov.u32 	%r37, %ctaid.y;
	mov.u32 	%r38, %tid.y;
	mad.lo.s32 	%r2, %r36, %r37, %r38;
	setp.lt.s32	%p1, %r1, %r31;
	setp.lt.s32	%p2, %r2, %r32;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB11_42;
	bra.uni 	BB11_1;

BB11_1:
	sub.s32 	%r40, %r29, %r30;
	div.s32 	%r41, %r1, %r40;
	div.s32 	%r3, %r2, %r40;
	mov.u16 	%rs38, 0;
	st.local.u8 	[%rd21+71], %rs38;
	st.local.u8 	[%rd21+70], %rs38;
	st.local.u8 	[%rd21+69], %rs38;
	st.local.u8 	[%rd21+68], %rs38;
	st.local.u8 	[%rd21+67], %rs38;
	st.local.u8 	[%rd21+66], %rs38;
	st.local.u8 	[%rd21+65], %rs38;
	st.local.u8 	[%rd21+64], %rs38;
	st.local.u8 	[%rd21+63], %rs38;
	st.local.u8 	[%rd21+62], %rs38;
	st.local.u8 	[%rd21+61], %rs38;
	st.local.u8 	[%rd21+60], %rs38;
	st.local.u8 	[%rd21+59], %rs38;
	st.local.u8 	[%rd21+58], %rs38;
	st.local.u8 	[%rd21+57], %rs38;
	st.local.u8 	[%rd21+56], %rs38;
	st.local.u8 	[%rd21+55], %rs38;
	st.local.u8 	[%rd21+54], %rs38;
	st.local.u8 	[%rd21+53], %rs38;
	st.local.u8 	[%rd21+52], %rs38;
	st.local.u8 	[%rd21+51], %rs38;
	st.local.u8 	[%rd21+50], %rs38;
	st.local.u8 	[%rd21+49], %rs38;
	st.local.u8 	[%rd21+48], %rs38;
	st.local.u8 	[%rd21+47], %rs38;
	st.local.u8 	[%rd21+46], %rs38;
	st.local.u8 	[%rd21+45], %rs38;
	st.local.u8 	[%rd21+44], %rs38;
	st.local.u8 	[%rd21+43], %rs38;
	st.local.u8 	[%rd21+42], %rs38;
	st.local.u8 	[%rd21+41], %rs38;
	st.local.u8 	[%rd21+40], %rs38;
	st.local.u8 	[%rd21+39], %rs38;
	st.local.u8 	[%rd21+38], %rs38;
	st.local.u8 	[%rd21+37], %rs38;
	st.local.u8 	[%rd21+36], %rs38;
	st.local.u8 	[%rd21+35], %rs38;
	st.local.u8 	[%rd21+34], %rs38;
	st.local.u8 	[%rd21+33], %rs38;
	st.local.u8 	[%rd21+32], %rs38;
	st.local.u8 	[%rd21+31], %rs38;
	st.local.u8 	[%rd21+30], %rs38;
	st.local.u8 	[%rd21+29], %rs38;
	st.local.u8 	[%rd21+28], %rs38;
	st.local.u8 	[%rd21+27], %rs38;
	st.local.u8 	[%rd21+26], %rs38;
	st.local.u8 	[%rd21+25], %rs38;
	st.local.u8 	[%rd21+24], %rs38;
	st.local.u8 	[%rd21+23], %rs38;
	st.local.u8 	[%rd21+22], %rs38;
	st.local.u8 	[%rd21+21], %rs38;
	st.local.u8 	[%rd21+20], %rs38;
	st.local.u8 	[%rd21+19], %rs38;
	st.local.u8 	[%rd21+18], %rs38;
	st.local.u8 	[%rd21+17], %rs38;
	st.local.u8 	[%rd21+16], %rs38;
	st.local.u8 	[%rd21+15], %rs38;
	st.local.u8 	[%rd21+14], %rs38;
	st.local.u8 	[%rd21+13], %rs38;
	st.local.u8 	[%rd21+12], %rs38;
	st.local.u8 	[%rd21+11], %rs38;
	st.local.u8 	[%rd21+10], %rs38;
	st.local.u8 	[%rd21+9], %rs38;
	st.local.u8 	[%rd21+8], %rs38;
	st.local.u8 	[%rd21+7], %rs38;
	st.local.u8 	[%rd21+6], %rs38;
	st.local.u8 	[%rd21+5], %rs38;
	st.local.u8 	[%rd21+4], %rs38;
	st.local.u8 	[%rd21+3], %rs38;
	st.local.u8 	[%rd21+2], %rs38;
	st.local.u8 	[%rd21+1], %rs38;
	st.local.u8 	[%rd21], %rs38;
	cvt.rn.f32.s32	%f46, %r41;
	cvt.rn.f32.s32	%f59, %r3;
	add.s32 	%r42, %r41, -1;
	cvt.rn.f32.s32	%f52, %r42;
	// inline asm
	tex.2d.v4.f32.f32 {%f42, %f43, %f44, %f45}, [uvTex, {%f46, %f59}];
	// inline asm
	st.local.v2.f32 	[%rd21], {%f42, %f43};
	// inline asm
	tex.2d.v4.f32.f32 {%f48, %f49, %f50, %f51}, [uvTex, {%f52, %f59}];
	// inline asm
	setp.eq.ftz.f32	%p4, %f48, %f42;
	setp.eq.ftz.f32	%p5, %f49, %f43;
	and.pred  	%p6, %p4, %p5;
	selp.b32	%r4, 1, 2, %p6;
	add.s32 	%r43, %r41, 1;
	cvt.rn.f32.s32	%f58, %r43;
	st.local.v2.f32 	[%rd21+8], {%f48, %f49};
	// inline asm
	tex.2d.v4.f32.f32 {%f54, %f55, %f56, %f57}, [uvTex, {%f58, %f59}];
	// inline asm
	mul.wide.u32 	%rd28, %r4, 8;
	add.s64 	%rd29, %rd21, %rd28;
	st.local.v2.f32 	[%rd29], {%f54, %f55};
	mov.u32 	%r77, 0;
	mov.u64 	%rd69, %rd21;

BB11_2:
	ld.local.f32 	%f60, [%rd69];
	setp.neu.ftz.f32	%p7, %f54, %f60;
	@%p7 bra 	BB11_5;

	ld.local.f32 	%f61, [%rd69+4];
	setp.neu.ftz.f32	%p8, %f55, %f61;
	@%p8 bra 	BB11_5;

	mov.u16 	%rs38, 1;

BB11_5:
	add.s64 	%rd69, %rd69, 8;
	add.s32 	%r77, %r77, 1;
	setp.lt.s32	%p9, %r77, %r4;
	@%p9 bra 	BB11_2;

	and.b16  	%rs19, %rs38, 255;
	setp.eq.s16	%p10, %rs19, 0;
	mov.u16 	%rs39, 0;
	selp.u32	%r45, 1, 0, %p10;
	add.s32 	%r7, %r45, %r4;
	add.s32 	%r46, %r3, -1;
	cvt.rn.f32.s32	%f67, %r46;
	// inline asm
	tex.2d.v4.f32.f32 {%f62, %f63, %f64, %f65}, [uvTex, {%f46, %f67}];
	// inline asm
	mul.wide.u32 	%rd31, %r7, 8;
	add.s64 	%rd32, %rd21, %rd31;
	st.local.v2.f32 	[%rd32], {%f62, %f63};
	mov.u32 	%r78, 0;
	mov.u64 	%rd68, %rd21;

BB11_7:
	ld.local.f32 	%f68, [%rd68];
	setp.neu.ftz.f32	%p11, %f62, %f68;
	@%p11 bra 	BB11_10;

	ld.local.f32 	%f69, [%rd68+4];
	setp.neu.ftz.f32	%p12, %f63, %f69;
	@%p12 bra 	BB11_10;

	mov.u16 	%rs39, 1;

BB11_10:
	add.s64 	%rd68, %rd68, 8;
	add.s32 	%r78, %r78, 1;
	setp.lt.s32	%p13, %r78, %r7;
	@%p13 bra 	BB11_7;

	and.b16  	%rs22, %rs39, 255;
	setp.eq.s16	%p14, %rs22, 0;
	mov.u16 	%rs40, 0;
	selp.u32	%r48, 1, 0, %p14;
	add.s32 	%r10, %r48, %r7;
	mul.wide.u32 	%rd34, %r10, 8;
	add.s64 	%rd35, %rd21, %rd34;
	// inline asm
	tex.2d.v4.f32.f32 {%f70, %f71, %f72, %f73}, [uvTex, {%f52, %f67}];
	// inline asm
	st.local.v2.f32 	[%rd35], {%f70, %f71};
	mov.u32 	%r79, 0;
	mov.u64 	%rd67, %rd21;

BB11_12:
	ld.local.f32 	%f76, [%rd67];
	setp.neu.ftz.f32	%p15, %f70, %f76;
	@%p15 bra 	BB11_15;

	ld.local.f32 	%f77, [%rd67+4];
	setp.neu.ftz.f32	%p16, %f71, %f77;
	@%p16 bra 	BB11_15;

	mov.u16 	%rs40, 1;

BB11_15:
	add.s64 	%rd67, %rd67, 8;
	add.s32 	%r79, %r79, 1;
	setp.lt.s32	%p17, %r79, %r10;
	@%p17 bra 	BB11_12;

	and.b16  	%rs25, %rs40, 255;
	setp.eq.s16	%p18, %rs25, 0;
	mov.u16 	%rs41, 0;
	selp.u32	%r50, 1, 0, %p18;
	add.s32 	%r13, %r50, %r10;
	mul.wide.u32 	%rd37, %r13, 8;
	add.s64 	%rd38, %rd21, %rd37;
	// inline asm
	tex.2d.v4.f32.f32 {%f78, %f79, %f80, %f81}, [uvTex, {%f58, %f67}];
	// inline asm
	st.local.v2.f32 	[%rd38], {%f78, %f79};
	mov.u32 	%r80, 0;
	mov.u64 	%rd66, %rd21;

BB11_17:
	ld.local.f32 	%f84, [%rd66];
	setp.neu.ftz.f32	%p19, %f78, %f84;
	@%p19 bra 	BB11_20;

	ld.local.f32 	%f85, [%rd66+4];
	setp.neu.ftz.f32	%p20, %f79, %f85;
	@%p20 bra 	BB11_20;

	mov.u16 	%rs41, 1;

BB11_20:
	add.s64 	%rd66, %rd66, 8;
	add.s32 	%r80, %r80, 1;
	setp.lt.s32	%p21, %r80, %r13;
	@%p21 bra 	BB11_17;

	and.b16  	%rs28, %rs41, 255;
	setp.eq.s16	%p22, %rs28, 0;
	mov.u16 	%rs42, 0;
	selp.u32	%r52, 1, 0, %p22;
	add.s32 	%r16, %r52, %r13;
	add.s32 	%r53, %r3, 1;
	cvt.rn.f32.s32	%f91, %r53;
	mul.wide.u32 	%rd40, %r16, 8;
	add.s64 	%rd41, %rd21, %rd40;
	// inline asm
	tex.2d.v4.f32.f32 {%f86, %f87, %f88, %f89}, [uvTex, {%f46, %f91}];
	// inline asm
	st.local.v2.f32 	[%rd41], {%f86, %f87};
	mov.u32 	%r81, 0;
	mov.u64 	%rd65, %rd21;

BB11_22:
	ld.local.f32 	%f92, [%rd65];
	setp.neu.ftz.f32	%p23, %f86, %f92;
	@%p23 bra 	BB11_25;

	ld.local.f32 	%f93, [%rd65+4];
	setp.neu.ftz.f32	%p24, %f87, %f93;
	@%p24 bra 	BB11_25;

	mov.u16 	%rs42, 1;

BB11_25:
	add.s64 	%rd65, %rd65, 8;
	add.s32 	%r81, %r81, 1;
	setp.lt.s32	%p25, %r81, %r16;
	@%p25 bra 	BB11_22;

	and.b16  	%rs31, %rs42, 255;
	setp.eq.s16	%p26, %rs31, 0;
	mov.u16 	%rs43, 0;
	selp.u32	%r55, 1, 0, %p26;
	add.s32 	%r19, %r55, %r16;
	mul.wide.s32 	%rd43, %r19, 8;
	add.s64 	%rd44, %rd21, %rd43;
	// inline asm
	tex.2d.v4.f32.f32 {%f94, %f95, %f96, %f97}, [uvTex, {%f52, %f91}];
	// inline asm
	st.local.v2.f32 	[%rd44], {%f94, %f95};
	mov.u32 	%r82, 0;
	mov.u64 	%rd64, %rd21;

BB11_27:
	ld.local.f32 	%f100, [%rd64];
	setp.neu.ftz.f32	%p27, %f94, %f100;
	@%p27 bra 	BB11_30;

	ld.local.f32 	%f101, [%rd64+4];
	setp.neu.ftz.f32	%p28, %f95, %f101;
	@%p28 bra 	BB11_30;

	mov.u16 	%rs43, 1;

BB11_30:
	add.s64 	%rd64, %rd64, 8;
	add.s32 	%r82, %r82, 1;
	setp.lt.s32	%p29, %r82, %r19;
	@%p29 bra 	BB11_27;

	and.b16  	%rs33, %rs43, 255;
	setp.eq.s16	%p30, %rs33, 0;
	selp.u32	%r56, 1, 0, %p30;
	add.s32 	%r22, %r56, %r19;
	mul.wide.s32 	%rd46, %r22, 8;
	add.s64 	%rd47, %rd21, %rd46;
	setp.gt.s32	%p31, %r22, 0;
	// inline asm
	tex.2d.v4.f32.f32 {%f102, %f103, %f104, %f105}, [uvTex, {%f58, %f91}];
	// inline asm
	st.local.v2.f32 	[%rd47], {%f102, %f103};
	@%p31 bra 	BB11_33;

	mov.u16 	%rs44, 0;
	bra.uni 	BB11_38;

BB11_33:
	mov.u32 	%r83, 0;
	mov.u16 	%rs44, 0;
	mov.u64 	%rd63, %rd21;

BB11_34:
	ld.local.f32 	%f108, [%rd63];
	setp.neu.ftz.f32	%p32, %f102, %f108;
	@%p32 bra 	BB11_37;

	ld.local.f32 	%f109, [%rd63+4];
	setp.neu.ftz.f32	%p33, %f103, %f109;
	@%p33 bra 	BB11_37;

	mov.u16 	%rs44, 1;

BB11_37:
	add.s64 	%rd63, %rd63, 8;
	add.s32 	%r83, %r83, 1;
	setp.lt.s32	%p34, %r83, %r22;
	@%p34 bra 	BB11_34;

BB11_38:
	and.b16  	%rs37, %rs44, 255;
	setp.eq.s16	%p35, %rs37, 0;
	selp.u32	%r58, 1, 0, %p35;
	add.s32 	%r25, %r58, %r22;
	ld.local.v2.f32 	{%f110, %f111}, [%rd21];
	setp.lt.s32	%p36, %r25, 1;
	mov.f32 	%f155, %f111;
	mov.f32 	%f149, %f110;
	@%p36 bra 	BB11_41;

	cvt.rn.f32.s32	%f26, %r1;
	cvt.rn.f32.s32	%f27, %r2;
	mov.f32 	%f113, 0f3F800000;
	sub.ftz.f32 	%f28, %f113, %f41;
	mov.f32 	%f145, 0f4CBEBC20;
	mov.u32 	%r84, 1;
	mov.f32 	%f151, %f149;
	mov.f32 	%f150, %f149;
	mov.f32 	%f157, %f155;

BB11_40:
	mov.f32 	%f156, %f155;
	mov.f32 	%f30, %f157;
	mov.f32 	%f32, %f156;
	mov.f32 	%f29, %f151;
	mov.f32 	%f31, %f150;
	mov.u64 	%rd18, %rd21;
	mul.ftz.f32 	%f126, %f29, %f41;
	sub.ftz.f32 	%f127, %f26, %f126;
	add.ftz.f32 	%f118, %f127, 0f3F000000;
	mul.ftz.f32 	%f128, %f30, %f41;
	sub.ftz.f32 	%f129, %f27, %f128;
	add.ftz.f32 	%f119, %f129, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f114, %f115, %f116, %f117}, [im1Tex, {%f118, %f119}];
	// inline asm
	fma.rn.ftz.f32 	%f130, %f29, %f28, %f26;
	add.ftz.f32 	%f124, %f130, 0f3F000000;
	fma.rn.ftz.f32 	%f131, %f30, %f28, %f27;
	add.ftz.f32 	%f125, %f131, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f120, %f121, %f122, %f123}, [im2Tex, {%f124, %f125}];
	// inline asm
	sub.ftz.f32 	%f132, %f114, %f120;
	sub.ftz.f32 	%f133, %f115, %f121;
	sub.ftz.f32 	%f134, %f116, %f122;
	abs.ftz.f32 	%f135, %f132;
	abs.ftz.f32 	%f136, %f133;
	add.ftz.f32 	%f137, %f135, %f136;
	abs.ftz.f32 	%f138, %f134;
	add.ftz.f32 	%f139, %f137, %f138;
	mov.f32 	%f140, 0f40400000;
	div.approx.ftz.f32 	%f141, %f139, %f140;
	setp.gt.ftz.f32	%p37, %f141, 0f3F800000;
	selp.f32	%f142, 0f3F800000, %f141, %p37;
	setp.lt.ftz.f32	%p38, %f142, %f145;
	selp.f32	%f145, %f142, %f145, %p38;
	selp.f32	%f155, %f30, %f32, %p38;
	selp.f32	%f149, %f29, %f31, %p38;
	setp.lt.s32	%p39, %r84, %r25;
	@%p39 bra 	BB11_43;

BB11_41:
	cvta.to.global.u64 	%rd52, %rd20;
	mad.lo.s32 	%r76, %r2, %r28, %r1;
	mul.wide.s32 	%rd53, %r76, 8;
	add.s64 	%rd54, %rd52, %rd53;
	st.global.v2.f32 	[%rd54], {%f149, %f155};

BB11_42:
	ret;

BB11_43:
	add.s64 	%rd21, %rd18, 8;
	ld.local.v2.f32 	{%f143, %f144}, [%rd18+8];
	add.s32 	%r84, %r84, 1;
	mov.f32 	%f157, %f144;
	mov.f32 	%f151, %f143;
	mov.f32 	%f150, %f149;
	bra.uni 	BB11_40;
}

.visible .entry _d_pixel_vec_smoothing_kernel_texmem_time(
	.param .u64 _d_pixel_vec_smoothing_kernel_texmem_time_param_0,
	.param .u32 _d_pixel_vec_smoothing_kernel_texmem_time_param_1,
	.param .u32 _d_pixel_vec_smoothing_kernel_texmem_time_param_2,
	.param .u32 _d_pixel_vec_smoothing_kernel_texmem_time_param_3,
	.param .f32 _d_pixel_vec_smoothing_kernel_texmem_time_param_4,
	.param .f32 _d_pixel_vec_smoothing_kernel_texmem_time_param_5,
	.param .u64 _d_pixel_vec_smoothing_kernel_texmem_time_param_6,
	.param .u64 _d_pixel_vec_smoothing_kernel_texmem_time_param_7,
	.param .u64 _d_pixel_vec_smoothing_kernel_texmem_time_param_8
)
{
	.local .align 8 .b8 	__local_depot12[72];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<40>;
	.reg .s16 	%rs<45>;
	.reg .s32 	%r<80>;
	.reg .f32 	%f<184>;
	.reg .s64 	%rd<69>;


	mov.u64 	%SPL, __local_depot12;
	ld.param.u64 	%rd20, [_d_pixel_vec_smoothing_kernel_texmem_time_param_0];
	ld.param.u32 	%r27, [_d_pixel_vec_smoothing_kernel_texmem_time_param_1];
	ld.param.u32 	%r28, [_d_pixel_vec_smoothing_kernel_texmem_time_param_2];
	ld.param.u32 	%r29, [_d_pixel_vec_smoothing_kernel_texmem_time_param_3];
	ld.param.f32 	%f53, [_d_pixel_vec_smoothing_kernel_texmem_time_param_4];
	ld.param.f32 	%f54, [_d_pixel_vec_smoothing_kernel_texmem_time_param_5];
	add.u64 	%rd21, %SPL, 0;
	mov.u32 	%r30, %ntid.x;
	mov.u32 	%r31, %ctaid.x;
	mov.u32 	%r32, %tid.x;
	mad.lo.s32 	%r1, %r30, %r31, %r32;
	mov.u32 	%r33, %ntid.y;
	mov.u32 	%r34, %ctaid.y;
	mov.u32 	%r35, %tid.y;
	mad.lo.s32 	%r2, %r33, %r34, %r35;
	setp.lt.s32	%p1, %r1, %r28;
	setp.lt.s32	%p2, %r2, %r29;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB12_42;
	bra.uni 	BB12_1;

BB12_1:
	mov.u16 	%rs38, 0;
	st.local.u8 	[%rd21+71], %rs38;
	st.local.u8 	[%rd21+70], %rs38;
	st.local.u8 	[%rd21+69], %rs38;
	st.local.u8 	[%rd21+68], %rs38;
	st.local.u8 	[%rd21+67], %rs38;
	st.local.u8 	[%rd21+66], %rs38;
	st.local.u8 	[%rd21+65], %rs38;
	st.local.u8 	[%rd21+64], %rs38;
	st.local.u8 	[%rd21+63], %rs38;
	st.local.u8 	[%rd21+62], %rs38;
	st.local.u8 	[%rd21+61], %rs38;
	st.local.u8 	[%rd21+60], %rs38;
	st.local.u8 	[%rd21+59], %rs38;
	st.local.u8 	[%rd21+58], %rs38;
	st.local.u8 	[%rd21+57], %rs38;
	st.local.u8 	[%rd21+56], %rs38;
	st.local.u8 	[%rd21+55], %rs38;
	st.local.u8 	[%rd21+54], %rs38;
	st.local.u8 	[%rd21+53], %rs38;
	st.local.u8 	[%rd21+52], %rs38;
	st.local.u8 	[%rd21+51], %rs38;
	st.local.u8 	[%rd21+50], %rs38;
	st.local.u8 	[%rd21+49], %rs38;
	st.local.u8 	[%rd21+48], %rs38;
	st.local.u8 	[%rd21+47], %rs38;
	st.local.u8 	[%rd21+46], %rs38;
	st.local.u8 	[%rd21+45], %rs38;
	st.local.u8 	[%rd21+44], %rs38;
	st.local.u8 	[%rd21+43], %rs38;
	st.local.u8 	[%rd21+42], %rs38;
	st.local.u8 	[%rd21+41], %rs38;
	st.local.u8 	[%rd21+40], %rs38;
	st.local.u8 	[%rd21+39], %rs38;
	st.local.u8 	[%rd21+38], %rs38;
	st.local.u8 	[%rd21+37], %rs38;
	st.local.u8 	[%rd21+36], %rs38;
	st.local.u8 	[%rd21+35], %rs38;
	st.local.u8 	[%rd21+34], %rs38;
	st.local.u8 	[%rd21+33], %rs38;
	st.local.u8 	[%rd21+32], %rs38;
	st.local.u8 	[%rd21+31], %rs38;
	st.local.u8 	[%rd21+30], %rs38;
	st.local.u8 	[%rd21+29], %rs38;
	st.local.u8 	[%rd21+28], %rs38;
	st.local.u8 	[%rd21+27], %rs38;
	st.local.u8 	[%rd21+26], %rs38;
	st.local.u8 	[%rd21+25], %rs38;
	st.local.u8 	[%rd21+24], %rs38;
	st.local.u8 	[%rd21+23], %rs38;
	st.local.u8 	[%rd21+22], %rs38;
	st.local.u8 	[%rd21+21], %rs38;
	st.local.u8 	[%rd21+20], %rs38;
	st.local.u8 	[%rd21+19], %rs38;
	st.local.u8 	[%rd21+18], %rs38;
	st.local.u8 	[%rd21+17], %rs38;
	st.local.u8 	[%rd21+16], %rs38;
	st.local.u8 	[%rd21+15], %rs38;
	st.local.u8 	[%rd21+14], %rs38;
	st.local.u8 	[%rd21+13], %rs38;
	st.local.u8 	[%rd21+12], %rs38;
	st.local.u8 	[%rd21+11], %rs38;
	st.local.u8 	[%rd21+10], %rs38;
	st.local.u8 	[%rd21+9], %rs38;
	st.local.u8 	[%rd21+8], %rs38;
	st.local.u8 	[%rd21+7], %rs38;
	st.local.u8 	[%rd21+6], %rs38;
	st.local.u8 	[%rd21+5], %rs38;
	st.local.u8 	[%rd21+4], %rs38;
	st.local.u8 	[%rd21+3], %rs38;
	st.local.u8 	[%rd21+2], %rs38;
	st.local.u8 	[%rd21+1], %rs38;
	st.local.u8 	[%rd21], %rs38;
	cvt.rn.f32.s32	%f72, %r2;
	cvt.rn.f32.s32	%f59, %r1;
	add.s32 	%r37, %r1, -1;
	cvt.rn.f32.s32	%f65, %r37;
	// inline asm
	tex.2d.v4.f32.f32 {%f55, %f56, %f57, %f58}, [uvTex, {%f59, %f72}];
	// inline asm
	st.local.v2.f32 	[%rd21], {%f55, %f56};
	// inline asm
	tex.2d.v4.f32.f32 {%f61, %f62, %f63, %f64}, [uvTex, {%f65, %f72}];
	// inline asm
	setp.eq.ftz.f32	%p4, %f61, %f55;
	setp.eq.ftz.f32	%p5, %f62, %f56;
	and.pred  	%p6, %p4, %p5;
	selp.b32	%r3, 1, 2, %p6;
	add.ftz.f32 	%f73, %f55, 0f00000000;
	add.ftz.f32 	%f74, %f56, 0f00000000;
	add.ftz.f32 	%f75, %f73, %f61;
	add.ftz.f32 	%f76, %f74, %f62;
	mul.wide.u32 	%rd27, %r3, 8;
	add.s64 	%rd28, %rd21, %rd27;
	add.s32 	%r38, %r1, 1;
	cvt.rn.f32.s32	%f71, %r38;
	st.local.v2.f32 	[%rd21+8], {%f61, %f62};
	// inline asm
	tex.2d.v4.f32.f32 {%f67, %f68, %f69, %f70}, [uvTex, {%f71, %f72}];
	// inline asm
	st.local.v2.f32 	[%rd28], {%f67, %f68};
	ld.local.f32 	%f8, [%rd28];
	add.ftz.f32 	%f6, %f75, %f8;
	add.ftz.f32 	%f7, %f76, %f68;
	mov.u32 	%r72, 0;
	mov.u64 	%rd68, %rd21;

BB12_2:
	ld.local.f32 	%f77, [%rd68];
	setp.neu.ftz.f32	%p7, %f8, %f77;
	@%p7 bra 	BB12_5;

	ld.local.f32 	%f78, [%rd68+4];
	setp.neu.ftz.f32	%p8, %f68, %f78;
	@%p8 bra 	BB12_5;

	mov.u16 	%rs38, 1;

BB12_5:
	add.s64 	%rd68, %rd68, 8;
	add.s32 	%r72, %r72, 1;
	setp.lt.s32	%p9, %r72, %r3;
	@%p9 bra 	BB12_2;

	and.b16  	%rs19, %rs38, 255;
	setp.eq.s16	%p10, %rs19, 0;
	mov.u16 	%rs39, 0;
	selp.u32	%r40, 1, 0, %p10;
	add.s32 	%r6, %r40, %r3;
	add.s32 	%r45, %r2, -1;
	mul.wide.u32 	%rd31, %r6, 8;
	add.s64 	%rd32, %rd21, %rd31;
	cvt.rn.f32.s32	%f84, %r45;
	// inline asm
	tex.2d.v4.f32.f32 {%f79, %f80, %f81, %f82}, [uvTex, {%f59, %f84}];
	// inline asm
	st.local.v2.f32 	[%rd32], {%f79, %f80};
	ld.local.f32 	%f13, [%rd32];
	add.ftz.f32 	%f11, %f6, %f13;
	add.ftz.f32 	%f12, %f7, %f80;
	mov.u32 	%r73, 0;
	mov.u64 	%rd67, %rd21;

BB12_7:
	ld.local.f32 	%f85, [%rd67];
	setp.neu.ftz.f32	%p11, %f13, %f85;
	@%p11 bra 	BB12_10;

	ld.local.f32 	%f86, [%rd67+4];
	setp.neu.ftz.f32	%p12, %f80, %f86;
	@%p12 bra 	BB12_10;

	mov.u16 	%rs39, 1;

BB12_10:
	add.s64 	%rd67, %rd67, 8;
	add.s32 	%r73, %r73, 1;
	setp.lt.s32	%p13, %r73, %r6;
	@%p13 bra 	BB12_7;

	and.b16  	%rs22, %rs39, 255;
	setp.eq.s16	%p14, %rs22, 0;
	mov.u16 	%rs40, 0;
	selp.u32	%r47, 1, 0, %p14;
	add.s32 	%r9, %r47, %r6;
	mul.wide.u32 	%rd34, %r9, 8;
	add.s64 	%rd35, %rd21, %rd34;
	// inline asm
	tex.2d.v4.f32.f32 {%f87, %f88, %f89, %f90}, [uvTex, {%f65, %f84}];
	// inline asm
	st.local.v2.f32 	[%rd35], {%f87, %f88};
	ld.local.f32 	%f17, [%rd35];
	add.ftz.f32 	%f15, %f11, %f17;
	add.ftz.f32 	%f16, %f12, %f88;
	mov.u32 	%r74, 0;
	mov.u64 	%rd66, %rd21;

BB12_12:
	ld.local.f32 	%f93, [%rd66];
	setp.neu.ftz.f32	%p15, %f17, %f93;
	@%p15 bra 	BB12_15;

	ld.local.f32 	%f94, [%rd66+4];
	setp.neu.ftz.f32	%p16, %f88, %f94;
	@%p16 bra 	BB12_15;

	mov.u16 	%rs40, 1;

BB12_15:
	add.s64 	%rd66, %rd66, 8;
	add.s32 	%r74, %r74, 1;
	setp.lt.s32	%p17, %r74, %r9;
	@%p17 bra 	BB12_12;

	and.b16  	%rs25, %rs40, 255;
	setp.eq.s16	%p18, %rs25, 0;
	mov.u16 	%rs41, 0;
	selp.u32	%r49, 1, 0, %p18;
	add.s32 	%r12, %r49, %r9;
	mul.wide.u32 	%rd37, %r12, 8;
	add.s64 	%rd38, %rd21, %rd37;
	// inline asm
	tex.2d.v4.f32.f32 {%f95, %f96, %f97, %f98}, [uvTex, {%f71, %f84}];
	// inline asm
	st.local.v2.f32 	[%rd38], {%f95, %f96};
	ld.local.f32 	%f21, [%rd38];
	add.ftz.f32 	%f19, %f15, %f21;
	add.ftz.f32 	%f20, %f16, %f96;
	mov.u32 	%r75, 0;
	mov.u64 	%rd65, %rd21;

BB12_17:
	ld.local.f32 	%f101, [%rd65];
	setp.neu.ftz.f32	%p19, %f21, %f101;
	@%p19 bra 	BB12_20;

	ld.local.f32 	%f102, [%rd65+4];
	setp.neu.ftz.f32	%p20, %f96, %f102;
	@%p20 bra 	BB12_20;

	mov.u16 	%rs41, 1;

BB12_20:
	add.s64 	%rd65, %rd65, 8;
	add.s32 	%r75, %r75, 1;
	setp.lt.s32	%p21, %r75, %r12;
	@%p21 bra 	BB12_17;

	and.b16  	%rs28, %rs41, 255;
	setp.eq.s16	%p22, %rs28, 0;
	mov.u16 	%rs42, 0;
	selp.u32	%r51, 1, 0, %p22;
	add.s32 	%r15, %r51, %r12;
	add.s32 	%r56, %r2, 1;
	mul.wide.u32 	%rd40, %r15, 8;
	add.s64 	%rd41, %rd21, %rd40;
	cvt.rn.f32.s32	%f108, %r56;
	// inline asm
	tex.2d.v4.f32.f32 {%f103, %f104, %f105, %f106}, [uvTex, {%f59, %f108}];
	// inline asm
	st.local.v2.f32 	[%rd41], {%f103, %f104};
	ld.local.f32 	%f26, [%rd41];
	add.ftz.f32 	%f24, %f19, %f26;
	add.ftz.f32 	%f25, %f20, %f104;
	mov.u32 	%r76, 0;
	mov.u64 	%rd64, %rd21;

BB12_22:
	ld.local.f32 	%f109, [%rd64];
	setp.neu.ftz.f32	%p23, %f26, %f109;
	@%p23 bra 	BB12_25;

	ld.local.f32 	%f110, [%rd64+4];
	setp.neu.ftz.f32	%p24, %f104, %f110;
	@%p24 bra 	BB12_25;

	mov.u16 	%rs42, 1;

BB12_25:
	add.s64 	%rd64, %rd64, 8;
	add.s32 	%r76, %r76, 1;
	setp.lt.s32	%p25, %r76, %r15;
	@%p25 bra 	BB12_22;

	and.b16  	%rs31, %rs42, 255;
	setp.eq.s16	%p26, %rs31, 0;
	mov.u16 	%rs43, 0;
	selp.u32	%r58, 1, 0, %p26;
	add.s32 	%r18, %r58, %r15;
	mul.wide.s32 	%rd43, %r18, 8;
	add.s64 	%rd44, %rd21, %rd43;
	// inline asm
	tex.2d.v4.f32.f32 {%f111, %f112, %f113, %f114}, [uvTex, {%f65, %f108}];
	// inline asm
	st.local.v2.f32 	[%rd44], {%f111, %f112};
	ld.local.f32 	%f30, [%rd44];
	add.ftz.f32 	%f28, %f24, %f30;
	add.ftz.f32 	%f29, %f25, %f112;
	mov.u32 	%r77, 0;
	mov.u64 	%rd63, %rd21;

BB12_27:
	ld.local.f32 	%f117, [%rd63];
	setp.neu.ftz.f32	%p27, %f30, %f117;
	@%p27 bra 	BB12_30;

	ld.local.f32 	%f118, [%rd63+4];
	setp.neu.ftz.f32	%p28, %f112, %f118;
	@%p28 bra 	BB12_30;

	mov.u16 	%rs43, 1;

BB12_30:
	add.s64 	%rd63, %rd63, 8;
	add.s32 	%r77, %r77, 1;
	setp.lt.s32	%p29, %r77, %r18;
	@%p29 bra 	BB12_27;

	and.b16  	%rs33, %rs43, 255;
	setp.eq.s16	%p30, %rs33, 0;
	selp.u32	%r59, 1, 0, %p30;
	add.s32 	%r21, %r59, %r18;
	mul.wide.s32 	%rd46, %r21, 8;
	add.s64 	%rd14, %rd21, %rd46;
	// inline asm
	tex.2d.v4.f32.f32 {%f119, %f120, %f121, %f122}, [uvTex, {%f71, %f108}];
	// inline asm
	st.local.v2.f32 	[%rd14], {%f119, %f120};
	ld.local.f32 	%f125, [%rd14];
	add.ftz.f32 	%f32, %f28, %f125;
	add.ftz.f32 	%f33, %f29, %f120;
	setp.gt.s32	%p31, %r21, 0;
	@%p31 bra 	BB12_33;

	mov.u16 	%rs44, 0;
	bra.uni 	BB12_38;

BB12_33:
	ld.local.f32 	%f34, [%rd14];
	mov.u32 	%r78, 0;
	mov.u16 	%rs44, 0;
	mov.u64 	%rd62, %rd21;

BB12_34:
	ld.local.f32 	%f126, [%rd62];
	setp.neu.ftz.f32	%p32, %f34, %f126;
	@%p32 bra 	BB12_37;

	ld.local.f32 	%f127, [%rd62+4];
	setp.neu.ftz.f32	%p33, %f120, %f127;
	@%p33 bra 	BB12_37;

	mov.u16 	%rs44, 1;

BB12_37:
	add.s64 	%rd62, %rd62, 8;
	add.s32 	%r78, %r78, 1;
	setp.lt.s32	%p34, %r78, %r21;
	@%p34 bra 	BB12_34;

BB12_38:
	and.b16  	%rs37, %rs44, 255;
	setp.eq.s16	%p35, %rs37, 0;
	selp.u32	%r61, 1, 0, %p35;
	add.s32 	%r24, %r61, %r21;
	ld.local.v2.f32 	{%f128, %f129}, [%rd21];
	setp.lt.s32	%p36, %r24, 2;
	mov.f32 	%f181, %f129;
	mov.f32 	%f175, %f128;
	@%p36 bra 	BB12_41;

	mov.f32 	%f131, 0f41100000;
	div.approx.ftz.f32 	%f37, %f32, %f131;
	div.approx.ftz.f32 	%f38, %f33, %f131;
	mov.f32 	%f132, 0f3F800000;
	sub.ftz.f32 	%f39, %f132, %f54;
	add.ftz.f32 	%f40, %f53, 0f3F800000;
	mov.f32 	%f171, 0f4CBEBC20;
	mov.u32 	%r79, 1;
	mov.f32 	%f177, %f175;
	mov.f32 	%f176, %f175;
	mov.f32 	%f183, %f181;

BB12_40:
	mov.f32 	%f182, %f181;
	mov.f32 	%f42, %f183;
	mov.f32 	%f44, %f182;
	mov.f32 	%f41, %f177;
	mov.f32 	%f43, %f176;
	mov.u64 	%rd18, %rd21;
	mul.ftz.f32 	%f145, %f41, %f54;
	sub.ftz.f32 	%f146, %f59, %f145;
	add.ftz.f32 	%f137, %f146, 0f3F000000;
	mul.ftz.f32 	%f147, %f42, %f54;
	sub.ftz.f32 	%f148, %f72, %f147;
	add.ftz.f32 	%f138, %f148, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f133, %f134, %f135, %f136}, [im1Tex, {%f137, %f138}];
	// inline asm
	fma.rn.ftz.f32 	%f149, %f41, %f39, %f59;
	add.ftz.f32 	%f143, %f149, 0f3F000000;
	fma.rn.ftz.f32 	%f150, %f42, %f39, %f72;
	add.ftz.f32 	%f144, %f150, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f139, %f140, %f141, %f142}, [im2Tex, {%f143, %f144}];
	// inline asm
	sub.ftz.f32 	%f151, %f133, %f139;
	sub.ftz.f32 	%f152, %f134, %f140;
	sub.ftz.f32 	%f153, %f135, %f141;
	abs.ftz.f32 	%f154, %f151;
	abs.ftz.f32 	%f155, %f152;
	add.ftz.f32 	%f156, %f154, %f155;
	abs.ftz.f32 	%f157, %f153;
	add.ftz.f32 	%f158, %f156, %f157;
	mov.f32 	%f159, 0f40400000;
	div.approx.ftz.f32 	%f160, %f158, %f159;
	setp.gt.ftz.f32	%p37, %f160, 0f3F800000;
	selp.f32	%f161, 0f3F800000, %f160, %p37;
	sub.ftz.f32 	%f162, %f41, %f37;
	sub.ftz.f32 	%f163, %f42, %f38;
	abs.ftz.f32 	%f164, %f162;
	abs.ftz.f32 	%f165, %f163;
	add.ftz.f32 	%f166, %f164, %f165;
	mul.ftz.f32 	%f167, %f166, 0f41000000;
	fma.rn.ftz.f32 	%f168, %f167, %f40, %f161;
	setp.lt.ftz.f32	%p38, %f168, %f171;
	selp.f32	%f171, %f168, %f171, %p38;
	selp.f32	%f181, %f42, %f44, %p38;
	selp.f32	%f175, %f41, %f43, %p38;
	setp.lt.s32	%p39, %r79, %r24;
	@%p39 bra 	BB12_43;

BB12_41:
	cvta.to.global.u64 	%rd51, %rd20;
	mad.lo.s32 	%r71, %r2, %r27, %r1;
	mul.wide.s32 	%rd52, %r71, 8;
	add.s64 	%rd53, %rd51, %rd52;
	st.global.v2.f32 	[%rd53], {%f175, %f181};

BB12_42:
	ret;

BB12_43:
	add.s64 	%rd21, %rd18, 8;
	ld.local.v2.f32 	{%f169, %f170}, [%rd18+8];
	add.s32 	%r79, %r79, 1;
	mov.f32 	%f183, %f170;
	mov.f32 	%f177, %f169;
	mov.f32 	%f176, %f175;
	bra.uni 	BB12_40;
}

.visible .entry _d_warp_flow_without_matching_cost_kernel_firstpass(
	.param .u64 _d_warp_flow_without_matching_cost_kernel_firstpass_param_0,
	.param .u64 _d_warp_flow_without_matching_cost_kernel_firstpass_param_1,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_2,
	.param .u64 _d_warp_flow_without_matching_cost_kernel_firstpass_param_3,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_4,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_5,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_6,
	.param .f32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_7,
	.param .f32 _d_warp_flow_without_matching_cost_kernel_firstpass_param_8
)
{
	.reg .pred 	%p<18>;
	.reg .s32 	%r<25>;
	.reg .f32 	%f<30>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd1, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_0];
	ld.param.u64 	%rd2, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_1];
	ld.param.u32 	%r5, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_2];
	ld.param.u64 	%rd3, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_3];
	ld.param.u32 	%r6, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_4];
	ld.param.u32 	%r7, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_5];
	ld.param.u32 	%r8, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_6];
	ld.param.f32 	%f13, [_d_warp_flow_without_matching_cost_kernel_firstpass_param_7];
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB13_10;
	bra.uni 	BB13_1;

BB13_1:
	cvta.to.global.u64 	%rd4, %rd2;
	mad.lo.s32 	%r15, %r2, %r5, %r1;
	mul.wide.s32 	%rd5, %r15, 8;
	add.s64 	%rd6, %rd4, %rd5;
	ld.global.v2.f32 	{%f14, %f15}, [%rd6];
	abs.ftz.f32 	%f16, %f14;
	setp.ge.ftz.f32	%p4, %f16, 0f4CBEBC20;
	@%p4 bra 	BB13_10;

	abs.ftz.f32 	%f17, %f15;
	setp.ge.ftz.f32	%p5, %f17, 0f4CBEBC20;
	@%p5 bra 	BB13_10;

	cvt.rn.f32.s32	%f18, %r1;
	cvt.rn.f32.s32	%f19, %r2;
	fma.rn.ftz.f32 	%f5, %f14, %f13, %f18;
	fma.rn.ftz.f32 	%f6, %f15, %f13, %f19;
	abs.ftz.f32 	%f20, %f5;
	mov.b32 	 %r16, %f5;
	and.b32  	%r17, %r16, -2147483648;
	or.b32  	%r18, %r17, 1056964608;
	mov.b32 	 %f21, %r18;
	add.ftz.f32 	%f22, %f21, %f5;
	cvt.rzi.ftz.f32.f32	%f23, %f22;
	setp.gt.ftz.f32	%p6, %f20, 0f4B000000;
	selp.f32	%f28, %f5, %f23, %p6;
	setp.geu.ftz.f32	%p7, %f20, 0f3F000000;
	@%p7 bra 	BB13_5;

	cvt.rzi.ftz.f32.f32	%f28, %f5;

BB13_5:
	cvt.rzi.ftz.s32.f32	%r3, %f28;
	mov.b32 	 %r19, %f6;
	and.b32  	%r20, %r19, -2147483648;
	or.b32  	%r21, %r20, 1056964608;
	mov.b32 	 %f24, %r21;
	add.ftz.f32 	%f25, %f24, %f6;
	cvt.rzi.ftz.f32.f32	%f26, %f25;
	abs.ftz.f32 	%f27, %f6;
	setp.gt.ftz.f32	%p8, %f27, 0f4B000000;
	selp.f32	%f29, %f6, %f26, %p8;
	setp.geu.ftz.f32	%p9, %f27, 0f3F000000;
	@%p9 bra 	BB13_7;

	cvt.rzi.ftz.f32.f32	%f29, %f6;

BB13_7:
	setp.ge.s32	%p10, %r3, %r7;
	setp.lt.s32	%p11, %r3, 0;
	or.pred  	%p12, %p11, %p10;
	cvt.rzi.ftz.s32.f32	%r4, %f29;
	setp.lt.s32	%p13, %r4, 0;
	or.pred  	%p14, %p12, %p13;
	setp.ge.s32	%p15, %r4, %r8;
	or.pred  	%p16, %p14, %p15;
	@%p16 bra 	BB13_10;

	cvta.to.global.u64 	%rd7, %rd3;
	mad.lo.s32 	%r22, %r4, %r6, %r3;
	mul.wide.s32 	%rd8, %r22, 4;
	add.s64 	%rd9, %rd7, %rd8;
	atom.global.add.u32 	%r23, [%rd9], 1;
	setp.ne.s32	%p17, %r23, 0;
	@%p17 bra 	BB13_10;

	cvta.to.global.u64 	%rd10, %rd1;
	mad.lo.s32 	%r24, %r4, %r5, %r3;
	mul.wide.s32 	%rd11, %r24, 8;
	add.s64 	%rd12, %rd10, %rd11;
	st.global.v2.f32 	[%rd12], {%f14, %f15};

BB13_10:
	ret;
}

.visible .entry _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate(
	.param .u64 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_0,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_1,
	.param .u64 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_2,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_3,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_4,
	.param .u32 _d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_5
)
{
	.reg .pred 	%p<5>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<2>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_0];
	ld.param.u32 	%r3, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_1];
	ld.param.u64 	%rd2, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_2];
	ld.param.u32 	%r4, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_3];
	ld.param.u32 	%r5, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_4];
	ld.param.u32 	%r6, [_d_warp_flow_without_matching_cost_kernel_firstpass_deduplicate_param_5];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	setp.lt.s32	%p1, %r1, %r5;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB14_3;
	bra.uni 	BB14_1;

BB14_1:
	cvta.to.global.u64 	%rd3, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd4, %r13, 4;
	add.s64 	%rd5, %rd3, %rd4;
	ld.global.u32 	%r14, [%rd5];
	setp.lt.s32	%p4, %r14, 2;
	@%p4 bra 	BB14_3;

	cvta.to.global.u64 	%rd6, %rd1;
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	mul.wide.s32 	%rd7, %r15, 8;
	add.s64 	%rd8, %rd6, %rd7;
	mov.f32 	%f1, 0f5368D4A5;
	st.global.v2.f32 	[%rd8], {%f1, %f1};

BB14_3:
	ret;
}

.visible .entry _d_warp_flow_kernel_make_splats(
	.param .u64 _d_warp_flow_kernel_make_splats_param_0,
	.param .u32 _d_warp_flow_kernel_make_splats_param_1,
	.param .u64 _d_warp_flow_kernel_make_splats_param_2,
	.param .u64 _d_warp_flow_kernel_make_splats_param_3,
	.param .u32 _d_warp_flow_kernel_make_splats_param_4,
	.param .u64 _d_warp_flow_kernel_make_splats_param_5,
	.param .u32 _d_warp_flow_kernel_make_splats_param_6,
	.param .u32 _d_warp_flow_kernel_make_splats_param_7,
	.param .u32 _d_warp_flow_kernel_make_splats_param_8,
	.param .f32 _d_warp_flow_kernel_make_splats_param_9,
	.param .f32 _d_warp_flow_kernel_make_splats_param_10,
	.param .u64 _d_warp_flow_kernel_make_splats_param_11,
	.param .u64 _d_warp_flow_kernel_make_splats_param_12,
	.param .u64 _d_warp_flow_kernel_make_splats_param_13
)
{
	.local .align 4 .b8 	__local_depot15[16];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<74>;
	.reg .s16 	%rs<48>;
	.reg .s32 	%r<105>;
	.reg .f32 	%f<303>;
	.reg .s64 	%rd<41>;


	mov.u64 	%SPL, __local_depot15;
	ld.param.u64 	%rd4, [_d_warp_flow_kernel_make_splats_param_0];
	ld.param.u32 	%r42, [_d_warp_flow_kernel_make_splats_param_1];
	ld.param.u64 	%rd5, [_d_warp_flow_kernel_make_splats_param_2];
	ld.param.u64 	%rd6, [_d_warp_flow_kernel_make_splats_param_3];
	ld.param.u32 	%r43, [_d_warp_flow_kernel_make_splats_param_4];
	ld.param.u64 	%rd7, [_d_warp_flow_kernel_make_splats_param_5];
	ld.param.u32 	%r44, [_d_warp_flow_kernel_make_splats_param_6];
	ld.param.u32 	%r45, [_d_warp_flow_kernel_make_splats_param_7];
	ld.param.u32 	%r46, [_d_warp_flow_kernel_make_splats_param_8];
	ld.param.f32 	%f117, [_d_warp_flow_kernel_make_splats_param_9];
	ld.param.f32 	%f118, [_d_warp_flow_kernel_make_splats_param_10];
	add.u64 	%rd8, %SPL, 0;
	mov.u32 	%r47, %ntid.x;
	mov.u32 	%r48, %ctaid.x;
	mov.u32 	%r49, %tid.x;
	mad.lo.s32 	%r1, %r47, %r48, %r49;
	mov.u32 	%r50, %ntid.y;
	mov.u32 	%r51, %ctaid.y;
	mov.u32 	%r52, %tid.y;
	mad.lo.s32 	%r2, %r50, %r51, %r52;
	setp.lt.s32	%p4, %r1, %r45;
	setp.lt.s32	%p5, %r2, %r46;
	and.pred  	%p6, %p4, %p5;
	@!%p6 bra 	BB15_75;
	bra.uni 	BB15_1;

BB15_1:
	cvta.to.global.u64 	%rd9, %rd7;
	mad.lo.s32 	%r54, %r2, %r44, %r1;
	mul.wide.s32 	%rd10, %r54, 8;
	add.s64 	%rd11, %rd9, %rd10;
	ld.global.v2.f32 	{%f119, %f120}, [%rd11];
	cvt.rn.f32.s32	%f3, %r1;
	cvt.rn.f32.s32	%f4, %r2;
	mov.u32 	%r94, 0;
	abs.ftz.f32 	%f121, %f119;
	abs.ftz.f32 	%f122, %f120;
	fma.rn.ftz.f32 	%f5, %f119, %f117, %f3;
	fma.rn.ftz.f32 	%f6, %f120, %f117, %f4;
	mul.ftz.f32 	%f141, %f119, %f117;
	mul.ftz.f32 	%f143, %f120, %f117;
	add.ftz.f32 	%f228, %f118, 0f3F800000;
	cvta.to.global.u64 	%rd26, %rd4;

BB15_2:
	mul.wide.s32 	%rd12, %r94, 4;
	add.s64 	%rd2, %rd8, %rd12;
	mov.u32 	%r55, 1399379109;
	st.local.u32 	[%rd2], %r55;
	setp.ge.ftz.f32	%p7, %f121, 0f4CBEBC20;
	@%p7 bra 	BB15_73;

	setp.ge.ftz.f32	%p8, %f122, 0f4CBEBC20;
	@%p8 bra 	BB15_73;

	and.b32  	%r56, %r94, 1;
	setp.eq.b32	%p9, %r56, 1;
	@!%p9 bra 	BB15_6;
	bra.uni 	BB15_5;

BB15_5:
	cvt.rpi.ftz.f32.f32	%f283, %f5;
	bra.uni 	BB15_7;

BB15_6:
	cvt.rmi.ftz.f32.f32	%f283, %f5;

BB15_7:
	cvt.rzi.ftz.s32.f32	%r4, %f283;
	and.b32  	%r57, %r94, 2;
	setp.eq.s32	%p10, %r57, 0;
	@%p10 bra 	BB15_9;

	cvt.rpi.ftz.f32.f32	%f284, %f6;
	bra.uni 	BB15_10;

BB15_9:
	cvt.rmi.ftz.f32.f32	%f284, %f6;

BB15_10:
	setp.lt.s32	%p11, %r4, %r45;
	setp.gt.s32	%p12, %r4, -1;
	and.pred  	%p13, %p12, %p11;
	cvt.rzi.ftz.s32.f32	%r5, %f284;
	setp.gt.s32	%p14, %r5, -1;
	and.pred  	%p15, %p13, %p14;
	setp.lt.s32	%p16, %r5, %r46;
	and.pred  	%p17, %p15, %p16;
	@!%p17 bra 	BB15_73;
	bra.uni 	BB15_11;

BB15_11:
	cvt.rn.f32.s32	%f13, %r4;
	sub.ftz.f32 	%f142, %f13, %f141;
	add.ftz.f32 	%f127, %f142, 0f3F000000;
	cvt.rn.f32.s32	%f140, %r5;
	sub.ftz.f32 	%f144, %f140, %f143;
	add.ftz.f32 	%f128, %f144, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f123, %f124, %f125, %f126}, [im1Tex, {%f127, %f128}];
	// inline asm
	mov.f32 	%f145, 0f3F800000;
	sub.ftz.f32 	%f146, %f145, %f117;
	fma.rn.ftz.f32 	%f147, %f119, %f146, %f13;
	add.ftz.f32 	%f133, %f147, 0f3F000000;
	fma.rn.ftz.f32 	%f148, %f120, %f146, %f140;
	add.ftz.f32 	%f134, %f148, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f129, %f130, %f131, %f132}, [im1Tex, {%f133, %f134}];
	// inline asm
	sub.ftz.f32 	%f149, %f123, %f129;
	sub.ftz.f32 	%f150, %f124, %f130;
	sub.ftz.f32 	%f151, %f125, %f131;
	abs.ftz.f32 	%f152, %f149;
	abs.ftz.f32 	%f153, %f150;
	add.ftz.f32 	%f154, %f152, %f153;
	abs.ftz.f32 	%f155, %f151;
	add.ftz.f32 	%f156, %f154, %f155;
	mov.f32 	%f157, 0f40400000;
	div.approx.ftz.f32 	%f158, %f156, %f157;
	setp.gt.ftz.f32	%p18, %f158, 0f3F800000;
	selp.f32	%f15, 0f3F800000, %f158, %p18;
	add.s32 	%r59, %r4, -1;
	cvt.rn.f32.s32	%f139, %r59;
	// inline asm
	tex.2d.v4.f32.f32 {%f135, %f136, %f137, %f138}, [uvTex, {%f139, %f140}];
	// inline asm
	mov.u32 	%r95, 1;
	mov.f32 	%f286, %f136;
	mov.f32 	%f285, %f135;

BB15_12:
	abs.ftz.f32 	%f159, %f285;
	setp.ltu.ftz.f32	%p19, %f159, 0f4CBEBC20;
	@%p19 bra 	BB15_14;

	mov.u16 	%rs40, 1;
	bra.uni 	BB15_15;

BB15_14:
	abs.ftz.f32 	%f160, %f286;
	setp.ge.ftz.f32	%p20, %f160, 0f4CBEBC20;
	selp.u16	%rs40, 1, 0, %p20;

BB15_15:
	and.b16  	%rs18, %rs40, 1;
	setp.eq.b16	%p21, %rs18, 1;
	setp.lt.s32	%p22, %r95, 10;
	and.pred  	%p23, %p21, %p22;
	@%p23 bra 	BB15_83;

	setp.eq.s16	%p24, %rs40, 0;
	@%p24 bra 	BB15_18;

	mov.f32 	%f302, 0f00000000;
	mov.f32 	%f301, %f302;
	mov.u32 	%r103, 0;
	bra.uni 	BB15_19;

BB15_18:
	add.ftz.f32 	%f301, %f285, 0f00000000;
	add.ftz.f32 	%f302, %f286, 0f00000000;
	mov.u32 	%r103, 1;

BB15_19:
	add.s32 	%r63, %r4, 1;
	mov.u32 	%r96, 1;
	cvt.rn.f32.s32	%f167, %r63;
	// inline asm
	tex.2d.v4.f32.f32 {%f163, %f164, %f165, %f166}, [uvTex, {%f167, %f140}];
	// inline asm
	mov.f32 	%f288, %f164;
	mov.f32 	%f287, %f163;

BB15_20:
	abs.ftz.f32 	%f169, %f287;
	setp.ltu.ftz.f32	%p25, %f169, 0f4CBEBC20;
	@%p25 bra 	BB15_22;

	mov.u16 	%rs41, 1;
	bra.uni 	BB15_23;

BB15_22:
	abs.ftz.f32 	%f170, %f288;
	setp.ge.ftz.f32	%p26, %f170, 0f4CBEBC20;
	selp.u16	%rs41, 1, 0, %p26;

BB15_23:
	and.b16  	%rs20, %rs41, 1;
	setp.eq.b16	%p27, %rs20, 1;
	setp.lt.s32	%p28, %r96, 10;
	and.pred  	%p29, %p27, %p28;
	@%p29 bra 	BB15_82;

	@%p27 bra 	BB15_26;

	add.ftz.f32 	%f301, %f301, %f287;
	add.ftz.f32 	%f302, %f302, %f288;
	add.s32 	%r103, %r103, 1;

BB15_26:
	add.s32 	%r65, %r5, -1;
	cvt.rn.f32.s32	%f176, %r65;
	// inline asm
	tex.2d.v4.f32.f32 {%f171, %f172, %f173, %f174}, [uvTex, {%f13, %f176}];
	// inline asm
	mov.u32 	%r97, 1;
	mov.f32 	%f290, %f172;
	mov.f32 	%f289, %f171;

BB15_27:
	abs.ftz.f32 	%f177, %f289;
	setp.ltu.ftz.f32	%p31, %f177, 0f4CBEBC20;
	@%p31 bra 	BB15_29;

	mov.u16 	%rs42, 1;
	bra.uni 	BB15_30;

BB15_29:
	abs.ftz.f32 	%f178, %f290;
	setp.ge.ftz.f32	%p32, %f178, 0f4CBEBC20;
	selp.u16	%rs42, 1, 0, %p32;

BB15_30:
	and.b16  	%rs23, %rs42, 1;
	setp.eq.b16	%p33, %rs23, 1;
	setp.lt.s32	%p34, %r97, 10;
	and.pred  	%p35, %p33, %p34;
	@%p35 bra 	BB15_81;

	@%p33 bra 	BB15_33;

	add.ftz.f32 	%f301, %f301, %f289;
	add.ftz.f32 	%f302, %f302, %f290;
	add.s32 	%r103, %r103, 1;

BB15_33:
	// inline asm
	tex.2d.v4.f32.f32 {%f179, %f180, %f181, %f182}, [uvTex, {%f139, %f176}];
	// inline asm
	mov.u32 	%r98, 1;
	mov.f32 	%f292, %f180;
	mov.f32 	%f291, %f179;

BB15_34:
	abs.ftz.f32 	%f185, %f291;
	setp.ltu.ftz.f32	%p37, %f185, 0f4CBEBC20;
	@%p37 bra 	BB15_36;

	mov.u16 	%rs43, 1;
	bra.uni 	BB15_37;

BB15_36:
	abs.ftz.f32 	%f186, %f292;
	setp.ge.ftz.f32	%p38, %f186, 0f4CBEBC20;
	selp.u16	%rs43, 1, 0, %p38;

BB15_37:
	and.b16  	%rs26, %rs43, 1;
	setp.eq.b16	%p39, %rs26, 1;
	setp.lt.s32	%p40, %r98, 10;
	and.pred  	%p41, %p39, %p40;
	@%p41 bra 	BB15_80;

	@%p39 bra 	BB15_40;

	add.ftz.f32 	%f301, %f301, %f291;
	add.ftz.f32 	%f302, %f302, %f292;
	add.s32 	%r103, %r103, 1;

BB15_40:
	// inline asm
	tex.2d.v4.f32.f32 {%f187, %f188, %f189, %f190}, [uvTex, {%f167, %f176}];
	// inline asm
	mov.u32 	%r99, 1;
	mov.f32 	%f294, %f188;
	mov.f32 	%f293, %f187;

BB15_41:
	abs.ftz.f32 	%f193, %f293;
	setp.ltu.ftz.f32	%p43, %f193, 0f4CBEBC20;
	@%p43 bra 	BB15_43;

	mov.u16 	%rs44, 1;
	bra.uni 	BB15_44;

BB15_43:
	abs.ftz.f32 	%f194, %f294;
	setp.ge.ftz.f32	%p44, %f194, 0f4CBEBC20;
	selp.u16	%rs44, 1, 0, %p44;

BB15_44:
	and.b16  	%rs29, %rs44, 1;
	setp.eq.b16	%p45, %rs29, 1;
	setp.lt.s32	%p46, %r99, 10;
	and.pred  	%p47, %p45, %p46;
	@%p47 bra 	BB15_79;

	@%p45 bra 	BB15_47;

	add.ftz.f32 	%f301, %f301, %f293;
	add.ftz.f32 	%f302, %f302, %f294;
	add.s32 	%r103, %r103, 1;

BB15_47:
	add.s32 	%r69, %r5, 1;
	mov.u32 	%r100, 1;
	cvt.rn.f32.s32	%f200, %r69;
	// inline asm
	tex.2d.v4.f32.f32 {%f195, %f196, %f197, %f198}, [uvTex, {%f13, %f200}];
	// inline asm
	mov.f32 	%f296, %f196;
	mov.f32 	%f295, %f195;

BB15_48:
	abs.ftz.f32 	%f201, %f295;
	setp.ltu.ftz.f32	%p49, %f201, 0f4CBEBC20;
	@%p49 bra 	BB15_50;

	mov.u16 	%rs45, 1;
	bra.uni 	BB15_51;

BB15_50:
	abs.ftz.f32 	%f202, %f296;
	setp.ge.ftz.f32	%p50, %f202, 0f4CBEBC20;
	selp.u16	%rs45, 1, 0, %p50;

BB15_51:
	and.b16  	%rs32, %rs45, 1;
	setp.eq.b16	%p51, %rs32, 1;
	setp.lt.s32	%p52, %r100, 10;
	and.pred  	%p53, %p51, %p52;
	@%p53 bra 	BB15_78;

	@%p51 bra 	BB15_54;

	add.ftz.f32 	%f301, %f301, %f295;
	add.ftz.f32 	%f302, %f302, %f296;
	add.s32 	%r103, %r103, 1;

BB15_54:
	// inline asm
	tex.2d.v4.f32.f32 {%f203, %f204, %f205, %f206}, [uvTex, {%f139, %f200}];
	// inline asm
	mov.u32 	%r101, 1;
	mov.f32 	%f298, %f204;
	mov.f32 	%f297, %f203;

BB15_55:
	abs.ftz.f32 	%f209, %f297;
	setp.ltu.ftz.f32	%p55, %f209, 0f4CBEBC20;
	@%p55 bra 	BB15_57;

	mov.u16 	%rs46, 1;
	bra.uni 	BB15_58;

BB15_57:
	abs.ftz.f32 	%f210, %f298;
	setp.ge.ftz.f32	%p56, %f210, 0f4CBEBC20;
	selp.u16	%rs46, 1, 0, %p56;

BB15_58:
	and.b16  	%rs35, %rs46, 1;
	setp.eq.b16	%p57, %rs35, 1;
	setp.lt.s32	%p58, %r101, 10;
	and.pred  	%p59, %p57, %p58;
	@%p59 bra 	BB15_77;

	@%p57 bra 	BB15_61;

	add.ftz.f32 	%f301, %f301, %f297;
	add.ftz.f32 	%f302, %f302, %f298;
	add.s32 	%r103, %r103, 1;

BB15_61:
	// inline asm
	tex.2d.v4.f32.f32 {%f211, %f212, %f213, %f214}, [uvTex, {%f167, %f200}];
	// inline asm
	mov.u32 	%r102, 1;
	mov.f32 	%f300, %f212;
	mov.f32 	%f299, %f211;

BB15_62:
	abs.ftz.f32 	%f217, %f299;
	setp.ltu.ftz.f32	%p61, %f217, 0f4CBEBC20;
	@%p61 bra 	BB15_64;

	mov.u16 	%rs47, 1;
	bra.uni 	BB15_65;

BB15_64:
	abs.ftz.f32 	%f218, %f300;
	setp.ge.ftz.f32	%p62, %f218, 0f4CBEBC20;
	selp.u16	%rs47, 1, 0, %p62;

BB15_65:
	and.b16  	%rs38, %rs47, 1;
	setp.eq.b16	%p63, %rs38, 1;
	setp.lt.s32	%p64, %r102, 10;
	and.pred  	%p65, %p63, %p64;
	@%p65 bra 	BB15_76;

	@%p63 bra 	BB15_68;

	add.ftz.f32 	%f301, %f301, %f299;
	add.ftz.f32 	%f302, %f302, %f300;
	add.s32 	%r103, %r103, 1;

BB15_68:
	setp.eq.s32	%p67, %r103, 0;
	@%p67 bra 	BB15_73;

	cvt.rn.f32.s32	%f219, %r103;
	div.approx.ftz.f32 	%f220, %f301, %f219;
	div.approx.ftz.f32 	%f221, %f302, %f219;
	sub.ftz.f32 	%f222, %f119, %f220;
	sub.ftz.f32 	%f223, %f120, %f221;
	abs.ftz.f32 	%f224, %f222;
	abs.ftz.f32 	%f225, %f223;
	add.ftz.f32 	%f226, %f224, %f225;
	mul.ftz.f32 	%f227, %f226, 0f41000000;
	fma.rn.ftz.f32 	%f100, %f227, %f228, %f15;
	mad.lo.s32 	%r72, %r5, %r42, %r4;
	mul.wide.s32 	%rd27, %r72, 4;
	add.s64 	%rd3, %rd26, %rd27;
	mov.b32 	 %r29, %f100;
	ld.global.u32 	%r104, [%rd3];
	mov.b32 	 %f229, %r104;
	setp.lt.ftz.f32	%p73, %f100, %f229;
	setp.ne.s32	%p68, %r104, %r29;
	and.pred  	%p69, %p73, %p68;
	@!%p69 bra 	BB15_71;
	bra.uni 	BB15_70;

BB15_70:
	mov.u32 	%r31, %r104;
	atom.global.cas.b32 	%r104, [%rd3], %r31, %r29;
	mov.b32 	 %f230, %r104;
	setp.lt.ftz.f32	%p73, %f100, %f230;
	setp.ne.s32	%p70, %r104, %r31;
	and.pred  	%p71, %p73, %p70;
	@%p71 bra 	BB15_70;

BB15_71:
	@!%p73 bra 	BB15_73;
	bra.uni 	BB15_72;

BB15_72:
	st.local.f32 	[%rd2], %f100;

BB15_73:
	add.s32 	%r94, %r94, 1;
	setp.lt.s32	%p72, %r94, 4;
	@%p72 bra 	BB15_2;

	cvta.to.global.u64 	%rd28, %rd5;
	cvta.to.global.u64 	%rd29, %rd6;
	mad.lo.s32 	%r81, %r2, %r43, %r1;
	mul.wide.s32 	%rd30, %r81, 8;
	add.s64 	%rd31, %rd28, %rd30;
	ld.local.f32 	%f231, [%rd8+8];
	ld.local.f32 	%f232, [%rd8+12];
	ld.local.f32 	%f233, [%rd8];
	ld.local.f32 	%f234, [%rd8+4];
	st.global.v2.f32 	[%rd31], {%f233, %f234};
	add.s64 	%rd32, %rd29, %rd30;
	st.global.v2.f32 	[%rd32], {%f231, %f232};

BB15_75:
	ret;

BB15_76:
	add.s32 	%r82, %r102, %r4;
	cvt.rn.f32.s32	%f239, %r82;
	add.s32 	%r83, %r102, %r5;
	cvt.rn.f32.s32	%f240, %r83;
	// inline asm
	tex.2d.v4.f32.f32 {%f235, %f236, %f237, %f238}, [uvTex, {%f239, %f240}];
	// inline asm
	add.s32 	%r102, %r102, 1;
	mov.f32 	%f300, %f236;
	mov.f32 	%f299, %f235;
	bra.uni 	BB15_62;

BB15_77:
	sub.s32 	%r84, %r4, %r101;
	cvt.rn.f32.s32	%f245, %r84;
	add.s32 	%r85, %r101, %r5;
	cvt.rn.f32.s32	%f246, %r85;
	// inline asm
	tex.2d.v4.f32.f32 {%f241, %f242, %f243, %f244}, [uvTex, {%f245, %f246}];
	// inline asm
	add.s32 	%r101, %r101, 1;
	mov.f32 	%f298, %f242;
	mov.f32 	%f297, %f241;
	bra.uni 	BB15_55;

BB15_78:
	add.s32 	%r86, %r100, %r5;
	cvt.rn.f32.s32	%f252, %r86;
	// inline asm
	tex.2d.v4.f32.f32 {%f247, %f248, %f249, %f250}, [uvTex, {%f13, %f252}];
	// inline asm
	add.s32 	%r100, %r100, 1;
	mov.f32 	%f296, %f248;
	mov.f32 	%f295, %f247;
	bra.uni 	BB15_48;

BB15_79:
	add.s32 	%r87, %r99, %r4;
	cvt.rn.f32.s32	%f257, %r87;
	sub.s32 	%r88, %r5, %r99;
	cvt.rn.f32.s32	%f258, %r88;
	// inline asm
	tex.2d.v4.f32.f32 {%f253, %f254, %f255, %f256}, [uvTex, {%f257, %f258}];
	// inline asm
	add.s32 	%r99, %r99, 1;
	mov.f32 	%f294, %f254;
	mov.f32 	%f293, %f253;
	bra.uni 	BB15_41;

BB15_80:
	sub.s32 	%r89, %r4, %r98;
	cvt.rn.f32.s32	%f263, %r89;
	sub.s32 	%r90, %r5, %r98;
	cvt.rn.f32.s32	%f264, %r90;
	// inline asm
	tex.2d.v4.f32.f32 {%f259, %f260, %f261, %f262}, [uvTex, {%f263, %f264}];
	// inline asm
	add.s32 	%r98, %r98, 1;
	mov.f32 	%f292, %f260;
	mov.f32 	%f291, %f259;
	bra.uni 	BB15_34;

BB15_81:
	sub.s32 	%r91, %r5, %r97;
	cvt.rn.f32.s32	%f270, %r91;
	// inline asm
	tex.2d.v4.f32.f32 {%f265, %f266, %f267, %f268}, [uvTex, {%f13, %f270}];
	// inline asm
	add.s32 	%r97, %r97, 1;
	mov.f32 	%f290, %f266;
	mov.f32 	%f289, %f265;
	bra.uni 	BB15_27;

BB15_82:
	add.s32 	%r92, %r96, %r4;
	cvt.rn.f32.s32	%f275, %r92;
	// inline asm
	tex.2d.v4.f32.f32 {%f271, %f272, %f273, %f274}, [uvTex, {%f275, %f140}];
	// inline asm
	add.s32 	%r96, %r96, 1;
	mov.f32 	%f288, %f272;
	mov.f32 	%f287, %f271;
	bra.uni 	BB15_20;

BB15_83:
	sub.s32 	%r93, %r4, %r95;
	cvt.rn.f32.s32	%f281, %r93;
	// inline asm
	tex.2d.v4.f32.f32 {%f277, %f278, %f279, %f280}, [uvTex, {%f281, %f140}];
	// inline asm
	add.s32 	%r95, %r95, 1;
	mov.f32 	%f286, %f278;
	mov.f32 	%f285, %f277;
	bra.uni 	BB15_12;
}

.visible .entry _d_warp_flow_kernel_splatter(
	.param .u64 _d_warp_flow_kernel_splatter_param_0,
	.param .u32 _d_warp_flow_kernel_splatter_param_1,
	.param .u64 _d_warp_flow_kernel_splatter_param_2,
	.param .u64 _d_warp_flow_kernel_splatter_param_3,
	.param .u32 _d_warp_flow_kernel_splatter_param_4,
	.param .u64 _d_warp_flow_kernel_splatter_param_5,
	.param .u64 _d_warp_flow_kernel_splatter_param_6,
	.param .u32 _d_warp_flow_kernel_splatter_param_7,
	.param .u32 _d_warp_flow_kernel_splatter_param_8,
	.param .u32 _d_warp_flow_kernel_splatter_param_9,
	.param .f32 _d_warp_flow_kernel_splatter_param_10
)
{
	.reg .pred 	%p<46>;
	.reg .s32 	%r<40>;
	.reg .f32 	%f<36>;
	.reg .s64 	%rd<35>;


	ld.param.u64 	%rd2, [_d_warp_flow_kernel_splatter_param_0];
	ld.param.u32 	%r11, [_d_warp_flow_kernel_splatter_param_1];
	ld.param.u64 	%rd3, [_d_warp_flow_kernel_splatter_param_2];
	ld.param.u64 	%rd4, [_d_warp_flow_kernel_splatter_param_3];
	ld.param.u32 	%r12, [_d_warp_flow_kernel_splatter_param_4];
	ld.param.u64 	%rd6, [_d_warp_flow_kernel_splatter_param_5];
	ld.param.u64 	%rd5, [_d_warp_flow_kernel_splatter_param_6];
	ld.param.u32 	%r13, [_d_warp_flow_kernel_splatter_param_7];
	ld.param.u32 	%r14, [_d_warp_flow_kernel_splatter_param_8];
	ld.param.u32 	%r15, [_d_warp_flow_kernel_splatter_param_9];
	ld.param.f32 	%f13, [_d_warp_flow_kernel_splatter_param_10];
	cvta.to.global.u64 	%rd1, %rd6;
	mov.u32 	%r16, %ntid.x;
	mov.u32 	%r17, %ctaid.x;
	mov.u32 	%r18, %tid.x;
	mad.lo.s32 	%r1, %r16, %r17, %r18;
	mov.u32 	%r19, %ntid.y;
	mov.u32 	%r20, %ctaid.y;
	mov.u32 	%r21, %tid.y;
	mad.lo.s32 	%r2, %r19, %r20, %r21;
	setp.lt.s32	%p1, %r1, %r14;
	setp.lt.s32	%p2, %r2, %r15;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB16_15;
	bra.uni 	BB16_1;

BB16_1:
	cvta.to.global.u64 	%rd7, %rd5;
	mad.lo.s32 	%r22, %r2, %r13, %r1;
	mul.wide.s32 	%rd8, %r22, 8;
	add.s64 	%rd9, %rd7, %rd8;
	ld.global.v2.f32 	{%f14, %f15}, [%rd9];
	abs.ftz.f32 	%f16, %f14;
	setp.ge.ftz.f32	%p4, %f16, 0f4CBEBC20;
	@%p4 bra 	BB16_15;

	abs.ftz.f32 	%f17, %f15;
	setp.ge.ftz.f32	%p5, %f17, 0f4CBEBC20;
	@%p5 bra 	BB16_15;

	cvta.to.global.u64 	%rd10, %rd4;
	cvta.to.global.u64 	%rd11, %rd3;
	mad.lo.s32 	%r23, %r2, %r12, %r1;
	mul.wide.s32 	%rd12, %r23, 8;
	add.s64 	%rd13, %rd11, %rd12;
	ld.global.v2.f32 	{%f18, %f19}, [%rd13];
	add.s64 	%rd14, %rd10, %rd12;
	ld.global.v2.f32 	{%f20, %f21}, [%rd14];
	cvt.rn.f32.s32	%f22, %r1;
	fma.rn.ftz.f32 	%f9, %f14, %f13, %f22;
	cvt.rn.f32.s32	%f23, %r2;
	fma.rn.ftz.f32 	%f10, %f15, %f13, %f23;
	cvt.rmi.ftz.f32.f32	%f24, %f9;
	cvt.rzi.ftz.s32.f32	%r3, %f24;
	cvt.rmi.ftz.f32.f32	%f25, %f10;
	cvt.rzi.ftz.s32.f32	%r4, %f25;
	setp.gt.s32	%p6, %r3, -1;
	setp.lt.s32	%p7, %r3, %r14;
	and.pred  	%p8, %p6, %p7;
	setp.gt.s32	%p9, %r4, -1;
	and.pred  	%p10, %p8, %p9;
	setp.lt.s32	%p11, %r4, %r15;
	and.pred  	%p12, %p10, %p11;
	setp.neu.ftz.f32	%p13, %f18, 0f5368D4A5;
	and.pred  	%p14, %p12, %p13;
	@!%p14 bra 	BB16_6;
	bra.uni 	BB16_4;

BB16_4:
	mad.lo.s32 	%r24, %r4, %r11, %r3;
	cvta.to.global.u64 	%rd15, %rd2;
	mul.wide.s32 	%rd16, %r24, 4;
	add.s64 	%rd17, %rd15, %rd16;
	mov.b32 	 %r25, %f18;
	atom.global.cas.b32 	%r26, [%rd17], %r25, 1399379109;
	mov.b32 	 %f26, %r26;
	setp.neu.ftz.f32	%p15, %f18, %f26;
	@%p15 bra 	BB16_6;

	mad.lo.s32 	%r27, %r4, %r13, %r3;
	mul.wide.s32 	%rd18, %r27, 8;
	add.s64 	%rd19, %rd1, %rd18;
	st.global.v2.f32 	[%rd19], {%f14, %f15};

BB16_6:
	cvt.rpi.ftz.f32.f32	%f27, %f9;
	cvt.rzi.ftz.s32.f32	%r5, %f27;
	setp.gt.s32	%p16, %r5, -1;
	setp.lt.s32	%p17, %r5, %r14;
	and.pred  	%p18, %p16, %p17;
	and.pred  	%p20, %p18, %p9;
	and.pred  	%p22, %p20, %p11;
	setp.neu.ftz.f32	%p23, %f19, 0f5368D4A5;
	and.pred  	%p24, %p22, %p23;
	@!%p24 bra 	BB16_9;
	bra.uni 	BB16_7;

BB16_7:
	mad.lo.s32 	%r28, %r4, %r11, %r5;
	cvta.to.global.u64 	%rd20, %rd2;
	mul.wide.s32 	%rd21, %r28, 4;
	add.s64 	%rd22, %rd20, %rd21;
	mov.b32 	 %r29, %f19;
	atom.global.cas.b32 	%r30, [%rd22], %r29, 1399379109;
	mov.b32 	 %f29, %r30;
	setp.neu.ftz.f32	%p25, %f19, %f29;
	@%p25 bra 	BB16_9;

	mad.lo.s32 	%r31, %r4, %r13, %r5;
	mul.wide.s32 	%rd23, %r31, 8;
	add.s64 	%rd24, %rd1, %rd23;
	st.global.v2.f32 	[%rd24], {%f14, %f15};

BB16_9:
	cvt.rpi.ftz.f32.f32	%f31, %f10;
	cvt.rzi.ftz.s32.f32	%r8, %f31;
	setp.gt.s32	%p29, %r8, -1;
	and.pred  	%p30, %p8, %p29;
	setp.lt.s32	%p31, %r8, %r15;
	and.pred  	%p32, %p30, %p31;
	setp.neu.ftz.f32	%p33, %f20, 0f5368D4A5;
	and.pred  	%p34, %p32, %p33;
	@!%p34 bra 	BB16_12;
	bra.uni 	BB16_10;

BB16_10:
	mad.lo.s32 	%r32, %r8, %r11, %r3;
	cvta.to.global.u64 	%rd25, %rd2;
	mul.wide.s32 	%rd26, %r32, 4;
	add.s64 	%rd27, %rd25, %rd26;
	mov.b32 	 %r33, %f20;
	atom.global.cas.b32 	%r34, [%rd27], %r33, 1399379109;
	mov.b32 	 %f32, %r34;
	setp.neu.ftz.f32	%p35, %f20, %f32;
	@%p35 bra 	BB16_12;

	mad.lo.s32 	%r35, %r8, %r13, %r3;
	mul.wide.s32 	%rd28, %r35, 8;
	add.s64 	%rd29, %rd1, %rd28;
	st.global.v2.f32 	[%rd29], {%f14, %f15};

BB16_12:
	and.pred  	%p40, %p18, %p29;
	and.pred  	%p42, %p40, %p31;
	setp.neu.ftz.f32	%p43, %f21, 0f5368D4A5;
	and.pred  	%p44, %p42, %p43;
	@!%p44 bra 	BB16_15;
	bra.uni 	BB16_13;

BB16_13:
	mad.lo.s32 	%r36, %r8, %r11, %r5;
	cvta.to.global.u64 	%rd30, %rd2;
	mul.wide.s32 	%rd31, %r36, 4;
	add.s64 	%rd32, %rd30, %rd31;
	mov.b32 	 %r37, %f21;
	atom.global.cas.b32 	%r38, [%rd32], %r37, 1399379109;
	mov.b32 	 %f35, %r38;
	setp.neu.ftz.f32	%p45, %f21, %f35;
	@%p45 bra 	BB16_15;

	mad.lo.s32 	%r39, %r8, %r13, %r5;
	mul.wide.s32 	%rd33, %r39, 8;
	add.s64 	%rd34, %rd1, %rd33;
	st.global.v2.f32 	[%rd34], {%f14, %f15};

BB16_15:
	ret;
}

.visible .entry _d_flow_fill_holes_joint_with_datacost(
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_0,
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_1,
	.param .u32 _d_flow_fill_holes_joint_with_datacost_param_2,
	.param .u32 _d_flow_fill_holes_joint_with_datacost_param_3,
	.param .u32 _d_flow_fill_holes_joint_with_datacost_param_4,
	.param .f32 _d_flow_fill_holes_joint_with_datacost_param_5,
	.param .f32 _d_flow_fill_holes_joint_with_datacost_param_6,
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_7,
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_8,
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_9,
	.param .u64 _d_flow_fill_holes_joint_with_datacost_param_10
)
{
	.local .align 8 .b8 	__local_depot17[128];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<184>;
	.reg .s16 	%rs<195>;
	.reg .s32 	%r<349>;
	.reg .f32 	%f<634>;
	.reg .s64 	%rd<146>;


	mov.u64 	%SPL, __local_depot17;
	ld.param.u64 	%rd45, [_d_flow_fill_holes_joint_with_datacost_param_0];
	ld.param.u64 	%rd46, [_d_flow_fill_holes_joint_with_datacost_param_1];
	ld.param.u32 	%r123, [_d_flow_fill_holes_joint_with_datacost_param_2];
	ld.param.u32 	%r124, [_d_flow_fill_holes_joint_with_datacost_param_3];
	ld.param.u32 	%r125, [_d_flow_fill_holes_joint_with_datacost_param_4];
	ld.param.f32 	%f213, [_d_flow_fill_holes_joint_with_datacost_param_5];
	add.u64 	%rd47, %SPL, 0;
	add.u64 	%rd48, %SPL, 64;
	mov.u32 	%r126, %ntid.x;
	mov.u32 	%r127, %ctaid.x;
	mov.u32 	%r128, %tid.x;
	mad.lo.s32 	%r1, %r126, %r127, %r128;
	mov.u32 	%r129, %ntid.y;
	mov.u32 	%r130, %ctaid.y;
	mov.u32 	%r131, %tid.y;
	mad.lo.s32 	%r2, %r129, %r130, %r131;
	setp.lt.s32	%p1, %r1, %r124;
	setp.lt.s32	%p2, %r2, %r125;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB17_246;
	bra.uni 	BB17_1;

BB17_1:
	cvta.to.global.u64 	%rd51, %rd46;
	cvta.to.global.u64 	%rd52, %rd45;
	cvt.rn.f32.s32	%f225, %r1;
	cvt.rn.f32.s32	%f226, %r2;
	// inline asm
	tex.2d.v4.f32.f32 {%f215, %f216, %f217, %f218}, [fwdUVTex, {%f225, %f226}];
	// inline asm
	// inline asm
	tex.2d.v4.f32.f32 {%f221, %f222, %f223, %f224}, [bwdUVTex, {%f225, %f226}];
	// inline asm
	abs.ftz.f32 	%f227, %f215;
	mad.lo.s32 	%r132, %r2, %r123, %r1;
	mul.wide.s32 	%rd55, %r132, 8;
	add.s64 	%rd3, %rd52, %rd55;
	add.s64 	%rd4, %rd51, %rd55;
	setp.ge.ftz.f32	%p4, %f227, 0f4CBEBC20;
	@%p4 bra 	BB17_5;

	abs.ftz.f32 	%f228, %f216;
	setp.ge.ftz.f32	%p5, %f228, 0f4CBEBC20;
	@%p5 bra 	BB17_5;

	abs.ftz.f32 	%f229, %f221;
	setp.ge.ftz.f32	%p6, %f229, 0f4CBEBC20;
	@%p6 bra 	BB17_5;

	abs.ftz.f32 	%f230, %f222;
	setp.ltu.ftz.f32	%p7, %f230, 0f4CBEBC20;
	@%p7 bra 	BB17_245;

BB17_5:
	mov.u16 	%rs69, 0;
	st.local.u8 	[%rd47+63], %rs69;
	st.local.u8 	[%rd47+62], %rs69;
	st.local.u8 	[%rd47+61], %rs69;
	st.local.u8 	[%rd47+60], %rs69;
	st.local.u8 	[%rd47+59], %rs69;
	st.local.u8 	[%rd47+58], %rs69;
	st.local.u8 	[%rd47+57], %rs69;
	st.local.u8 	[%rd47+56], %rs69;
	st.local.u8 	[%rd47+55], %rs69;
	st.local.u8 	[%rd47+54], %rs69;
	st.local.u8 	[%rd47+53], %rs69;
	st.local.u8 	[%rd47+52], %rs69;
	st.local.u8 	[%rd47+51], %rs69;
	st.local.u8 	[%rd47+50], %rs69;
	st.local.u8 	[%rd47+49], %rs69;
	st.local.u8 	[%rd47+48], %rs69;
	st.local.u8 	[%rd47+47], %rs69;
	st.local.u8 	[%rd47+46], %rs69;
	st.local.u8 	[%rd47+45], %rs69;
	st.local.u8 	[%rd47+44], %rs69;
	st.local.u8 	[%rd47+43], %rs69;
	st.local.u8 	[%rd47+42], %rs69;
	st.local.u8 	[%rd47+41], %rs69;
	st.local.u8 	[%rd47+40], %rs69;
	st.local.u8 	[%rd47+39], %rs69;
	st.local.u8 	[%rd47+38], %rs69;
	st.local.u8 	[%rd47+37], %rs69;
	st.local.u8 	[%rd47+36], %rs69;
	st.local.u8 	[%rd47+35], %rs69;
	st.local.u8 	[%rd47+34], %rs69;
	st.local.u8 	[%rd47+33], %rs69;
	st.local.u8 	[%rd47+32], %rs69;
	st.local.u8 	[%rd47+31], %rs69;
	st.local.u8 	[%rd47+30], %rs69;
	st.local.u8 	[%rd47+29], %rs69;
	st.local.u8 	[%rd47+28], %rs69;
	st.local.u8 	[%rd47+27], %rs69;
	st.local.u8 	[%rd47+26], %rs69;
	st.local.u8 	[%rd47+25], %rs69;
	st.local.u8 	[%rd47+24], %rs69;
	st.local.u8 	[%rd47+23], %rs69;
	st.local.u8 	[%rd47+22], %rs69;
	st.local.u8 	[%rd47+21], %rs69;
	st.local.u8 	[%rd47+20], %rs69;
	st.local.u8 	[%rd47+19], %rs69;
	st.local.u8 	[%rd47+18], %rs69;
	st.local.u8 	[%rd47+17], %rs69;
	st.local.u8 	[%rd47+16], %rs69;
	st.local.u8 	[%rd47+15], %rs69;
	st.local.u8 	[%rd47+14], %rs69;
	st.local.u8 	[%rd47+13], %rs69;
	st.local.u8 	[%rd47+12], %rs69;
	st.local.u8 	[%rd47+11], %rs69;
	st.local.u8 	[%rd47+10], %rs69;
	st.local.u8 	[%rd47+9], %rs69;
	st.local.u8 	[%rd47+8], %rs69;
	st.local.u8 	[%rd47+7], %rs69;
	st.local.u8 	[%rd47+6], %rs69;
	st.local.u8 	[%rd47+5], %rs69;
	st.local.u8 	[%rd47+4], %rs69;
	st.local.u8 	[%rd47+3], %rs69;
	st.local.u8 	[%rd47+2], %rs69;
	st.local.u8 	[%rd47+1], %rs69;
	st.local.u8 	[%rd47], %rs69;
	add.s32 	%r134, %r1, -1;
	cvt.rn.f32.s32	%f235, %r134;
	// inline asm
	tex.2d.v4.f32.f32 {%f231, %f232, %f233, %f234}, [fwdUVTex, {%f235, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd47], {%f231, %f232};
	mov.u32 	%r259, 1;

BB17_6:
	ld.local.v2.f32 	{%f237, %f238}, [%rd47];
	abs.ftz.f32 	%f239, %f237;
	setp.ltu.ftz.f32	%p8, %f239, 0f4CBEBC20;
	@%p8 bra 	BB17_8;

	mov.u16 	%rs167, 1;
	bra.uni 	BB17_9;

BB17_8:
	abs.ftz.f32 	%f240, %f238;
	setp.ge.ftz.f32	%p9, %f240, 0f4CBEBC20;
	selp.u16	%rs167, 1, 0, %p9;

BB17_9:
	and.b16  	%rs71, %rs167, 1;
	setp.eq.b16	%p10, %rs71, 1;
	setp.lt.s32	%p11, %r259, 5;
	and.pred  	%p12, %p10, %p11;
	@%p12 bra 	BB17_244;

	setp.eq.s16	%p13, %rs167, 0;
	@%p13 bra 	BB17_12;

	mov.f32 	%f593, 0f00000000;
	mov.f32 	%f592, %f593;
	mov.u32 	%r302, 0;
	bra.uni 	BB17_13;

BB17_12:
	add.ftz.f32 	%f592, %f237, 0f00000000;
	add.ftz.f32 	%f593, %f238, 0f00000000;
	mov.u32 	%r302, 1;

BB17_13:
	mov.u32 	%r4, %r302;
	mul.wide.u32 	%rd58, %r4, 8;
	add.s64 	%rd59, %rd47, %rd58;
	add.s32 	%r138, %r1, 1;
	mov.u32 	%r260, 1;
	cvt.rn.f32.s32	%f247, %r138;
	// inline asm
	tex.2d.v4.f32.f32 {%f243, %f244, %f245, %f246}, [fwdUVTex, {%f247, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd59], {%f243, %f244};
	mov.f32 	%f585, %f244;

BB17_14:
	mul.wide.u32 	%rd60, %r4, 8;
	add.s64 	%rd61, %rd47, %rd60;
	ld.local.v2.f32 	{%f249, %f250}, [%rd61];
	abs.ftz.f32 	%f251, %f249;
	setp.ltu.ftz.f32	%p14, %f251, 0f4CBEBC20;
	@%p14 bra 	BB17_16;

	mov.u16 	%rs168, 1;
	bra.uni 	BB17_17;

BB17_16:
	abs.ftz.f32 	%f252, %f250;
	setp.ge.ftz.f32	%p15, %f252, 0f4CBEBC20;
	selp.u16	%rs168, 1, 0, %p15;

BB17_17:
	and.b16  	%rs73, %rs168, 1;
	setp.eq.b16	%p16, %rs73, 1;
	setp.lt.s32	%p17, %r260, 5;
	and.pred  	%p18, %p16, %p17;
	@%p18 bra 	BB17_243;

	mov.u32 	%r300, %r4;
	mov.u32 	%r301, %r4;
	@%p16 bra 	BB17_27;

	add.ftz.f32 	%f592, %f592, %f249;
	add.ftz.f32 	%f593, %f593, %f250;
	setp.ne.s32	%p20, %r4, 0;
	@%p20 bra 	BB17_21;

	mov.u32 	%r261, 1;
	bra.uni 	BB17_26;

BB17_21:
	mul.wide.u32 	%rd62, %r4, 8;
	add.s64 	%rd63, %rd47, %rd62;
	ld.local.f32 	%f253, [%rd47];
	ld.local.f32 	%f254, [%rd63];
	setp.eq.ftz.f32	%p21, %f254, %f253;
	@%p21 bra 	BB17_23;

	mov.u32 	%r261, 1;
	bra.uni 	BB17_26;

BB17_23:
	ld.local.f32 	%f255, [%rd47+4];
	setp.eq.ftz.f32	%p22, %f585, %f255;
	@%p22 bra 	BB17_25;

	mov.u32 	%r261, 1;
	bra.uni 	BB17_26;

BB17_25:
	mov.u32 	%r261, 0;

BB17_26:
	add.s32 	%r7, %r4, 1;
	add.s32 	%r300, %r261, %r4;
	mov.u32 	%r301, %r7;

BB17_27:
	mov.u32 	%r298, %r300;
	mov.u32 	%r299, %r301;
	mul.wide.s32 	%rd65, %r298, 8;
	add.s64 	%rd66, %rd47, %rd65;
	add.s32 	%r144, %r2, -1;
	cvt.rn.f32.s32	%f261, %r144;
	// inline asm
	tex.2d.v4.f32.f32 {%f256, %f257, %f258, %f259}, [fwdUVTex, {%f225, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd66], {%f256, %f257};
	mov.u32 	%r262, 1;
	mov.f32 	%f586, %f257;

BB17_28:
	mul.wide.s32 	%rd67, %r298, 8;
	add.s64 	%rd68, %rd47, %rd67;
	ld.local.v2.f32 	{%f262, %f263}, [%rd68];
	abs.ftz.f32 	%f264, %f262;
	setp.ltu.ftz.f32	%p23, %f264, 0f4CBEBC20;
	@%p23 bra 	BB17_30;

	mov.u16 	%rs169, 1;
	bra.uni 	BB17_31;

BB17_30:
	abs.ftz.f32 	%f265, %f263;
	setp.ge.ftz.f32	%p24, %f265, 0f4CBEBC20;
	selp.u16	%rs169, 1, 0, %p24;

BB17_31:
	and.b16  	%rs76, %rs169, 1;
	setp.eq.b16	%p25, %rs76, 1;
	setp.lt.s32	%p26, %r262, 5;
	and.pred  	%p27, %p25, %p26;
	@%p27 bra 	BB17_242;

	@%p25 bra 	BB17_41;

	add.ftz.f32 	%f592, %f592, %f262;
	add.ftz.f32 	%f593, %f593, %f263;
	setp.gt.s32	%p29, %r298, 0;
	@%p29 bra 	BB17_35;

	mov.u16 	%rs170, 0;
	bra.uni 	BB17_40;

BB17_35:
	mul.wide.s32 	%rd69, %r298, 8;
	add.s64 	%rd70, %rd47, %rd69;
	ld.local.f32 	%f33, [%rd70];
	mov.u16 	%rs170, 0;
	mov.u32 	%r263, 0;
	mov.u64 	%rd132, %rd47;

BB17_36:
	ld.local.f32 	%f266, [%rd132];
	setp.neu.ftz.f32	%p30, %f33, %f266;
	@%p30 bra 	BB17_39;

	ld.local.f32 	%f267, [%rd132+4];
	setp.neu.ftz.f32	%p31, %f586, %f267;
	@%p31 bra 	BB17_39;

	mov.u16 	%rs170, 1;

BB17_39:
	add.s64 	%rd132, %rd132, 8;
	add.s32 	%r263, %r263, 1;
	setp.lt.s32	%p32, %r263, %r298;
	@%p32 bra 	BB17_36;

BB17_40:
	add.s32 	%r299, %r299, 1;
	and.b16  	%rs81, %rs170, 255;
	setp.eq.s16	%p33, %rs81, 0;
	selp.u32	%r146, 1, 0, %p33;
	add.s32 	%r298, %r146, %r298;

BB17_41:
	mov.u32 	%r296, %r298;
	mov.u32 	%r297, %r299;
	mul.wide.s32 	%rd72, %r296, 8;
	add.s64 	%rd8, %rd47, %rd72;
	// inline asm
	tex.2d.v4.f32.f32 {%f268, %f269, %f270, %f271}, [fwdUVTex, {%f235, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd8], {%f268, %f269};
	mov.u32 	%r264, 1;
	mov.f32 	%f587, %f269;

BB17_42:
	ld.local.v2.f32 	{%f274, %f275}, [%rd8];
	abs.ftz.f32 	%f276, %f274;
	setp.ltu.ftz.f32	%p34, %f276, 0f4CBEBC20;
	@%p34 bra 	BB17_44;

	mov.u16 	%rs171, 1;
	bra.uni 	BB17_45;

BB17_44:
	abs.ftz.f32 	%f277, %f275;
	setp.ge.ftz.f32	%p35, %f277, 0f4CBEBC20;
	selp.u16	%rs171, 1, 0, %p35;

BB17_45:
	and.b16  	%rs83, %rs171, 1;
	setp.eq.b16	%p36, %rs83, 1;
	setp.lt.s32	%p37, %r264, 5;
	and.pred  	%p38, %p36, %p37;
	@%p38 bra 	BB17_241;

	@%p36 bra 	BB17_55;

	add.ftz.f32 	%f592, %f592, %f274;
	add.ftz.f32 	%f593, %f593, %f275;
	setp.gt.s32	%p40, %r296, 0;
	@%p40 bra 	BB17_49;

	mov.u16 	%rs172, 0;
	bra.uni 	BB17_54;

BB17_49:
	ld.local.f32 	%f42, [%rd8];
	mov.u16 	%rs172, 0;
	mov.u32 	%r265, 0;
	mov.u64 	%rd131, %rd47;

BB17_50:
	ld.local.f32 	%f278, [%rd131];
	setp.neu.ftz.f32	%p41, %f42, %f278;
	@%p41 bra 	BB17_53;

	ld.local.f32 	%f279, [%rd131+4];
	setp.neu.ftz.f32	%p42, %f587, %f279;
	@%p42 bra 	BB17_53;

	mov.u16 	%rs172, 1;

BB17_53:
	add.s64 	%rd131, %rd131, 8;
	add.s32 	%r265, %r265, 1;
	setp.lt.s32	%p43, %r265, %r296;
	@%p43 bra 	BB17_50;

BB17_54:
	add.s32 	%r297, %r297, 1;
	and.b16  	%rs88, %rs172, 255;
	setp.eq.s16	%p44, %rs88, 0;
	selp.u32	%r149, 1, 0, %p44;
	add.s32 	%r296, %r149, %r296;

BB17_55:
	mov.u32 	%r294, %r296;
	mov.u32 	%r295, %r297;
	mul.wide.s32 	%rd74, %r294, 8;
	add.s64 	%rd11, %rd47, %rd74;
	// inline asm
	tex.2d.v4.f32.f32 {%f280, %f281, %f282, %f283}, [fwdUVTex, {%f247, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd11], {%f280, %f281};
	mov.u32 	%r266, 1;
	mov.f32 	%f588, %f281;

BB17_56:
	ld.local.v2.f32 	{%f286, %f287}, [%rd11];
	abs.ftz.f32 	%f288, %f286;
	setp.ltu.ftz.f32	%p45, %f288, 0f4CBEBC20;
	@%p45 bra 	BB17_58;

	mov.u16 	%rs173, 1;
	bra.uni 	BB17_59;

BB17_58:
	abs.ftz.f32 	%f289, %f287;
	setp.ge.ftz.f32	%p46, %f289, 0f4CBEBC20;
	selp.u16	%rs173, 1, 0, %p46;

BB17_59:
	and.b16  	%rs90, %rs173, 1;
	setp.eq.b16	%p47, %rs90, 1;
	setp.lt.s32	%p48, %r266, 5;
	and.pred  	%p49, %p47, %p48;
	@%p49 bra 	BB17_240;

	@%p47 bra 	BB17_69;

	add.ftz.f32 	%f592, %f592, %f286;
	add.ftz.f32 	%f593, %f593, %f287;
	setp.gt.s32	%p51, %r294, 0;
	@%p51 bra 	BB17_63;

	mov.u16 	%rs174, 0;
	bra.uni 	BB17_68;

BB17_63:
	ld.local.f32 	%f51, [%rd11];
	mov.u16 	%rs174, 0;
	mov.u32 	%r267, 0;
	mov.u64 	%rd130, %rd47;

BB17_64:
	ld.local.f32 	%f290, [%rd130];
	setp.neu.ftz.f32	%p52, %f51, %f290;
	@%p52 bra 	BB17_67;

	ld.local.f32 	%f291, [%rd130+4];
	setp.neu.ftz.f32	%p53, %f588, %f291;
	@%p53 bra 	BB17_67;

	mov.u16 	%rs174, 1;

BB17_67:
	add.s64 	%rd130, %rd130, 8;
	add.s32 	%r267, %r267, 1;
	setp.lt.s32	%p54, %r267, %r294;
	@%p54 bra 	BB17_64;

BB17_68:
	add.s32 	%r295, %r295, 1;
	and.b16  	%rs95, %rs174, 255;
	setp.eq.s16	%p55, %rs95, 0;
	selp.u32	%r152, 1, 0, %p55;
	add.s32 	%r294, %r152, %r294;

BB17_69:
	mov.u32 	%r292, %r294;
	mov.u32 	%r293, %r295;
	mul.wide.s32 	%rd76, %r292, 8;
	add.s64 	%rd14, %rd47, %rd76;
	add.s32 	%r154, %r2, 1;
	mov.u32 	%r268, 1;
	cvt.rn.f32.s32	%f297, %r154;
	// inline asm
	tex.2d.v4.f32.f32 {%f292, %f293, %f294, %f295}, [fwdUVTex, {%f225, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd14], {%f292, %f293};
	mov.f32 	%f589, %f293;

BB17_70:
	ld.local.v2.f32 	{%f298, %f299}, [%rd14];
	abs.ftz.f32 	%f300, %f298;
	setp.ltu.ftz.f32	%p56, %f300, 0f4CBEBC20;
	@%p56 bra 	BB17_72;

	mov.u16 	%rs175, 1;
	bra.uni 	BB17_73;

BB17_72:
	abs.ftz.f32 	%f301, %f299;
	setp.ge.ftz.f32	%p57, %f301, 0f4CBEBC20;
	selp.u16	%rs175, 1, 0, %p57;

BB17_73:
	and.b16  	%rs97, %rs175, 1;
	setp.eq.b16	%p58, %rs97, 1;
	setp.lt.s32	%p59, %r268, 5;
	and.pred  	%p60, %p58, %p59;
	@%p60 bra 	BB17_239;

	@%p58 bra 	BB17_83;

	add.ftz.f32 	%f592, %f592, %f298;
	add.ftz.f32 	%f593, %f593, %f299;
	setp.gt.s32	%p62, %r292, 0;
	@%p62 bra 	BB17_77;

	mov.u16 	%rs176, 0;
	bra.uni 	BB17_82;

BB17_77:
	ld.local.f32 	%f61, [%rd14];
	mov.u16 	%rs176, 0;
	mov.u32 	%r269, 0;
	mov.u64 	%rd129, %rd47;

BB17_78:
	ld.local.f32 	%f302, [%rd129];
	setp.neu.ftz.f32	%p63, %f61, %f302;
	@%p63 bra 	BB17_81;

	ld.local.f32 	%f303, [%rd129+4];
	setp.neu.ftz.f32	%p64, %f589, %f303;
	@%p64 bra 	BB17_81;

	mov.u16 	%rs176, 1;

BB17_81:
	add.s64 	%rd129, %rd129, 8;
	add.s32 	%r269, %r269, 1;
	setp.lt.s32	%p65, %r269, %r292;
	@%p65 bra 	BB17_78;

BB17_82:
	add.s32 	%r293, %r293, 1;
	and.b16  	%rs102, %rs176, 255;
	setp.eq.s16	%p66, %rs102, 0;
	selp.u32	%r156, 1, 0, %p66;
	add.s32 	%r292, %r156, %r292;

BB17_83:
	mov.u32 	%r290, %r292;
	mov.u32 	%r291, %r293;
	mul.wide.s32 	%rd78, %r290, 8;
	add.s64 	%rd17, %rd47, %rd78;
	// inline asm
	tex.2d.v4.f32.f32 {%f304, %f305, %f306, %f307}, [fwdUVTex, {%f235, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd17], {%f304, %f305};
	mov.u32 	%r270, 1;
	mov.f32 	%f590, %f305;

BB17_84:
	ld.local.v2.f32 	{%f310, %f311}, [%rd17];
	abs.ftz.f32 	%f312, %f310;
	setp.ltu.ftz.f32	%p67, %f312, 0f4CBEBC20;
	@%p67 bra 	BB17_86;

	mov.u16 	%rs177, 1;
	bra.uni 	BB17_87;

BB17_86:
	abs.ftz.f32 	%f313, %f311;
	setp.ge.ftz.f32	%p68, %f313, 0f4CBEBC20;
	selp.u16	%rs177, 1, 0, %p68;

BB17_87:
	and.b16  	%rs104, %rs177, 1;
	setp.eq.b16	%p69, %rs104, 1;
	setp.lt.s32	%p70, %r270, 5;
	and.pred  	%p71, %p69, %p70;
	@%p71 bra 	BB17_238;

	@%p69 bra 	BB17_97;

	add.ftz.f32 	%f592, %f592, %f310;
	add.ftz.f32 	%f593, %f593, %f311;
	setp.gt.s32	%p73, %r290, 0;
	@%p73 bra 	BB17_91;

	mov.u16 	%rs178, 0;
	bra.uni 	BB17_96;

BB17_91:
	ld.local.f32 	%f70, [%rd17];
	mov.u16 	%rs178, 0;
	mov.u32 	%r271, 0;
	mov.u64 	%rd128, %rd47;

BB17_92:
	ld.local.f32 	%f314, [%rd128];
	setp.neu.ftz.f32	%p74, %f70, %f314;
	@%p74 bra 	BB17_95;

	ld.local.f32 	%f315, [%rd128+4];
	setp.neu.ftz.f32	%p75, %f590, %f315;
	@%p75 bra 	BB17_95;

	mov.u16 	%rs178, 1;

BB17_95:
	add.s64 	%rd128, %rd128, 8;
	add.s32 	%r271, %r271, 1;
	setp.lt.s32	%p76, %r271, %r290;
	@%p76 bra 	BB17_92;

BB17_96:
	add.s32 	%r291, %r291, 1;
	and.b16  	%rs109, %rs178, 255;
	setp.eq.s16	%p77, %rs109, 0;
	selp.u32	%r159, 1, 0, %p77;
	add.s32 	%r290, %r159, %r290;

BB17_97:
	mov.u32 	%r288, %r290;
	mov.u32 	%r289, %r291;
	mul.wide.s32 	%rd80, %r288, 8;
	add.s64 	%rd20, %rd47, %rd80;
	// inline asm
	tex.2d.v4.f32.f32 {%f316, %f317, %f318, %f319}, [fwdUVTex, {%f247, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd20], {%f316, %f317};
	mov.u32 	%r272, 1;
	mov.f32 	%f591, %f317;

BB17_98:
	ld.local.v2.f32 	{%f322, %f323}, [%rd20];
	abs.ftz.f32 	%f324, %f322;
	setp.ltu.ftz.f32	%p78, %f324, 0f4CBEBC20;
	@%p78 bra 	BB17_100;

	mov.u16 	%rs179, 1;
	bra.uni 	BB17_101;

BB17_100:
	abs.ftz.f32 	%f325, %f323;
	setp.ge.ftz.f32	%p79, %f325, 0f4CBEBC20;
	selp.u16	%rs179, 1, 0, %p79;

BB17_101:
	and.b16  	%rs111, %rs179, 1;
	setp.eq.b16	%p80, %rs111, 1;
	setp.lt.s32	%p81, %r272, 5;
	and.pred  	%p82, %p80, %p81;
	@%p82 bra 	BB17_237;

	@%p80 bra 	BB17_111;

	add.ftz.f32 	%f592, %f592, %f322;
	add.ftz.f32 	%f593, %f593, %f323;
	setp.gt.s32	%p84, %r288, 0;
	@%p84 bra 	BB17_105;

	mov.u16 	%rs180, 0;
	bra.uni 	BB17_110;

BB17_105:
	ld.local.f32 	%f79, [%rd20];
	mov.u16 	%rs180, 0;
	mov.u32 	%r273, 0;
	mov.u64 	%rd127, %rd47;

BB17_106:
	mov.u64 	%rd21, %rd127;
	ld.local.f32 	%f326, [%rd21];
	setp.neu.ftz.f32	%p85, %f79, %f326;
	@%p85 bra 	BB17_109;

	ld.local.f32 	%f327, [%rd21+4];
	setp.neu.ftz.f32	%p86, %f591, %f327;
	@%p86 bra 	BB17_109;

	mov.u16 	%rs180, 1;

BB17_109:
	add.s64 	%rd22, %rd21, 8;
	add.s32 	%r273, %r273, 1;
	setp.lt.s32	%p87, %r273, %r288;
	mov.u64 	%rd127, %rd22;
	@%p87 bra 	BB17_106;

BB17_110:
	add.s32 	%r289, %r289, 1;
	and.b16  	%rs116, %rs180, 255;
	setp.eq.s16	%p88, %rs116, 0;
	selp.u32	%r162, 1, 0, %p88;
	add.s32 	%r288, %r162, %r288;

BB17_111:
	mov.u16 	%rs165, 0;
	cvt.rn.f32.s32	%f334, %r289;
	div.approx.ftz.f32 	%f82, %f592, %f334;
	div.approx.ftz.f32 	%f83, %f593, %f334;
	st.local.u8 	[%rd48+63], %rs165;
	st.local.u8 	[%rd48+62], %rs165;
	st.local.u8 	[%rd48+61], %rs165;
	st.local.u8 	[%rd48+60], %rs165;
	st.local.u8 	[%rd48+59], %rs165;
	st.local.u8 	[%rd48+58], %rs165;
	st.local.u8 	[%rd48+57], %rs165;
	st.local.u8 	[%rd48+56], %rs165;
	st.local.u8 	[%rd48+55], %rs165;
	st.local.u8 	[%rd48+54], %rs165;
	st.local.u8 	[%rd48+53], %rs165;
	st.local.u8 	[%rd48+52], %rs165;
	st.local.u8 	[%rd48+51], %rs165;
	st.local.u8 	[%rd48+50], %rs165;
	st.local.u8 	[%rd48+49], %rs165;
	st.local.u8 	[%rd48+48], %rs165;
	st.local.u8 	[%rd48+47], %rs165;
	st.local.u8 	[%rd48+46], %rs165;
	st.local.u8 	[%rd48+45], %rs165;
	st.local.u8 	[%rd48+44], %rs165;
	st.local.u8 	[%rd48+43], %rs165;
	st.local.u8 	[%rd48+42], %rs165;
	st.local.u8 	[%rd48+41], %rs165;
	st.local.u8 	[%rd48+40], %rs165;
	st.local.u8 	[%rd48+39], %rs165;
	st.local.u8 	[%rd48+38], %rs165;
	st.local.u8 	[%rd48+37], %rs165;
	st.local.u8 	[%rd48+36], %rs165;
	st.local.u8 	[%rd48+35], %rs165;
	st.local.u8 	[%rd48+34], %rs165;
	st.local.u8 	[%rd48+33], %rs165;
	st.local.u8 	[%rd48+32], %rs165;
	st.local.u8 	[%rd48+31], %rs165;
	st.local.u8 	[%rd48+30], %rs165;
	st.local.u8 	[%rd48+29], %rs165;
	st.local.u8 	[%rd48+28], %rs165;
	st.local.u8 	[%rd48+27], %rs165;
	st.local.u8 	[%rd48+26], %rs165;
	st.local.u8 	[%rd48+25], %rs165;
	st.local.u8 	[%rd48+24], %rs165;
	st.local.u8 	[%rd48+23], %rs165;
	st.local.u8 	[%rd48+22], %rs165;
	st.local.u8 	[%rd48+21], %rs165;
	st.local.u8 	[%rd48+20], %rs165;
	st.local.u8 	[%rd48+19], %rs165;
	st.local.u8 	[%rd48+18], %rs165;
	st.local.u8 	[%rd48+17], %rs165;
	st.local.u8 	[%rd48+16], %rs165;
	st.local.u8 	[%rd48+15], %rs165;
	st.local.u8 	[%rd48+14], %rs165;
	st.local.u8 	[%rd48+13], %rs165;
	st.local.u8 	[%rd48+12], %rs165;
	st.local.u8 	[%rd48+11], %rs165;
	st.local.u8 	[%rd48+10], %rs165;
	st.local.u8 	[%rd48+9], %rs165;
	st.local.u8 	[%rd48+8], %rs165;
	st.local.u8 	[%rd48+7], %rs165;
	st.local.u8 	[%rd48+6], %rs165;
	st.local.u8 	[%rd48+5], %rs165;
	st.local.u8 	[%rd48+4], %rs165;
	st.local.u8 	[%rd48+3], %rs165;
	st.local.u8 	[%rd48+2], %rs165;
	st.local.u8 	[%rd48+1], %rs165;
	st.local.u8 	[%rd48], %rs165;
	// inline asm
	tex.2d.v4.f32.f32 {%f328, %f329, %f330, %f331}, [bwdUVTex, {%f235, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd48], {%f328, %f329};
	mov.u32 	%r303, 1;

BB17_112:
	ld.local.v2.f32 	{%f335, %f336}, [%rd48];
	abs.ftz.f32 	%f337, %f335;
	setp.ltu.ftz.f32	%p89, %f337, 0f4CBEBC20;
	@%p89 bra 	BB17_114;

	mov.u16 	%rs181, 1;
	bra.uni 	BB17_115;

BB17_114:
	abs.ftz.f32 	%f338, %f336;
	setp.ge.ftz.f32	%p90, %f338, 0f4CBEBC20;
	selp.u16	%rs181, 1, 0, %p90;

BB17_115:
	and.b16  	%rs119, %rs181, 1;
	setp.eq.b16	%p91, %rs119, 1;
	setp.lt.s32	%p92, %r303, 5;
	and.pred  	%p93, %p91, %p92;
	@%p93 bra 	BB17_236;

	setp.eq.s16	%p94, %rs181, 0;
	@%p94 bra 	BB17_118;

	mov.f32 	%f602, 0f00000000;
	mov.f32 	%f601, %f602;
	mov.u32 	%r346, 0;
	bra.uni 	BB17_119;

BB17_118:
	add.ftz.f32 	%f601, %f335, 0f00000000;
	add.ftz.f32 	%f602, %f336, 0f00000000;
	mov.u32 	%r346, 1;

BB17_119:
	mov.u32 	%r54, %r346;
	mul.wide.u32 	%rd83, %r54, 8;
	add.s64 	%rd24, %rd48, %rd83;
	// inline asm
	tex.2d.v4.f32.f32 {%f341, %f342, %f343, %f344}, [bwdUVTex, {%f247, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd24], {%f341, %f342};
	mov.u32 	%r304, 1;
	mov.f32 	%f594, %f342;

BB17_120:
	ld.local.v2.f32 	{%f347, %f348}, [%rd24];
	abs.ftz.f32 	%f349, %f347;
	setp.ltu.ftz.f32	%p95, %f349, 0f4CBEBC20;
	@%p95 bra 	BB17_122;

	mov.u16 	%rs182, 1;
	bra.uni 	BB17_123;

BB17_122:
	abs.ftz.f32 	%f350, %f348;
	setp.ge.ftz.f32	%p96, %f350, 0f4CBEBC20;
	selp.u16	%rs182, 1, 0, %p96;

BB17_123:
	and.b16  	%rs121, %rs182, 1;
	setp.eq.b16	%p97, %rs121, 1;
	setp.lt.s32	%p98, %r304, 5;
	and.pred  	%p99, %p97, %p98;
	@%p99 bra 	BB17_235;

	mov.u32 	%r344, %r54;
	mov.u32 	%r345, %r54;
	@%p97 bra 	BB17_133;

	add.ftz.f32 	%f601, %f601, %f347;
	add.ftz.f32 	%f602, %f602, %f348;
	setp.ne.s32	%p101, %r54, 0;
	@%p101 bra 	BB17_127;

	mov.u32 	%r305, 1;
	bra.uni 	BB17_132;

BB17_127:
	ld.local.f32 	%f351, [%rd24];
	ld.local.f32 	%f352, [%rd48];
	setp.eq.ftz.f32	%p102, %f351, %f352;
	@%p102 bra 	BB17_129;

	mov.u32 	%r305, 1;
	bra.uni 	BB17_132;

BB17_129:
	ld.local.f32 	%f353, [%rd48+4];
	setp.eq.ftz.f32	%p103, %f594, %f353;
	@%p103 bra 	BB17_131;

	mov.u32 	%r305, 1;
	bra.uni 	BB17_132;

BB17_131:
	mov.u32 	%r305, 0;

BB17_132:
	add.s32 	%r57, %r54, 1;
	add.s32 	%r344, %r305, %r54;
	mov.u32 	%r345, %r57;

BB17_133:
	mov.u32 	%r342, %r344;
	mov.u32 	%r343, %r345;
	mul.wide.s32 	%rd85, %r342, 8;
	add.s64 	%rd25, %rd48, %rd85;
	// inline asm
	tex.2d.v4.f32.f32 {%f354, %f355, %f356, %f357}, [bwdUVTex, {%f225, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd25], {%f354, %f355};
	mov.u32 	%r306, 1;
	mov.f32 	%f595, %f355;

BB17_134:
	ld.local.v2.f32 	{%f360, %f361}, [%rd25];
	abs.ftz.f32 	%f362, %f360;
	setp.ltu.ftz.f32	%p104, %f362, 0f4CBEBC20;
	@%p104 bra 	BB17_136;

	mov.u16 	%rs183, 1;
	bra.uni 	BB17_137;

BB17_136:
	abs.ftz.f32 	%f363, %f361;
	setp.ge.ftz.f32	%p105, %f363, 0f4CBEBC20;
	selp.u16	%rs183, 1, 0, %p105;

BB17_137:
	and.b16  	%rs124, %rs183, 1;
	setp.eq.b16	%p106, %rs124, 1;
	setp.lt.s32	%p107, %r306, 5;
	and.pred  	%p108, %p106, %p107;
	@%p108 bra 	BB17_234;

	@%p106 bra 	BB17_147;

	add.ftz.f32 	%f601, %f601, %f360;
	add.ftz.f32 	%f602, %f602, %f361;
	setp.gt.s32	%p110, %r342, 0;
	@%p110 bra 	BB17_141;

	mov.u16 	%rs184, 0;
	bra.uni 	BB17_146;

BB17_141:
	ld.local.f32 	%f104, [%rd25];
	mov.u16 	%rs184, 0;
	mov.u32 	%r307, 0;
	mov.u64 	%rd145, %rd48;

BB17_142:
	ld.local.f32 	%f364, [%rd145];
	setp.neu.ftz.f32	%p111, %f104, %f364;
	@%p111 bra 	BB17_145;

	ld.local.f32 	%f365, [%rd145+4];
	setp.neu.ftz.f32	%p112, %f595, %f365;
	@%p112 bra 	BB17_145;

	mov.u16 	%rs184, 1;

BB17_145:
	add.s64 	%rd145, %rd145, 8;
	add.s32 	%r307, %r307, 1;
	setp.lt.s32	%p113, %r307, %r342;
	@%p113 bra 	BB17_142;

BB17_146:
	add.s32 	%r343, %r343, 1;
	and.b16  	%rs129, %rs184, 255;
	setp.eq.s16	%p114, %rs129, 0;
	selp.u32	%r173, 1, 0, %p114;
	add.s32 	%r342, %r173, %r342;

BB17_147:
	mov.u32 	%r340, %r342;
	mov.u32 	%r341, %r343;
	mul.wide.s32 	%rd87, %r340, 8;
	add.s64 	%rd28, %rd48, %rd87;
	// inline asm
	tex.2d.v4.f32.f32 {%f366, %f367, %f368, %f369}, [bwdUVTex, {%f235, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd28], {%f366, %f367};
	mov.u32 	%r308, 1;
	mov.f32 	%f596, %f367;

BB17_148:
	ld.local.v2.f32 	{%f372, %f373}, [%rd28];
	abs.ftz.f32 	%f374, %f372;
	setp.ltu.ftz.f32	%p115, %f374, 0f4CBEBC20;
	@%p115 bra 	BB17_150;

	mov.u16 	%rs185, 1;
	bra.uni 	BB17_151;

BB17_150:
	abs.ftz.f32 	%f375, %f373;
	setp.ge.ftz.f32	%p116, %f375, 0f4CBEBC20;
	selp.u16	%rs185, 1, 0, %p116;

BB17_151:
	and.b16  	%rs131, %rs185, 1;
	setp.eq.b16	%p117, %rs131, 1;
	setp.lt.s32	%p118, %r308, 5;
	and.pred  	%p119, %p117, %p118;
	@%p119 bra 	BB17_233;

	@%p117 bra 	BB17_161;

	add.ftz.f32 	%f601, %f601, %f372;
	add.ftz.f32 	%f602, %f602, %f373;
	setp.gt.s32	%p121, %r340, 0;
	@%p121 bra 	BB17_155;

	mov.u16 	%rs186, 0;
	bra.uni 	BB17_160;

BB17_155:
	ld.local.f32 	%f113, [%rd28];
	mov.u16 	%rs186, 0;
	mov.u32 	%r309, 0;
	mov.u64 	%rd144, %rd48;

BB17_156:
	ld.local.f32 	%f376, [%rd144];
	setp.neu.ftz.f32	%p122, %f113, %f376;
	@%p122 bra 	BB17_159;

	ld.local.f32 	%f377, [%rd144+4];
	setp.neu.ftz.f32	%p123, %f596, %f377;
	@%p123 bra 	BB17_159;

	mov.u16 	%rs186, 1;

BB17_159:
	add.s64 	%rd144, %rd144, 8;
	add.s32 	%r309, %r309, 1;
	setp.lt.s32	%p124, %r309, %r340;
	@%p124 bra 	BB17_156;

BB17_160:
	add.s32 	%r341, %r341, 1;
	and.b16  	%rs136, %rs186, 255;
	setp.eq.s16	%p125, %rs136, 0;
	selp.u32	%r176, 1, 0, %p125;
	add.s32 	%r340, %r176, %r340;

BB17_161:
	mov.u32 	%r338, %r340;
	mov.u32 	%r339, %r341;
	mul.wide.s32 	%rd89, %r338, 8;
	add.s64 	%rd31, %rd48, %rd89;
	// inline asm
	tex.2d.v4.f32.f32 {%f378, %f379, %f380, %f381}, [bwdUVTex, {%f247, %f261}];
	// inline asm
	st.local.v2.f32 	[%rd31], {%f378, %f379};
	mov.u32 	%r310, 1;
	mov.f32 	%f597, %f379;

BB17_162:
	ld.local.v2.f32 	{%f384, %f385}, [%rd31];
	abs.ftz.f32 	%f386, %f384;
	setp.ltu.ftz.f32	%p126, %f386, 0f4CBEBC20;
	@%p126 bra 	BB17_164;

	mov.u16 	%rs187, 1;
	bra.uni 	BB17_165;

BB17_164:
	abs.ftz.f32 	%f387, %f385;
	setp.ge.ftz.f32	%p127, %f387, 0f4CBEBC20;
	selp.u16	%rs187, 1, 0, %p127;

BB17_165:
	and.b16  	%rs138, %rs187, 1;
	setp.eq.b16	%p128, %rs138, 1;
	setp.lt.s32	%p129, %r310, 5;
	and.pred  	%p130, %p128, %p129;
	@%p130 bra 	BB17_232;

	@%p128 bra 	BB17_175;

	add.ftz.f32 	%f601, %f601, %f384;
	add.ftz.f32 	%f602, %f602, %f385;
	setp.gt.s32	%p132, %r338, 0;
	@%p132 bra 	BB17_169;

	mov.u16 	%rs188, 0;
	bra.uni 	BB17_174;

BB17_169:
	ld.local.f32 	%f122, [%rd31];
	mov.u16 	%rs188, 0;
	mov.u32 	%r311, 0;
	mov.u64 	%rd143, %rd48;

BB17_170:
	ld.local.f32 	%f388, [%rd143];
	setp.neu.ftz.f32	%p133, %f122, %f388;
	@%p133 bra 	BB17_173;

	ld.local.f32 	%f389, [%rd143+4];
	setp.neu.ftz.f32	%p134, %f597, %f389;
	@%p134 bra 	BB17_173;

	mov.u16 	%rs188, 1;

BB17_173:
	add.s64 	%rd143, %rd143, 8;
	add.s32 	%r311, %r311, 1;
	setp.lt.s32	%p135, %r311, %r338;
	@%p135 bra 	BB17_170;

BB17_174:
	add.s32 	%r339, %r339, 1;
	and.b16  	%rs143, %rs188, 255;
	setp.eq.s16	%p136, %rs143, 0;
	selp.u32	%r179, 1, 0, %p136;
	add.s32 	%r338, %r179, %r338;

BB17_175:
	mov.u32 	%r336, %r338;
	mov.u32 	%r337, %r339;
	mul.wide.s32 	%rd91, %r336, 8;
	add.s64 	%rd34, %rd48, %rd91;
	// inline asm
	tex.2d.v4.f32.f32 {%f390, %f391, %f392, %f393}, [bwdUVTex, {%f225, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd34], {%f390, %f391};
	mov.u32 	%r312, 1;
	mov.f32 	%f598, %f391;

BB17_176:
	ld.local.v2.f32 	{%f396, %f397}, [%rd34];
	abs.ftz.f32 	%f398, %f396;
	setp.ltu.ftz.f32	%p137, %f398, 0f4CBEBC20;
	@%p137 bra 	BB17_178;

	mov.u16 	%rs189, 1;
	bra.uni 	BB17_179;

BB17_178:
	abs.ftz.f32 	%f399, %f397;
	setp.ge.ftz.f32	%p138, %f399, 0f4CBEBC20;
	selp.u16	%rs189, 1, 0, %p138;

BB17_179:
	and.b16  	%rs145, %rs189, 1;
	setp.eq.b16	%p139, %rs145, 1;
	setp.lt.s32	%p140, %r312, 5;
	and.pred  	%p141, %p139, %p140;
	@%p141 bra 	BB17_231;

	@%p139 bra 	BB17_189;

	add.ftz.f32 	%f601, %f601, %f396;
	add.ftz.f32 	%f602, %f602, %f397;
	setp.gt.s32	%p143, %r336, 0;
	@%p143 bra 	BB17_183;

	mov.u16 	%rs190, 0;
	bra.uni 	BB17_188;

BB17_183:
	ld.local.f32 	%f131, [%rd34];
	mov.u16 	%rs190, 0;
	mov.u32 	%r313, 0;
	mov.u64 	%rd142, %rd48;

BB17_184:
	ld.local.f32 	%f400, [%rd142];
	setp.neu.ftz.f32	%p144, %f131, %f400;
	@%p144 bra 	BB17_187;

	ld.local.f32 	%f401, [%rd142+4];
	setp.neu.ftz.f32	%p145, %f598, %f401;
	@%p145 bra 	BB17_187;

	mov.u16 	%rs190, 1;

BB17_187:
	add.s64 	%rd142, %rd142, 8;
	add.s32 	%r313, %r313, 1;
	setp.lt.s32	%p146, %r313, %r336;
	@%p146 bra 	BB17_184;

BB17_188:
	add.s32 	%r337, %r337, 1;
	and.b16  	%rs150, %rs190, 255;
	setp.eq.s16	%p147, %rs150, 0;
	selp.u32	%r182, 1, 0, %p147;
	add.s32 	%r336, %r182, %r336;

BB17_189:
	mov.u32 	%r334, %r336;
	mov.u32 	%r335, %r337;
	mul.wide.s32 	%rd93, %r334, 8;
	add.s64 	%rd37, %rd48, %rd93;
	// inline asm
	tex.2d.v4.f32.f32 {%f402, %f403, %f404, %f405}, [bwdUVTex, {%f235, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd37], {%f402, %f403};
	mov.u32 	%r314, 1;
	mov.f32 	%f599, %f403;

BB17_190:
	ld.local.v2.f32 	{%f408, %f409}, [%rd37];
	abs.ftz.f32 	%f410, %f408;
	setp.ltu.ftz.f32	%p148, %f410, 0f4CBEBC20;
	@%p148 bra 	BB17_192;

	mov.u16 	%rs191, 1;
	bra.uni 	BB17_193;

BB17_192:
	abs.ftz.f32 	%f411, %f409;
	setp.ge.ftz.f32	%p149, %f411, 0f4CBEBC20;
	selp.u16	%rs191, 1, 0, %p149;

BB17_193:
	and.b16  	%rs152, %rs191, 1;
	setp.eq.b16	%p150, %rs152, 1;
	setp.lt.s32	%p151, %r314, 5;
	and.pred  	%p152, %p150, %p151;
	@%p152 bra 	BB17_230;

	@%p150 bra 	BB17_203;

	add.ftz.f32 	%f601, %f601, %f408;
	add.ftz.f32 	%f602, %f602, %f409;
	setp.gt.s32	%p154, %r334, 0;
	@%p154 bra 	BB17_197;

	mov.u16 	%rs192, 0;
	bra.uni 	BB17_202;

BB17_197:
	ld.local.f32 	%f140, [%rd37];
	mov.u32 	%r315, 0;
	mov.u16 	%rs192, 0;
	mov.u64 	%rd141, %rd48;

BB17_198:
	ld.local.f32 	%f412, [%rd141];
	setp.neu.ftz.f32	%p155, %f140, %f412;
	@%p155 bra 	BB17_201;

	ld.local.f32 	%f413, [%rd141+4];
	setp.neu.ftz.f32	%p156, %f599, %f413;
	@%p156 bra 	BB17_201;

	mov.u16 	%rs192, 1;

BB17_201:
	add.s64 	%rd141, %rd141, 8;
	add.s32 	%r315, %r315, 1;
	setp.lt.s32	%p157, %r315, %r334;
	@%p157 bra 	BB17_198;

BB17_202:
	add.s32 	%r335, %r335, 1;
	and.b16  	%rs157, %rs192, 255;
	setp.eq.s16	%p158, %rs157, 0;
	selp.u32	%r185, 1, 0, %p158;
	add.s32 	%r334, %r185, %r334;

BB17_203:
	mov.u32 	%r332, %r334;
	mov.u32 	%r333, %r335;
	mul.wide.s32 	%rd95, %r332, 8;
	add.s64 	%rd40, %rd48, %rd95;
	// inline asm
	tex.2d.v4.f32.f32 {%f414, %f415, %f416, %f417}, [bwdUVTex, {%f247, %f297}];
	// inline asm
	st.local.v2.f32 	[%rd40], {%f414, %f415};
	mov.u32 	%r316, 1;
	mov.f32 	%f600, %f415;

BB17_204:
	ld.local.v2.f32 	{%f420, %f421}, [%rd40];
	abs.ftz.f32 	%f422, %f420;
	setp.ltu.ftz.f32	%p159, %f422, 0f4CBEBC20;
	@%p159 bra 	BB17_206;

	mov.u16 	%rs193, 1;
	bra.uni 	BB17_207;

BB17_206:
	abs.ftz.f32 	%f423, %f421;
	setp.ge.ftz.f32	%p160, %f423, 0f4CBEBC20;
	selp.u16	%rs193, 1, 0, %p160;

BB17_207:
	and.b16  	%rs159, %rs193, 1;
	setp.eq.b16	%p161, %rs159, 1;
	setp.lt.s32	%p162, %r316, 5;
	and.pred  	%p163, %p161, %p162;
	@%p163 bra 	BB17_229;

	@%p161 bra 	BB17_217;

	add.ftz.f32 	%f601, %f601, %f420;
	add.ftz.f32 	%f602, %f602, %f421;
	setp.gt.s32	%p165, %r332, 0;
	@%p165 bra 	BB17_211;

	mov.u16 	%rs194, 0;
	bra.uni 	BB17_216;

BB17_211:
	ld.local.f32 	%f149, [%rd40];
	mov.u32 	%r317, 0;
	mov.u16 	%rs194, 0;
	mov.u64 	%rd140, %rd48;

BB17_212:
	ld.local.f32 	%f424, [%rd140];
	setp.neu.ftz.f32	%p166, %f149, %f424;
	@%p166 bra 	BB17_215;

	ld.local.f32 	%f425, [%rd140+4];
	setp.neu.ftz.f32	%p167, %f600, %f425;
	@%p167 bra 	BB17_215;

	mov.u16 	%rs194, 1;

BB17_215:
	add.s64 	%rd140, %rd140, 8;
	add.s32 	%r317, %r317, 1;
	setp.lt.s32	%p168, %r317, %r332;
	@%p168 bra 	BB17_212;

BB17_216:
	add.s32 	%r333, %r333, 1;
	and.b16  	%rs164, %rs194, 255;
	setp.eq.s16	%p169, %rs164, 0;
	selp.u32	%r188, 1, 0, %p169;
	add.s32 	%r332, %r188, %r332;

BB17_217:
	ld.local.v2.f32 	{%f426, %f427}, [%rd47];
	ld.local.v2.f32 	{%f428, %f429}, [%rd48];
	setp.gt.s32	%p170, %r332, 1;
	setp.gt.s32	%p171, %r288, 1;
	or.pred  	%p172, %p171, %p170;
	setp.gt.s32	%p173, %r288, 0;
	and.pred  	%p174, %p172, %p173;
	mov.f32 	%f629, %f429;
	mov.f32 	%f628, %f428;
	mov.f32 	%f623, %f427;
	mov.f32 	%f611, %f426;
	@!%p174 bra 	BB17_223;
	bra.uni 	BB17_218;

BB17_218:
	ld.param.f32 	%f584, [_d_flow_fill_holes_joint_with_datacost_param_6];
	cvt.rn.f32.s32	%f431, %r333;
	div.approx.ftz.f32 	%f156, %f601, %f431;
	div.approx.ftz.f32 	%f157, %f602, %f431;
	mov.f32 	%f432, 0f3F800000;
	sub.ftz.f32 	%f158, %f432, %f213;
	add.ftz.f32 	%f159, %f584, 0f3F800000;
	mov.f32 	%f603, 0f4CBEBC20;
	mov.u32 	%r347, 1;
	mov.f32 	%f615, %f611;
	mov.f32 	%f614, %f611;
	mov.f32 	%f627, %f623;

BB17_219:
	mov.f32 	%f626, %f623;
	mov.f32 	%f619, %f626;
	mov.f32 	%f625, %f619;
	mov.f32 	%f607, %f614;
	mov.f32 	%f613, %f607;
	setp.lt.s32	%p175, %r332, 1;
	@%p175 bra 	BB17_222;

	fma.rn.ftz.f32 	%f433, %f615, %f158, %f225;
	add.ftz.f32 	%f167, %f433, 0f3F000000;
	fma.rn.ftz.f32 	%f434, %f627, %f158, %f226;
	add.ftz.f32 	%f168, %f434, 0f3F000000;
	sub.ftz.f32 	%f435, %f615, %f82;
	abs.ftz.f32 	%f436, %f435;
	sub.ftz.f32 	%f437, %f627, %f83;
	abs.ftz.f32 	%f438, %f437;
	add.ftz.f32 	%f439, %f436, %f438;
	mul.ftz.f32 	%f440, %f439, 0f41000000;
	mul.ftz.f32 	%f169, %f440, %f159;
	mov.u32 	%r348, 0;
	mov.u64 	%rd139, %rd48;
	mov.f32 	%f612, %f613;
	mov.f32 	%f624, %f625;

BB17_221:
	mov.u64 	%rd43, %rd139;
	ld.local.v2.f32 	{%f453, %f454}, [%rd43];
	fma.rn.ftz.f32 	%f456, %f453, %f213, %f225;
	add.ftz.f32 	%f445, %f456, 0f3F000000;
	fma.rn.ftz.f32 	%f458, %f454, %f213, %f226;
	add.ftz.f32 	%f446, %f458, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f441, %f442, %f443, %f444}, [im1Tex, {%f445, %f446}];
	// inline asm
	// inline asm
	tex.2d.v4.f32.f32 {%f447, %f448, %f449, %f450}, [im2Tex, {%f167, %f168}];
	// inline asm
	sub.ftz.f32 	%f459, %f441, %f447;
	sub.ftz.f32 	%f460, %f442, %f448;
	sub.ftz.f32 	%f461, %f443, %f449;
	abs.ftz.f32 	%f462, %f459;
	abs.ftz.f32 	%f463, %f460;
	add.ftz.f32 	%f464, %f462, %f463;
	abs.ftz.f32 	%f465, %f461;
	add.ftz.f32 	%f466, %f464, %f465;
	mov.f32 	%f467, 0f40400000;
	div.approx.ftz.f32 	%f468, %f466, %f467;
	setp.gt.ftz.f32	%p176, %f468, 0f3F800000;
	selp.f32	%f469, 0f3F800000, %f468, %p176;
	sub.ftz.f32 	%f470, %f453, %f156;
	sub.ftz.f32 	%f471, %f454, %f157;
	abs.ftz.f32 	%f472, %f470;
	abs.ftz.f32 	%f473, %f471;
	add.ftz.f32 	%f474, %f472, %f473;
	mul.ftz.f32 	%f475, %f474, 0f41000000;
	add.ftz.f32 	%f476, %f469, %f169;
	fma.rn.ftz.f32 	%f477, %f475, %f159, %f476;
	setp.lt.ftz.f32	%p177, %f477, %f603;
	selp.f32	%f603, %f477, %f603, %p177;
	selp.f32	%f624, %f627, %f624, %p177;
	selp.f32	%f612, %f615, %f612, %p177;
	selp.f32	%f629, %f454, %f629, %p177;
	selp.f32	%f628, %f453, %f628, %p177;
	add.s64 	%rd44, %rd43, 8;
	add.s32 	%r348, %r348, 1;
	setp.lt.s32	%p178, %r348, %r332;
	mov.u64 	%rd139, %rd44;
	mov.f32 	%f613, %f612;
	mov.f32 	%f625, %f624;
	@%p178 bra 	BB17_221;

BB17_222:
	mov.f32 	%f623, %f625;
	mov.f32 	%f611, %f613;
	setp.lt.s32	%p179, %r347, %r288;
	@%p179 bra 	BB17_228;

BB17_223:
	mov.f32 	%f631, %f216;
	mov.f32 	%f630, %f215;
	abs.ftz.f32 	%f478, %f611;
	setp.ge.ftz.f32	%p180, %f478, 0f4CBEBC20;
	@%p180 bra 	BB17_225;

	abs.ftz.f32 	%f479, %f623;
	setp.ge.ftz.f32	%p181, %f479, 0f4CBEBC20;
	selp.f32	%f631, %f216, %f623, %p181;
	selp.f32	%f630, %f215, %f611, %p181;

BB17_225:
	mov.f32 	%f633, %f222;
	mov.f32 	%f632, %f221;
	abs.ftz.f32 	%f480, %f628;
	setp.ge.ftz.f32	%p182, %f480, 0f4CBEBC20;
	@%p182 bra 	BB17_227;

	abs.ftz.f32 	%f481, %f629;
	setp.ge.ftz.f32	%p183, %f481, 0f4CBEBC20;
	selp.f32	%f633, %f222, %f629, %p183;
	selp.f32	%f632, %f221, %f628, %p183;

BB17_227:
	st.global.v2.f32 	[%rd3], {%f630, %f631};
	st.global.v2.f32 	[%rd4], {%f632, %f633};
	bra.uni 	BB17_246;

BB17_228:
	mul.wide.s32 	%rd100, %r347, 8;
	add.s64 	%rd101, %rd47, %rd100;
	ld.local.v2.f32 	{%f482, %f483}, [%rd101];
	add.s32 	%r347, %r347, 1;
	mov.f32 	%f627, %f483;
	mov.f32 	%f615, %f482;
	mov.f32 	%f614, %f611;
	bra.uni 	BB17_219;

BB17_229:
	add.s32 	%r195, %r316, %r1;
	cvt.rn.f32.s32	%f488, %r195;
	add.s32 	%r196, %r316, %r2;
	cvt.rn.f32.s32	%f489, %r196;
	add.s32 	%r316, %r316, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f484, %f485, %f486, %f487}, [bwdUVTex, {%f488, %f489}];
	// inline asm
	mov.f32 	%f600, %f485;
	st.local.v2.f32 	[%rd40], {%f484, %f485};
	bra.uni 	BB17_204;

BB17_230:
	sub.s32 	%r201, %r1, %r314;
	cvt.rn.f32.s32	%f494, %r201;
	add.s32 	%r202, %r314, %r2;
	cvt.rn.f32.s32	%f495, %r202;
	add.s32 	%r314, %r314, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f490, %f491, %f492, %f493}, [bwdUVTex, {%f494, %f495}];
	// inline asm
	mov.f32 	%f599, %f491;
	st.local.v2.f32 	[%rd37], {%f490, %f491};
	bra.uni 	BB17_190;

BB17_231:
	add.s32 	%r203, %r312, %r2;
	cvt.rn.f32.s32	%f501, %r203;
	add.s32 	%r312, %r312, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f496, %f497, %f498, %f499}, [bwdUVTex, {%f225, %f501}];
	// inline asm
	mov.f32 	%f598, %f497;
	st.local.v2.f32 	[%rd34], {%f496, %f497};
	bra.uni 	BB17_176;

BB17_232:
	add.s32 	%r208, %r310, %r1;
	cvt.rn.f32.s32	%f506, %r208;
	sub.s32 	%r209, %r2, %r310;
	cvt.rn.f32.s32	%f507, %r209;
	add.s32 	%r310, %r310, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f502, %f503, %f504, %f505}, [bwdUVTex, {%f506, %f507}];
	// inline asm
	mov.f32 	%f597, %f503;
	st.local.v2.f32 	[%rd31], {%f502, %f503};
	bra.uni 	BB17_162;

BB17_233:
	sub.s32 	%r214, %r1, %r308;
	cvt.rn.f32.s32	%f512, %r214;
	sub.s32 	%r215, %r2, %r308;
	cvt.rn.f32.s32	%f513, %r215;
	add.s32 	%r308, %r308, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f508, %f509, %f510, %f511}, [bwdUVTex, {%f512, %f513}];
	// inline asm
	mov.f32 	%f596, %f509;
	st.local.v2.f32 	[%rd28], {%f508, %f509};
	bra.uni 	BB17_148;

BB17_234:
	sub.s32 	%r216, %r2, %r306;
	cvt.rn.f32.s32	%f519, %r216;
	add.s32 	%r306, %r306, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f514, %f515, %f516, %f517}, [bwdUVTex, {%f225, %f519}];
	// inline asm
	mov.f32 	%f595, %f515;
	st.local.v2.f32 	[%rd25], {%f514, %f515};
	bra.uni 	BB17_134;

BB17_235:
	add.s32 	%r221, %r304, %r1;
	cvt.rn.f32.s32	%f524, %r221;
	add.s32 	%r304, %r304, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f520, %f521, %f522, %f523}, [bwdUVTex, {%f524, %f226}];
	// inline asm
	mov.f32 	%f594, %f521;
	st.local.v2.f32 	[%rd24], {%f520, %f521};
	bra.uni 	BB17_120;

BB17_236:
	sub.s32 	%r226, %r1, %r303;
	cvt.rn.f32.s32	%f530, %r226;
	add.s32 	%r303, %r303, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f526, %f527, %f528, %f529}, [bwdUVTex, {%f530, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd48], {%f526, %f527};
	bra.uni 	BB17_112;

BB17_237:
	add.s32 	%r231, %r272, %r1;
	cvt.rn.f32.s32	%f536, %r231;
	add.s32 	%r232, %r272, %r2;
	cvt.rn.f32.s32	%f537, %r232;
	add.s32 	%r272, %r272, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f532, %f533, %f534, %f535}, [fwdUVTex, {%f536, %f537}];
	// inline asm
	mov.f32 	%f591, %f533;
	st.local.v2.f32 	[%rd20], {%f532, %f533};
	bra.uni 	BB17_98;

BB17_238:
	sub.s32 	%r237, %r1, %r270;
	cvt.rn.f32.s32	%f542, %r237;
	add.s32 	%r238, %r270, %r2;
	cvt.rn.f32.s32	%f543, %r238;
	add.s32 	%r270, %r270, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f538, %f539, %f540, %f541}, [fwdUVTex, {%f542, %f543}];
	// inline asm
	mov.f32 	%f590, %f539;
	st.local.v2.f32 	[%rd17], {%f538, %f539};
	bra.uni 	BB17_84;

BB17_239:
	add.s32 	%r239, %r268, %r2;
	cvt.rn.f32.s32	%f549, %r239;
	add.s32 	%r268, %r268, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f544, %f545, %f546, %f547}, [fwdUVTex, {%f225, %f549}];
	// inline asm
	mov.f32 	%f589, %f545;
	st.local.v2.f32 	[%rd14], {%f544, %f545};
	bra.uni 	BB17_70;

BB17_240:
	add.s32 	%r244, %r266, %r1;
	cvt.rn.f32.s32	%f554, %r244;
	sub.s32 	%r245, %r2, %r266;
	cvt.rn.f32.s32	%f555, %r245;
	add.s32 	%r266, %r266, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f550, %f551, %f552, %f553}, [fwdUVTex, {%f554, %f555}];
	// inline asm
	mov.f32 	%f588, %f551;
	st.local.v2.f32 	[%rd11], {%f550, %f551};
	bra.uni 	BB17_56;

BB17_241:
	sub.s32 	%r250, %r1, %r264;
	cvt.rn.f32.s32	%f560, %r250;
	sub.s32 	%r251, %r2, %r264;
	cvt.rn.f32.s32	%f561, %r251;
	add.s32 	%r264, %r264, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f556, %f557, %f558, %f559}, [fwdUVTex, {%f560, %f561}];
	// inline asm
	mov.f32 	%f587, %f557;
	st.local.v2.f32 	[%rd8], {%f556, %f557};
	bra.uni 	BB17_42;

BB17_242:
	sub.s32 	%r252, %r2, %r262;
	cvt.rn.f32.s32	%f567, %r252;
	mul.wide.s32 	%rd116, %r298, 8;
	add.s64 	%rd117, %rd47, %rd116;
	add.s32 	%r262, %r262, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f562, %f563, %f564, %f565}, [fwdUVTex, {%f225, %f567}];
	// inline asm
	mov.f32 	%f586, %f563;
	st.local.v2.f32 	[%rd117], {%f562, %f563};
	bra.uni 	BB17_28;

BB17_243:
	add.s32 	%r257, %r260, %r1;
	cvt.rn.f32.s32	%f572, %r257;
	mul.wide.u32 	%rd119, %r4, 8;
	add.s64 	%rd120, %rd47, %rd119;
	add.s32 	%r260, %r260, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f568, %f569, %f570, %f571}, [fwdUVTex, {%f572, %f226}];
	// inline asm
	mov.f32 	%f585, %f569;
	st.local.v2.f32 	[%rd120], {%f568, %f569};
	bra.uni 	BB17_14;

BB17_244:
	sub.s32 	%r258, %r1, %r259;
	cvt.rn.f32.s32	%f578, %r258;
	add.s32 	%r259, %r259, 1;
	// inline asm
	tex.2d.v4.f32.f32 {%f574, %f575, %f576, %f577}, [fwdUVTex, {%f578, %f226}];
	// inline asm
	st.local.v2.f32 	[%rd47], {%f574, %f575};
	bra.uni 	BB17_6;

BB17_245:
	st.global.v2.f32 	[%rd3], {%f215, %f216};
	st.global.v2.f32 	[%rd4], {%f221, %f222};

BB17_246:
	ret;
}

.visible .entry _d_frame_interp_by_two_flow_kernel(
	.param .u64 _d_frame_interp_by_two_flow_kernel_param_0,
	.param .u32 _d_frame_interp_by_two_flow_kernel_param_1,
	.param .u64 _d_frame_interp_by_two_flow_kernel_param_2,
	.param .u64 _d_frame_interp_by_two_flow_kernel_param_3,
	.param .u32 _d_frame_interp_by_two_flow_kernel_param_4,
	.param .f32 _d_frame_interp_by_two_flow_kernel_param_5,
	.param .u32 _d_frame_interp_by_two_flow_kernel_param_6,
	.param .u32 _d_frame_interp_by_two_flow_kernel_param_7,
	.param .u64 _d_frame_interp_by_two_flow_kernel_param_8,
	.param .u64 _d_frame_interp_by_two_flow_kernel_param_9
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<15>;
	.reg .f32 	%f<38>;
	.reg .s64 	%rd<16>;


	ld.param.u64 	%rd1, [_d_frame_interp_by_two_flow_kernel_param_0];
	ld.param.u32 	%r3, [_d_frame_interp_by_two_flow_kernel_param_1];
	ld.param.u64 	%rd2, [_d_frame_interp_by_two_flow_kernel_param_2];
	ld.param.u64 	%rd3, [_d_frame_interp_by_two_flow_kernel_param_3];
	ld.param.u32 	%r4, [_d_frame_interp_by_two_flow_kernel_param_4];
	ld.param.f32 	%f1, [_d_frame_interp_by_two_flow_kernel_param_5];
	ld.param.u32 	%r5, [_d_frame_interp_by_two_flow_kernel_param_6];
	ld.param.u32 	%r6, [_d_frame_interp_by_two_flow_kernel_param_7];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p1, %r1, %r5;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB18_2;
	bra.uni 	BB18_1;

BB18_1:
	cvta.to.global.u64 	%rd6, %rd1;
	cvta.to.global.u64 	%rd7, %rd3;
	cvta.to.global.u64 	%rd8, %rd2;
	mad.lo.s32 	%r13, %r2, %r4, %r1;
	mul.wide.s32 	%rd9, %r13, 8;
	add.s64 	%rd10, %rd7, %rd9;
	cvt.rn.f32.s32	%f14, %r1;
	ld.global.v2.f32 	{%f15, %f16}, [%rd10];
	add.ftz.f32 	%f18, %f14, %f15;
	add.ftz.f32 	%f6, %f18, 0f3F000000;
	cvt.rn.f32.s32	%f19, %r2;
	add.ftz.f32 	%f21, %f19, %f16;
	add.ftz.f32 	%f7, %f21, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f2, %f3, %f4, %f5}, [im1RGBTex, {%f6, %f7}];
	// inline asm
	add.s64 	%rd12, %rd8, %rd9;
	ld.global.v2.f32 	{%f22, %f23}, [%rd12];
	add.ftz.f32 	%f25, %f14, %f22;
	add.ftz.f32 	%f12, %f25, 0f3F000000;
	add.ftz.f32 	%f27, %f19, %f23;
	add.ftz.f32 	%f13, %f27, 0f3F000000;
	// inline asm
	tex.2d.v4.f32.f32 {%f8, %f9, %f10, %f11}, [im2RGBTex, {%f12, %f13}];
	// inline asm
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd14, %r14, 16;
	add.s64 	%rd15, %rd6, %rd14;
	mov.f32 	%f28, 0f3F800000;
	sub.ftz.f32 	%f29, %f28, %f1;
	mul.ftz.f32 	%f30, %f8, %f1;
	mul.ftz.f32 	%f31, %f9, %f1;
	mul.ftz.f32 	%f32, %f10, %f1;
	mul.ftz.f32 	%f33, %f11, %f1;
	fma.rn.ftz.f32 	%f34, %f5, %f29, %f33;
	fma.rn.ftz.f32 	%f35, %f4, %f29, %f32;
	fma.rn.ftz.f32 	%f36, %f3, %f29, %f31;
	fma.rn.ftz.f32 	%f37, %f2, %f29, %f30;
	st.global.v4.f32 	[%rd15], {%f37, %f36, %f35, %f34};

BB18_2:
	ret;
}


