//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 184, 30, 206, 189, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};

.visible .func  (.param .b32 func_retval0) _Z17CalcShadowsWeightfff(
	.param .b32 _Z17CalcShadowsWeightfff_param_0,
	.param .b32 _Z17CalcShadowsWeightfff_param_1,
	.param .b32 _Z17CalcShadowsWeightfff_param_2
)
{
	.reg .pred 	%p<3>;
	.reg .f32 	%f<11>;


	ld.param.f32 	%f3, [_Z17CalcShadowsWeightfff_param_0];
	ld.param.f32 	%f4, [_Z17CalcShadowsWeightfff_param_1];
	ld.param.f32 	%f5, [_Z17CalcShadowsWeightfff_param_2];
	setp.gtu.ftz.f32	%p1, %f3, %f4;
	@%p1 bra 	BB0_2;

	mov.f32 	%f10, 0f3F800000;
	bra.uni 	BB0_5;

BB0_2:
	add.ftz.f32 	%f7, %f4, %f5;
	setp.gtu.ftz.f32	%p2, %f7, %f3;
	@%p2 bra 	BB0_4;

	mov.f32 	%f10, 0f00000000;
	bra.uni 	BB0_5;

BB0_4:
	sub.ftz.f32 	%f9, %f3, %f4;
	div.approx.ftz.f32 	%f10, %f9, %f5;

BB0_5:
	st.param.f32	[func_retval0+0], %f10;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z20CalcHighlightsWeightfff(
	.param .b32 _Z20CalcHighlightsWeightfff_param_0,
	.param .b32 _Z20CalcHighlightsWeightfff_param_1,
	.param .b32 _Z20CalcHighlightsWeightfff_param_2
)
{
	.reg .pred 	%p<3>;
	.reg .f32 	%f<11>;


	ld.param.f32 	%f4, [_Z20CalcHighlightsWeightfff_param_0];
	ld.param.f32 	%f5, [_Z20CalcHighlightsWeightfff_param_1];
	ld.param.f32 	%f6, [_Z20CalcHighlightsWeightfff_param_2];
	sub.ftz.f32 	%f1, %f5, %f6;
	setp.leu.ftz.f32	%p1, %f1, %f4;
	@%p1 bra 	BB1_2;

	mov.f32 	%f10, 0f00000000;
	bra.uni 	BB1_5;

BB1_2:
	setp.leu.ftz.f32	%p2, %f4, %f5;
	@%p2 bra 	BB1_4;

	mov.f32 	%f10, 0f3F800000;
	bra.uni 	BB1_5;

BB1_4:
	sub.ftz.f32 	%f7, %f4, %f1;
	div.approx.ftz.f32 	%f10, %f7, %f6;

BB1_5:
	st.param.f32	[func_retval0+0], %f10;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z21SmartLimitRatioMethodfffffffffPfS_S_(
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_0,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_1,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_2,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_3,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_4,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_5,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_6,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_7,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_8,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_9,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_10,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_11
)
{
	.reg .pred 	%p<28>;
	.reg .s32 	%r<14>;
	.reg .f32 	%f<99>;
	.reg .s64 	%rd<4>;


	ld.param.f32 	%f16, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_0];
	ld.param.f32 	%f17, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_1];
	ld.param.f32 	%f18, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_2];
	ld.param.f32 	%f19, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_3];
	ld.param.f32 	%f20, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_4];
	ld.param.f32 	%f21, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_6];
	ld.param.f32 	%f22, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_7];
	ld.param.f32 	%f23, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_8];
	ld.param.u64 	%rd1, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_9];
	ld.param.u64 	%rd2, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_10];
	ld.param.u64 	%rd3, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_11];
	mul.ftz.f32 	%f24, %f18, 0f3F728F61;
	fma.rn.ftz.f32 	%f25, %f17, 0fBEA3B6E9, %f24;
	mul.ftz.f32 	%f26, %f18, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f27, %f17, 0fBF728F61, %f26;
	abs.ftz.f32 	%f1, %f25;
	abs.ftz.f32 	%f2, %f27;
	setp.eq.ftz.f32	%p1, %f1, 0f00000000;
	setp.eq.ftz.f32	%p2, %f2, 0f00000000;
	and.pred  	%p3, %p1, %p2;
	mov.b32 	 %r1, %f25;
	mov.b32 	 %r3, %f27;
	and.b32  	%r2, %r3, -2147483648;
	@%p3 bra 	BB2_4;

	setp.eq.ftz.f32	%p4, %f1, 0f7F800000;
	setp.eq.ftz.f32	%p5, %f2, 0f7F800000;
	and.pred  	%p6, %p4, %p5;
	@%p6 bra 	BB2_3;

	max.ftz.f32 	%f28, %f2, %f1;
	min.ftz.f32 	%f29, %f2, %f1;
	div.full.ftz.f32 	%f30, %f29, %f28;
	mul.rn.ftz.f32 	%f31, %f30, %f30;
	mov.f32 	%f32, 0fC0B59883;
	mov.f32 	%f33, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f34, %f31, %f33, %f32;
	mov.f32 	%f35, 0fC0D21907;
	fma.rn.ftz.f32 	%f36, %f34, %f31, %f35;
	mul.ftz.f32 	%f37, %f36, %f31;
	mul.ftz.f32 	%f38, %f37, %f30;
	add.ftz.f32 	%f39, %f31, 0f41355DC0;
	mov.f32 	%f40, 0f41E6BD60;
	fma.rn.ftz.f32 	%f41, %f39, %f31, %f40;
	mov.f32 	%f42, 0f419D92C8;
	fma.rn.ftz.f32 	%f43, %f41, %f31, %f42;
	rcp.approx.ftz.f32 	%f44, %f43;
	fma.rn.ftz.f32 	%f45, %f38, %f44, %f30;
	mov.f32 	%f46, 0f3FC90FDB;
	sub.ftz.f32 	%f47, %f46, %f45;
	setp.gt.ftz.f32	%p7, %f2, %f1;
	selp.f32	%f48, %f47, %f45, %p7;
	mov.f32 	%f49, 0f40490FDB;
	sub.ftz.f32 	%f50, %f49, %f48;
	setp.lt.s32	%p8, %r1, 0;
	selp.f32	%f51, %f50, %f48, %p8;
	mov.b32 	 %r4, %f51;
	or.b32  	%r5, %r4, %r2;
	mov.b32 	 %f52, %r5;
	add.ftz.f32 	%f53, %f1, %f2;
	setp.gtu.ftz.f32	%p9, %f53, 0f7F800000;
	selp.f32	%f94, %f53, %f52, %p9;
	bra.uni 	BB2_5;

BB2_3:
	shr.s32 	%r6, %r1, 31;
	and.b32  	%r7, %r6, 13483017;
	add.s32 	%r8, %r7, 1061752795;
	or.b32  	%r9, %r8, %r2;
	mov.b32 	 %f94, %r9;
	bra.uni 	BB2_5;

BB2_4:
	shr.s32 	%r10, %r1, 31;
	and.b32  	%r11, %r10, 1078530011;
	or.b32  	%r12, %r11, %r2;
	mov.b32 	 %f94, %r12;

BB2_5:
	add.ftz.f32 	%f54, %f94, 0f40C90FDB;
	setp.lt.ftz.f32	%p10, %f94, 0f00000000;
	selp.f32	%f55, %f54, %f94, %p10;
	mul.ftz.f32 	%f7, %f55, 0f3E22F983;
	setp.lt.ftz.f32	%p11, %f7, 0f3F8147AE;
	mov.f32 	%f98, 0f3F800000;
	sub.ftz.f32 	%f57, %f98, 0f3DAA9931;
	setp.ge.ftz.f32	%p12, %f7, %f57;
	and.pred  	%p13, %p12, %p11;
	mov.f32 	%f95, 0f3E4CCCCD;
	mov.f32 	%f96, 0f3F4CCCCD;
	@!%p13 bra 	BB2_6;
	bra.uni 	BB2_9;

BB2_6:
	setp.lt.ftz.f32	%p14, %f7, 0f3DAA9931;
	setp.ge.ftz.f32	%p15, %f7, 0f00000000;
	and.pred  	%p16, %p15, %p14;
	@!%p16 bra 	BB2_7;
	bra.uni 	BB2_9;

BB2_7:
	setp.lt.ftz.f32	%p17, %f7, 0f3E802752;
	mov.f32 	%f58, 0f3E2B020C;
	sub.ftz.f32 	%f59, %f58, 0f3DAA9931;
	setp.ge.ftz.f32	%p18, %f7, %f59;
	and.pred  	%p19, %p18, %p17;
	@!%p19 bra 	BB2_9;
	bra.uni 	BB2_8;

BB2_8:
	mov.f32 	%f96, 0f3F000000;
	mov.f32 	%f95, %f96;

BB2_9:
	sub.ftz.f32 	%f68, %f21, %f20;
	setp.gt.ftz.f32	%p20, %f21, %f20;
	selp.f32	%f10, %f68, 0f00000000, %p20;
	sub.ftz.f32 	%f69, %f19, %f22;
	setp.lt.ftz.f32	%p21, %f22, %f19;
	selp.f32	%f11, %f69, 0f00000000, %p21;
	setp.leu.ftz.f32	%p22, %f10, 0f00000000;
	@%p22 bra 	BB2_14;

	setp.neu.ftz.f32	%p23, %f23, 0f00000000;
	@%p23 bra 	BB2_12;

	mov.f32 	%f97, 0f3F800000;
	bra.uni 	BB2_13;

BB2_12:
	mul.ftz.f32 	%f71, %f10, %f96;
	div.approx.ftz.f32 	%f72, %f71, %f23;
	mov.f32 	%f73, 0f3F800000;
	sub.ftz.f32 	%f97, %f73, %f72;

BB2_13:
	mul.ftz.f32 	%f74, %f10, %f95;
	setp.ltu.ftz.f32	%p24, %f97, 0f3F7D70A4;
	selp.f32	%f75, %f74, %f10, %p24;
	mov.f32 	%f76, 0f42C80000;
	div.approx.ftz.f32 	%f77, %f75, %f76;
	sub.ftz.f32 	%f78, %f16, %f77;
	st.f32 	[%rd1], %f78;
	mul.ftz.f32 	%f79, %f97, %f17;
	st.f32 	[%rd2], %f79;
	mul.ftz.f32 	%f80, %f97, %f18;
	st.f32 	[%rd3], %f80;

BB2_14:
	setp.leu.ftz.f32	%p25, %f11, 0f00000000;
	@%p25 bra 	BB2_18;

	setp.neu.ftz.f32	%p26, %f23, 0f00000000;
	@%p26 bra 	BB2_16;
	bra.uni 	BB2_17;

BB2_16:
	mul.ftz.f32 	%f82, %f11, %f96;
	div.approx.ftz.f32 	%f83, %f82, %f23;
	mov.f32 	%f84, 0f3F800000;
	sub.ftz.f32 	%f98, %f84, %f83;

BB2_17:
	mul.ftz.f32 	%f85, %f11, %f95;
	setp.ltu.ftz.f32	%p27, %f98, 0f3F7D70A4;
	selp.f32	%f86, %f85, %f11, %p27;
	mov.f32 	%f87, 0f42C80000;
	div.approx.ftz.f32 	%f88, %f86, %f87;
	add.ftz.f32 	%f89, %f88, %f16;
	st.f32 	[%rd1], %f89;
	mul.ftz.f32 	%f90, %f98, %f17;
	st.f32 	[%rd2], %f90;
	mul.ftz.f32 	%f91, %f98, %f18;
	st.f32 	[%rd3], %f91;

BB2_18:
	mov.u32 	%r13, 1;
	st.param.b32	[func_retval0+0], %r13;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	max.ftz.f32 	%f4, %f1, %f2;
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .entry VideoLimiter(
	.param .u64 VideoLimiter_param_0,
	.param .u32 VideoLimiter_param_1,
	.param .u32 VideoLimiter_param_2,
	.param .u32 VideoLimiter_param_3,
	.param .u32 VideoLimiter_param_4,
	.param .u32 VideoLimiter_param_5,
	.param .u32 VideoLimiter_param_6,
	.param .f32 VideoLimiter_param_7,
	.param .f32 VideoLimiter_param_8,
	.param .f32 VideoLimiter_param_9,
	.param .f32 VideoLimiter_param_10,
	.param .f32 VideoLimiter_param_11,
	.param .f32 VideoLimiter_param_12,
	.param .f32 VideoLimiter_param_13,
	.param .f32 VideoLimiter_param_14,
	.param .f32 VideoLimiter_param_15,
	.param .f32 VideoLimiter_param_16
)
{
	.reg .pred 	%p<202>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<106>;
	.reg .f32 	%f<973>;
	.reg .s64 	%rd<14>;


	ld.param.u32 	%r17, [VideoLimiter_param_3];
	ld.param.u32 	%r18, [VideoLimiter_param_4];
	ld.param.u32 	%r15, [VideoLimiter_param_5];
	ld.param.u32 	%r16, [VideoLimiter_param_6];
	ld.param.f32 	%f253, [VideoLimiter_param_7];
	ld.param.f32 	%f254, [VideoLimiter_param_8];
	ld.param.f32 	%f255, [VideoLimiter_param_9];
	ld.param.f32 	%f256, [VideoLimiter_param_10];
	ld.param.f32 	%f257, [VideoLimiter_param_11];
	ld.param.f32 	%f258, [VideoLimiter_param_12];
	mov.u32 	%r19, %ntid.x;
	mov.u32 	%r20, %ctaid.x;
	mov.u32 	%r21, %tid.x;
	mad.lo.s32 	%r1, %r19, %r20, %r21;
	mov.u32 	%r22, %ntid.y;
	mov.u32 	%r23, %ctaid.y;
	mov.u32 	%r24, %tid.y;
	mad.lo.s32 	%r2, %r22, %r23, %r24;
	setp.lt.s32	%p1, %r1, %r17;
	setp.lt.s32	%p2, %r2, %r18;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB4_203;
	bra.uni 	BB4_1;

BB4_1:
	ld.param.u32 	%r105, [VideoLimiter_param_2];
	ld.param.u32 	%r104, [VideoLimiter_param_1];
	ld.param.u64 	%rd13, [VideoLimiter_param_0];
	cvta.to.global.u64 	%rd4, %rd13;
	mad.lo.s32 	%r25, %r2, %r104, %r1;
	mul.wide.s32 	%rd5, %r25, 16;
	add.s64 	%rd1, %rd4, %rd5;
	mul.wide.s32 	%rd6, %r25, 8;
	add.s64 	%rd2, %rd4, %rd6;
	setp.eq.s32	%p4, %r105, 0;
	@%p4 bra 	BB4_3;

	ld.global.v4.f32 	{%f262, %f263, %f264, %f265}, [%rd1];
	mov.f32 	%f789, %f265;
	mov.f32 	%f788, %f264;
	mov.f32 	%f787, %f263;
	mov.f32 	%f786, %f262;
	bra.uni 	BB4_4;

BB4_3:
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd2];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f786, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f787, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f788, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f789, %temp;
	}

BB4_4:
	ld.param.f32 	%f785, [VideoLimiter_param_15];
	ld.const.f32 	%f266, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f267, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f268, %f787, %f267;
	fma.rn.ftz.f32 	%f269, %f788, %f266, %f268;
	ld.const.f32 	%f270, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f972, %f786, %f270, %f269;
	ld.const.f32 	%f271, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f272, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f273, %f787, %f272;
	fma.rn.ftz.f32 	%f274, %f788, %f271, %f273;
	ld.const.f32 	%f275, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f930, %f786, %f275, %f274;
	ld.const.f32 	%f276, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f277, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f278, %f787, %f277;
	fma.rn.ftz.f32 	%f279, %f788, %f276, %f278;
	ld.const.f32 	%f280, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f878, %f786, %f280, %f279;
	mul.ftz.f32 	%f281, %f930, 0f3F5F3CB4;
	mul.ftz.f32 	%f282, %f878, 0f3F9D70A4;
	mul.ftz.f32 	%f16, %f972, 0f42C80000;
	mul.ftz.f32 	%f283, %f282, %f282;
	fma.rn.ftz.f32 	%f284, %f281, %f281, %f283;
	sqrt.approx.ftz.f32 	%f285, %f284;
	mul.ftz.f32 	%f17, %f285, 0f42C80000;
	add.ftz.f32 	%f18, %f16, %f17;
	sub.ftz.f32 	%f19, %f16, %f17;
	setp.gtu.ftz.f32	%p5, %f972, %f785;
	@%p5 bra 	BB4_6;

	mov.f32 	%f790, 0f3F800000;
	bra.uni 	BB4_9;

BB4_6:
	ld.param.f32 	%f783, [VideoLimiter_param_16];
	ld.param.f32 	%f782, [VideoLimiter_param_15];
	add.ftz.f32 	%f287, %f782, %f783;
	setp.gtu.ftz.f32	%p6, %f287, %f972;
	@%p6 bra 	BB4_8;

	mov.f32 	%f790, 0f00000000;
	bra.uni 	BB4_9;

BB4_8:
	ld.param.f32 	%f784, [VideoLimiter_param_16];
	ld.param.f32 	%f781, [VideoLimiter_param_15];
	sub.ftz.f32 	%f289, %f972, %f781;
	div.approx.ftz.f32 	%f790, %f289, %f784;

BB4_9:
	ld.param.f32 	%f777, [VideoLimiter_param_16];
	ld.param.f32 	%f776, [VideoLimiter_param_15];
	sub.ftz.f32 	%f290, %f776, %f777;
	setp.leu.ftz.f32	%p7, %f290, %f972;
	@%p7 bra 	BB4_11;

	mov.f32 	%f791, 0f00000000;
	bra.uni 	BB4_14;

BB4_11:
	ld.param.f32 	%f778, [VideoLimiter_param_15];
	setp.leu.ftz.f32	%p8, %f972, %f778;
	@%p8 bra 	BB4_13;

	mov.f32 	%f791, 0f3F800000;
	bra.uni 	BB4_14;

BB4_13:
	ld.param.f32 	%f779, [VideoLimiter_param_16];
	sub.ftz.f32 	%f292, %f972, %f290;
	div.approx.ftz.f32 	%f791, %f292, %f779;

BB4_14:
	ld.param.f32 	%f780, [VideoLimiter_param_16];
	ld.param.f32 	%f775, [VideoLimiter_param_14];
	setp.gt.ftz.f32	%p9, %f775, %f780;
	selp.f32	%f295, 0f40000000, 0f3F800000, %p9;
	sub.ftz.f32 	%f296, %f295, %f790;
	sub.ftz.f32 	%f297, %f296, %f791;
	div.approx.ftz.f32 	%f24, %f790, %f295;
	div.approx.ftz.f32 	%f25, %f297, %f295;
	div.approx.ftz.f32 	%f26, %f791, %f295;
	setp.gt.s32	%p10, %r16, 1;
	@%p10 bra 	BB4_17;

	setp.eq.s32	%p14, %r16, 0;
	@%p14 bra 	BB4_164;

	setp.eq.s32	%p15, %r16, 1;
	mov.f32 	%f859, %f878;
	mov.f32 	%f911, %f930;
	mov.f32 	%f958, %f972;
	@%p15 bra 	BB4_128;
	bra.uni 	BB4_200;

BB4_17:
	setp.eq.s32	%p11, %r16, 2;
	@%p11 bra 	BB4_92;

	setp.eq.s32	%p12, %r16, 3;
	@%p12 bra 	BB4_56;

	setp.ne.s32	%p13, %r16, 4;
	mov.f32 	%f859, %f878;
	mov.f32 	%f911, %f930;
	mov.f32 	%f958, %f972;
	@%p13 bra 	BB4_200;

	or.b32  	%r26, %r15, 2;
	setp.eq.s32	%p16, %r26, 2;
	@%p16 bra 	BB4_21;
	bra.uni 	BB4_25;

BB4_21:
	setp.gt.ftz.f32	%p17, %f16, %f254;
	@%p17 bra 	BB4_24;

	setp.geu.ftz.f32	%p18, %f16, %f253;
	@%p18 bra 	BB4_25;

	mov.f32 	%f298, 0f42C80000;
	div.approx.ftz.f32 	%f972, %f253, %f298;
	bra.uni 	BB4_25;

BB4_24:
	mov.f32 	%f299, 0f42C80000;
	div.approx.ftz.f32 	%f972, %f254, %f299;

BB4_25:
	mov.f32 	%f29, %f972;
	add.s32 	%r27, %r15, -1;
	setp.gt.u32	%p19, %r27, 1;
	mov.f32 	%f877, %f878;
	mov.f32 	%f929, %f930;
	@%p19 bra 	BB4_36;

	setp.leu.ftz.f32	%p20, %f18, %f256;
	@%p20 bra 	BB4_31;

	setp.neu.ftz.f32	%p21, %f17, 0f00000000;
	@%p21 bra 	BB4_29;

	mov.f32 	%f792, 0f00000000;
	bra.uni 	BB4_30;

BB4_29:
	sub.ftz.f32 	%f301, %f18, %f256;
	div.approx.ftz.f32 	%f302, %f301, %f17;
	mov.f32 	%f303, 0f3F800000;
	sub.ftz.f32 	%f792, %f303, %f302;

BB4_30:
	setp.lt.ftz.f32	%p22, %f792, 0f00000000;
	selp.f32	%f304, 0f00000000, %f792, %p22;
	mul.ftz.f32 	%f930, %f930, %f304;
	mul.ftz.f32 	%f878, %f878, %f304;

BB4_31:
	mov.f32 	%f929, %f930;
	mov.f32 	%f877, %f878;
	setp.geu.ftz.f32	%p23, %f19, %f255;
	@%p23 bra 	BB4_36;

	setp.neu.ftz.f32	%p24, %f17, 0f00000000;
	@%p24 bra 	BB4_34;

	mov.f32 	%f793, 0f00000000;
	bra.uni 	BB4_35;

BB4_34:
	sub.ftz.f32 	%f306, %f255, %f19;
	div.approx.ftz.f32 	%f307, %f306, %f17;
	mov.f32 	%f308, 0f3F800000;
	sub.ftz.f32 	%f793, %f308, %f307;

BB4_35:
	setp.lt.ftz.f32	%p25, %f793, 0f00000000;
	selp.f32	%f309, 0f00000000, %f793, %p25;
	mul.ftz.f32 	%f929, %f929, %f309;
	mul.ftz.f32 	%f877, %f877, %f309;

BB4_36:
	mov.f32 	%f41, %f929;
	mov.f32 	%f40, %f877;
	setp.ne.s32	%p26, %r15, 3;
	mov.f32 	%f859, %f40;
	mov.f32 	%f911, %f41;
	mov.f32 	%f958, %f29;
	@%p26 bra 	BB4_200;

	setp.gt.ftz.f32	%p27, %f18, %f258;
	setp.lt.ftz.f32	%p28, %f19, %f257;
	or.pred  	%p29, %p27, %p28;
	mov.f32 	%f859, %f40;
	mov.f32 	%f911, %f41;
	mov.f32 	%f958, %f29;
	@!%p29 bra 	BB4_200;
	bra.uni 	BB4_38;

BB4_38:
	mul.ftz.f32 	%f310, %f40, 0f3F728F61;
	fma.rn.ftz.f32 	%f311, %f41, 0fBEA3B6E9, %f310;
	mul.ftz.f32 	%f312, %f40, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f313, %f41, 0fBF728F61, %f312;
	abs.ftz.f32 	%f42, %f311;
	abs.ftz.f32 	%f43, %f313;
	setp.eq.ftz.f32	%p30, %f42, 0f00000000;
	setp.eq.ftz.f32	%p31, %f43, 0f00000000;
	and.pred  	%p32, %p30, %p31;
	mov.b32 	 %r3, %f311;
	mov.b32 	 %r28, %f313;
	and.b32  	%r4, %r28, -2147483648;
	@%p32 bra 	BB4_42;

	setp.eq.ftz.f32	%p33, %f42, 0f7F800000;
	setp.eq.ftz.f32	%p34, %f43, 0f7F800000;
	and.pred  	%p35, %p33, %p34;
	@%p35 bra 	BB4_41;

	max.ftz.f32 	%f314, %f43, %f42;
	min.ftz.f32 	%f315, %f43, %f42;
	div.full.ftz.f32 	%f316, %f315, %f314;
	mul.rn.ftz.f32 	%f317, %f316, %f316;
	mov.f32 	%f318, 0fC0B59883;
	mov.f32 	%f319, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f320, %f317, %f319, %f318;
	mov.f32 	%f321, 0fC0D21907;
	fma.rn.ftz.f32 	%f322, %f320, %f317, %f321;
	mul.ftz.f32 	%f323, %f322, %f317;
	mul.ftz.f32 	%f324, %f323, %f316;
	add.ftz.f32 	%f325, %f317, 0f41355DC0;
	mov.f32 	%f326, 0f41E6BD60;
	fma.rn.ftz.f32 	%f327, %f325, %f317, %f326;
	mov.f32 	%f328, 0f419D92C8;
	fma.rn.ftz.f32 	%f329, %f327, %f317, %f328;
	rcp.approx.ftz.f32 	%f330, %f329;
	fma.rn.ftz.f32 	%f331, %f324, %f330, %f316;
	mov.f32 	%f332, 0f3FC90FDB;
	sub.ftz.f32 	%f333, %f332, %f331;
	setp.gt.ftz.f32	%p36, %f43, %f42;
	selp.f32	%f334, %f333, %f331, %p36;
	mov.f32 	%f335, 0f40490FDB;
	sub.ftz.f32 	%f336, %f335, %f334;
	setp.lt.s32	%p37, %r3, 0;
	selp.f32	%f337, %f336, %f334, %p37;
	mov.b32 	 %r29, %f337;
	or.b32  	%r30, %r29, %r4;
	mov.b32 	 %f338, %r30;
	add.ftz.f32 	%f339, %f42, %f43;
	setp.gtu.ftz.f32	%p38, %f339, 0f7F800000;
	selp.f32	%f794, %f339, %f338, %p38;
	bra.uni 	BB4_43;

BB4_41:
	shr.s32 	%r31, %r3, 31;
	and.b32  	%r32, %r31, 13483017;
	add.s32 	%r33, %r32, 1061752795;
	or.b32  	%r34, %r33, %r4;
	mov.b32 	 %f794, %r34;
	bra.uni 	BB4_43;

BB4_42:
	shr.s32 	%r35, %r3, 31;
	and.b32  	%r36, %r35, 1078530011;
	or.b32  	%r37, %r36, %r4;
	mov.b32 	 %f794, %r37;

BB4_43:
	add.ftz.f32 	%f340, %f794, 0f40C90FDB;
	setp.lt.ftz.f32	%p39, %f794, 0f00000000;
	selp.f32	%f341, %f340, %f794, %p39;
	mul.ftz.f32 	%f48, %f341, 0f3E22F983;
	setp.lt.ftz.f32	%p40, %f48, 0f3F8147AE;
	mov.f32 	%f798, 0f3F800000;
	sub.ftz.f32 	%f343, %f798, 0f3DAA9931;
	setp.ge.ftz.f32	%p41, %f48, %f343;
	and.pred  	%p42, %p41, %p40;
	mov.f32 	%f795, 0f3F4CCCCD;
	mov.f32 	%f796, 0f3E4CCCCD;
	@!%p42 bra 	BB4_44;
	bra.uni 	BB4_47;

BB4_44:
	setp.lt.ftz.f32	%p43, %f48, 0f3DAA9931;
	setp.ge.ftz.f32	%p44, %f48, 0f00000000;
	and.pred  	%p45, %p44, %p43;
	@!%p45 bra 	BB4_45;
	bra.uni 	BB4_47;

BB4_45:
	setp.lt.ftz.f32	%p46, %f48, 0f3E802752;
	mov.f32 	%f344, 0f3E2B020C;
	sub.ftz.f32 	%f345, %f344, 0f3DAA9931;
	setp.ge.ftz.f32	%p47, %f48, %f345;
	and.pred  	%p48, %p47, %p46;
	@!%p48 bra 	BB4_47;
	bra.uni 	BB4_46;

BB4_46:
	mov.f32 	%f796, 0f3F000000;
	mov.f32 	%f795, %f796;

BB4_47:
	sub.ftz.f32 	%f354, %f18, %f258;
	selp.f32	%f51, %f354, 0f00000000, %p27;
	sub.ftz.f32 	%f355, %f257, %f19;
	selp.f32	%f52, %f355, 0f00000000, %p28;
	setp.leu.ftz.f32	%p51, %f51, 0f00000000;
	mov.f32 	%f876, %f40;
	mov.f32 	%f928, %f41;
	mov.f32 	%f971, %f29;
	@%p51 bra 	BB4_52;

	setp.neu.ftz.f32	%p52, %f17, 0f00000000;
	@%p52 bra 	BB4_50;

	mov.f32 	%f797, 0f3F800000;
	bra.uni 	BB4_51;

BB4_50:
	mul.ftz.f32 	%f357, %f51, %f795;
	div.approx.ftz.f32 	%f358, %f357, %f17;
	mov.f32 	%f359, 0f3F800000;
	sub.ftz.f32 	%f797, %f359, %f358;

BB4_51:
	mul.ftz.f32 	%f360, %f51, %f796;
	setp.ltu.ftz.f32	%p53, %f797, 0f3F7D70A4;
	selp.f32	%f361, %f360, %f51, %p53;
	mov.f32 	%f362, 0f42C80000;
	div.approx.ftz.f32 	%f363, %f361, %f362;
	sub.ftz.f32 	%f55, %f29, %f363;
	mul.ftz.f32 	%f56, %f41, %f797;
	mul.ftz.f32 	%f57, %f40, %f797;
	mov.f32 	%f876, %f57;
	mov.f32 	%f928, %f56;
	mov.f32 	%f971, %f55;

BB4_52:
	mov.f32 	%f945, %f971;
	mov.f32 	%f958, %f945;
	mov.f32 	%f894, %f928;
	mov.f32 	%f911, %f894;
	mov.f32 	%f842, %f876;
	mov.f32 	%f859, %f842;
	setp.leu.ftz.f32	%p54, %f52, 0f00000000;
	@%p54 bra 	BB4_200;

	setp.neu.ftz.f32	%p55, %f17, 0f00000000;
	@%p55 bra 	BB4_54;
	bra.uni 	BB4_55;

BB4_54:
	mul.ftz.f32 	%f365, %f52, %f795;
	div.approx.ftz.f32 	%f366, %f365, %f17;
	mov.f32 	%f367, 0f3F800000;
	sub.ftz.f32 	%f798, %f367, %f366;

BB4_55:
	mul.ftz.f32 	%f368, %f52, %f796;
	setp.ltu.ftz.f32	%p56, %f798, 0f3F7D70A4;
	selp.f32	%f369, %f368, %f52, %p56;
	mov.f32 	%f370, 0f42C80000;
	div.approx.ftz.f32 	%f371, %f369, %f370;
	add.ftz.f32 	%f958, %f29, %f371;
	mul.ftz.f32 	%f911, %f41, %f798;
	mul.ftz.f32 	%f859, %f40, %f798;
	bra.uni 	BB4_200;

BB4_56:
	or.b32  	%r38, %r15, 2;
	setp.eq.s32	%p57, %r38, 2;
	mov.f32 	%f970, %f972;
	@%p57 bra 	BB4_57;
	bra.uni 	BB4_61;

BB4_57:
	setp.gt.ftz.f32	%p58, %f16, %f254;
	@%p58 bra 	BB4_60;

	setp.geu.ftz.f32	%p59, %f16, %f253;
	mov.f32 	%f970, %f972;
	@%p59 bra 	BB4_61;

	mov.f32 	%f372, 0f42C80000;
	div.approx.ftz.f32 	%f373, %f253, %f372;
	sub.ftz.f32 	%f374, %f373, %f972;
	fma.rn.ftz.f32 	%f375, %f26, %f374, %f972;
	fma.rn.ftz.f32 	%f970, %f24, %f374, %f375;
	bra.uni 	BB4_61;

BB4_60:
	mov.f32 	%f376, 0f42C80000;
	div.approx.ftz.f32 	%f377, %f254, %f376;
	sub.ftz.f32 	%f378, %f972, %f377;
	mul.ftz.f32 	%f379, %f26, %f378;
	sub.ftz.f32 	%f380, %f972, %f379;
	mul.ftz.f32 	%f381, %f24, %f378;
	sub.ftz.f32 	%f970, %f380, %f381;

BB4_61:
	mov.f32 	%f68, %f970;
	add.s32 	%r39, %r15, -1;
	setp.gt.u32	%p60, %r39, 1;
	mov.f32 	%f874, %f878;
	mov.f32 	%f926, %f930;
	@%p60 bra 	BB4_72;

	setp.leu.ftz.f32	%p61, %f18, %f256;
	mov.f32 	%f875, %f878;
	mov.f32 	%f927, %f930;
	@%p61 bra 	BB4_67;

	setp.neu.ftz.f32	%p62, %f17, 0f00000000;
	@%p62 bra 	BB4_65;

	mov.f32 	%f799, 0f00000000;
	bra.uni 	BB4_66;

BB4_65:
	sub.ftz.f32 	%f383, %f18, %f256;
	div.approx.ftz.f32 	%f384, %f383, %f17;
	mov.f32 	%f385, 0f3F800000;
	sub.ftz.f32 	%f799, %f385, %f384;

BB4_66:
	mul.ftz.f32 	%f386, %f930, %f799;
	sub.ftz.f32 	%f387, %f930, %f386;
	mul.ftz.f32 	%f388, %f26, %f387;
	sub.ftz.f32 	%f389, %f930, %f388;
	mul.ftz.f32 	%f390, %f878, %f799;
	sub.ftz.f32 	%f391, %f878, %f390;
	mul.ftz.f32 	%f392, %f26, %f391;
	sub.ftz.f32 	%f393, %f878, %f392;
	mul.ftz.f32 	%f394, %f389, %f799;
	sub.ftz.f32 	%f395, %f389, %f394;
	mul.ftz.f32 	%f396, %f24, %f395;
	sub.ftz.f32 	%f927, %f389, %f396;
	mul.ftz.f32 	%f397, %f393, %f799;
	sub.ftz.f32 	%f398, %f393, %f397;
	mul.ftz.f32 	%f399, %f24, %f398;
	sub.ftz.f32 	%f875, %f393, %f399;

BB4_67:
	mov.f32 	%f926, %f927;
	mov.f32 	%f874, %f875;
	setp.geu.ftz.f32	%p63, %f19, %f255;
	@%p63 bra 	BB4_72;

	setp.neu.ftz.f32	%p64, %f17, 0f00000000;
	@%p64 bra 	BB4_70;

	mov.f32 	%f800, 0f00000000;
	bra.uni 	BB4_71;

BB4_70:
	sub.ftz.f32 	%f401, %f255, %f19;
	div.approx.ftz.f32 	%f402, %f401, %f17;
	mov.f32 	%f403, 0f3F800000;
	sub.ftz.f32 	%f800, %f403, %f402;

BB4_71:
	mul.ftz.f32 	%f404, %f926, %f800;
	sub.ftz.f32 	%f405, %f926, %f404;
	fma.rn.ftz.f32 	%f406, %f26, %f405, %f926;
	mul.ftz.f32 	%f407, %f874, %f800;
	sub.ftz.f32 	%f408, %f874, %f407;
	fma.rn.ftz.f32 	%f409, %f26, %f408, %f874;
	mul.ftz.f32 	%f410, %f406, %f800;
	sub.ftz.f32 	%f411, %f406, %f410;
	fma.rn.ftz.f32 	%f926, %f24, %f411, %f406;
	mul.ftz.f32 	%f412, %f409, %f800;
	sub.ftz.f32 	%f413, %f409, %f412;
	fma.rn.ftz.f32 	%f874, %f24, %f413, %f409;

BB4_72:
	mov.f32 	%f80, %f926;
	mov.f32 	%f79, %f874;
	setp.ne.s32	%p65, %r15, 3;
	mov.f32 	%f859, %f79;
	mov.f32 	%f911, %f80;
	mov.f32 	%f958, %f68;
	@%p65 bra 	BB4_200;

	mul.ftz.f32 	%f414, %f79, 0f3F728F61;
	fma.rn.ftz.f32 	%f415, %f80, 0fBEA3B6E9, %f414;
	mul.ftz.f32 	%f416, %f79, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f417, %f80, 0fBF728F61, %f416;
	abs.ftz.f32 	%f81, %f415;
	abs.ftz.f32 	%f82, %f417;
	setp.eq.ftz.f32	%p66, %f81, 0f00000000;
	setp.eq.ftz.f32	%p67, %f82, 0f00000000;
	and.pred  	%p68, %p66, %p67;
	mov.b32 	 %r5, %f415;
	mov.b32 	 %r40, %f417;
	and.b32  	%r6, %r40, -2147483648;
	@%p68 bra 	BB4_77;

	setp.eq.ftz.f32	%p69, %f81, 0f7F800000;
	setp.eq.ftz.f32	%p70, %f82, 0f7F800000;
	and.pred  	%p71, %p69, %p70;
	@%p71 bra 	BB4_76;

	max.ftz.f32 	%f418, %f82, %f81;
	min.ftz.f32 	%f419, %f82, %f81;
	div.full.ftz.f32 	%f420, %f419, %f418;
	mul.rn.ftz.f32 	%f421, %f420, %f420;
	mov.f32 	%f422, 0fC0B59883;
	mov.f32 	%f423, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f424, %f421, %f423, %f422;
	mov.f32 	%f425, 0fC0D21907;
	fma.rn.ftz.f32 	%f426, %f424, %f421, %f425;
	mul.ftz.f32 	%f427, %f426, %f421;
	mul.ftz.f32 	%f428, %f427, %f420;
	add.ftz.f32 	%f429, %f421, 0f41355DC0;
	mov.f32 	%f430, 0f41E6BD60;
	fma.rn.ftz.f32 	%f431, %f429, %f421, %f430;
	mov.f32 	%f432, 0f419D92C8;
	fma.rn.ftz.f32 	%f433, %f431, %f421, %f432;
	rcp.approx.ftz.f32 	%f434, %f433;
	fma.rn.ftz.f32 	%f435, %f428, %f434, %f420;
	mov.f32 	%f436, 0f3FC90FDB;
	sub.ftz.f32 	%f437, %f436, %f435;
	setp.gt.ftz.f32	%p72, %f82, %f81;
	selp.f32	%f438, %f437, %f435, %p72;
	mov.f32 	%f439, 0f40490FDB;
	sub.ftz.f32 	%f440, %f439, %f438;
	setp.lt.s32	%p73, %r5, 0;
	selp.f32	%f441, %f440, %f438, %p73;
	mov.b32 	 %r41, %f441;
	or.b32  	%r42, %r41, %r6;
	mov.b32 	 %f442, %r42;
	add.ftz.f32 	%f443, %f81, %f82;
	setp.gtu.ftz.f32	%p74, %f443, 0f7F800000;
	selp.f32	%f801, %f443, %f442, %p74;
	bra.uni 	BB4_78;

BB4_76:
	shr.s32 	%r43, %r5, 31;
	and.b32  	%r44, %r43, 13483017;
	add.s32 	%r45, %r44, 1061752795;
	or.b32  	%r46, %r45, %r6;
	mov.b32 	 %f801, %r46;
	bra.uni 	BB4_78;

BB4_77:
	shr.s32 	%r47, %r5, 31;
	and.b32  	%r48, %r47, 1078530011;
	or.b32  	%r49, %r48, %r6;
	mov.b32 	 %f801, %r49;

BB4_78:
	add.ftz.f32 	%f444, %f801, 0f40C90FDB;
	setp.lt.ftz.f32	%p75, %f801, 0f00000000;
	selp.f32	%f445, %f444, %f801, %p75;
	mul.ftz.f32 	%f87, %f445, 0f3E22F983;
	setp.lt.ftz.f32	%p76, %f87, 0f3F8147AE;
	mov.f32 	%f805, 0f3F800000;
	sub.ftz.f32 	%f447, %f805, 0f3DAA9931;
	setp.ge.ftz.f32	%p77, %f87, %f447;
	and.pred  	%p78, %p77, %p76;
	mov.f32 	%f802, 0f3E4CCCCD;
	mov.f32 	%f803, 0f3F4CCCCD;
	@!%p78 bra 	BB4_79;
	bra.uni 	BB4_82;

BB4_79:
	setp.lt.ftz.f32	%p79, %f87, 0f3DAA9931;
	setp.ge.ftz.f32	%p80, %f87, 0f00000000;
	and.pred  	%p81, %p80, %p79;
	@!%p81 bra 	BB4_80;
	bra.uni 	BB4_82;

BB4_80:
	setp.lt.ftz.f32	%p82, %f87, 0f3E802752;
	mov.f32 	%f448, 0f3E2B020C;
	sub.ftz.f32 	%f449, %f448, 0f3DAA9931;
	setp.ge.ftz.f32	%p83, %f87, %f449;
	and.pred  	%p84, %p83, %p82;
	@!%p84 bra 	BB4_82;
	bra.uni 	BB4_81;

BB4_81:
	mov.f32 	%f803, 0f3F000000;
	mov.f32 	%f802, %f803;

BB4_82:
	sub.ftz.f32 	%f458, %f18, %f258;
	setp.gt.ftz.f32	%p85, %f18, %f258;
	selp.f32	%f90, %f458, 0f00000000, %p85;
	sub.ftz.f32 	%f459, %f257, %f19;
	setp.lt.ftz.f32	%p86, %f19, %f257;
	selp.f32	%f91, %f459, 0f00000000, %p86;
	setp.leu.ftz.f32	%p87, %f90, 0f00000000;
	mov.f32 	%f873, %f79;
	mov.f32 	%f925, %f80;
	mov.f32 	%f969, %f68;
	@%p87 bra 	BB4_87;

	setp.neu.ftz.f32	%p88, %f17, 0f00000000;
	@%p88 bra 	BB4_85;

	mov.f32 	%f804, 0f3F800000;
	bra.uni 	BB4_86;

BB4_85:
	mul.ftz.f32 	%f461, %f90, %f803;
	div.approx.ftz.f32 	%f462, %f461, %f17;
	mov.f32 	%f463, 0f3F800000;
	sub.ftz.f32 	%f804, %f463, %f462;

BB4_86:
	mul.ftz.f32 	%f464, %f90, %f802;
	setp.ltu.ftz.f32	%p89, %f804, 0f3F7D70A4;
	selp.f32	%f465, %f464, %f90, %p89;
	mov.f32 	%f466, 0f42C80000;
	div.approx.ftz.f32 	%f467, %f465, %f466;
	sub.ftz.f32 	%f94, %f68, %f467;
	mul.ftz.f32 	%f95, %f80, %f804;
	mul.ftz.f32 	%f96, %f79, %f804;
	mov.f32 	%f873, %f96;
	mov.f32 	%f925, %f95;
	mov.f32 	%f969, %f94;

BB4_87:
	mov.f32 	%f948, %f969;
	mov.f32 	%f968, %f948;
	mov.f32 	%f898, %f925;
	mov.f32 	%f924, %f898;
	mov.f32 	%f846, %f873;
	mov.f32 	%f872, %f846;
	setp.leu.ftz.f32	%p90, %f91, 0f00000000;
	@%p90 bra 	BB4_91;

	setp.neu.ftz.f32	%p91, %f17, 0f00000000;
	@%p91 bra 	BB4_89;
	bra.uni 	BB4_90;

BB4_89:
	mul.ftz.f32 	%f469, %f91, %f803;
	div.approx.ftz.f32 	%f470, %f469, %f17;
	mov.f32 	%f471, 0f3F800000;
	sub.ftz.f32 	%f805, %f471, %f470;

BB4_90:
	mul.ftz.f32 	%f472, %f91, %f802;
	setp.ltu.ftz.f32	%p92, %f805, 0f3F7D70A4;
	selp.f32	%f473, %f472, %f91, %p92;
	mov.f32 	%f474, 0f42C80000;
	div.approx.ftz.f32 	%f475, %f473, %f474;
	add.ftz.f32 	%f968, %f68, %f475;
	mul.ftz.f32 	%f924, %f80, %f805;
	mul.ftz.f32 	%f872, %f79, %f805;

BB4_91:
	sub.ftz.f32 	%f476, %f968, %f68;
	add.ftz.f32 	%f477, %f26, %f24;
	fma.rn.ftz.f32 	%f958, %f477, %f476, %f68;
	sub.ftz.f32 	%f478, %f924, %f80;
	fma.rn.ftz.f32 	%f911, %f477, %f478, %f80;
	sub.ftz.f32 	%f479, %f872, %f79;
	fma.rn.ftz.f32 	%f859, %f477, %f479, %f79;
	bra.uni 	BB4_200;

BB4_92:
	or.b32  	%r50, %r15, 2;
	setp.eq.s32	%p93, %r50, 2;
	mov.f32 	%f967, %f972;
	@%p93 bra 	BB4_93;
	bra.uni 	BB4_97;

BB4_93:
	setp.gt.ftz.f32	%p94, %f16, %f254;
	@%p94 bra 	BB4_96;

	setp.geu.ftz.f32	%p95, %f16, %f253;
	mov.f32 	%f967, %f972;
	@%p95 bra 	BB4_97;

	mov.f32 	%f480, 0f42C80000;
	div.approx.ftz.f32 	%f481, %f253, %f480;
	sub.ftz.f32 	%f482, %f481, %f972;
	fma.rn.ftz.f32 	%f967, %f24, %f482, %f972;
	bra.uni 	BB4_97;

BB4_96:
	mov.f32 	%f483, 0f42C80000;
	div.approx.ftz.f32 	%f484, %f254, %f483;
	sub.ftz.f32 	%f485, %f972, %f484;
	mul.ftz.f32 	%f486, %f24, %f485;
	sub.ftz.f32 	%f967, %f972, %f486;

BB4_97:
	mov.f32 	%f113, %f967;
	add.s32 	%r51, %r15, -1;
	setp.gt.u32	%p96, %r51, 1;
	mov.f32 	%f870, %f878;
	mov.f32 	%f922, %f930;
	@%p96 bra 	BB4_108;

	setp.leu.ftz.f32	%p97, %f18, %f256;
	mov.f32 	%f871, %f878;
	mov.f32 	%f923, %f930;
	@%p97 bra 	BB4_103;

	setp.neu.ftz.f32	%p98, %f17, 0f00000000;
	@%p98 bra 	BB4_101;

	mov.f32 	%f806, 0f00000000;
	bra.uni 	BB4_102;

BB4_101:
	sub.ftz.f32 	%f488, %f18, %f256;
	div.approx.ftz.f32 	%f489, %f488, %f17;
	mov.f32 	%f490, 0f3F800000;
	sub.ftz.f32 	%f806, %f490, %f489;

BB4_102:
	mul.ftz.f32 	%f491, %f930, %f806;
	sub.ftz.f32 	%f492, %f930, %f491;
	mul.ftz.f32 	%f493, %f24, %f492;
	sub.ftz.f32 	%f923, %f930, %f493;
	mul.ftz.f32 	%f494, %f878, %f806;
	sub.ftz.f32 	%f495, %f878, %f494;
	mul.ftz.f32 	%f496, %f24, %f495;
	sub.ftz.f32 	%f871, %f878, %f496;

BB4_103:
	mov.f32 	%f922, %f923;
	mov.f32 	%f870, %f871;
	setp.geu.ftz.f32	%p99, %f19, %f255;
	@%p99 bra 	BB4_108;

	setp.neu.ftz.f32	%p100, %f17, 0f00000000;
	@%p100 bra 	BB4_106;

	mov.f32 	%f807, 0f00000000;
	bra.uni 	BB4_107;

BB4_106:
	sub.ftz.f32 	%f498, %f255, %f19;
	div.approx.ftz.f32 	%f499, %f498, %f17;
	mov.f32 	%f500, 0f3F800000;
	sub.ftz.f32 	%f807, %f500, %f499;

BB4_107:
	mul.ftz.f32 	%f501, %f922, %f807;
	sub.ftz.f32 	%f502, %f922, %f501;
	fma.rn.ftz.f32 	%f922, %f24, %f502, %f922;
	mul.ftz.f32 	%f503, %f870, %f807;
	sub.ftz.f32 	%f504, %f870, %f503;
	fma.rn.ftz.f32 	%f870, %f24, %f504, %f870;

BB4_108:
	mov.f32 	%f125, %f922;
	mov.f32 	%f124, %f870;
	setp.ne.s32	%p101, %r15, 3;
	mov.f32 	%f859, %f124;
	mov.f32 	%f911, %f125;
	mov.f32 	%f958, %f113;
	@%p101 bra 	BB4_200;

	mul.ftz.f32 	%f505, %f124, 0f3F728F61;
	fma.rn.ftz.f32 	%f506, %f125, 0fBEA3B6E9, %f505;
	mul.ftz.f32 	%f507, %f124, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f508, %f125, 0fBF728F61, %f507;
	abs.ftz.f32 	%f126, %f506;
	abs.ftz.f32 	%f127, %f508;
	setp.eq.ftz.f32	%p102, %f126, 0f00000000;
	setp.eq.ftz.f32	%p103, %f127, 0f00000000;
	and.pred  	%p104, %p102, %p103;
	mov.b32 	 %r7, %f506;
	mov.b32 	 %r52, %f508;
	and.b32  	%r8, %r52, -2147483648;
	@%p104 bra 	BB4_113;

	setp.eq.ftz.f32	%p105, %f126, 0f7F800000;
	setp.eq.ftz.f32	%p106, %f127, 0f7F800000;
	and.pred  	%p107, %p105, %p106;
	@%p107 bra 	BB4_112;

	max.ftz.f32 	%f509, %f127, %f126;
	min.ftz.f32 	%f510, %f127, %f126;
	div.full.ftz.f32 	%f511, %f510, %f509;
	mul.rn.ftz.f32 	%f512, %f511, %f511;
	mov.f32 	%f513, 0fC0B59883;
	mov.f32 	%f514, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f515, %f512, %f514, %f513;
	mov.f32 	%f516, 0fC0D21907;
	fma.rn.ftz.f32 	%f517, %f515, %f512, %f516;
	mul.ftz.f32 	%f518, %f517, %f512;
	mul.ftz.f32 	%f519, %f518, %f511;
	add.ftz.f32 	%f520, %f512, 0f41355DC0;
	mov.f32 	%f521, 0f41E6BD60;
	fma.rn.ftz.f32 	%f522, %f520, %f512, %f521;
	mov.f32 	%f523, 0f419D92C8;
	fma.rn.ftz.f32 	%f524, %f522, %f512, %f523;
	rcp.approx.ftz.f32 	%f525, %f524;
	fma.rn.ftz.f32 	%f526, %f519, %f525, %f511;
	mov.f32 	%f527, 0f3FC90FDB;
	sub.ftz.f32 	%f528, %f527, %f526;
	setp.gt.ftz.f32	%p108, %f127, %f126;
	selp.f32	%f529, %f528, %f526, %p108;
	mov.f32 	%f530, 0f40490FDB;
	sub.ftz.f32 	%f531, %f530, %f529;
	setp.lt.s32	%p109, %r7, 0;
	selp.f32	%f532, %f531, %f529, %p109;
	mov.b32 	 %r53, %f532;
	or.b32  	%r54, %r53, %r8;
	mov.b32 	 %f533, %r54;
	add.ftz.f32 	%f534, %f126, %f127;
	setp.gtu.ftz.f32	%p110, %f534, 0f7F800000;
	selp.f32	%f808, %f534, %f533, %p110;
	bra.uni 	BB4_114;

BB4_112:
	shr.s32 	%r55, %r7, 31;
	and.b32  	%r56, %r55, 13483017;
	add.s32 	%r57, %r56, 1061752795;
	or.b32  	%r58, %r57, %r8;
	mov.b32 	 %f808, %r58;
	bra.uni 	BB4_114;

BB4_113:
	shr.s32 	%r59, %r7, 31;
	and.b32  	%r60, %r59, 1078530011;
	or.b32  	%r61, %r60, %r8;
	mov.b32 	 %f808, %r61;

BB4_114:
	add.ftz.f32 	%f535, %f808, 0f40C90FDB;
	setp.lt.ftz.f32	%p111, %f808, 0f00000000;
	selp.f32	%f536, %f535, %f808, %p111;
	mul.ftz.f32 	%f132, %f536, 0f3E22F983;
	setp.lt.ftz.f32	%p112, %f132, 0f3F8147AE;
	mov.f32 	%f812, 0f3F800000;
	sub.ftz.f32 	%f538, %f812, 0f3DAA9931;
	setp.ge.ftz.f32	%p113, %f132, %f538;
	and.pred  	%p114, %p113, %p112;
	mov.f32 	%f809, 0f3E4CCCCD;
	mov.f32 	%f810, 0f3F4CCCCD;
	@!%p114 bra 	BB4_115;
	bra.uni 	BB4_118;

BB4_115:
	setp.lt.ftz.f32	%p115, %f132, 0f3DAA9931;
	setp.ge.ftz.f32	%p116, %f132, 0f00000000;
	and.pred  	%p117, %p116, %p115;
	@!%p117 bra 	BB4_116;
	bra.uni 	BB4_118;

BB4_116:
	setp.lt.ftz.f32	%p118, %f132, 0f3E802752;
	mov.f32 	%f539, 0f3E2B020C;
	sub.ftz.f32 	%f540, %f539, 0f3DAA9931;
	setp.ge.ftz.f32	%p119, %f132, %f540;
	and.pred  	%p120, %p119, %p118;
	@!%p120 bra 	BB4_118;
	bra.uni 	BB4_117;

BB4_117:
	mov.f32 	%f810, 0f3F000000;
	mov.f32 	%f809, %f810;

BB4_118:
	sub.ftz.f32 	%f549, %f18, %f258;
	setp.gt.ftz.f32	%p121, %f18, %f258;
	selp.f32	%f135, %f549, 0f00000000, %p121;
	sub.ftz.f32 	%f550, %f257, %f19;
	setp.lt.ftz.f32	%p122, %f19, %f257;
	selp.f32	%f136, %f550, 0f00000000, %p122;
	setp.leu.ftz.f32	%p123, %f135, 0f00000000;
	mov.f32 	%f869, %f124;
	mov.f32 	%f921, %f125;
	mov.f32 	%f966, %f113;
	@%p123 bra 	BB4_123;

	setp.neu.ftz.f32	%p124, %f17, 0f00000000;
	@%p124 bra 	BB4_121;

	mov.f32 	%f811, 0f3F800000;
	bra.uni 	BB4_122;

BB4_121:
	mul.ftz.f32 	%f552, %f135, %f810;
	div.approx.ftz.f32 	%f553, %f552, %f17;
	mov.f32 	%f554, 0f3F800000;
	sub.ftz.f32 	%f811, %f554, %f553;

BB4_122:
	mul.ftz.f32 	%f555, %f135, %f809;
	setp.ltu.ftz.f32	%p125, %f811, 0f3F7D70A4;
	selp.f32	%f556, %f555, %f135, %p125;
	mov.f32 	%f557, 0f42C80000;
	div.approx.ftz.f32 	%f558, %f556, %f557;
	sub.ftz.f32 	%f139, %f113, %f558;
	mul.ftz.f32 	%f140, %f125, %f811;
	mul.ftz.f32 	%f141, %f124, %f811;
	mov.f32 	%f869, %f141;
	mov.f32 	%f921, %f140;
	mov.f32 	%f966, %f139;

BB4_123:
	mov.f32 	%f951, %f966;
	mov.f32 	%f965, %f951;
	mov.f32 	%f902, %f921;
	mov.f32 	%f920, %f902;
	mov.f32 	%f850, %f869;
	mov.f32 	%f868, %f850;
	setp.leu.ftz.f32	%p126, %f136, 0f00000000;
	@%p126 bra 	BB4_127;

	setp.neu.ftz.f32	%p127, %f17, 0f00000000;
	@%p127 bra 	BB4_125;
	bra.uni 	BB4_126;

BB4_125:
	mul.ftz.f32 	%f560, %f136, %f810;
	div.approx.ftz.f32 	%f561, %f560, %f17;
	mov.f32 	%f562, 0f3F800000;
	sub.ftz.f32 	%f812, %f562, %f561;

BB4_126:
	mul.ftz.f32 	%f563, %f136, %f809;
	setp.ltu.ftz.f32	%p128, %f812, 0f3F7D70A4;
	selp.f32	%f564, %f563, %f136, %p128;
	mov.f32 	%f565, 0f42C80000;
	div.approx.ftz.f32 	%f566, %f564, %f565;
	add.ftz.f32 	%f965, %f113, %f566;
	mul.ftz.f32 	%f920, %f125, %f812;
	mul.ftz.f32 	%f868, %f124, %f812;

BB4_127:
	sub.ftz.f32 	%f567, %f965, %f113;
	fma.rn.ftz.f32 	%f958, %f24, %f567, %f113;
	sub.ftz.f32 	%f568, %f920, %f125;
	fma.rn.ftz.f32 	%f911, %f24, %f568, %f125;
	sub.ftz.f32 	%f569, %f868, %f124;
	fma.rn.ftz.f32 	%f859, %f24, %f569, %f124;
	bra.uni 	BB4_200;

BB4_128:
	or.b32  	%r62, %r15, 2;
	setp.eq.s32	%p129, %r62, 2;
	mov.f32 	%f964, %f972;
	@%p129 bra 	BB4_129;
	bra.uni 	BB4_133;

BB4_129:
	setp.gt.ftz.f32	%p130, %f16, %f254;
	@%p130 bra 	BB4_132;

	setp.geu.ftz.f32	%p131, %f16, %f253;
	mov.f32 	%f964, %f972;
	@%p131 bra 	BB4_133;

	mov.f32 	%f570, 0f42C80000;
	div.approx.ftz.f32 	%f571, %f253, %f570;
	sub.ftz.f32 	%f572, %f571, %f972;
	fma.rn.ftz.f32 	%f964, %f25, %f572, %f972;
	bra.uni 	BB4_133;

BB4_132:
	mov.f32 	%f573, 0f42C80000;
	div.approx.ftz.f32 	%f574, %f254, %f573;
	sub.ftz.f32 	%f575, %f972, %f574;
	mul.ftz.f32 	%f576, %f25, %f575;
	sub.ftz.f32 	%f964, %f972, %f576;

BB4_133:
	mov.f32 	%f158, %f964;
	add.s32 	%r63, %r15, -1;
	setp.gt.u32	%p132, %r63, 1;
	mov.f32 	%f866, %f878;
	mov.f32 	%f918, %f930;
	@%p132 bra 	BB4_144;

	setp.leu.ftz.f32	%p133, %f18, %f256;
	mov.f32 	%f867, %f878;
	mov.f32 	%f919, %f930;
	@%p133 bra 	BB4_139;

	setp.neu.ftz.f32	%p134, %f17, 0f00000000;
	@%p134 bra 	BB4_137;

	mov.f32 	%f813, 0f00000000;
	bra.uni 	BB4_138;

BB4_137:
	sub.ftz.f32 	%f578, %f18, %f256;
	div.approx.ftz.f32 	%f579, %f578, %f17;
	mov.f32 	%f580, 0f3F800000;
	sub.ftz.f32 	%f813, %f580, %f579;

BB4_138:
	mul.ftz.f32 	%f581, %f930, %f813;
	sub.ftz.f32 	%f582, %f930, %f581;
	mul.ftz.f32 	%f583, %f25, %f582;
	sub.ftz.f32 	%f919, %f930, %f583;
	mul.ftz.f32 	%f584, %f878, %f813;
	sub.ftz.f32 	%f585, %f878, %f584;
	mul.ftz.f32 	%f586, %f25, %f585;
	sub.ftz.f32 	%f867, %f878, %f586;

BB4_139:
	mov.f32 	%f918, %f919;
	mov.f32 	%f866, %f867;
	setp.geu.ftz.f32	%p135, %f19, %f255;
	@%p135 bra 	BB4_144;

	setp.neu.ftz.f32	%p136, %f17, 0f00000000;
	@%p136 bra 	BB4_142;

	mov.f32 	%f814, 0f00000000;
	bra.uni 	BB4_143;

BB4_142:
	sub.ftz.f32 	%f588, %f255, %f19;
	div.approx.ftz.f32 	%f589, %f588, %f17;
	mov.f32 	%f590, 0f3F800000;
	sub.ftz.f32 	%f814, %f590, %f589;

BB4_143:
	mul.ftz.f32 	%f591, %f918, %f814;
	sub.ftz.f32 	%f592, %f918, %f591;
	fma.rn.ftz.f32 	%f918, %f25, %f592, %f918;
	mul.ftz.f32 	%f593, %f866, %f814;
	sub.ftz.f32 	%f594, %f866, %f593;
	fma.rn.ftz.f32 	%f866, %f25, %f594, %f866;

BB4_144:
	mov.f32 	%f170, %f918;
	mov.f32 	%f169, %f866;
	setp.ne.s32	%p137, %r15, 3;
	mov.f32 	%f859, %f169;
	mov.f32 	%f911, %f170;
	mov.f32 	%f958, %f158;
	@%p137 bra 	BB4_200;

	mul.ftz.f32 	%f595, %f169, 0f3F728F61;
	fma.rn.ftz.f32 	%f596, %f170, 0fBEA3B6E9, %f595;
	mul.ftz.f32 	%f597, %f169, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f598, %f170, 0fBF728F61, %f597;
	abs.ftz.f32 	%f171, %f596;
	abs.ftz.f32 	%f172, %f598;
	setp.eq.ftz.f32	%p138, %f171, 0f00000000;
	setp.eq.ftz.f32	%p139, %f172, 0f00000000;
	and.pred  	%p140, %p138, %p139;
	mov.b32 	 %r9, %f596;
	mov.b32 	 %r64, %f598;
	and.b32  	%r10, %r64, -2147483648;
	@%p140 bra 	BB4_149;

	setp.eq.ftz.f32	%p141, %f171, 0f7F800000;
	setp.eq.ftz.f32	%p142, %f172, 0f7F800000;
	and.pred  	%p143, %p141, %p142;
	@%p143 bra 	BB4_148;

	max.ftz.f32 	%f599, %f172, %f171;
	min.ftz.f32 	%f600, %f172, %f171;
	div.full.ftz.f32 	%f601, %f600, %f599;
	mul.rn.ftz.f32 	%f602, %f601, %f601;
	mov.f32 	%f603, 0fC0B59883;
	mov.f32 	%f604, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f605, %f602, %f604, %f603;
	mov.f32 	%f606, 0fC0D21907;
	fma.rn.ftz.f32 	%f607, %f605, %f602, %f606;
	mul.ftz.f32 	%f608, %f607, %f602;
	mul.ftz.f32 	%f609, %f608, %f601;
	add.ftz.f32 	%f610, %f602, 0f41355DC0;
	mov.f32 	%f611, 0f41E6BD60;
	fma.rn.ftz.f32 	%f612, %f610, %f602, %f611;
	mov.f32 	%f613, 0f419D92C8;
	fma.rn.ftz.f32 	%f614, %f612, %f602, %f613;
	rcp.approx.ftz.f32 	%f615, %f614;
	fma.rn.ftz.f32 	%f616, %f609, %f615, %f601;
	mov.f32 	%f617, 0f3FC90FDB;
	sub.ftz.f32 	%f618, %f617, %f616;
	setp.gt.ftz.f32	%p144, %f172, %f171;
	selp.f32	%f619, %f618, %f616, %p144;
	mov.f32 	%f620, 0f40490FDB;
	sub.ftz.f32 	%f621, %f620, %f619;
	setp.lt.s32	%p145, %r9, 0;
	selp.f32	%f622, %f621, %f619, %p145;
	mov.b32 	 %r65, %f622;
	or.b32  	%r66, %r65, %r10;
	mov.b32 	 %f623, %r66;
	add.ftz.f32 	%f624, %f171, %f172;
	setp.gtu.ftz.f32	%p146, %f624, 0f7F800000;
	selp.f32	%f815, %f624, %f623, %p146;
	bra.uni 	BB4_150;

BB4_148:
	shr.s32 	%r67, %r9, 31;
	and.b32  	%r68, %r67, 13483017;
	add.s32 	%r69, %r68, 1061752795;
	or.b32  	%r70, %r69, %r10;
	mov.b32 	 %f815, %r70;
	bra.uni 	BB4_150;

BB4_149:
	shr.s32 	%r71, %r9, 31;
	and.b32  	%r72, %r71, 1078530011;
	or.b32  	%r73, %r72, %r10;
	mov.b32 	 %f815, %r73;

BB4_150:
	add.ftz.f32 	%f625, %f815, 0f40C90FDB;
	setp.lt.ftz.f32	%p147, %f815, 0f00000000;
	selp.f32	%f626, %f625, %f815, %p147;
	mul.ftz.f32 	%f177, %f626, 0f3E22F983;
	setp.lt.ftz.f32	%p148, %f177, 0f3F8147AE;
	mov.f32 	%f819, 0f3F800000;
	sub.ftz.f32 	%f628, %f819, 0f3DAA9931;
	setp.ge.ftz.f32	%p149, %f177, %f628;
	and.pred  	%p150, %p149, %p148;
	mov.f32 	%f816, 0f3E4CCCCD;
	mov.f32 	%f817, 0f3F4CCCCD;
	@!%p150 bra 	BB4_151;
	bra.uni 	BB4_154;

BB4_151:
	setp.lt.ftz.f32	%p151, %f177, 0f3DAA9931;
	setp.ge.ftz.f32	%p152, %f177, 0f00000000;
	and.pred  	%p153, %p152, %p151;
	@!%p153 bra 	BB4_152;
	bra.uni 	BB4_154;

BB4_152:
	setp.lt.ftz.f32	%p154, %f177, 0f3E802752;
	mov.f32 	%f629, 0f3E2B020C;
	sub.ftz.f32 	%f630, %f629, 0f3DAA9931;
	setp.ge.ftz.f32	%p155, %f177, %f630;
	and.pred  	%p156, %p155, %p154;
	@!%p156 bra 	BB4_154;
	bra.uni 	BB4_153;

BB4_153:
	mov.f32 	%f817, 0f3F000000;
	mov.f32 	%f816, %f817;

BB4_154:
	sub.ftz.f32 	%f639, %f18, %f258;
	setp.gt.ftz.f32	%p157, %f18, %f258;
	selp.f32	%f180, %f639, 0f00000000, %p157;
	sub.ftz.f32 	%f640, %f257, %f19;
	setp.lt.ftz.f32	%p158, %f19, %f257;
	selp.f32	%f181, %f640, 0f00000000, %p158;
	setp.leu.ftz.f32	%p159, %f180, 0f00000000;
	mov.f32 	%f865, %f169;
	mov.f32 	%f917, %f170;
	mov.f32 	%f963, %f158;
	@%p159 bra 	BB4_159;

	setp.neu.ftz.f32	%p160, %f17, 0f00000000;
	@%p160 bra 	BB4_157;

	mov.f32 	%f818, 0f3F800000;
	bra.uni 	BB4_158;

BB4_157:
	mul.ftz.f32 	%f642, %f180, %f817;
	div.approx.ftz.f32 	%f643, %f642, %f17;
	mov.f32 	%f644, 0f3F800000;
	sub.ftz.f32 	%f818, %f644, %f643;

BB4_158:
	mul.ftz.f32 	%f645, %f180, %f816;
	setp.ltu.ftz.f32	%p161, %f818, 0f3F7D70A4;
	selp.f32	%f646, %f645, %f180, %p161;
	mov.f32 	%f647, 0f42C80000;
	div.approx.ftz.f32 	%f648, %f646, %f647;
	sub.ftz.f32 	%f184, %f158, %f648;
	mul.ftz.f32 	%f185, %f170, %f818;
	mul.ftz.f32 	%f186, %f169, %f818;
	mov.f32 	%f865, %f186;
	mov.f32 	%f917, %f185;
	mov.f32 	%f963, %f184;

BB4_159:
	mov.f32 	%f954, %f963;
	mov.f32 	%f962, %f954;
	mov.f32 	%f906, %f917;
	mov.f32 	%f916, %f906;
	mov.f32 	%f854, %f865;
	mov.f32 	%f864, %f854;
	setp.leu.ftz.f32	%p162, %f181, 0f00000000;
	@%p162 bra 	BB4_163;

	setp.neu.ftz.f32	%p163, %f17, 0f00000000;
	@%p163 bra 	BB4_161;
	bra.uni 	BB4_162;

BB4_161:
	mul.ftz.f32 	%f650, %f181, %f817;
	div.approx.ftz.f32 	%f651, %f650, %f17;
	mov.f32 	%f652, 0f3F800000;
	sub.ftz.f32 	%f819, %f652, %f651;

BB4_162:
	mul.ftz.f32 	%f653, %f181, %f816;
	setp.ltu.ftz.f32	%p164, %f819, 0f3F7D70A4;
	selp.f32	%f654, %f653, %f181, %p164;
	mov.f32 	%f655, 0f42C80000;
	div.approx.ftz.f32 	%f656, %f654, %f655;
	add.ftz.f32 	%f962, %f158, %f656;
	mul.ftz.f32 	%f916, %f170, %f819;
	mul.ftz.f32 	%f864, %f169, %f819;

BB4_163:
	sub.ftz.f32 	%f657, %f962, %f158;
	fma.rn.ftz.f32 	%f958, %f25, %f657, %f158;
	sub.ftz.f32 	%f658, %f916, %f170;
	fma.rn.ftz.f32 	%f911, %f25, %f658, %f170;
	sub.ftz.f32 	%f659, %f864, %f169;
	fma.rn.ftz.f32 	%f859, %f25, %f659, %f169;
	bra.uni 	BB4_200;

BB4_164:
	or.b32  	%r74, %r15, 2;
	setp.eq.s32	%p165, %r74, 2;
	mov.f32 	%f961, %f972;
	@%p165 bra 	BB4_165;
	bra.uni 	BB4_169;

BB4_165:
	setp.gt.ftz.f32	%p166, %f16, %f254;
	@%p166 bra 	BB4_168;

	setp.geu.ftz.f32	%p167, %f16, %f253;
	mov.f32 	%f961, %f972;
	@%p167 bra 	BB4_169;

	mov.f32 	%f660, 0f42C80000;
	div.approx.ftz.f32 	%f661, %f253, %f660;
	sub.ftz.f32 	%f662, %f661, %f972;
	fma.rn.ftz.f32 	%f961, %f26, %f662, %f972;
	bra.uni 	BB4_169;

BB4_168:
	mov.f32 	%f663, 0f42C80000;
	div.approx.ftz.f32 	%f664, %f254, %f663;
	sub.ftz.f32 	%f665, %f972, %f664;
	mul.ftz.f32 	%f666, %f26, %f665;
	sub.ftz.f32 	%f961, %f972, %f666;

BB4_169:
	mov.f32 	%f203, %f961;
	add.s32 	%r75, %r15, -1;
	setp.gt.u32	%p168, %r75, 1;
	mov.f32 	%f862, %f878;
	mov.f32 	%f914, %f930;
	@%p168 bra 	BB4_180;

	setp.leu.ftz.f32	%p169, %f18, %f256;
	mov.f32 	%f863, %f878;
	mov.f32 	%f915, %f930;
	@%p169 bra 	BB4_175;

	setp.neu.ftz.f32	%p170, %f17, 0f00000000;
	@%p170 bra 	BB4_173;

	mov.f32 	%f820, 0f00000000;
	bra.uni 	BB4_174;

BB4_173:
	sub.ftz.f32 	%f668, %f18, %f256;
	div.approx.ftz.f32 	%f669, %f668, %f17;
	mov.f32 	%f670, 0f3F800000;
	sub.ftz.f32 	%f820, %f670, %f669;

BB4_174:
	mul.ftz.f32 	%f671, %f930, %f820;
	sub.ftz.f32 	%f672, %f930, %f671;
	mul.ftz.f32 	%f673, %f26, %f672;
	sub.ftz.f32 	%f915, %f930, %f673;
	mul.ftz.f32 	%f674, %f878, %f820;
	sub.ftz.f32 	%f675, %f878, %f674;
	mul.ftz.f32 	%f676, %f26, %f675;
	sub.ftz.f32 	%f863, %f878, %f676;

BB4_175:
	mov.f32 	%f914, %f915;
	mov.f32 	%f862, %f863;
	setp.geu.ftz.f32	%p171, %f19, %f255;
	@%p171 bra 	BB4_180;

	setp.neu.ftz.f32	%p172, %f17, 0f00000000;
	@%p172 bra 	BB4_178;

	mov.f32 	%f821, 0f00000000;
	bra.uni 	BB4_179;

BB4_178:
	sub.ftz.f32 	%f678, %f255, %f19;
	div.approx.ftz.f32 	%f679, %f678, %f17;
	mov.f32 	%f680, 0f3F800000;
	sub.ftz.f32 	%f821, %f680, %f679;

BB4_179:
	mul.ftz.f32 	%f681, %f914, %f821;
	sub.ftz.f32 	%f682, %f914, %f681;
	fma.rn.ftz.f32 	%f914, %f26, %f682, %f914;
	mul.ftz.f32 	%f683, %f862, %f821;
	sub.ftz.f32 	%f684, %f862, %f683;
	fma.rn.ftz.f32 	%f862, %f26, %f684, %f862;

BB4_180:
	mov.f32 	%f215, %f914;
	mov.f32 	%f214, %f862;
	setp.ne.s32	%p173, %r15, 3;
	mov.f32 	%f859, %f214;
	mov.f32 	%f911, %f215;
	mov.f32 	%f958, %f203;
	@%p173 bra 	BB4_200;

	mul.ftz.f32 	%f685, %f214, 0f3F728F61;
	fma.rn.ftz.f32 	%f686, %f215, 0fBEA3B6E9, %f685;
	mul.ftz.f32 	%f687, %f214, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f688, %f215, 0fBF728F61, %f687;
	abs.ftz.f32 	%f216, %f686;
	abs.ftz.f32 	%f217, %f688;
	setp.eq.ftz.f32	%p174, %f216, 0f00000000;
	setp.eq.ftz.f32	%p175, %f217, 0f00000000;
	and.pred  	%p176, %p174, %p175;
	mov.b32 	 %r11, %f686;
	mov.b32 	 %r76, %f688;
	and.b32  	%r12, %r76, -2147483648;
	@%p176 bra 	BB4_185;

	setp.eq.ftz.f32	%p177, %f216, 0f7F800000;
	setp.eq.ftz.f32	%p178, %f217, 0f7F800000;
	and.pred  	%p179, %p177, %p178;
	@%p179 bra 	BB4_184;

	max.ftz.f32 	%f689, %f217, %f216;
	min.ftz.f32 	%f690, %f217, %f216;
	div.full.ftz.f32 	%f691, %f690, %f689;
	mul.rn.ftz.f32 	%f692, %f691, %f691;
	mov.f32 	%f693, 0fC0B59883;
	mov.f32 	%f694, 0fBF52C7EA;
	fma.rn.ftz.f32 	%f695, %f692, %f694, %f693;
	mov.f32 	%f696, 0fC0D21907;
	fma.rn.ftz.f32 	%f697, %f695, %f692, %f696;
	mul.ftz.f32 	%f698, %f697, %f692;
	mul.ftz.f32 	%f699, %f698, %f691;
	add.ftz.f32 	%f700, %f692, 0f41355DC0;
	mov.f32 	%f701, 0f41E6BD60;
	fma.rn.ftz.f32 	%f702, %f700, %f692, %f701;
	mov.f32 	%f703, 0f419D92C8;
	fma.rn.ftz.f32 	%f704, %f702, %f692, %f703;
	rcp.approx.ftz.f32 	%f705, %f704;
	fma.rn.ftz.f32 	%f706, %f699, %f705, %f691;
	mov.f32 	%f707, 0f3FC90FDB;
	sub.ftz.f32 	%f708, %f707, %f706;
	setp.gt.ftz.f32	%p180, %f217, %f216;
	selp.f32	%f709, %f708, %f706, %p180;
	mov.f32 	%f710, 0f40490FDB;
	sub.ftz.f32 	%f711, %f710, %f709;
	setp.lt.s32	%p181, %r11, 0;
	selp.f32	%f712, %f711, %f709, %p181;
	mov.b32 	 %r77, %f712;
	or.b32  	%r78, %r77, %r12;
	mov.b32 	 %f713, %r78;
	add.ftz.f32 	%f714, %f216, %f217;
	setp.gtu.ftz.f32	%p182, %f714, 0f7F800000;
	selp.f32	%f822, %f714, %f713, %p182;
	bra.uni 	BB4_186;

BB4_184:
	shr.s32 	%r79, %r11, 31;
	and.b32  	%r80, %r79, 13483017;
	add.s32 	%r81, %r80, 1061752795;
	or.b32  	%r82, %r81, %r12;
	mov.b32 	 %f822, %r82;
	bra.uni 	BB4_186;

BB4_185:
	shr.s32 	%r83, %r11, 31;
	and.b32  	%r84, %r83, 1078530011;
	or.b32  	%r85, %r84, %r12;
	mov.b32 	 %f822, %r85;

BB4_186:
	add.ftz.f32 	%f715, %f822, 0f40C90FDB;
	setp.lt.ftz.f32	%p183, %f822, 0f00000000;
	selp.f32	%f716, %f715, %f822, %p183;
	mul.ftz.f32 	%f222, %f716, 0f3E22F983;
	setp.lt.ftz.f32	%p184, %f222, 0f3F8147AE;
	mov.f32 	%f826, 0f3F800000;
	sub.ftz.f32 	%f718, %f826, 0f3DAA9931;
	setp.ge.ftz.f32	%p185, %f222, %f718;
	and.pred  	%p186, %p185, %p184;
	mov.f32 	%f823, 0f3E4CCCCD;
	mov.f32 	%f824, 0f3F4CCCCD;
	@!%p186 bra 	BB4_187;
	bra.uni 	BB4_190;

BB4_187:
	setp.lt.ftz.f32	%p187, %f222, 0f3DAA9931;
	setp.ge.ftz.f32	%p188, %f222, 0f00000000;
	and.pred  	%p189, %p188, %p187;
	@!%p189 bra 	BB4_188;
	bra.uni 	BB4_190;

BB4_188:
	setp.lt.ftz.f32	%p190, %f222, 0f3E802752;
	mov.f32 	%f719, 0f3E2B020C;
	sub.ftz.f32 	%f720, %f719, 0f3DAA9931;
	setp.ge.ftz.f32	%p191, %f222, %f720;
	and.pred  	%p192, %p191, %p190;
	@!%p192 bra 	BB4_190;
	bra.uni 	BB4_189;

BB4_189:
	mov.f32 	%f824, 0f3F000000;
	mov.f32 	%f823, %f824;

BB4_190:
	sub.ftz.f32 	%f729, %f18, %f258;
	setp.gt.ftz.f32	%p193, %f18, %f258;
	selp.f32	%f225, %f729, 0f00000000, %p193;
	sub.ftz.f32 	%f730, %f257, %f19;
	setp.lt.ftz.f32	%p194, %f19, %f257;
	selp.f32	%f226, %f730, 0f00000000, %p194;
	setp.leu.ftz.f32	%p195, %f225, 0f00000000;
	mov.f32 	%f861, %f214;
	mov.f32 	%f913, %f215;
	mov.f32 	%f960, %f203;
	@%p195 bra 	BB4_195;

	setp.neu.ftz.f32	%p196, %f17, 0f00000000;
	@%p196 bra 	BB4_193;

	mov.f32 	%f825, 0f3F800000;
	bra.uni 	BB4_194;

BB4_193:
	mul.ftz.f32 	%f732, %f225, %f824;
	div.approx.ftz.f32 	%f733, %f732, %f17;
	mov.f32 	%f734, 0f3F800000;
	sub.ftz.f32 	%f825, %f734, %f733;

BB4_194:
	mul.ftz.f32 	%f735, %f225, %f823;
	setp.ltu.ftz.f32	%p197, %f825, 0f3F7D70A4;
	selp.f32	%f736, %f735, %f225, %p197;
	mov.f32 	%f737, 0f42C80000;
	div.approx.ftz.f32 	%f738, %f736, %f737;
	sub.ftz.f32 	%f229, %f203, %f738;
	mul.ftz.f32 	%f230, %f215, %f825;
	mul.ftz.f32 	%f231, %f214, %f825;
	mov.f32 	%f861, %f231;
	mov.f32 	%f913, %f230;
	mov.f32 	%f960, %f229;

BB4_195:
	mov.f32 	%f957, %f960;
	mov.f32 	%f959, %f957;
	mov.f32 	%f910, %f913;
	mov.f32 	%f912, %f910;
	mov.f32 	%f858, %f861;
	mov.f32 	%f860, %f858;
	setp.leu.ftz.f32	%p198, %f226, 0f00000000;
	@%p198 bra 	BB4_199;

	setp.neu.ftz.f32	%p199, %f17, 0f00000000;
	@%p199 bra 	BB4_197;
	bra.uni 	BB4_198;

BB4_197:
	mul.ftz.f32 	%f740, %f226, %f824;
	div.approx.ftz.f32 	%f741, %f740, %f17;
	mov.f32 	%f742, 0f3F800000;
	sub.ftz.f32 	%f826, %f742, %f741;

BB4_198:
	mul.ftz.f32 	%f743, %f226, %f823;
	setp.ltu.ftz.f32	%p200, %f826, 0f3F7D70A4;
	selp.f32	%f744, %f743, %f226, %p200;
	mov.f32 	%f745, 0f42C80000;
	div.approx.ftz.f32 	%f746, %f744, %f745;
	add.ftz.f32 	%f959, %f203, %f746;
	mul.ftz.f32 	%f912, %f215, %f826;
	mul.ftz.f32 	%f860, %f214, %f826;

BB4_199:
	sub.ftz.f32 	%f747, %f959, %f203;
	fma.rn.ftz.f32 	%f958, %f26, %f747, %f203;
	sub.ftz.f32 	%f748, %f912, %f215;
	fma.rn.ftz.f32 	%f911, %f26, %f748, %f215;
	sub.ftz.f32 	%f749, %f860, %f214;
	fma.rn.ftz.f32 	%f859, %f26, %f749, %f214;

BB4_200:
	ld.const.f32 	%f750, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f751, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f752, %f911, %f751;
	fma.rn.ftz.f32 	%f753, %f958, %f750, %f752;
	ld.const.f32 	%f754, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f250, %f859, %f754, %f753;
	ld.const.f32 	%f755, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f756, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f757, %f911, %f756;
	fma.rn.ftz.f32 	%f758, %f958, %f755, %f757;
	ld.const.f32 	%f759, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f251, %f859, %f759, %f758;
	ld.const.f32 	%f760, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f761, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f762, %f911, %f761;
	fma.rn.ftz.f32 	%f763, %f958, %f760, %f762;
	ld.const.f32 	%f764, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f252, %f859, %f764, %f763;
	@%p4 bra 	BB4_202;

	mul.wide.s32 	%rd8, %r25, 16;
	add.s64 	%rd9, %rd4, %rd8;
	st.global.v4.f32 	[%rd9], {%f252, %f251, %f250, %f789};
	bra.uni 	BB4_203;

BB4_202:
	mul.wide.s32 	%rd11, %r25, 8;
	add.s64 	%rd12, %rd4, %rd11;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f789;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f250;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f251;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f252;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd12], {%rs12, %rs11, %rs10, %rs9};

BB4_203:
	ret;
}


