//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/Renderers/RendererGPU/Src/Effects/VideoLimiter.cu", 1399785316, 19757
	.file	2 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
	.file	3 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\math_functions.h", 1399785281, 404374
	.file	4 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\API\\Inc\\GPUFoundation/KernelSupport/KernelCore.h", 1399785310, 7840
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 147, 24, 206, 61, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func  (.param .b32 func_retval0) _Z17CalcShadowsWeightfff(
	.param .b32 _Z17CalcShadowsWeightfff_param_0,
	.param .b32 _Z17CalcShadowsWeightfff_param_1,
	.param .b32 _Z17CalcShadowsWeightfff_param_2
)
{
	.reg .pred 	%p<3>;
	.reg .f32 	%f<11>;


	ld.param.f32 	%f3, [_Z17CalcShadowsWeightfff_param_0];
	ld.param.f32 	%f4, [_Z17CalcShadowsWeightfff_param_1];
	ld.param.f32 	%f5, [_Z17CalcShadowsWeightfff_param_2];
	.loc 1 44 1
	setp.gtu.ftz.f32	%p1, %f3, %f4;
	@%p1 bra 	BB0_2;

	mov.f32 	%f10, 0f3F800000;
	bra.uni 	BB0_5;

BB0_2:
	.loc 1 44 1
	add.ftz.f32 	%f7, %f4, %f5;
	setp.gtu.ftz.f32	%p2, %f7, %f3;
	@%p2 bra 	BB0_4;

	mov.f32 	%f10, 0f00000000;
	bra.uni 	BB0_5;

BB0_4:
	.loc 1 44 1
	sub.ftz.f32 	%f9, %f3, %f4;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f10, %f9, %f5;

BB0_5:
	st.param.f32	[func_retval0+0], %f10;
	.loc 1 44 8
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z20CalcHighlightsWeightfff(
	.param .b32 _Z20CalcHighlightsWeightfff_param_0,
	.param .b32 _Z20CalcHighlightsWeightfff_param_1,
	.param .b32 _Z20CalcHighlightsWeightfff_param_2
)
{
	.reg .pred 	%p<3>;
	.reg .f32 	%f<11>;


	ld.param.f32 	%f4, [_Z20CalcHighlightsWeightfff_param_0];
	ld.param.f32 	%f5, [_Z20CalcHighlightsWeightfff_param_1];
	ld.param.f32 	%f6, [_Z20CalcHighlightsWeightfff_param_2];
	.loc 1 44 1
	sub.ftz.f32 	%f1, %f5, %f6;
	setp.leu.ftz.f32	%p1, %f1, %f4;
	@%p1 bra 	BB1_2;

	mov.f32 	%f10, 0f00000000;
	bra.uni 	BB1_5;

BB1_2:
	.loc 1 44 1
	setp.leu.ftz.f32	%p2, %f4, %f5;
	@%p2 bra 	BB1_4;

	mov.f32 	%f10, 0f3F800000;
	bra.uni 	BB1_5;

BB1_4:
	.loc 1 44 1
	sub.ftz.f32 	%f7, %f4, %f1;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f10, %f7, %f6;

BB1_5:
	st.param.f32	[func_retval0+0], %f10;
	.loc 1 44 8
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z21SmartLimitRatioMethodfffffffffPfS_S_(
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_0,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_1,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_2,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_3,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_4,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_5,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_6,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_7,
	.param .b32 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_8,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_9,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_10,
	.param .b64 _Z21SmartLimitRatioMethodfffffffffPfS_S__param_11
)
{
	.reg .pred 	%p<28>;
	.reg .s32 	%r<14>;
	.reg .f32 	%f<99>;
	.reg .s64 	%rd<4>;


	ld.param.f32 	%f20, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_0];
	ld.param.f32 	%f21, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_1];
	ld.param.f32 	%f22, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_2];
	ld.param.f32 	%f23, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_3];
	ld.param.f32 	%f24, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_4];
	ld.param.f32 	%f25, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_6];
	ld.param.f32 	%f26, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_7];
	ld.param.f32 	%f27, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_8];
	ld.param.u64 	%rd1, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_9];
	ld.param.u64 	%rd2, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_10];
	ld.param.u64 	%rd3, [_Z21SmartLimitRatioMethodfffffffffPfS_S__param_11];
	.loc 1 44 1
	mul.ftz.f32 	%f28, %f22, 0f3F728F61;
	fma.rn.ftz.f32 	%f29, %f21, 0fBEA3B6E9, %f28;
	mul.ftz.f32 	%f30, %f22, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f31, %f21, 0fBF728F61, %f30;
	.loc 3 8449 10
	abs.ftz.f32 	%f1, %f29;
	abs.ftz.f32 	%f2, %f31;
	setp.eq.ftz.f32	%p1, %f1, 0f00000000;
	setp.eq.ftz.f32	%p2, %f2, 0f00000000;
	and.pred  	%p3, %p1, %p2;
	mov.b32 	 %r1, %f29;
	mov.b32 	 %r3, %f31;
	and.b32  	%r2, %r3, -2147483648;
	@%p3 bra 	BB2_4;

	setp.eq.ftz.f32	%p4, %f1, 0f7F800000;
	setp.eq.ftz.f32	%p5, %f2, 0f7F800000;
	and.pred  	%p6, %p4, %p5;
	@%p6 bra 	BB2_3;

	max.ftz.f32 	%f32, %f2, %f1;
	min.ftz.f32 	%f33, %f2, %f1;
	div.full.ftz.f32 	%f34, %f33, %f32;
	mul.rn.ftz.f32 	%f35, %f34, %f34;
	mov.f32 	%f36, 0fC0B59883;
	mov.f32 	%f37, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f38, %f35, %f37, %f36;
	mov.f32 	%f39, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f40, %f38, %f35, %f39;
	mul.ftz.f32 	%f41, %f40, %f35;
	mul.ftz.f32 	%f42, %f41, %f34;
	add.ftz.f32 	%f43, %f35, 0f41355DC0;
	mov.f32 	%f44, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f45, %f43, %f35, %f44;
	mov.f32 	%f46, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f47, %f45, %f35, %f46;
	rcp.approx.ftz.f32 	%f48, %f47;
	fma.rn.ftz.f32 	%f49, %f42, %f48, %f34;
	mov.f32 	%f50, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f51, %f50, %f49;
	setp.gt.ftz.f32	%p7, %f2, %f1;
	selp.f32	%f52, %f51, %f49, %p7;
	mov.f32 	%f53, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f54, %f53, %f52;
	setp.lt.s32	%p8, %r1, 0;
	selp.f32	%f55, %f54, %f52, %p8;
	mov.b32 	 %r4, %f55;
	or.b32  	%r5, %r4, %r2;
	mov.b32 	 %f56, %r5;
	add.ftz.f32 	%f57, %f1, %f2;
	setp.gtu.ftz.f32	%p9, %f57, 0f7F800000;
	selp.f32	%f94, %f57, %f56, %p9;
	bra.uni 	BB2_5;

BB2_3:
	.loc 3 8449 10
	shr.s32 	%r6, %r1, 31;
	and.b32  	%r7, %r6, 13483017;
	add.s32 	%r8, %r7, 1061752795;
	or.b32  	%r9, %r8, %r2;
	mov.b32 	 %f94, %r9;
	bra.uni 	BB2_5;

BB2_4:
	.loc 3 8449 10
	shr.s32 	%r10, %r1, 31;
	and.b32  	%r11, %r10, 1078530011;
	or.b32  	%r12, %r11, %r2;
	mov.b32 	 %f94, %r12;

BB2_5:
	.loc 1 44 1
	add.ftz.f32 	%f58, %f94, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p10, %f94, 0f00000000;
	selp.f32	%f59, %f58, %f94, %p10;
	.loc 1 44 1
	mul.ftz.f32 	%f7, %f59, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p11, %f7, 0f3F8147AE;
	mov.f32 	%f98, 0f3F800000;
	sub.ftz.f32 	%f61, %f98, 0f3DAA9931;
	setp.ge.ftz.f32	%p12, %f7, %f61;
	and.pred  	%p13, %p12, %p11;
	.loc 1 44 1
	mov.f32 	%f95, 0f3E4CCCCD;
	mov.f32 	%f96, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p13 bra 	BB2_6;
	bra.uni 	BB2_9;

BB2_6:
	setp.lt.ftz.f32	%p14, %f7, 0f3DAA9931;
	setp.ge.ftz.f32	%p15, %f7, 0f00000000;
	and.pred  	%p16, %p15, %p14;
	.loc 1 44 1
	@!%p16 bra 	BB2_7;
	bra.uni 	BB2_9;

BB2_7:
	.loc 1 44 1
	setp.lt.ftz.f32	%p17, %f7, 0f3E802752;
	mov.f32 	%f62, 0f3E2B020C;
	sub.ftz.f32 	%f63, %f62, 0f3DAA9931;
	setp.ge.ftz.f32	%p18, %f7, %f63;
	and.pred  	%p19, %p18, %p17;
	.loc 1 44 1
	@!%p19 bra 	BB2_9;
	bra.uni 	BB2_8;

BB2_8:
	mov.f32 	%f96, 0f3F000000;
	mov.f32 	%f95, %f96;

BB2_9:
	.loc 1 44 1
	sub.ftz.f32 	%f72, %f25, %f24;
	setp.gt.ftz.f32	%p20, %f25, %f24;
	selp.f32	%f10, %f72, 0f00000000, %p20;
	sub.ftz.f32 	%f73, %f23, %f26;
	setp.lt.ftz.f32	%p21, %f26, %f23;
	selp.f32	%f11, %f73, 0f00000000, %p21;
	.loc 1 44 1
	setp.leu.ftz.f32	%p22, %f10, 0f00000000;
	@%p22 bra 	BB2_14;

	.loc 1 44 1
	mul.ftz.f32 	%f12, %f10, %f95;
	mul.ftz.f32 	%f13, %f10, %f96;
	.loc 1 44 1
	setp.neu.ftz.f32	%p23, %f27, 0f00000000;
	@%p23 bra 	BB2_12;

	mov.f32 	%f97, 0f3F800000;
	bra.uni 	BB2_13;

BB2_12:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f75, %f13, %f27;
	mov.f32 	%f76, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f97, %f76, %f75;

BB2_13:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p24, %f97, 0f3F7D70A4;
	selp.f32	%f77, %f12, %f10, %p24;
	mov.f32 	%f78, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f79, %f77, %f78;
	.loc 1 44 23
	sub.ftz.f32 	%f80, %f20, %f79;
	st.f32 	[%rd1], %f80;
	.loc 1 44 1
	mul.ftz.f32 	%f81, %f97, %f21;
	st.f32 	[%rd2], %f81;
	mul.ftz.f32 	%f82, %f97, %f22;
	st.f32 	[%rd3], %f82;

BB2_14:
	.loc 1 44 1
	setp.leu.ftz.f32	%p25, %f11, 0f00000000;
	@%p25 bra 	BB2_18;

	.loc 1 44 1
	mul.ftz.f32 	%f16, %f11, %f95;
	mul.ftz.f32 	%f17, %f11, %f96;
	.loc 1 44 1
	setp.neu.ftz.f32	%p26, %f27, 0f00000000;
	@%p26 bra 	BB2_16;
	bra.uni 	BB2_17;

BB2_16:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f84, %f17, %f27;
	mov.f32 	%f85, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f98, %f85, %f84;

BB2_17:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p27, %f98, 0f3F7D70A4;
	selp.f32	%f86, %f16, %f11, %p27;
	mov.f32 	%f87, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f88, %f86, %f87;
	.loc 1 44 23
	add.ftz.f32 	%f89, %f88, %f20;
	st.f32 	[%rd1], %f89;
	.loc 1 44 1
	mul.ftz.f32 	%f90, %f98, %f21;
	st.f32 	[%rd2], %f90;
	mul.ftz.f32 	%f91, %f98, %f22;
	st.f32 	[%rd3], %f91;

BB2_18:
	mov.u32 	%r13, 1;
	st.param.b32	[func_retval0+0], %r13;
	.loc 1 44 1
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	.loc 2 2770 10
	max.ftz.f32 	%f4, %f1, %f2;
	.loc 2 2765 10
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	.loc 4 146 40
	ret;
}

.visible .entry VideoLimiter(
	.param .u64 VideoLimiter_param_0,
	.param .u32 VideoLimiter_param_1,
	.param .u32 VideoLimiter_param_2,
	.param .u32 VideoLimiter_param_3,
	.param .u32 VideoLimiter_param_4,
	.param .u32 VideoLimiter_param_5,
	.param .u32 VideoLimiter_param_6,
	.param .f32 VideoLimiter_param_7,
	.param .f32 VideoLimiter_param_8,
	.param .f32 VideoLimiter_param_9,
	.param .f32 VideoLimiter_param_10,
	.param .f32 VideoLimiter_param_11,
	.param .f32 VideoLimiter_param_12,
	.param .f32 VideoLimiter_param_13,
	.param .f32 VideoLimiter_param_14,
	.param .f32 VideoLimiter_param_15,
	.param .f32 VideoLimiter_param_16
)
{
	.reg .pred 	%p<201>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<106>;
	.reg .f32 	%f<974>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd4, [VideoLimiter_param_0];
	ld.param.u32 	%r17, [VideoLimiter_param_3];
	ld.param.u32 	%r18, [VideoLimiter_param_4];
	ld.param.u32 	%r15, [VideoLimiter_param_5];
	ld.param.u32 	%r16, [VideoLimiter_param_6];
	ld.param.f32 	%f287, [VideoLimiter_param_7];
	ld.param.f32 	%f288, [VideoLimiter_param_8];
	ld.param.f32 	%f289, [VideoLimiter_param_9];
	ld.param.f32 	%f290, [VideoLimiter_param_10];
	ld.param.f32 	%f291, [VideoLimiter_param_11];
	ld.param.f32 	%f292, [VideoLimiter_param_12];
	cvta.to.global.u64 	%rd1, %rd4;
	.loc 1 44 1
	mov.u32 	%r19, %ntid.x;
	mov.u32 	%r20, %ctaid.x;
	mov.u32 	%r21, %tid.x;
	mad.lo.s32 	%r1, %r19, %r20, %r21;
	mov.u32 	%r22, %ntid.y;
	mov.u32 	%r23, %ctaid.y;
	mov.u32 	%r24, %tid.y;
	mad.lo.s32 	%r2, %r22, %r23, %r24;
	.loc 1 44 1
	setp.lt.s32	%p1, %r1, %r17;
	setp.lt.s32	%p2, %r2, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 44 1
	@!%p3 bra 	BB4_202;
	bra.uni 	BB4_1;

BB4_1:
	ld.param.u32 	%r105, [VideoLimiter_param_2];
	ld.param.u32 	%r104, [VideoLimiter_param_1];
	.loc 1 44 1
	mad.lo.s32 	%r25, %r2, %r104, %r1;
	mul.wide.s32 	%rd5, %r25, 16;
	add.s64 	%rd2, %rd1, %rd5;
	mul.wide.s32 	%rd6, %r25, 8;
	add.s64 	%rd3, %rd1, %rd6;
	.loc 1 44 1
	setp.eq.s32	%p4, %r105, 0;
	@%p4 bra 	BB4_3;

	ld.global.v4.f32 	{%f296, %f297, %f298, %f299}, [%rd2];
	mov.f32 	%f793, %f299;
	mov.f32 	%f792, %f298;
	mov.f32 	%f791, %f297;
	mov.f32 	%f790, %f296;
	bra.uni 	BB4_4;

BB4_3:
	.loc 1 44 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd3];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f790, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f791, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f792, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f793, %temp;
	}

BB4_4:
	ld.param.f32 	%f789, [VideoLimiter_param_15];
	ld.const.f32 	%f300, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f301, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f302, %f791, %f301;
	fma.rn.ftz.f32 	%f303, %f792, %f300, %f302;
	ld.const.f32 	%f304, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f973, %f790, %f304, %f303;
	ld.const.f32 	%f305, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f306, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f307, %f791, %f306;
	fma.rn.ftz.f32 	%f308, %f792, %f305, %f307;
	ld.const.f32 	%f309, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f932, %f790, %f309, %f308;
	ld.const.f32 	%f310, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f311, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f312, %f791, %f311;
	fma.rn.ftz.f32 	%f313, %f792, %f310, %f312;
	ld.const.f32 	%f314, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f881, %f790, %f314, %f313;
	.loc 1 44 1
	mul.ftz.f32 	%f315, %f932, 0f3F5F3CB4;
	mul.ftz.f32 	%f316, %f881, 0f3F9D70A4;
	mul.ftz.f32 	%f17, %f973, 0f42C80000;
	mul.ftz.f32 	%f317, %f316, %f316;
	fma.rn.ftz.f32 	%f318, %f315, %f315, %f317;
	.loc 2 3055 10
	sqrt.approx.ftz.f32 	%f319, %f318;
	.loc 1 44 61
	mul.ftz.f32 	%f18, %f319, 0f42C80000;
	.loc 1 44 1
	add.ftz.f32 	%f19, %f17, %f18;
	sub.ftz.f32 	%f20, %f17, %f18;
	.loc 1 44 1
	setp.gtu.ftz.f32	%p5, %f973, %f789;
	@%p5 bra 	BB4_6;

	mov.f32 	%f794, 0f3F800000;
	bra.uni 	BB4_9;

BB4_6:
	ld.param.f32 	%f787, [VideoLimiter_param_16];
	ld.param.f32 	%f786, [VideoLimiter_param_15];
	.loc 1 44 1
	add.ftz.f32 	%f321, %f786, %f787;
	setp.ltu.ftz.f32	%p6, %f973, %f321;
	@%p6 bra 	BB4_8;

	mov.f32 	%f794, 0f00000000;
	bra.uni 	BB4_9;

BB4_8:
	ld.param.f32 	%f788, [VideoLimiter_param_16];
	ld.param.f32 	%f785, [VideoLimiter_param_15];
	.loc 1 44 1
	sub.ftz.f32 	%f323, %f973, %f785;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f794, %f323, %f788;

BB4_9:
	ld.param.f32 	%f781, [VideoLimiter_param_16];
	ld.param.f32 	%f780, [VideoLimiter_param_15];
	.loc 1 44 1
	sub.ftz.f32 	%f324, %f780, %f781;
	setp.geu.ftz.f32	%p7, %f973, %f324;
	@%p7 bra 	BB4_11;

	mov.f32 	%f795, 0f00000000;
	bra.uni 	BB4_14;

BB4_11:
	ld.param.f32 	%f782, [VideoLimiter_param_15];
	.loc 1 44 1
	setp.leu.ftz.f32	%p8, %f973, %f782;
	@%p8 bra 	BB4_13;

	mov.f32 	%f795, 0f3F800000;
	bra.uni 	BB4_14;

BB4_13:
	ld.param.f32 	%f783, [VideoLimiter_param_16];
	.loc 1 44 1
	sub.ftz.f32 	%f326, %f973, %f324;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f795, %f326, %f783;

BB4_14:
	ld.param.f32 	%f784, [VideoLimiter_param_16];
	ld.param.f32 	%f779, [VideoLimiter_param_14];
	.loc 1 44 1
	setp.gt.ftz.f32	%p9, %f779, %f784;
	selp.f32	%f329, 0f40000000, 0f3F800000, %p9;
	sub.ftz.f32 	%f330, %f329, %f794;
	sub.ftz.f32 	%f331, %f330, %f795;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f25, %f794, %f329;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f26, %f331, %f329;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f27, %f795, %f329;
	.loc 1 44 1
	setp.eq.s32	%p10, %r16, 0;
	@%p10 bra 	BB4_163;

	setp.eq.s32	%p11, %r16, 1;
	@%p11 bra 	BB4_127;

	setp.eq.s32	%p12, %r16, 2;
	@%p12 bra 	BB4_91;

	setp.eq.s32	%p13, %r16, 3;
	@%p13 bra 	BB4_55;

	setp.ne.s32	%p14, %r16, 4;
	mov.f32 	%f862, %f881;
	mov.f32 	%f913, %f932;
	mov.f32 	%f959, %f973;
	@%p14 bra 	BB4_199;

	.loc 1 44 1
	or.b32  	%r26, %r15, 2;
	setp.eq.s32	%p15, %r26, 2;
	@%p15 bra 	BB4_20;
	bra.uni 	BB4_24;

BB4_20:
	.loc 1 44 1
	setp.gt.ftz.f32	%p16, %f17, %f288;
	@%p16 bra 	BB4_23;

	.loc 1 44 1
	setp.geu.ftz.f32	%p17, %f17, %f287;
	@%p17 bra 	BB4_24;

	mov.f32 	%f332, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f973, %f287, %f332;
	bra.uni 	BB4_24;

BB4_23:
	mov.f32 	%f333, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f973, %f288, %f333;

BB4_24:
	.loc 1 44 1
	mov.f32 	%f30, %f973;
	add.s32 	%r27, %r15, -1;
	setp.gt.u32	%p18, %r27, 1;
	mov.f32 	%f880, %f881;
	mov.f32 	%f931, %f932;
	@%p18 bra 	BB4_35;

	.loc 1 44 1
	setp.leu.ftz.f32	%p19, %f19, %f290;
	@%p19 bra 	BB4_30;

	.loc 1 44 1
	sub.ftz.f32 	%f31, %f19, %f290;
	setp.neu.ftz.f32	%p20, %f18, 0f00000000;
	@%p20 bra 	BB4_28;

	mov.f32 	%f796, 0f00000000;
	bra.uni 	BB4_29;

BB4_28:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f335, %f31, %f18;
	mov.f32 	%f336, 0f3F800000;
	.loc 1 44 138
	sub.ftz.f32 	%f796, %f336, %f335;

BB4_29:
	.loc 1 44 1
	setp.lt.ftz.f32	%p21, %f796, 0f00000000;
	selp.f32	%f337, 0f00000000, %f796, %p21;
	.loc 1 44 1
	mul.ftz.f32 	%f932, %f932, %f337;
	mul.ftz.f32 	%f881, %f881, %f337;

BB4_30:
	.loc 1 44 1
	mov.f32 	%f931, %f932;
	mov.f32 	%f880, %f881;
	setp.geu.ftz.f32	%p22, %f20, %f289;
	@%p22 bra 	BB4_35;

	.loc 1 44 1
	sub.ftz.f32 	%f38, %f289, %f20;
	setp.neu.ftz.f32	%p23, %f18, 0f00000000;
	@%p23 bra 	BB4_33;

	mov.f32 	%f797, 0f00000000;
	bra.uni 	BB4_34;

BB4_33:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f339, %f38, %f18;
	mov.f32 	%f340, 0f3F800000;
	.loc 1 44 138
	sub.ftz.f32 	%f797, %f340, %f339;

BB4_34:
	.loc 1 44 1
	setp.lt.ftz.f32	%p24, %f797, 0f00000000;
	selp.f32	%f341, 0f00000000, %f797, %p24;
	.loc 1 44 1
	mul.ftz.f32 	%f931, %f931, %f341;
	mul.ftz.f32 	%f880, %f880, %f341;

BB4_35:
	.loc 1 44 1
	mov.f32 	%f44, %f931;
	mov.f32 	%f43, %f880;
	setp.ne.s32	%p25, %r15, 3;
	mov.f32 	%f862, %f43;
	mov.f32 	%f913, %f44;
	mov.f32 	%f959, %f30;
	@%p25 bra 	BB4_199;

	.loc 1 44 1
	setp.gt.ftz.f32	%p26, %f19, %f292;
	setp.lt.ftz.f32	%p27, %f20, %f291;
	or.pred  	%p28, %p26, %p27;
	.loc 1 44 1
	mov.f32 	%f862, %f43;
	mov.f32 	%f913, %f44;
	mov.f32 	%f959, %f30;
	@!%p28 bra 	BB4_199;
	bra.uni 	BB4_37;

BB4_37:
	.loc 1 44 1
	mul.ftz.f32 	%f342, %f43, 0f3F728F61;
	fma.rn.ftz.f32 	%f343, %f44, 0fBEA3B6E9, %f342;
	mul.ftz.f32 	%f344, %f43, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f345, %f44, 0fBF728F61, %f344;
	.loc 3 8449 10
	abs.ftz.f32 	%f45, %f343;
	abs.ftz.f32 	%f46, %f345;
	setp.eq.ftz.f32	%p29, %f45, 0f00000000;
	setp.eq.ftz.f32	%p30, %f46, 0f00000000;
	and.pred  	%p31, %p29, %p30;
	mov.b32 	 %r3, %f343;
	mov.b32 	 %r28, %f345;
	and.b32  	%r4, %r28, -2147483648;
	@%p31 bra 	BB4_41;

	setp.eq.ftz.f32	%p32, %f45, 0f7F800000;
	setp.eq.ftz.f32	%p33, %f46, 0f7F800000;
	and.pred  	%p34, %p32, %p33;
	@%p34 bra 	BB4_40;

	max.ftz.f32 	%f346, %f46, %f45;
	min.ftz.f32 	%f347, %f46, %f45;
	div.full.ftz.f32 	%f348, %f347, %f346;
	mul.rn.ftz.f32 	%f349, %f348, %f348;
	mov.f32 	%f350, 0fC0B59883;
	mov.f32 	%f351, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f352, %f349, %f351, %f350;
	mov.f32 	%f353, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f354, %f352, %f349, %f353;
	mul.ftz.f32 	%f355, %f354, %f349;
	mul.ftz.f32 	%f356, %f355, %f348;
	add.ftz.f32 	%f357, %f349, 0f41355DC0;
	mov.f32 	%f358, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f359, %f357, %f349, %f358;
	mov.f32 	%f360, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f361, %f359, %f349, %f360;
	rcp.approx.ftz.f32 	%f362, %f361;
	fma.rn.ftz.f32 	%f363, %f356, %f362, %f348;
	mov.f32 	%f364, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f365, %f364, %f363;
	setp.gt.ftz.f32	%p35, %f46, %f45;
	selp.f32	%f366, %f365, %f363, %p35;
	mov.f32 	%f367, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f368, %f367, %f366;
	setp.lt.s32	%p36, %r3, 0;
	selp.f32	%f369, %f368, %f366, %p36;
	mov.b32 	 %r29, %f369;
	or.b32  	%r30, %r29, %r4;
	mov.b32 	 %f370, %r30;
	add.ftz.f32 	%f371, %f45, %f46;
	setp.gtu.ftz.f32	%p37, %f371, 0f7F800000;
	selp.f32	%f798, %f371, %f370, %p37;
	bra.uni 	BB4_42;

BB4_40:
	.loc 3 8449 10
	shr.s32 	%r31, %r3, 31;
	and.b32  	%r32, %r31, 13483017;
	add.s32 	%r33, %r32, 1061752795;
	or.b32  	%r34, %r33, %r4;
	mov.b32 	 %f798, %r34;
	bra.uni 	BB4_42;

BB4_41:
	.loc 3 8449 10
	shr.s32 	%r35, %r3, 31;
	and.b32  	%r36, %r35, 1078530011;
	or.b32  	%r37, %r36, %r4;
	mov.b32 	 %f798, %r37;

BB4_42:
	.loc 1 44 1
	add.ftz.f32 	%f372, %f798, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p38, %f798, 0f00000000;
	selp.f32	%f373, %f372, %f798, %p38;
	.loc 1 44 1
	mul.ftz.f32 	%f51, %f373, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p39, %f51, 0f3F8147AE;
	mov.f32 	%f802, 0f3F800000;
	sub.ftz.f32 	%f375, %f802, 0f3DAA9931;
	setp.ge.ftz.f32	%p40, %f51, %f375;
	and.pred  	%p41, %p40, %p39;
	.loc 1 44 1
	mov.f32 	%f799, 0f3E4CCCCD;
	mov.f32 	%f800, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p41 bra 	BB4_43;
	bra.uni 	BB4_46;

BB4_43:
	setp.lt.ftz.f32	%p42, %f51, 0f3DAA9931;
	setp.ge.ftz.f32	%p43, %f51, 0f00000000;
	and.pred  	%p44, %p43, %p42;
	.loc 1 44 1
	@!%p44 bra 	BB4_44;
	bra.uni 	BB4_46;

BB4_44:
	.loc 1 44 1
	setp.lt.ftz.f32	%p45, %f51, 0f3E802752;
	mov.f32 	%f376, 0f3E2B020C;
	sub.ftz.f32 	%f377, %f376, 0f3DAA9931;
	setp.ge.ftz.f32	%p46, %f51, %f377;
	and.pred  	%p47, %p46, %p45;
	.loc 1 44 1
	@!%p47 bra 	BB4_46;
	bra.uni 	BB4_45;

BB4_45:
	mov.f32 	%f800, 0f3F000000;
	mov.f32 	%f799, %f800;

BB4_46:
	.loc 1 44 1
	sub.ftz.f32 	%f386, %f19, %f292;
	selp.f32	%f54, %f386, 0f00000000, %p26;
	sub.ftz.f32 	%f387, %f291, %f20;
	selp.f32	%f55, %f387, 0f00000000, %p27;
	.loc 1 44 1
	setp.leu.ftz.f32	%p50, %f54, 0f00000000;
	mov.f32 	%f879, %f43;
	mov.f32 	%f930, %f44;
	mov.f32 	%f972, %f30;
	@%p50 bra 	BB4_51;

	.loc 1 44 1
	mul.ftz.f32 	%f56, %f54, %f799;
	mul.ftz.f32 	%f57, %f54, %f800;
	.loc 1 44 1
	setp.neu.ftz.f32	%p51, %f18, 0f00000000;
	@%p51 bra 	BB4_49;

	mov.f32 	%f801, 0f3F800000;
	bra.uni 	BB4_50;

BB4_49:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f389, %f57, %f18;
	mov.f32 	%f390, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f801, %f390, %f389;

BB4_50:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p52, %f801, 0f3F7D70A4;
	selp.f32	%f391, %f56, %f54, %p52;
	mov.f32 	%f392, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f393, %f391, %f392;
	.loc 1 44 23
	sub.ftz.f32 	%f60, %f30, %f393;
	.loc 1 44 1
	mul.ftz.f32 	%f61, %f44, %f801;
	mul.ftz.f32 	%f62, %f43, %f801;
	mov.f32 	%f879, %f62;
	mov.f32 	%f930, %f61;
	mov.f32 	%f972, %f60;

BB4_51:
	.loc 1 44 1
	mov.f32 	%f946, %f972;
	mov.f32 	%f959, %f946;
	mov.f32 	%f896, %f930;
	mov.f32 	%f913, %f896;
	mov.f32 	%f845, %f879;
	mov.f32 	%f862, %f845;
	setp.leu.ftz.f32	%p53, %f55, 0f00000000;
	@%p53 bra 	BB4_199;

	.loc 1 44 1
	mul.ftz.f32 	%f66, %f55, %f799;
	mul.ftz.f32 	%f67, %f55, %f800;
	.loc 1 44 1
	setp.neu.ftz.f32	%p54, %f18, 0f00000000;
	@%p54 bra 	BB4_53;
	bra.uni 	BB4_54;

BB4_53:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f395, %f67, %f18;
	mov.f32 	%f396, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f802, %f396, %f395;

BB4_54:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p55, %f802, 0f3F7D70A4;
	selp.f32	%f397, %f66, %f55, %p55;
	mov.f32 	%f398, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f399, %f397, %f398;
	.loc 1 44 23
	add.ftz.f32 	%f959, %f30, %f399;
	.loc 1 44 1
	mul.ftz.f32 	%f913, %f44, %f802;
	mul.ftz.f32 	%f862, %f43, %f802;
	bra.uni 	BB4_199;

BB4_55:
	.loc 1 44 1
	or.b32  	%r38, %r15, 2;
	setp.eq.s32	%p56, %r38, 2;
	mov.f32 	%f971, %f973;
	@%p56 bra 	BB4_56;
	bra.uni 	BB4_60;

BB4_56:
	.loc 1 44 1
	setp.gt.ftz.f32	%p57, %f17, %f288;
	@%p57 bra 	BB4_59;

	.loc 1 44 1
	setp.geu.ftz.f32	%p58, %f17, %f287;
	mov.f32 	%f971, %f973;
	@%p58 bra 	BB4_60;

	mov.f32 	%f400, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f401, %f287, %f400;
	.loc 1 44 52
	sub.ftz.f32 	%f402, %f401, %f973;
	.loc 1 44 1
	fma.rn.ftz.f32 	%f403, %f27, %f402, %f973;
	fma.rn.ftz.f32 	%f971, %f25, %f402, %f403;
	bra.uni 	BB4_60;

BB4_59:
	mov.f32 	%f404, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f405, %f288, %f404;
	.loc 1 44 103
	sub.ftz.f32 	%f406, %f973, %f405;
	.loc 1 44 1
	mul.ftz.f32 	%f407, %f27, %f406;
	sub.ftz.f32 	%f408, %f973, %f407;
	mul.ftz.f32 	%f409, %f25, %f406;
	sub.ftz.f32 	%f971, %f408, %f409;

BB4_60:
	.loc 1 44 1
	mov.f32 	%f75, %f971;
	add.s32 	%r39, %r15, -1;
	setp.gt.u32	%p59, %r39, 1;
	mov.f32 	%f877, %f881;
	mov.f32 	%f928, %f932;
	@%p59 bra 	BB4_71;

	.loc 1 44 1
	setp.leu.ftz.f32	%p60, %f19, %f290;
	mov.f32 	%f878, %f881;
	mov.f32 	%f929, %f932;
	@%p60 bra 	BB4_66;

	.loc 1 44 1
	sub.ftz.f32 	%f76, %f19, %f290;
	setp.neu.ftz.f32	%p61, %f18, 0f00000000;
	@%p61 bra 	BB4_64;

	mov.f32 	%f803, 0f00000000;
	bra.uni 	BB4_65;

BB4_64:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f411, %f76, %f18;
	mov.f32 	%f412, 0f3F800000;
	.loc 1 44 138
	sub.ftz.f32 	%f803, %f412, %f411;

BB4_65:
	.loc 1 44 1
	mul.ftz.f32 	%f413, %f932, %f803;
	sub.ftz.f32 	%f414, %f932, %f413;
	mul.ftz.f32 	%f415, %f27, %f414;
	sub.ftz.f32 	%f416, %f932, %f415;
	mul.ftz.f32 	%f417, %f881, %f803;
	sub.ftz.f32 	%f418, %f881, %f417;
	mul.ftz.f32 	%f419, %f27, %f418;
	sub.ftz.f32 	%f420, %f881, %f419;
	mul.ftz.f32 	%f421, %f416, %f803;
	sub.ftz.f32 	%f422, %f416, %f421;
	mul.ftz.f32 	%f423, %f25, %f422;
	sub.ftz.f32 	%f929, %f416, %f423;
	mul.ftz.f32 	%f424, %f420, %f803;
	sub.ftz.f32 	%f425, %f420, %f424;
	mul.ftz.f32 	%f426, %f25, %f425;
	sub.ftz.f32 	%f878, %f420, %f426;

BB4_66:
	.loc 1 44 1
	mov.f32 	%f928, %f929;
	mov.f32 	%f877, %f878;
	setp.geu.ftz.f32	%p62, %f20, %f289;
	@%p62 bra 	BB4_71;

	.loc 1 44 1
	sub.ftz.f32 	%f83, %f289, %f20;
	setp.neu.ftz.f32	%p63, %f18, 0f00000000;
	@%p63 bra 	BB4_69;

	mov.f32 	%f804, 0f00000000;
	bra.uni 	BB4_70;

BB4_69:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f428, %f83, %f18;
	mov.f32 	%f429, 0f3F800000;
	.loc 1 44 138
	sub.ftz.f32 	%f804, %f429, %f428;

BB4_70:
	.loc 1 44 1
	mul.ftz.f32 	%f430, %f928, %f804;
	sub.ftz.f32 	%f431, %f928, %f430;
	fma.rn.ftz.f32 	%f432, %f27, %f431, %f928;
	mul.ftz.f32 	%f433, %f877, %f804;
	sub.ftz.f32 	%f434, %f877, %f433;
	fma.rn.ftz.f32 	%f435, %f27, %f434, %f877;
	mul.ftz.f32 	%f436, %f432, %f804;
	sub.ftz.f32 	%f437, %f432, %f436;
	fma.rn.ftz.f32 	%f928, %f25, %f437, %f432;
	mul.ftz.f32 	%f438, %f435, %f804;
	sub.ftz.f32 	%f439, %f435, %f438;
	fma.rn.ftz.f32 	%f877, %f25, %f439, %f435;

BB4_71:
	.loc 1 44 1
	mov.f32 	%f89, %f928;
	mov.f32 	%f88, %f877;
	setp.ne.s32	%p64, %r15, 3;
	mov.f32 	%f862, %f88;
	mov.f32 	%f913, %f89;
	mov.f32 	%f959, %f75;
	@%p64 bra 	BB4_199;

	.loc 1 44 1
	mul.ftz.f32 	%f440, %f88, 0f3F728F61;
	fma.rn.ftz.f32 	%f441, %f89, 0fBEA3B6E9, %f440;
	mul.ftz.f32 	%f442, %f88, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f443, %f89, 0fBF728F61, %f442;
	.loc 3 8449 10
	abs.ftz.f32 	%f90, %f441;
	abs.ftz.f32 	%f91, %f443;
	setp.eq.ftz.f32	%p65, %f90, 0f00000000;
	setp.eq.ftz.f32	%p66, %f91, 0f00000000;
	and.pred  	%p67, %p65, %p66;
	mov.b32 	 %r5, %f441;
	mov.b32 	 %r40, %f443;
	and.b32  	%r6, %r40, -2147483648;
	@%p67 bra 	BB4_76;

	setp.eq.ftz.f32	%p68, %f90, 0f7F800000;
	setp.eq.ftz.f32	%p69, %f91, 0f7F800000;
	and.pred  	%p70, %p68, %p69;
	@%p70 bra 	BB4_75;

	max.ftz.f32 	%f444, %f91, %f90;
	min.ftz.f32 	%f445, %f91, %f90;
	div.full.ftz.f32 	%f446, %f445, %f444;
	mul.rn.ftz.f32 	%f447, %f446, %f446;
	mov.f32 	%f448, 0fC0B59883;
	mov.f32 	%f449, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f450, %f447, %f449, %f448;
	mov.f32 	%f451, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f452, %f450, %f447, %f451;
	mul.ftz.f32 	%f453, %f452, %f447;
	mul.ftz.f32 	%f454, %f453, %f446;
	add.ftz.f32 	%f455, %f447, 0f41355DC0;
	mov.f32 	%f456, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f457, %f455, %f447, %f456;
	mov.f32 	%f458, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f459, %f457, %f447, %f458;
	rcp.approx.ftz.f32 	%f460, %f459;
	fma.rn.ftz.f32 	%f461, %f454, %f460, %f446;
	mov.f32 	%f462, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f463, %f462, %f461;
	setp.gt.ftz.f32	%p71, %f91, %f90;
	selp.f32	%f464, %f463, %f461, %p71;
	mov.f32 	%f465, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f466, %f465, %f464;
	setp.lt.s32	%p72, %r5, 0;
	selp.f32	%f467, %f466, %f464, %p72;
	mov.b32 	 %r41, %f467;
	or.b32  	%r42, %r41, %r6;
	mov.b32 	 %f468, %r42;
	add.ftz.f32 	%f469, %f90, %f91;
	setp.gtu.ftz.f32	%p73, %f469, 0f7F800000;
	selp.f32	%f805, %f469, %f468, %p73;
	bra.uni 	BB4_77;

BB4_75:
	.loc 3 8449 10
	shr.s32 	%r43, %r5, 31;
	and.b32  	%r44, %r43, 13483017;
	add.s32 	%r45, %r44, 1061752795;
	or.b32  	%r46, %r45, %r6;
	mov.b32 	 %f805, %r46;
	bra.uni 	BB4_77;

BB4_76:
	.loc 3 8449 10
	shr.s32 	%r47, %r5, 31;
	and.b32  	%r48, %r47, 1078530011;
	or.b32  	%r49, %r48, %r6;
	mov.b32 	 %f805, %r49;

BB4_77:
	.loc 1 44 1
	add.ftz.f32 	%f470, %f805, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p74, %f805, 0f00000000;
	selp.f32	%f471, %f470, %f805, %p74;
	.loc 1 44 1
	mul.ftz.f32 	%f96, %f471, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p75, %f96, 0f3F8147AE;
	mov.f32 	%f809, 0f3F800000;
	sub.ftz.f32 	%f473, %f809, 0f3DAA9931;
	setp.ge.ftz.f32	%p76, %f96, %f473;
	and.pred  	%p77, %p76, %p75;
	.loc 1 44 1
	mov.f32 	%f806, 0f3E4CCCCD;
	mov.f32 	%f807, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p77 bra 	BB4_78;
	bra.uni 	BB4_81;

BB4_78:
	setp.lt.ftz.f32	%p78, %f96, 0f3DAA9931;
	setp.ge.ftz.f32	%p79, %f96, 0f00000000;
	and.pred  	%p80, %p79, %p78;
	.loc 1 44 1
	@!%p80 bra 	BB4_79;
	bra.uni 	BB4_81;

BB4_79:
	.loc 1 44 1
	setp.lt.ftz.f32	%p81, %f96, 0f3E802752;
	mov.f32 	%f474, 0f3E2B020C;
	sub.ftz.f32 	%f475, %f474, 0f3DAA9931;
	setp.ge.ftz.f32	%p82, %f96, %f475;
	and.pred  	%p83, %p82, %p81;
	.loc 1 44 1
	@!%p83 bra 	BB4_81;
	bra.uni 	BB4_80;

BB4_80:
	mov.f32 	%f807, 0f3F000000;
	mov.f32 	%f806, %f807;

BB4_81:
	.loc 1 44 1
	sub.ftz.f32 	%f484, %f19, %f292;
	setp.gt.ftz.f32	%p84, %f19, %f292;
	selp.f32	%f99, %f484, 0f00000000, %p84;
	sub.ftz.f32 	%f485, %f291, %f20;
	setp.lt.ftz.f32	%p85, %f20, %f291;
	selp.f32	%f100, %f485, 0f00000000, %p85;
	.loc 1 44 1
	setp.leu.ftz.f32	%p86, %f99, 0f00000000;
	mov.f32 	%f876, %f88;
	mov.f32 	%f927, %f89;
	mov.f32 	%f970, %f75;
	@%p86 bra 	BB4_86;

	.loc 1 44 1
	mul.ftz.f32 	%f101, %f99, %f806;
	mul.ftz.f32 	%f102, %f99, %f807;
	.loc 1 44 1
	setp.neu.ftz.f32	%p87, %f18, 0f00000000;
	@%p87 bra 	BB4_84;

	mov.f32 	%f808, 0f3F800000;
	bra.uni 	BB4_85;

BB4_84:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f487, %f102, %f18;
	mov.f32 	%f488, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f808, %f488, %f487;

BB4_85:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p88, %f808, 0f3F7D70A4;
	selp.f32	%f489, %f101, %f99, %p88;
	mov.f32 	%f490, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f491, %f489, %f490;
	.loc 1 44 23
	sub.ftz.f32 	%f105, %f75, %f491;
	.loc 1 44 1
	mul.ftz.f32 	%f106, %f89, %f808;
	mul.ftz.f32 	%f107, %f88, %f808;
	mov.f32 	%f876, %f107;
	mov.f32 	%f927, %f106;
	mov.f32 	%f970, %f105;

BB4_86:
	.loc 1 44 1
	mov.f32 	%f949, %f970;
	mov.f32 	%f969, %f949;
	mov.f32 	%f900, %f927;
	mov.f32 	%f926, %f900;
	mov.f32 	%f849, %f876;
	mov.f32 	%f875, %f849;
	setp.leu.ftz.f32	%p89, %f100, 0f00000000;
	@%p89 bra 	BB4_90;

	.loc 1 44 1
	mul.ftz.f32 	%f111, %f100, %f806;
	mul.ftz.f32 	%f112, %f100, %f807;
	.loc 1 44 1
	setp.neu.ftz.f32	%p90, %f18, 0f00000000;
	@%p90 bra 	BB4_88;
	bra.uni 	BB4_89;

BB4_88:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f493, %f112, %f18;
	mov.f32 	%f494, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f809, %f494, %f493;

BB4_89:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p91, %f809, 0f3F7D70A4;
	selp.f32	%f495, %f111, %f100, %p91;
	mov.f32 	%f496, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f497, %f495, %f496;
	.loc 1 44 23
	add.ftz.f32 	%f969, %f75, %f497;
	.loc 1 44 1
	mul.ftz.f32 	%f926, %f89, %f809;
	mul.ftz.f32 	%f875, %f88, %f809;

BB4_90:
	.loc 1 44 1
	sub.ftz.f32 	%f498, %f969, %f75;
	add.ftz.f32 	%f499, %f27, %f25;
	fma.rn.ftz.f32 	%f959, %f499, %f498, %f75;
	sub.ftz.f32 	%f500, %f926, %f89;
	fma.rn.ftz.f32 	%f913, %f499, %f500, %f89;
	sub.ftz.f32 	%f501, %f875, %f88;
	fma.rn.ftz.f32 	%f862, %f499, %f501, %f88;
	bra.uni 	BB4_199;

BB4_91:
	.loc 1 44 1
	or.b32  	%r50, %r15, 2;
	setp.eq.s32	%p92, %r50, 2;
	mov.f32 	%f968, %f973;
	@%p92 bra 	BB4_92;
	bra.uni 	BB4_96;

BB4_92:
	.loc 1 44 1
	setp.gt.ftz.f32	%p93, %f17, %f288;
	@%p93 bra 	BB4_95;

	.loc 1 44 1
	setp.geu.ftz.f32	%p94, %f17, %f287;
	mov.f32 	%f968, %f973;
	@%p94 bra 	BB4_96;

	mov.f32 	%f502, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f503, %f287, %f502;
	.loc 1 44 51
	sub.ftz.f32 	%f504, %f503, %f973;
	.loc 1 44 1
	fma.rn.ftz.f32 	%f968, %f25, %f504, %f973;
	bra.uni 	BB4_96;

BB4_95:
	mov.f32 	%f505, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f506, %f288, %f505;
	.loc 1 44 102
	sub.ftz.f32 	%f507, %f973, %f506;
	.loc 1 44 1
	mul.ftz.f32 	%f508, %f25, %f507;
	sub.ftz.f32 	%f968, %f973, %f508;

BB4_96:
	.loc 1 44 1
	mov.f32 	%f126, %f968;
	add.s32 	%r51, %r15, -1;
	setp.gt.u32	%p95, %r51, 1;
	mov.f32 	%f873, %f881;
	mov.f32 	%f924, %f932;
	@%p95 bra 	BB4_107;

	.loc 1 44 1
	setp.leu.ftz.f32	%p96, %f19, %f290;
	mov.f32 	%f874, %f881;
	mov.f32 	%f925, %f932;
	@%p96 bra 	BB4_102;

	.loc 1 44 1
	sub.ftz.f32 	%f127, %f19, %f290;
	setp.neu.ftz.f32	%p97, %f18, 0f00000000;
	@%p97 bra 	BB4_100;

	mov.f32 	%f810, 0f00000000;
	bra.uni 	BB4_101;

BB4_100:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f510, %f127, %f18;
	mov.f32 	%f511, 0f3F800000;
	.loc 1 44 137
	sub.ftz.f32 	%f810, %f511, %f510;

BB4_101:
	.loc 1 44 1
	mul.ftz.f32 	%f512, %f932, %f810;
	sub.ftz.f32 	%f513, %f932, %f512;
	mul.ftz.f32 	%f514, %f25, %f513;
	sub.ftz.f32 	%f925, %f932, %f514;
	mul.ftz.f32 	%f515, %f881, %f810;
	sub.ftz.f32 	%f516, %f881, %f515;
	mul.ftz.f32 	%f517, %f25, %f516;
	sub.ftz.f32 	%f874, %f881, %f517;

BB4_102:
	.loc 1 44 1
	mov.f32 	%f924, %f925;
	mov.f32 	%f873, %f874;
	setp.geu.ftz.f32	%p98, %f20, %f289;
	@%p98 bra 	BB4_107;

	.loc 1 44 1
	sub.ftz.f32 	%f134, %f289, %f20;
	setp.neu.ftz.f32	%p99, %f18, 0f00000000;
	@%p99 bra 	BB4_105;

	mov.f32 	%f811, 0f00000000;
	bra.uni 	BB4_106;

BB4_105:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f519, %f134, %f18;
	mov.f32 	%f520, 0f3F800000;
	.loc 1 44 138
	sub.ftz.f32 	%f811, %f520, %f519;

BB4_106:
	.loc 1 44 1
	mul.ftz.f32 	%f521, %f924, %f811;
	sub.ftz.f32 	%f522, %f924, %f521;
	fma.rn.ftz.f32 	%f924, %f25, %f522, %f924;
	mul.ftz.f32 	%f523, %f873, %f811;
	sub.ftz.f32 	%f524, %f873, %f523;
	fma.rn.ftz.f32 	%f873, %f25, %f524, %f873;

BB4_107:
	.loc 1 44 1
	mov.f32 	%f140, %f924;
	mov.f32 	%f139, %f873;
	setp.ne.s32	%p100, %r15, 3;
	mov.f32 	%f862, %f139;
	mov.f32 	%f913, %f140;
	mov.f32 	%f959, %f126;
	@%p100 bra 	BB4_199;

	.loc 1 44 1
	mul.ftz.f32 	%f525, %f139, 0f3F728F61;
	fma.rn.ftz.f32 	%f526, %f140, 0fBEA3B6E9, %f525;
	mul.ftz.f32 	%f527, %f139, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f528, %f140, 0fBF728F61, %f527;
	.loc 3 8449 10
	abs.ftz.f32 	%f141, %f526;
	abs.ftz.f32 	%f142, %f528;
	setp.eq.ftz.f32	%p101, %f141, 0f00000000;
	setp.eq.ftz.f32	%p102, %f142, 0f00000000;
	and.pred  	%p103, %p101, %p102;
	mov.b32 	 %r7, %f526;
	mov.b32 	 %r52, %f528;
	and.b32  	%r8, %r52, -2147483648;
	@%p103 bra 	BB4_112;

	setp.eq.ftz.f32	%p104, %f141, 0f7F800000;
	setp.eq.ftz.f32	%p105, %f142, 0f7F800000;
	and.pred  	%p106, %p104, %p105;
	@%p106 bra 	BB4_111;

	max.ftz.f32 	%f529, %f142, %f141;
	min.ftz.f32 	%f530, %f142, %f141;
	div.full.ftz.f32 	%f531, %f530, %f529;
	mul.rn.ftz.f32 	%f532, %f531, %f531;
	mov.f32 	%f533, 0fC0B59883;
	mov.f32 	%f534, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f535, %f532, %f534, %f533;
	mov.f32 	%f536, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f537, %f535, %f532, %f536;
	mul.ftz.f32 	%f538, %f537, %f532;
	mul.ftz.f32 	%f539, %f538, %f531;
	add.ftz.f32 	%f540, %f532, 0f41355DC0;
	mov.f32 	%f541, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f542, %f540, %f532, %f541;
	mov.f32 	%f543, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f544, %f542, %f532, %f543;
	rcp.approx.ftz.f32 	%f545, %f544;
	fma.rn.ftz.f32 	%f546, %f539, %f545, %f531;
	mov.f32 	%f547, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f548, %f547, %f546;
	setp.gt.ftz.f32	%p107, %f142, %f141;
	selp.f32	%f549, %f548, %f546, %p107;
	mov.f32 	%f550, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f551, %f550, %f549;
	setp.lt.s32	%p108, %r7, 0;
	selp.f32	%f552, %f551, %f549, %p108;
	mov.b32 	 %r53, %f552;
	or.b32  	%r54, %r53, %r8;
	mov.b32 	 %f553, %r54;
	add.ftz.f32 	%f554, %f141, %f142;
	setp.gtu.ftz.f32	%p109, %f554, 0f7F800000;
	selp.f32	%f812, %f554, %f553, %p109;
	bra.uni 	BB4_113;

BB4_111:
	.loc 3 8449 10
	shr.s32 	%r55, %r7, 31;
	and.b32  	%r56, %r55, 13483017;
	add.s32 	%r57, %r56, 1061752795;
	or.b32  	%r58, %r57, %r8;
	mov.b32 	 %f812, %r58;
	bra.uni 	BB4_113;

BB4_112:
	.loc 3 8449 10
	shr.s32 	%r59, %r7, 31;
	and.b32  	%r60, %r59, 1078530011;
	or.b32  	%r61, %r60, %r8;
	mov.b32 	 %f812, %r61;

BB4_113:
	.loc 1 44 1
	add.ftz.f32 	%f555, %f812, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p110, %f812, 0f00000000;
	selp.f32	%f556, %f555, %f812, %p110;
	.loc 1 44 1
	mul.ftz.f32 	%f147, %f556, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p111, %f147, 0f3F8147AE;
	mov.f32 	%f816, 0f3F800000;
	sub.ftz.f32 	%f558, %f816, 0f3DAA9931;
	setp.ge.ftz.f32	%p112, %f147, %f558;
	and.pred  	%p113, %p112, %p111;
	.loc 1 44 1
	mov.f32 	%f813, 0f3E4CCCCD;
	mov.f32 	%f814, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p113 bra 	BB4_114;
	bra.uni 	BB4_117;

BB4_114:
	setp.lt.ftz.f32	%p114, %f147, 0f3DAA9931;
	setp.ge.ftz.f32	%p115, %f147, 0f00000000;
	and.pred  	%p116, %p115, %p114;
	.loc 1 44 1
	@!%p116 bra 	BB4_115;
	bra.uni 	BB4_117;

BB4_115:
	.loc 1 44 1
	setp.lt.ftz.f32	%p117, %f147, 0f3E802752;
	mov.f32 	%f559, 0f3E2B020C;
	sub.ftz.f32 	%f560, %f559, 0f3DAA9931;
	setp.ge.ftz.f32	%p118, %f147, %f560;
	and.pred  	%p119, %p118, %p117;
	.loc 1 44 1
	@!%p119 bra 	BB4_117;
	bra.uni 	BB4_116;

BB4_116:
	mov.f32 	%f814, 0f3F000000;
	mov.f32 	%f813, %f814;

BB4_117:
	.loc 1 44 1
	sub.ftz.f32 	%f569, %f19, %f292;
	setp.gt.ftz.f32	%p120, %f19, %f292;
	selp.f32	%f150, %f569, 0f00000000, %p120;
	sub.ftz.f32 	%f570, %f291, %f20;
	setp.lt.ftz.f32	%p121, %f20, %f291;
	selp.f32	%f151, %f570, 0f00000000, %p121;
	.loc 1 44 1
	setp.leu.ftz.f32	%p122, %f150, 0f00000000;
	mov.f32 	%f872, %f139;
	mov.f32 	%f923, %f140;
	mov.f32 	%f967, %f126;
	@%p122 bra 	BB4_122;

	.loc 1 44 1
	mul.ftz.f32 	%f152, %f150, %f813;
	mul.ftz.f32 	%f153, %f150, %f814;
	.loc 1 44 1
	setp.neu.ftz.f32	%p123, %f18, 0f00000000;
	@%p123 bra 	BB4_120;

	mov.f32 	%f815, 0f3F800000;
	bra.uni 	BB4_121;

BB4_120:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f572, %f153, %f18;
	mov.f32 	%f573, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f815, %f573, %f572;

BB4_121:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p124, %f815, 0f3F7D70A4;
	selp.f32	%f574, %f152, %f150, %p124;
	mov.f32 	%f575, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f576, %f574, %f575;
	.loc 1 44 23
	sub.ftz.f32 	%f156, %f126, %f576;
	.loc 1 44 1
	mul.ftz.f32 	%f157, %f140, %f815;
	mul.ftz.f32 	%f158, %f139, %f815;
	mov.f32 	%f872, %f158;
	mov.f32 	%f923, %f157;
	mov.f32 	%f967, %f156;

BB4_122:
	.loc 1 44 1
	mov.f32 	%f952, %f967;
	mov.f32 	%f966, %f952;
	mov.f32 	%f904, %f923;
	mov.f32 	%f922, %f904;
	mov.f32 	%f853, %f872;
	mov.f32 	%f871, %f853;
	setp.leu.ftz.f32	%p125, %f151, 0f00000000;
	@%p125 bra 	BB4_126;

	.loc 1 44 1
	mul.ftz.f32 	%f162, %f151, %f813;
	mul.ftz.f32 	%f163, %f151, %f814;
	.loc 1 44 1
	setp.neu.ftz.f32	%p126, %f18, 0f00000000;
	@%p126 bra 	BB4_124;
	bra.uni 	BB4_125;

BB4_124:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f578, %f163, %f18;
	mov.f32 	%f579, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f816, %f579, %f578;

BB4_125:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p127, %f816, 0f3F7D70A4;
	selp.f32	%f580, %f162, %f151, %p127;
	mov.f32 	%f581, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f582, %f580, %f581;
	.loc 1 44 23
	add.ftz.f32 	%f966, %f126, %f582;
	.loc 1 44 1
	mul.ftz.f32 	%f922, %f140, %f816;
	mul.ftz.f32 	%f871, %f139, %f816;

BB4_126:
	.loc 1 44 1
	sub.ftz.f32 	%f583, %f966, %f126;
	fma.rn.ftz.f32 	%f959, %f25, %f583, %f126;
	sub.ftz.f32 	%f584, %f922, %f140;
	fma.rn.ftz.f32 	%f913, %f25, %f584, %f140;
	sub.ftz.f32 	%f585, %f871, %f139;
	fma.rn.ftz.f32 	%f862, %f25, %f585, %f139;
	bra.uni 	BB4_199;

BB4_127:
	.loc 1 44 1
	or.b32  	%r62, %r15, 2;
	setp.eq.s32	%p128, %r62, 2;
	mov.f32 	%f965, %f973;
	@%p128 bra 	BB4_128;
	bra.uni 	BB4_132;

BB4_128:
	.loc 1 44 1
	setp.gt.ftz.f32	%p129, %f17, %f288;
	@%p129 bra 	BB4_131;

	.loc 1 44 1
	setp.geu.ftz.f32	%p130, %f17, %f287;
	mov.f32 	%f965, %f973;
	@%p130 bra 	BB4_132;

	mov.f32 	%f586, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f587, %f287, %f586;
	.loc 1 44 51
	sub.ftz.f32 	%f588, %f587, %f973;
	.loc 1 44 1
	fma.rn.ftz.f32 	%f965, %f26, %f588, %f973;
	bra.uni 	BB4_132;

BB4_131:
	mov.f32 	%f589, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f590, %f288, %f589;
	.loc 1 44 102
	sub.ftz.f32 	%f591, %f973, %f590;
	.loc 1 44 1
	mul.ftz.f32 	%f592, %f26, %f591;
	sub.ftz.f32 	%f965, %f973, %f592;

BB4_132:
	.loc 1 44 1
	mov.f32 	%f959, %f965;
	add.s32 	%r63, %r15, -1;
	setp.gt.u32	%p131, %r63, 1;
	mov.f32 	%f869, %f881;
	mov.f32 	%f920, %f932;
	@%p131 bra 	BB4_143;

	.loc 1 44 1
	setp.leu.ftz.f32	%p132, %f19, %f290;
	mov.f32 	%f870, %f881;
	mov.f32 	%f921, %f932;
	@%p132 bra 	BB4_138;

	.loc 1 44 1
	sub.ftz.f32 	%f178, %f19, %f290;
	setp.neu.ftz.f32	%p133, %f18, 0f00000000;
	@%p133 bra 	BB4_136;

	mov.f32 	%f817, 0f00000000;
	bra.uni 	BB4_137;

BB4_136:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f594, %f178, %f18;
	mov.f32 	%f595, 0f3F800000;
	.loc 1 44 137
	sub.ftz.f32 	%f817, %f595, %f594;

BB4_137:
	.loc 1 44 1
	mul.ftz.f32 	%f596, %f932, %f817;
	sub.ftz.f32 	%f597, %f932, %f596;
	mul.ftz.f32 	%f598, %f26, %f597;
	sub.ftz.f32 	%f921, %f932, %f598;
	mul.ftz.f32 	%f599, %f881, %f817;
	sub.ftz.f32 	%f600, %f881, %f599;
	mul.ftz.f32 	%f601, %f26, %f600;
	sub.ftz.f32 	%f870, %f881, %f601;

BB4_138:
	.loc 1 44 1
	mov.f32 	%f920, %f921;
	mov.f32 	%f869, %f870;
	setp.geu.ftz.f32	%p134, %f20, %f289;
	@%p134 bra 	BB4_143;

	.loc 1 44 1
	sub.ftz.f32 	%f185, %f289, %f20;
	setp.neu.ftz.f32	%p135, %f18, 0f00000000;
	@%p135 bra 	BB4_141;

	mov.f32 	%f818, 0f00000000;
	bra.uni 	BB4_142;

BB4_141:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f603, %f185, %f18;
	mov.f32 	%f604, 0f3F800000;
	.loc 1 44 137
	sub.ftz.f32 	%f818, %f604, %f603;

BB4_142:
	.loc 1 44 1
	mul.ftz.f32 	%f605, %f920, %f818;
	sub.ftz.f32 	%f606, %f920, %f605;
	fma.rn.ftz.f32 	%f920, %f26, %f606, %f920;
	mul.ftz.f32 	%f607, %f869, %f818;
	sub.ftz.f32 	%f608, %f869, %f607;
	fma.rn.ftz.f32 	%f869, %f26, %f608, %f869;

BB4_143:
	.loc 1 44 1
	mov.f32 	%f913, %f920;
	mov.f32 	%f862, %f869;
	setp.ne.s32	%p136, %r15, 3;
	@%p136 bra 	BB4_199;

	.loc 1 44 1
	mul.ftz.f32 	%f609, %f862, 0f3F728F61;
	fma.rn.ftz.f32 	%f610, %f913, 0fBEA3B6E9, %f609;
	mul.ftz.f32 	%f611, %f862, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f612, %f913, 0fBF728F61, %f611;
	.loc 3 8449 10
	abs.ftz.f32 	%f192, %f610;
	abs.ftz.f32 	%f193, %f612;
	setp.eq.ftz.f32	%p137, %f192, 0f00000000;
	setp.eq.ftz.f32	%p138, %f193, 0f00000000;
	and.pred  	%p139, %p137, %p138;
	mov.b32 	 %r9, %f610;
	mov.b32 	 %r64, %f612;
	and.b32  	%r10, %r64, -2147483648;
	@%p139 bra 	BB4_148;

	setp.eq.ftz.f32	%p140, %f192, 0f7F800000;
	setp.eq.ftz.f32	%p141, %f193, 0f7F800000;
	and.pred  	%p142, %p140, %p141;
	@%p142 bra 	BB4_147;

	max.ftz.f32 	%f613, %f193, %f192;
	min.ftz.f32 	%f614, %f193, %f192;
	div.full.ftz.f32 	%f615, %f614, %f613;
	mul.rn.ftz.f32 	%f616, %f615, %f615;
	mov.f32 	%f617, 0fC0B59883;
	mov.f32 	%f618, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f619, %f616, %f618, %f617;
	mov.f32 	%f620, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f621, %f619, %f616, %f620;
	mul.ftz.f32 	%f622, %f621, %f616;
	mul.ftz.f32 	%f623, %f622, %f615;
	add.ftz.f32 	%f624, %f616, 0f41355DC0;
	mov.f32 	%f625, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f626, %f624, %f616, %f625;
	mov.f32 	%f627, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f628, %f626, %f616, %f627;
	rcp.approx.ftz.f32 	%f629, %f628;
	fma.rn.ftz.f32 	%f630, %f623, %f629, %f615;
	mov.f32 	%f631, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f632, %f631, %f630;
	setp.gt.ftz.f32	%p143, %f193, %f192;
	selp.f32	%f633, %f632, %f630, %p143;
	mov.f32 	%f634, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f635, %f634, %f633;
	setp.lt.s32	%p144, %r9, 0;
	selp.f32	%f636, %f635, %f633, %p144;
	mov.b32 	 %r65, %f636;
	or.b32  	%r66, %r65, %r10;
	mov.b32 	 %f637, %r66;
	add.ftz.f32 	%f638, %f192, %f193;
	setp.gtu.ftz.f32	%p145, %f638, 0f7F800000;
	selp.f32	%f819, %f638, %f637, %p145;
	bra.uni 	BB4_149;

BB4_147:
	.loc 3 8449 10
	shr.s32 	%r67, %r9, 31;
	and.b32  	%r68, %r67, 13483017;
	add.s32 	%r69, %r68, 1061752795;
	or.b32  	%r70, %r69, %r10;
	mov.b32 	 %f819, %r70;
	bra.uni 	BB4_149;

BB4_148:
	.loc 3 8449 10
	shr.s32 	%r71, %r9, 31;
	and.b32  	%r72, %r71, 1078530011;
	or.b32  	%r73, %r72, %r10;
	mov.b32 	 %f819, %r73;

BB4_149:
	.loc 1 44 1
	add.ftz.f32 	%f639, %f819, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p146, %f819, 0f00000000;
	selp.f32	%f640, %f639, %f819, %p146;
	.loc 1 44 1
	mul.ftz.f32 	%f198, %f640, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p147, %f198, 0f3F8147AE;
	mov.f32 	%f823, 0f3F800000;
	sub.ftz.f32 	%f642, %f823, 0f3DAA9931;
	setp.ge.ftz.f32	%p148, %f198, %f642;
	and.pred  	%p149, %p148, %p147;
	.loc 1 44 1
	mov.f32 	%f820, 0f3E4CCCCD;
	mov.f32 	%f821, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p149 bra 	BB4_150;
	bra.uni 	BB4_153;

BB4_150:
	setp.lt.ftz.f32	%p150, %f198, 0f3DAA9931;
	setp.ge.ftz.f32	%p151, %f198, 0f00000000;
	and.pred  	%p152, %p151, %p150;
	.loc 1 44 1
	@!%p152 bra 	BB4_151;
	bra.uni 	BB4_153;

BB4_151:
	.loc 1 44 1
	setp.lt.ftz.f32	%p153, %f198, 0f3E802752;
	mov.f32 	%f643, 0f3E2B020C;
	sub.ftz.f32 	%f644, %f643, 0f3DAA9931;
	setp.ge.ftz.f32	%p154, %f198, %f644;
	and.pred  	%p155, %p154, %p153;
	.loc 1 44 1
	@!%p155 bra 	BB4_153;
	bra.uni 	BB4_152;

BB4_152:
	mov.f32 	%f821, 0f3F000000;
	mov.f32 	%f820, %f821;

BB4_153:
	.loc 1 44 1
	sub.ftz.f32 	%f653, %f19, %f292;
	setp.gt.ftz.f32	%p156, %f19, %f292;
	selp.f32	%f201, %f653, 0f00000000, %p156;
	sub.ftz.f32 	%f654, %f291, %f20;
	setp.lt.ftz.f32	%p157, %f20, %f291;
	selp.f32	%f202, %f654, 0f00000000, %p157;
	.loc 1 44 1
	setp.leu.ftz.f32	%p158, %f201, 0f00000000;
	mov.f32 	%f868, %f862;
	mov.f32 	%f919, %f913;
	mov.f32 	%f964, %f959;
	@%p158 bra 	BB4_158;

	.loc 1 44 1
	mul.ftz.f32 	%f203, %f201, %f820;
	mul.ftz.f32 	%f204, %f201, %f821;
	.loc 1 44 1
	setp.neu.ftz.f32	%p159, %f18, 0f00000000;
	@%p159 bra 	BB4_156;

	mov.f32 	%f822, 0f3F800000;
	bra.uni 	BB4_157;

BB4_156:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f656, %f204, %f18;
	mov.f32 	%f657, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f822, %f657, %f656;

BB4_157:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p160, %f822, 0f3F7D70A4;
	selp.f32	%f658, %f203, %f201, %p160;
	mov.f32 	%f659, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f660, %f658, %f659;
	.loc 1 44 23
	sub.ftz.f32 	%f964, %f959, %f660;
	.loc 1 44 1
	mul.ftz.f32 	%f919, %f913, %f822;
	mul.ftz.f32 	%f868, %f862, %f822;

BB4_158:
	.loc 1 44 1
	mov.f32 	%f963, %f964;
	mov.f32 	%f918, %f919;
	mov.f32 	%f867, %f868;
	setp.leu.ftz.f32	%p161, %f202, 0f00000000;
	@%p161 bra 	BB4_162;

	.loc 1 44 1
	mul.ftz.f32 	%f213, %f202, %f820;
	mul.ftz.f32 	%f214, %f202, %f821;
	.loc 1 44 1
	setp.neu.ftz.f32	%p162, %f18, 0f00000000;
	@%p162 bra 	BB4_160;
	bra.uni 	BB4_161;

BB4_160:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f662, %f214, %f18;
	mov.f32 	%f663, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f823, %f663, %f662;

BB4_161:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p163, %f823, 0f3F7D70A4;
	selp.f32	%f664, %f213, %f202, %p163;
	mov.f32 	%f665, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f666, %f664, %f665;
	.loc 1 44 23
	add.ftz.f32 	%f963, %f959, %f666;
	.loc 1 44 1
	mul.ftz.f32 	%f918, %f913, %f823;
	mul.ftz.f32 	%f867, %f862, %f823;

BB4_162:
	.loc 1 44 1
	sub.ftz.f32 	%f667, %f963, %f959;
	fma.rn.ftz.f32 	%f959, %f26, %f667, %f959;
	sub.ftz.f32 	%f668, %f918, %f913;
	fma.rn.ftz.f32 	%f913, %f26, %f668, %f913;
	sub.ftz.f32 	%f669, %f867, %f862;
	fma.rn.ftz.f32 	%f862, %f26, %f669, %f862;
	bra.uni 	BB4_199;

BB4_163:
	.loc 1 44 1
	or.b32  	%r74, %r15, 2;
	setp.eq.s32	%p164, %r74, 2;
	mov.f32 	%f962, %f973;
	@%p164 bra 	BB4_164;
	bra.uni 	BB4_168;

BB4_164:
	.loc 1 44 1
	setp.gt.ftz.f32	%p165, %f17, %f288;
	@%p165 bra 	BB4_167;

	.loc 1 44 1
	setp.geu.ftz.f32	%p166, %f17, %f287;
	mov.f32 	%f962, %f973;
	@%p166 bra 	BB4_168;

	mov.f32 	%f670, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f671, %f287, %f670;
	.loc 1 44 51
	sub.ftz.f32 	%f672, %f671, %f973;
	.loc 1 44 1
	fma.rn.ftz.f32 	%f962, %f27, %f672, %f973;
	bra.uni 	BB4_168;

BB4_167:
	mov.f32 	%f673, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f674, %f288, %f673;
	.loc 1 44 102
	sub.ftz.f32 	%f675, %f973, %f674;
	.loc 1 44 1
	mul.ftz.f32 	%f676, %f27, %f675;
	sub.ftz.f32 	%f962, %f973, %f676;

BB4_168:
	.loc 1 44 1
	mov.f32 	%f959, %f962;
	add.s32 	%r75, %r15, -1;
	setp.gt.u32	%p167, %r75, 1;
	mov.f32 	%f865, %f881;
	mov.f32 	%f916, %f932;
	@%p167 bra 	BB4_179;

	.loc 1 44 1
	setp.leu.ftz.f32	%p168, %f19, %f290;
	mov.f32 	%f866, %f881;
	mov.f32 	%f917, %f932;
	@%p168 bra 	BB4_174;

	.loc 1 44 1
	sub.ftz.f32 	%f229, %f19, %f290;
	setp.neu.ftz.f32	%p169, %f18, 0f00000000;
	@%p169 bra 	BB4_172;

	mov.f32 	%f824, 0f00000000;
	bra.uni 	BB4_173;

BB4_172:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f678, %f229, %f18;
	mov.f32 	%f679, 0f3F800000;
	.loc 1 44 137
	sub.ftz.f32 	%f824, %f679, %f678;

BB4_173:
	.loc 1 44 1
	mul.ftz.f32 	%f680, %f932, %f824;
	sub.ftz.f32 	%f681, %f932, %f680;
	mul.ftz.f32 	%f682, %f27, %f681;
	sub.ftz.f32 	%f917, %f932, %f682;
	mul.ftz.f32 	%f683, %f881, %f824;
	sub.ftz.f32 	%f684, %f881, %f683;
	mul.ftz.f32 	%f685, %f27, %f684;
	sub.ftz.f32 	%f866, %f881, %f685;

BB4_174:
	.loc 1 44 1
	mov.f32 	%f916, %f917;
	mov.f32 	%f865, %f866;
	setp.geu.ftz.f32	%p170, %f20, %f289;
	@%p170 bra 	BB4_179;

	.loc 1 44 1
	sub.ftz.f32 	%f236, %f289, %f20;
	setp.neu.ftz.f32	%p171, %f18, 0f00000000;
	@%p171 bra 	BB4_177;

	mov.f32 	%f825, 0f00000000;
	bra.uni 	BB4_178;

BB4_177:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f687, %f236, %f18;
	mov.f32 	%f688, 0f3F800000;
	.loc 1 44 137
	sub.ftz.f32 	%f825, %f688, %f687;

BB4_178:
	.loc 1 44 1
	mul.ftz.f32 	%f689, %f916, %f825;
	sub.ftz.f32 	%f690, %f916, %f689;
	fma.rn.ftz.f32 	%f916, %f27, %f690, %f916;
	mul.ftz.f32 	%f691, %f865, %f825;
	sub.ftz.f32 	%f692, %f865, %f691;
	fma.rn.ftz.f32 	%f865, %f27, %f692, %f865;

BB4_179:
	.loc 1 44 1
	mov.f32 	%f913, %f916;
	mov.f32 	%f862, %f865;
	setp.ne.s32	%p172, %r15, 3;
	@%p172 bra 	BB4_199;

	.loc 1 44 1
	mul.ftz.f32 	%f693, %f862, 0f3F728F61;
	fma.rn.ftz.f32 	%f694, %f913, 0fBEA3B6E9, %f693;
	mul.ftz.f32 	%f695, %f862, 0fBEA3B6E9;
	fma.rn.ftz.f32 	%f696, %f913, 0fBF728F61, %f695;
	.loc 3 8449 10
	abs.ftz.f32 	%f243, %f694;
	abs.ftz.f32 	%f244, %f696;
	setp.eq.ftz.f32	%p173, %f243, 0f00000000;
	setp.eq.ftz.f32	%p174, %f244, 0f00000000;
	and.pred  	%p175, %p173, %p174;
	mov.b32 	 %r11, %f694;
	mov.b32 	 %r76, %f696;
	and.b32  	%r12, %r76, -2147483648;
	@%p175 bra 	BB4_184;

	setp.eq.ftz.f32	%p176, %f243, 0f7F800000;
	setp.eq.ftz.f32	%p177, %f244, 0f7F800000;
	and.pred  	%p178, %p176, %p177;
	@%p178 bra 	BB4_183;

	max.ftz.f32 	%f697, %f244, %f243;
	min.ftz.f32 	%f698, %f244, %f243;
	div.full.ftz.f32 	%f699, %f698, %f697;
	mul.rn.ftz.f32 	%f700, %f699, %f699;
	mov.f32 	%f701, 0fC0B59883;
	mov.f32 	%f702, 0fBF52C7EA;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f703, %f700, %f702, %f701;
	mov.f32 	%f704, 0fC0D21907;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f705, %f703, %f700, %f704;
	mul.ftz.f32 	%f706, %f705, %f700;
	mul.ftz.f32 	%f707, %f706, %f699;
	add.ftz.f32 	%f708, %f700, 0f41355DC0;
	mov.f32 	%f709, 0f41E6BD60;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f710, %f708, %f700, %f709;
	mov.f32 	%f711, 0f419D92C8;
	.loc 3 8449 10
	fma.rn.ftz.f32 	%f712, %f710, %f700, %f711;
	rcp.approx.ftz.f32 	%f713, %f712;
	fma.rn.ftz.f32 	%f714, %f707, %f713, %f699;
	mov.f32 	%f715, 0f3FC90FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f716, %f715, %f714;
	setp.gt.ftz.f32	%p179, %f244, %f243;
	selp.f32	%f717, %f716, %f714, %p179;
	mov.f32 	%f718, 0f40490FDB;
	.loc 3 8449 10
	sub.ftz.f32 	%f719, %f718, %f717;
	setp.lt.s32	%p180, %r11, 0;
	selp.f32	%f720, %f719, %f717, %p180;
	mov.b32 	 %r77, %f720;
	or.b32  	%r78, %r77, %r12;
	mov.b32 	 %f721, %r78;
	add.ftz.f32 	%f722, %f243, %f244;
	setp.gtu.ftz.f32	%p181, %f722, 0f7F800000;
	selp.f32	%f826, %f722, %f721, %p181;
	bra.uni 	BB4_185;

BB4_183:
	.loc 3 8449 10
	shr.s32 	%r79, %r11, 31;
	and.b32  	%r80, %r79, 13483017;
	add.s32 	%r81, %r80, 1061752795;
	or.b32  	%r82, %r81, %r12;
	mov.b32 	 %f826, %r82;
	bra.uni 	BB4_185;

BB4_184:
	.loc 3 8449 10
	shr.s32 	%r83, %r11, 31;
	and.b32  	%r84, %r83, 1078530011;
	or.b32  	%r85, %r84, %r12;
	mov.b32 	 %f826, %r85;

BB4_185:
	.loc 1 44 1
	add.ftz.f32 	%f723, %f826, 0f40C90FDB;
	.loc 1 44 1
	setp.lt.ftz.f32	%p182, %f826, 0f00000000;
	selp.f32	%f724, %f723, %f826, %p182;
	.loc 1 44 1
	mul.ftz.f32 	%f249, %f724, 0f3E22F983;
	.loc 1 44 1
	setp.lt.ftz.f32	%p183, %f249, 0f3F8147AE;
	mov.f32 	%f830, 0f3F800000;
	sub.ftz.f32 	%f726, %f830, 0f3DAA9931;
	setp.ge.ftz.f32	%p184, %f249, %f726;
	and.pred  	%p185, %p184, %p183;
	.loc 1 44 1
	mov.f32 	%f827, 0f3E4CCCCD;
	mov.f32 	%f828, 0f3F4CCCCD;
	.loc 1 44 1
	@!%p185 bra 	BB4_186;
	bra.uni 	BB4_189;

BB4_186:
	setp.lt.ftz.f32	%p186, %f249, 0f3DAA9931;
	setp.ge.ftz.f32	%p187, %f249, 0f00000000;
	and.pred  	%p188, %p187, %p186;
	.loc 1 44 1
	@!%p188 bra 	BB4_187;
	bra.uni 	BB4_189;

BB4_187:
	.loc 1 44 1
	setp.lt.ftz.f32	%p189, %f249, 0f3E802752;
	mov.f32 	%f727, 0f3E2B020C;
	sub.ftz.f32 	%f728, %f727, 0f3DAA9931;
	setp.ge.ftz.f32	%p190, %f249, %f728;
	and.pred  	%p191, %p190, %p189;
	.loc 1 44 1
	@!%p191 bra 	BB4_189;
	bra.uni 	BB4_188;

BB4_188:
	mov.f32 	%f828, 0f3F000000;
	mov.f32 	%f827, %f828;

BB4_189:
	.loc 1 44 1
	sub.ftz.f32 	%f737, %f19, %f292;
	setp.gt.ftz.f32	%p192, %f19, %f292;
	selp.f32	%f252, %f737, 0f00000000, %p192;
	sub.ftz.f32 	%f738, %f291, %f20;
	setp.lt.ftz.f32	%p193, %f20, %f291;
	selp.f32	%f253, %f738, 0f00000000, %p193;
	.loc 1 44 1
	setp.leu.ftz.f32	%p194, %f252, 0f00000000;
	mov.f32 	%f864, %f862;
	mov.f32 	%f915, %f913;
	mov.f32 	%f961, %f959;
	@%p194 bra 	BB4_194;

	.loc 1 44 1
	mul.ftz.f32 	%f254, %f252, %f827;
	mul.ftz.f32 	%f255, %f252, %f828;
	.loc 1 44 1
	setp.neu.ftz.f32	%p195, %f18, 0f00000000;
	@%p195 bra 	BB4_192;

	mov.f32 	%f829, 0f3F800000;
	bra.uni 	BB4_193;

BB4_192:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f740, %f255, %f18;
	mov.f32 	%f741, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f829, %f741, %f740;

BB4_193:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p196, %f829, 0f3F7D70A4;
	selp.f32	%f742, %f254, %f252, %p196;
	mov.f32 	%f743, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f744, %f742, %f743;
	.loc 1 44 23
	sub.ftz.f32 	%f961, %f959, %f744;
	.loc 1 44 1
	mul.ftz.f32 	%f915, %f913, %f829;
	mul.ftz.f32 	%f864, %f862, %f829;

BB4_194:
	.loc 1 44 1
	mov.f32 	%f960, %f961;
	mov.f32 	%f914, %f915;
	mov.f32 	%f863, %f864;
	setp.leu.ftz.f32	%p197, %f253, 0f00000000;
	@%p197 bra 	BB4_198;

	.loc 1 44 1
	mul.ftz.f32 	%f264, %f253, %f827;
	mul.ftz.f32 	%f265, %f253, %f828;
	.loc 1 44 1
	setp.neu.ftz.f32	%p198, %f18, 0f00000000;
	@%p198 bra 	BB4_196;
	bra.uni 	BB4_197;

BB4_196:
	.loc 2 3606 10
	div.approx.ftz.f32 	%f746, %f265, %f18;
	mov.f32 	%f747, 0f3F800000;
	.loc 1 44 72
	sub.ftz.f32 	%f830, %f747, %f746;

BB4_197:
	.loc 1 44 1
	setp.ltu.ftz.f32	%p199, %f830, 0f3F7D70A4;
	selp.f32	%f748, %f264, %f253, %p199;
	mov.f32 	%f749, 0f42C80000;
	.loc 2 3606 10
	div.approx.ftz.f32 	%f750, %f748, %f749;
	.loc 1 44 23
	add.ftz.f32 	%f960, %f959, %f750;
	.loc 1 44 1
	mul.ftz.f32 	%f914, %f913, %f830;
	mul.ftz.f32 	%f863, %f862, %f830;

BB4_198:
	.loc 1 44 1
	sub.ftz.f32 	%f751, %f960, %f959;
	fma.rn.ftz.f32 	%f959, %f27, %f751, %f959;
	sub.ftz.f32 	%f752, %f914, %f913;
	fma.rn.ftz.f32 	%f913, %f27, %f752, %f913;
	sub.ftz.f32 	%f753, %f863, %f862;
	fma.rn.ftz.f32 	%f862, %f27, %f753, %f862;

BB4_199:
	.loc 1 44 1
	ld.const.f32 	%f754, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f755, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f756, %f913, %f755;
	fma.rn.ftz.f32 	%f757, %f959, %f754, %f756;
	ld.const.f32 	%f758, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f280, %f862, %f758, %f757;
	ld.const.f32 	%f759, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f760, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f761, %f913, %f760;
	fma.rn.ftz.f32 	%f762, %f959, %f759, %f761;
	ld.const.f32 	%f763, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f281, %f862, %f763, %f762;
	ld.const.f32 	%f764, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f765, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f766, %f913, %f765;
	fma.rn.ftz.f32 	%f767, %f959, %f764, %f766;
	ld.const.f32 	%f768, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f282, %f862, %f768, %f767;
	.loc 1 44 1
	@%p4 bra 	BB4_201;

	mul.wide.s32 	%rd8, %r25, 16;
	add.s64 	%rd9, %rd1, %rd8;
	.loc 1 44 1
	st.global.v4.f32 	[%rd9], {%f282, %f281, %f280, %f793};
	bra.uni 	BB4_202;

BB4_201:
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f282;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f281;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f280;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f793;
	mov.b16 	%rs12, %temp;
}
	mul.wide.s32 	%rd11, %r25, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 1 44 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB4_202:
	.loc 1 44 2
	ret;
}


