//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/GPUFoundation/Src/ImageProcessing/PixelFormatConvert_420.cu", 1399785311, 9697
	.file	2 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\Inc\\ImageProcessing/PixelFormatConvert_Common.h", 1399785310, 21667
	.file	3 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
	.file	4 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\API\\Inc\\GPUFoundation/KernelSupport/KernelCore.h", 1399785310, 7840
.global .texref inYPlaneTexture;
.global .texref inFrameCodedUPlaneTexture;
.global .texref inFrameCodedVPlaneTexture;
.global .texref inUpperUPlaneTexture;
.global .texref inUpperVPlaneTexture;
.global .texref inLowerUPlaneTexture;
.global .texref inLowerVPlaneTexture;
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 147, 24, 206, 61, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func  (.param .b32 func_retval0) _Z13MaxDepthValuej(
	.param .b32 _Z13MaxDepthValuej_param_0
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<3>;
	.reg .f32 	%f<6>;


	ld.param.u32 	%r2, [_Z13MaxDepthValuej_param_0];
	.loc 2 51 1
	and.b32  	%r1, %r2, 248;
	setp.ne.s32	%p1, %r1, 0;
	@%p1 bra 	BB0_2;

	mov.f32 	%f5, 0f437F0000;
	bra.uni 	BB0_5;

BB0_2:
	.loc 2 51 1
	setp.ne.s32	%p2, %r1, 8;
	@%p2 bra 	BB0_4;

	mov.f32 	%f5, 0f447FC000;
	bra.uni 	BB0_5;

BB0_4:
	.loc 2 51 1
	setp.eq.s32	%p3, %r1, 16;
	.loc 2 51 1
	selp.f32	%f5, 0f47000000, 0f3F800000, %p3;

BB0_5:
	st.param.f32	[func_retval0+0], %f5;
	.loc 2 51 1
	ret;
}

.visible .func  (.param .b64 func_retval0) _Z23ColorSpaceConvertMatrixjj(
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_0,
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_1
)
{
	.reg .pred 	%p<33>;
	.reg .s32 	%r<16>;
	.reg .s64 	%rd<32>;


	ld.param.u32 	%r9, [_Z23ColorSpaceConvertMatrixjj_param_0];
	ld.param.u32 	%r10, [_Z23ColorSpaceConvertMatrixjj_param_1];
	.loc 2 51 1
	and.b32  	%r11, %r9, 512;
	setp.eq.s32	%p1, %r11, 0;
	.loc 2 51 1
	and.b32  	%r1, %r10, 256;
	.loc 2 51 1
	@%p1 bra 	BB1_29;

	.loc 2 51 1
	and.b32  	%r12, %r9, 2048;
	setp.eq.s32	%p2, %r12, 0;
	.loc 2 51 1
	and.b32  	%r2, %r9, 4096;
	.loc 2 51 1
	and.b32  	%r3, %r10, 512;
	.loc 2 51 1
	@%p2 bra 	BB1_15;

	setp.eq.s32	%p3, %r2, 0;
	.loc 2 51 1
	@%p3 bra 	BB1_13;

	setp.eq.s32	%p4, %r3, 0;
	.loc 2 51 1
	and.b32  	%r4, %r9, 256;
	.loc 2 51 1
	@%p4 bra 	BB1_7;

	.loc 2 51 1
	setp.ne.s32	%p5, %r4, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p5 bra 	BB1_47;

	.loc 2 51 1
	and.b32  	%r13, %r10, 4096;
	setp.eq.s32	%p6, %r13, 0;
	.loc 2 51 1
	setp.ne.s32	%p7, %r1, 0;
	or.pred  	%p8, %p6, %p7;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p8 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_7:
	setp.eq.s32	%p9, %r4, 0;
	.loc 2 51 1
	@%p9 bra 	BB1_10;

	setp.eq.s32	%p10, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p10 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_10:
	setp.eq.s32	%p11, %r1, 0;
	.loc 2 51 1
	@%p11 bra 	BB1_12;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_12:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_13:
	.loc 2 51 1
	setp.ne.s32	%p12, %r3, 0;
	setp.eq.s32	%p13, %r1, 0;
	.loc 2 51 1
	or.pred  	%p14, %p13, %p12;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p14 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_15:
	setp.eq.s32	%p15, %r2, 0;
	.loc 2 51 1
	@%p15 bra 	BB1_25;

	setp.eq.s32	%p16, %r3, 0;
	.loc 2 51 1
	and.b32  	%r5, %r9, 256;
	.loc 2 51 1
	@%p16 bra 	BB1_19;

	or.b32  	%r14, %r5, %r1;
	.loc 2 51 1
	setp.ne.s32	%p17, %r14, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p17 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_19:
	setp.eq.s32	%p18, %r5, 0;
	.loc 2 51 1
	@%p18 bra 	BB1_22;

	setp.eq.s32	%p19, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p19 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_22:
	setp.eq.s32	%p20, %r1, 0;
	.loc 2 51 1
	@%p20 bra 	BB1_24;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_24:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_25:
	.loc 2 51 1
	setp.ne.s32	%p21, %r3, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p21 bra 	BB1_47;

	setp.eq.s32	%p22, %r1, 0;
	.loc 2 51 1
	@%p22 bra 	BB1_28;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_28:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	BB1_47;

BB1_29:
	.loc 2 51 1
	and.b32  	%r15, %r9, 256;
	setp.eq.s32	%p23, %r15, 0;
	.loc 2 51 1
	and.b32  	%r6, %r10, 2048;
	.loc 2 51 1
	@%p23 bra 	BB1_40;

	setp.eq.s32	%p24, %r1, 0;
	.loc 2 51 1
	@%p24 bra 	BB1_34;

	setp.eq.s32	%p25, %r6, 0;
	.loc 2 51 1
	@%p25 bra 	BB1_33;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_709YPbPr;
	bra.uni 	BB1_47;

BB1_33:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YPbPr;
	bra.uni 	BB1_47;

BB1_34:
	setp.eq.s32	%p26, %r6, 0;
	.loc 2 51 1
	and.b32  	%r7, %r10, 4096;
	.loc 2 51 1
	@%p26 bra 	BB1_37;

	setp.eq.s32	%p27, %r7, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p27 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_37:
	setp.eq.s32	%p28, %r7, 0;
	.loc 2 51 1
	@%p28 bra 	BB1_39;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_39:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	BB1_47;

BB1_40:
	.loc 2 51 1
	setp.ne.s32	%p29, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p29 bra 	BB1_47;

	setp.eq.s32	%p30, %r6, 0;
	.loc 2 51 1
	and.b32  	%r8, %r10, 4096;
	.loc 2 51 1
	@%p30 bra 	BB1_44;

	.loc 2 51 1
	setp.ne.s32	%p31, %r8, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p31 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_44:
	setp.eq.s32	%p32, %r8, 0;
	.loc 2 51 1
	@%p32 bra 	BB1_46;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_46:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCrFullRange;

BB1_47:
	st.param.b64	[func_retval0+0], %rd31;
	.loc 2 51 1
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	.loc 3 2770 10
	max.ftz.f32 	%f4, %f1, %f2;
	.loc 3 2765 10
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	.loc 4 146 39
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z23UnpremultiplyComponents6float4j(
	.param .align 16 .b8 _Z23UnpremultiplyComponents6float4j_param_0[16],
	.param .b32 _Z23UnpremultiplyComponents6float4j_param_1
)
{
	.reg .pred 	%p<15>;
	.reg .s32 	%r<8>;
	.reg .f32 	%f<77>;


	ld.param.f32 	%f70, [_Z23UnpremultiplyComponents6float4j_param_0+12];
	ld.param.f32 	%f69, [_Z23UnpremultiplyComponents6float4j_param_0+8];
	ld.param.f32 	%f68, [_Z23UnpremultiplyComponents6float4j_param_0+4];
	ld.param.f32 	%f72, [_Z23UnpremultiplyComponents6float4j_param_0];
	ld.param.u32 	%r5, [_Z23UnpremultiplyComponents6float4j_param_1];
	.loc 2 51 1
	and.b32  	%r1, %r5, 768;
	setp.ne.s32	%p1, %r1, 512;
	@%p1 bra 	BB3_7;

	.loc 2 51 1
	and.b32  	%r2, %r5, 248;
	setp.ne.s32	%p2, %r2, 0;
	@%p2 bra 	BB3_3;

	mov.f32 	%f67, 0f437F0000;
	bra.uni 	BB3_6;

BB3_3:
	.loc 2 51 1
	setp.ne.s32	%p3, %r2, 8;
	@%p3 bra 	BB3_5;

	mov.f32 	%f67, 0f447FC000;
	bra.uni 	BB3_6;

BB3_5:
	.loc 2 51 1
	setp.eq.s32	%p4, %r2, 16;
	.loc 2 51 1
	selp.f32	%f67, 0f47000000, 0f3F800000, %p4;

BB3_6:
	.loc 2 51 119
	and.b32  	%r6, %r5, 4096;
	setp.eq.s32	%p5, %r6, 0;
	ld.const.f32 	%f32, [kYCbCrOffset];
	ld.const.f32 	%f33, [kYCbCrFullRangeOffset];
	selp.f32	%f34, %f33, %f32, %p5;
	mov.f32 	%f35, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f67, %f35;
	.loc 2 51 119
	mul.ftz.f32 	%f37, %f34, %f36;
	sub.ftz.f32 	%f68, %f68, %f37;
	ld.const.f32 	%f38, [kYCbCrOffset+4];
	ld.const.f32 	%f39, [kYCbCrFullRangeOffset+4];
	selp.f32	%f40, %f39, %f38, %p5;
	mul.ftz.f32 	%f41, %f40, %f36;
	sub.ftz.f32 	%f69, %f69, %f41;
	ld.const.f32 	%f42, [kYCbCrOffset+8];
	ld.const.f32 	%f43, [kYCbCrFullRangeOffset+8];
	selp.f32	%f44, %f43, %f42, %p5;
	mul.ftz.f32 	%f45, %f44, %f36;
	sub.ftz.f32 	%f70, %f70, %f45;

BB3_7:
	.loc 2 51 1
	add.ftz.f32 	%f46, %f72, 0fB70637BD;
	setp.gtu.ftz.f32	%p6, %f46, 0f00000000;
	@%p6 bra 	BB3_9;

	mov.f32 	%f76, 0f00000000;
	mov.f32 	%f75, %f76;
	mov.f32 	%f74, %f76;
	mov.f32 	%f72, %f76;
	bra.uni 	BB3_15;

BB3_9:
	.loc 2 51 1
	and.b32  	%r3, %r5, 248;
	setp.ne.s32	%p7, %r3, 0;
	@%p7 bra 	BB3_11;

	mov.f32 	%f71, 0f437F0000;
	bra.uni 	BB3_14;

BB3_11:
	.loc 2 51 1
	setp.ne.s32	%p8, %r3, 8;
	@%p8 bra 	BB3_13;

	mov.f32 	%f71, 0f447FC000;
	bra.uni 	BB3_14;

BB3_13:
	.loc 2 51 1
	setp.eq.s32	%p9, %r3, 16;
	.loc 2 51 1
	selp.f32	%f71, 0f47000000, 0f3F800000, %p9;

BB3_14:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f53, %f71, %f72;
	.loc 2 51 1
	mul.ftz.f32 	%f74, %f68, %f53;
	mul.ftz.f32 	%f75, %f69, %f53;
	mul.ftz.f32 	%f76, %f70, %f53;

BB3_15:
	.loc 2 51 1
	@%p1 bra 	BB3_22;

	.loc 2 51 1
	and.b32  	%r4, %r5, 248;
	setp.eq.s32	%p11, %r4, 8;
	@%p11 bra 	BB3_19;

	setp.ne.s32	%p12, %r4, 0;
	@%p12 bra 	BB3_20;

	mov.f32 	%f73, 0f437F0000;
	bra.uni 	BB3_21;

BB3_19:
	mov.f32 	%f73, 0f447FC000;
	bra.uni 	BB3_21;

BB3_20:
	.loc 2 51 1
	setp.eq.s32	%p13, %r4, 16;
	.loc 2 51 1
	selp.f32	%f73, 0f47000000, 0f3F800000, %p13;

BB3_21:
	.loc 2 51 120
	and.b32  	%r7, %r5, 4096;
	setp.eq.s32	%p14, %r7, 0;
	ld.const.f32 	%f56, [kYCbCrOffset];
	ld.const.f32 	%f57, [kYCbCrFullRangeOffset];
	selp.f32	%f58, %f57, %f56, %p14;
	mov.f32 	%f59, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f60, %f73, %f59;
	.loc 2 51 120
	fma.rn.ftz.f32 	%f74, %f58, %f60, %f74;
	ld.const.f32 	%f61, [kYCbCrOffset+4];
	ld.const.f32 	%f62, [kYCbCrFullRangeOffset+4];
	selp.f32	%f63, %f62, %f61, %p14;
	fma.rn.ftz.f32 	%f75, %f63, %f60, %f75;
	ld.const.f32 	%f64, [kYCbCrOffset+8];
	ld.const.f32 	%f65, [kYCbCrFullRangeOffset+8];
	selp.f32	%f66, %f65, %f64, %p14;
	fma.rn.ftz.f32 	%f76, %f66, %f60, %f76;

BB3_22:
	st.param.f32	[func_retval0+0], %f72;
	st.param.f32	[func_retval0+4], %f74;
	st.param.f32	[func_retval0+8], %f75;
	st.param.f32	[func_retval0+12], %f76;
	.loc 2 51 1
	ret;
}

.visible .func _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii(
	.param .align 16 .b8 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0[16],
	.param .b64 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<24>;
	.reg .f32 	%f<9>;
	.reg .s64 	%rd<8>;


	ld.param.f32 	%f4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+12];
	ld.param.f32 	%f3, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+8];
	ld.param.f32 	%f2, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+4];
	ld.param.f32 	%f1, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0];
	ld.param.u64 	%rd3, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1];
	ld.param.u32 	%r4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r5, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3];
	ld.param.u32 	%r2, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r6, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5];
	ld.param.u32 	%r7, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6];
	.loc 2 51 1
	mad.lo.s32 	%r1, %r7, %r4, %r6;
	mul.wide.s32 	%rd4, %r1, 8;
	add.s64 	%rd1, %rd3, %rd4;
	mul.wide.s32 	%rd5, %r1, 4;
	add.s64 	%rd2, %rd3, %rd5;
	.loc 2 51 1
	and.b32  	%r3, %r5, 248;
	setp.eq.s32	%p1, %r3, 0;
	@%p1 bra 	BB4_5;

	setp.eq.s32	%p2, %r3, 8;
	@%p2 bra 	BB4_4;

	setp.ne.s32	%p3, %r3, 16;
	@%p3 bra 	BB4_6;

	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r8, %f1;
	.loc 2 51 82
	cvt.u16.u32	%rs1, %r8;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r9, %f2;
	.loc 2 51 161
	cvt.u16.u32	%rs2, %r9;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r10, %f3;
	.loc 2 51 242
	cvt.u16.u32	%rs3, %r10;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r11, %f4;
	cvt.u16.u32	%rs4, %r11;
	.loc 2 51 1
	st.v4.u16 	[%rd1], {%rs1, %rs2, %rs3, %rs4};
	bra.uni 	BB4_9;

BB4_4:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r12, %f1;
	.loc 2 51 65
	shl.b32 	%r13, %r12, 2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r14, %f2;
	.loc 2 51 127
	shl.b32 	%r15, %r14, 12;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f3;
	.loc 2 51 191
	shl.b32 	%r17, %r16, 22;
	.loc 2 51 127
	add.s32 	%r18, %r15, %r13;
	.loc 2 51 191
	add.s32 	%r19, %r18, %r17;
	.loc 2 51 1
	st.u32 	[%rd2], %r19;
	bra.uni 	BB4_9;

BB4_5:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r20, %f1;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r21, %f2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r22, %f3;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r23, %f4;
	cvt.u16.u32	%rs5, %r23;
	.loc 2 51 239
	cvt.u16.u32	%rs6, %r22;
	.loc 2 51 159
	cvt.u16.u32	%rs7, %r21;
	.loc 2 51 81
	cvt.u16.u32	%rs8, %r20;
	.loc 2 51 1
	st.v4.u8 	[%rd2], {%rs8, %rs7, %rs6, %rs5};
	bra.uni 	BB4_9;

BB4_6:
	.loc 2 51 1
	setp.eq.s32	%p4, %r2, 0;
	@%p4 bra 	BB4_8;

	mul.wide.s32 	%rd6, %r1, 16;
	add.s64 	%rd7, %rd3, %rd6;
	.loc 2 51 1
	st.v4.f32 	[%rd7], {%f1, %f2, %f3, %f4};
	bra.uni 	BB4_9;

BB4_8:
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.v4.u16 	[%rd1], {%rs9, %rs10, %rs11, %rs12};

BB4_9:
	.loc 2 51 2
	ret;
}

.visible .func _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii(
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_0,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_1,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_2,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_3,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_4,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_5,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_6,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_7,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_8,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_9,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_10,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_11,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_12,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_13,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_14,
	.param .b64 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_15,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_16,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_17,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_18,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_19,
	.param .b32 _Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_20
)
{
	.reg .pred 	%p<185>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<203>;
	.reg .f32 	%f<529>;
	.reg .s64 	%rd<49>;


	ld.param.u32 	%r38, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_0];
	ld.param.u32 	%r39, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r40, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r41, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_6];
	ld.param.u32 	%r42, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_8];
	ld.param.u32 	%r43, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_10];
	ld.param.u32 	%r44, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_12];
	ld.param.u32 	%r45, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_14];
	ld.param.u64 	%rd10, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_15];
	ld.param.u32 	%r46, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_16];
	ld.param.u32 	%r47, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_17];
	ld.param.u32 	%r48, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_18];
	ld.param.u32 	%r49, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_19];
	ld.param.u32 	%r50, [_Z29PixelFormatConvert_420_To_444iPviS_iS_iS_iS_iS_iS_jP6float4ij17DevicePixelFormatii_param_20];
	.loc 1 129 1
	mov.u32 	%r51, %ctaid.x;
	mov.u32 	%r52, %ntid.x;
	mov.u32 	%r53, %tid.x;
	mad.lo.s32 	%r1, %r52, %r51, %r53;
	mov.u32 	%r54, %ntid.y;
	mov.u32 	%r55, %ctaid.y;
	mov.u32 	%r56, %tid.y;
	mad.lo.s32 	%r2, %r54, %r55, %r56;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r50;
	setp.lt.s32	%p2, %r1, %r49;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB5_236;
	bra.uni 	BB5_1;

BB5_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f183, %r1;
	add.ftz.f32 	%f184, %f183, 0f3F000000;
	mul.ftz.f32 	%f185, %f184, 0f3F000000;
	.loc 1 129 1
	and.b32  	%r61, %r45, 67092480;
	setp.eq.s32	%p4, %r61, 4194304;
	setp.eq.s32	%p5, %r61, 8388608;
	or.pred  	%p6, %p4, %p5;
	.loc 1 129 1
	add.ftz.f32 	%f186, %f185, 0f3E800000;
	.loc 1 129 1
	selp.f32	%f1, %f186, %f185, %p6;
	.loc 1 129 1
	add.s32 	%r62, %r1, %r38;
	cvt.rn.f32.s32	%f187, %r62;
	add.ftz.f32 	%f181, %f187, 0f3F000000;
	cvt.rn.f32.s32	%f188, %r2;
	add.ftz.f32 	%f182, %f188, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r57, %r58, %r59, %r60}, [inYPlaneTexture, {%f181, %f182}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f3, %r57;
	.loc 1 129 1
	@%p5 bra 	BB5_3;

	setp.ne.s32	%p7, %r61, 33554432;
	@%p7 bra 	BB5_4;

BB5_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f193, %r39;
	add.ftz.f32 	%f189, %f193, %f1;
	mul.ftz.f32 	%f192, %f182, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r63, %r64, %r65, %r66}, [inFrameCodedUPlaneTexture, {%f189, %f192}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f194, %r40;
	add.ftz.f32 	%f191, %f194, %f1;
	// inline asm
	tex.2d.v4.u32.f32 {%r67, %r68, %r69, %r70}, [inFrameCodedVPlaneTexture, {%f191, %f192}];
	// inline asm
	mov.u32 	%r202, %r67;
	mov.u32 	%r201, %r63;
	bra.uni 	BB5_7;

BB5_4:
	.loc 1 129 1
	and.b32  	%r5, %r2, 1;
	setp.eq.s32	%p8, %r5, 0;
	.loc 1 129 1
	@%p8 bra 	BB5_6;

	.loc 1 129 1
	selp.u32	%r79, 1, 0, %p8;
	shl.b32 	%r80, %r79, 1;
	shr.s32 	%r81, %r2, 31;
	shr.u32 	%r82, %r81, 30;
	add.s32 	%r83, %r2, %r82;
	and.b32  	%r84, %r83, 2147483644;
	sub.s32 	%r85, %r2, %r84;
	add.s32 	%r86, %r85, %r80;
	shl.b32 	%r87, %r86, 1;
	add.s32 	%r88, %r87, -1;
	shr.s32 	%r89, %r83, 2;
	cvt.rn.f32.s32	%f199, %r89;
	cvt.rn.f32.s32	%f200, %r88;
	mov.f32 	%f201, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f202, %f200, %f201;
	.loc 1 129 110
	add.ftz.f32 	%f198, %f199, %f202;
	.loc 1 129 1
	cvt.rn.f32.s32	%f203, %r43;
	add.ftz.f32 	%f195, %f203, %f1;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r71, %r72, %r73, %r74}, [inLowerUPlaneTexture, {%f195, %f198}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f204, %r44;
	add.ftz.f32 	%f197, %f204, %f1;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r75, %r76, %r77, %r78}, [inLowerVPlaneTexture, {%f197, %f198}];
	// inline asm
	mov.u32 	%r202, %r75;
	mov.u32 	%r201, %r71;
	bra.uni 	BB5_7;

BB5_6:
	.loc 1 129 1
	selp.u32	%r98, 1, 0, %p8;
	shl.b32 	%r99, %r98, 1;
	shr.s32 	%r100, %r2, 31;
	shr.u32 	%r101, %r100, 30;
	add.s32 	%r102, %r2, %r101;
	and.b32  	%r103, %r102, 2147483644;
	sub.s32 	%r104, %r2, %r103;
	add.s32 	%r105, %r104, %r99;
	shl.b32 	%r106, %r105, 1;
	add.s32 	%r107, %r106, -1;
	shr.s32 	%r108, %r102, 2;
	cvt.rn.f32.s32	%f209, %r108;
	cvt.rn.f32.s32	%f210, %r107;
	mov.f32 	%f211, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f212, %f210, %f211;
	.loc 1 129 110
	add.ftz.f32 	%f208, %f209, %f212;
	.loc 1 129 1
	cvt.rn.f32.s32	%f213, %r41;
	add.ftz.f32 	%f205, %f213, %f1;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r90, %r91, %r92, %r93}, [inUpperUPlaneTexture, {%f205, %f208}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f214, %r42;
	add.ftz.f32 	%f207, %f214, %f1;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r94, %r95, %r96, %r97}, [inUpperVPlaneTexture, {%f207, %f208}];
	// inline asm
	mov.u32 	%r202, %r94;
	mov.u32 	%r201, %r90;

BB5_7:
	.loc 1 129 1
	mov.b32 	 %f215, %r202;
	mul.ftz.f32 	%f216, %f215, 0f437F0000;
	mov.b32 	 %f217, %r201;
	mul.ftz.f32 	%f218, %f217, 0f437F0000;
	and.b32  	%r109, %r45, 8192;
	setp.eq.s32	%p11, %r109, 0;
	selp.f32	%f458, 0f00000000, %f216, %p11;
	mul.ftz.f32 	%f219, %f3, 0f437F0000;
	selp.f32	%f5, %f219, %f218, %p11;
	selp.f32	%f6, %f218, %f219, %p11;
	selp.f32	%f523, %f216, 0f00000000, %p11;
	and.b32  	%r110, %r45, 768;
	setp.ne.s32	%p12, %r110, 512;
	mov.f32 	%f490, %f5;
	mov.f32 	%f522, %f6;
	@%p12 bra 	BB5_15;

	and.b32  	%r111, %r47, 768;
	setp.eq.s32	%p13, %r111, 512;
	mov.f32 	%f459, %f5;
	mov.f32 	%f490, %f459;
	mov.f32 	%f491, %f6;
	mov.f32 	%f522, %f491;
	@%p13 bra 	BB5_15;

	.loc 2 51 1
	and.b32  	%r12, %r45, 248;
	setp.ne.s32	%p14, %r12, 0;
	@%p14 bra 	BB5_11;

	mov.f32 	%f428, 0f437F0000;
	bra.uni 	BB5_14;

BB5_11:
	.loc 2 51 1
	setp.ne.s32	%p15, %r12, 8;
	@%p15 bra 	BB5_13;

	mov.f32 	%f428, 0f447FC000;
	bra.uni 	BB5_14;

BB5_13:
	.loc 2 51 1
	setp.eq.s32	%p16, %r12, 16;
	.loc 2 51 1
	selp.f32	%f428, 0f47000000, 0f3F800000, %p16;

BB5_14:
	and.b32  	%r112, %r45, 4096;
	setp.eq.s32	%p17, %r112, 0;
	ld.const.f32 	%f222, [kYCbCrOffset];
	ld.const.f32 	%f223, [kYCbCrFullRangeOffset];
	selp.f32	%f224, %f223, %f222, %p17;
	mov.f32 	%f225, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f226, %f428, %f225;
	mul.ftz.f32 	%f227, %f224, %f226;
	sub.ftz.f32 	%f490, %f5, %f227;
	ld.const.f32 	%f228, [kYCbCrOffset+4];
	ld.const.f32 	%f229, [kYCbCrFullRangeOffset+4];
	selp.f32	%f230, %f229, %f228, %p17;
	mul.ftz.f32 	%f231, %f230, %f226;
	sub.ftz.f32 	%f522, %f6, %f231;
	ld.const.f32 	%f232, [kYCbCrOffset+8];
	ld.const.f32 	%f233, [kYCbCrFullRangeOffset+8];
	selp.f32	%f234, %f233, %f232, %p17;
	mul.ftz.f32 	%f235, %f234, %f226;
	sub.ftz.f32 	%f523, %f523, %f235;

BB5_15:
	mov.f32 	%f520, %f522;
	mov.f32 	%f488, %f490;
	and.b32  	%r113, %r47, 1024;
	setp.eq.s32	%p18, %r113, 0;
	shr.u32 	%r114, %r45, 10;
	and.b32  	%r115, %r114, 1;
	setp.eq.b32	%p19, %r115, 1;
	and.pred  	%p20, %p18, %p19;
	@!%p20 bra 	BB5_25;
	bra.uni 	BB5_16;

BB5_16:
	setp.ltu.ftz.f32	%p21, %f488, 0f00000000;
	@%p21 bra 	BB5_18;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f236, %f488;
	mul.ftz.f32 	%f237, %f236, 0f3EE66666;
	ex2.approx.ftz.f32 	%f489, %f237;
	bra.uni 	BB5_19;

BB5_18:
	neg.ftz.f32 	%f238, %f488;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f239, %f238;
	mul.ftz.f32 	%f240, %f239, 0f3EE66666;
	ex2.approx.ftz.f32 	%f241, %f240;
	neg.ftz.f32 	%f489, %f241;

BB5_19:
	mov.f32 	%f488, %f489;
	setp.ltu.ftz.f32	%p22, %f520, 0f00000000;
	@%p22 bra 	BB5_21;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f242, %f520;
	mul.ftz.f32 	%f243, %f242, 0f3EE66666;
	ex2.approx.ftz.f32 	%f521, %f243;
	bra.uni 	BB5_22;

BB5_21:
	neg.ftz.f32 	%f244, %f520;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f245, %f244;
	mul.ftz.f32 	%f246, %f245, 0f3EE66666;
	ex2.approx.ftz.f32 	%f247, %f246;
	neg.ftz.f32 	%f521, %f247;

BB5_22:
	mov.f32 	%f520, %f521;
	setp.ltu.ftz.f32	%p23, %f523, 0f00000000;
	@%p23 bra 	BB5_24;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f248, %f523;
	mul.ftz.f32 	%f249, %f248, 0f3EE66666;
	ex2.approx.ftz.f32 	%f523, %f249;
	bra.uni 	BB5_25;

BB5_24:
	neg.ftz.f32 	%f250, %f523;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f251, %f250;
	mul.ftz.f32 	%f252, %f251, 0f3EE66666;
	ex2.approx.ftz.f32 	%f253, %f252;
	neg.ftz.f32 	%f523, %f253;

BB5_25:
	mov.f32 	%f25, %f520;
	mov.f32 	%f24, %f488;
	xor.b32  	%r13, %r47, %r45;
	and.b32  	%r116, %r13, 2560;
	and.b32  	%r14, %r45, 512;
	setp.ne.s32	%p24, %r116, 0;
	@%p24 bra 	BB5_27;

	setp.eq.s32	%p25, %r14, 0;
	and.b32  	%r117, %r45, 256;
	setp.eq.s32	%p26, %r117, 0;
	selp.u32	%r118, 1, 0, %p26;
	selp.b32	%r119, 0, %r118, %p25;
	and.b32  	%r120, %r47, 256;
	setp.eq.s32	%p27, %r120, 0;
	selp.u32	%r121, 1, 0, %p27;
	shr.u32 	%r122, %r47, 9;
	and.b32  	%r123, %r122, %r121;
	setp.eq.s32	%p28, %r119, %r123;
	@%p28 bra 	BB5_131;

BB5_27:
	setp.eq.s32	%p29, %r14, 0;
	.loc 2 51 1
	@%p29 bra 	BB5_45;

	.loc 2 51 1
	and.b32  	%r124, %r45, 2048;
	setp.eq.s32	%p30, %r124, 0;
	@%p30 bra 	BB5_37;

	.loc 2 51 1
	and.b32  	%r125, %r45, 4096;
	setp.eq.s32	%p31, %r125, 0;
	.loc 2 51 1
	@%p31 bra 	BB5_36;

	.loc 2 51 1
	and.b32  	%r126, %r47, 512;
	setp.eq.s32	%p32, %r126, 0;
	.loc 2 51 1
	and.b32  	%r15, %r45, 256;
	.loc 2 51 1
	@%p32 bra 	BB5_33;

	.loc 2 51 1
	setp.ne.s32	%p33, %r15, 0;
	@%p33 bra 	BB5_54;

	.loc 2 51 1
	and.b32  	%r127, %r47, 4096;
	setp.eq.s32	%p34, %r127, 0;
	.loc 2 51 1
	shr.u32 	%r128, %r47, 8;
	and.b32  	%r129, %r128, 1;
	setp.eq.b32	%p35, %r129, 1;
	or.pred  	%p36, %p34, %p35;
	mov.u64 	%rd48, k709YCbCr_To_601YCbCr;
	.loc 2 51 1
	@%p36 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_33:
	setp.eq.s32	%p37, %r15, 0;
	.loc 2 51 1
	@%p37 bra 	BB5_35;

	.loc 2 51 1
	and.b32  	%r130, %r47, 256;
	setp.eq.s32	%p38, %r130, 0;
	mov.u64 	%rd48, k709YPbPr_To_RGB32f;
	.loc 2 51 1
	@%p38 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_35:
	.loc 2 51 1
	and.b32  	%r131, %r47, 256;
	setp.eq.s32	%p39, %r131, 0;
	.loc 2 51 1
	mov.u64 	%rd26, k709YCbCr_To_RGB32f;
	mov.u64 	%rd27, k709YCbCr_To_RGB8u;
	selp.b64	%rd48, %rd27, %rd26, %p39;
	bra.uni 	BB5_56;

BB5_36:
	.loc 2 51 1
	and.b32  	%r132, %r47, 256;
	setp.eq.s32	%p40, %r132, 0;
	.loc 2 51 1
	shr.u32 	%r133, %r47, 9;
	and.b32  	%r134, %r133, 1;
	setp.eq.b32	%p41, %r134, 1;
	or.pred  	%p42, %p40, %p41;
	mov.u64 	%rd48, k709YCbCrFullRange_To_RGB32f;
	.loc 2 51 1
	@%p42 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_37:
	.loc 2 51 1
	and.b32  	%r135, %r45, 4096;
	setp.eq.s32	%p43, %r135, 0;
	.loc 2 51 1
	@%p43 bra 	BB5_43;

	.loc 2 51 1
	and.b32  	%r136, %r47, 512;
	setp.eq.s32	%p44, %r136, 0;
	.loc 2 51 1
	and.b32  	%r16, %r45, 256;
	.loc 2 51 1
	@%p44 bra 	BB5_40;

	.loc 2 51 1
	setp.ne.s32	%p45, %r16, 0;
	shr.u32 	%r137, %r47, 8;
	and.b32  	%r138, %r137, 1;
	setp.eq.b32	%p46, %r138, 1;
	or.pred  	%p47, %p45, %p46;
	mov.u64 	%rd48, k601YCbCr_To_709YCbCr;
	.loc 2 51 1
	@%p47 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_40:
	setp.eq.s32	%p48, %r16, 0;
	.loc 2 51 1
	@%p48 bra 	BB5_42;

	.loc 2 51 1
	and.b32  	%r139, %r47, 256;
	setp.eq.s32	%p49, %r139, 0;
	mov.u64 	%rd48, k601YPbPr_To_RGB32f;
	.loc 2 51 1
	@%p49 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_42:
	.loc 2 51 1
	and.b32  	%r140, %r47, 256;
	setp.eq.s32	%p50, %r140, 0;
	.loc 2 51 1
	mov.u64 	%rd31, k601YCbCr_To_RGB32f;
	mov.u64 	%rd32, k601YCbCr_To_RGB8u;
	selp.b64	%rd48, %rd32, %rd31, %p50;
	bra.uni 	BB5_56;

BB5_43:
	.loc 2 51 1
	and.b32  	%r141, %r47, 512;
	setp.ne.s32	%p51, %r141, 0;
	@%p51 bra 	BB5_54;

	.loc 2 51 1
	and.b32  	%r142, %r47, 256;
	setp.eq.s32	%p52, %r142, 0;
	.loc 2 51 1
	mov.u64 	%rd33, k601YCbCrFullRange_To_RGB32f;
	mov.u64 	%rd34, k601YCbCrFullRange_To_RGB8u;
	selp.b64	%rd48, %rd34, %rd33, %p52;
	bra.uni 	BB5_56;

BB5_45:
	.loc 2 51 1
	and.b32  	%r143, %r45, 256;
	setp.eq.s32	%p53, %r143, 0;
	@%p53 bra 	BB5_51;

	.loc 2 51 1
	and.b32  	%r144, %r47, 256;
	setp.eq.s32	%p54, %r144, 0;
	.loc 2 51 1
	@%p54 bra 	BB5_48;

	.loc 2 51 1
	and.b32  	%r145, %r47, 2048;
	setp.eq.s32	%p55, %r145, 0;
	.loc 2 51 1
	mov.u64 	%rd35, kRGB32f_To_709YPbPr;
	mov.u64 	%rd36, kRGB32f_To_601YPbPr;
	selp.b64	%rd48, %rd36, %rd35, %p55;
	bra.uni 	BB5_56;

BB5_48:
	.loc 2 51 1
	and.b32  	%r146, %r47, 2048;
	setp.eq.s32	%p56, %r146, 0;
	.loc 2 51 1
	and.b32  	%r17, %r47, 4096;
	.loc 2 51 1
	@%p56 bra 	BB5_50;

	setp.eq.s32	%p57, %r17, 0;
	mov.u64 	%rd48, kRGB32f_To_709YCbCr;
	.loc 2 51 1
	@%p57 bra 	BB5_54;
	bra.uni 	BB5_56;

BB5_50:
	setp.eq.s32	%p58, %r17, 0;
	.loc 2 51 1
	mov.u64 	%rd38, kRGB32f_To_601YCbCr;
	mov.u64 	%rd39, kRGB32f_To_601YCbCrFullRange;
	selp.b64	%rd48, %rd39, %rd38, %p58;
	bra.uni 	BB5_56;

BB5_51:
	.loc 2 51 1
	and.b32  	%r147, %r47, 256;
	setp.ne.s32	%p59, %r147, 0;
	@%p59 bra 	BB5_54;

	.loc 2 51 1
	and.b32  	%r148, %r47, 2048;
	setp.eq.s32	%p60, %r148, 0;
	.loc 2 51 1
	and.b32  	%r18, %r47, 4096;
	.loc 2 51 1
	@%p60 bra 	BB5_55;

	setp.eq.s32	%p61, %r18, 0;
	mov.u64 	%rd48, kRGB8u_To_709YCbCr;
	.loc 2 51 1
	@%p61 bra 	BB5_56;

BB5_54:
	mov.u64 	%rd48, 0;
	bra.uni 	BB5_56;

BB5_55:
	setp.eq.s32	%p62, %r18, 0;
	.loc 2 51 1
	mov.u64 	%rd42, kRGB8u_To_601YCbCr;
	mov.u64 	%rd43, kRGB8u_To_601YCbCrFullRange;
	selp.b64	%rd48, %rd43, %rd42, %p62;

BB5_56:
	ld.const.f32 	%f254, [%rd48];
	ld.const.f32 	%f255, [%rd48+4];
	mul.ftz.f32 	%f256, %f25, %f255;
	fma.rn.ftz.f32 	%f257, %f24, %f254, %f256;
	ld.const.f32 	%f258, [%rd48+8];
	fma.rn.ftz.f32 	%f27, %f523, %f258, %f257;
	ld.const.f32 	%f259, [%rd48+12];
	ld.const.f32 	%f260, [%rd48+16];
	mul.ftz.f32 	%f261, %f25, %f260;
	fma.rn.ftz.f32 	%f262, %f24, %f259, %f261;
	ld.const.f32 	%f263, [%rd48+20];
	fma.rn.ftz.f32 	%f28, %f523, %f263, %f262;
	ld.const.f32 	%f264, [%rd48+24];
	ld.const.f32 	%f265, [%rd48+28];
	mul.ftz.f32 	%f266, %f25, %f265;
	fma.rn.ftz.f32 	%f267, %f24, %f264, %f266;
	ld.const.f32 	%f268, [%rd48+32];
	fma.rn.ftz.f32 	%f523, %f523, %f268, %f267;
	and.b32  	%r150, %r13, 248;
	setp.eq.s32	%p63, %r150, 0;
	mov.f32 	%f487, %f27;
	mov.f32 	%f519, %f28;
	@%p63 bra 	BB5_143;

	and.b32  	%r19, %r45, 256;
	and.b32  	%r20, %r47, 248;
	setp.eq.s32	%p64, %r20, 0;
	shr.u32 	%r151, %r19, 8;
	and.b32  	%r152, %r151, 1;
	setp.eq.b32	%p65, %r152, 1;
	and.pred  	%p66, %p64, %p65;
	@%p66 bra 	BB5_120;

	and.b32  	%r21, %r45, 248;
	setp.eq.s32	%p67, %r21, 0;
	and.b32  	%r22, %r47, 256;
	shr.u32 	%r153, %r22, 8;
	and.b32  	%r154, %r153, 1;
	setp.eq.b32	%p68, %r154, 1;
	and.pred  	%p69, %p67, %p68;
	@%p69 bra 	BB5_120;

	setp.ne.s32	%p70, %r22, 0;
	setp.eq.s32	%p71, %r19, 0;
	and.pred  	%p72, %p71, %p70;
	@%p72 bra 	BB5_93;

	setp.ne.s32	%p73, %r19, 0;
	setp.eq.s32	%p74, %r22, 0;
	and.pred  	%p75, %p74, %p73;
	@%p75 bra 	BB5_67;

	.loc 2 51 1
	setp.ne.s32	%p76, %r21, 0;
	@%p76 bra 	BB5_63;

	mov.f32 	%f429, 0f437F0000;
	bra.uni 	BB5_66;

BB5_63:
	.loc 2 51 1
	setp.ne.s32	%p77, %r21, 8;
	@%p77 bra 	BB5_65;

	mov.f32 	%f429, 0f447FC000;
	bra.uni 	BB5_66;

BB5_65:
	.loc 2 51 1
	setp.eq.s32	%p78, %r21, 16;
	.loc 2 51 1
	selp.f32	%f429, 0f47000000, 0f3F800000, %p78;

BB5_66:
	mov.f32 	%f271, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f443, %f429, %f271;
	mov.f32 	%f444, %f443;
	mov.f32 	%f445, %f443;
	mov.f32 	%f446, %f443;
	bra.uni 	BB5_119;

BB5_67:
	.loc 2 51 1
	setp.ne.s32	%p79, %r20, 0;
	@%p79 bra 	BB5_69;

	mov.f32 	%f430, 0f437F0000;
	bra.uni 	BB5_72;

BB5_69:
	.loc 2 51 1
	setp.ne.s32	%p80, %r20, 8;
	@%p80 bra 	BB5_71;

	mov.f32 	%f430, 0f447FC000;
	bra.uni 	BB5_72;

BB5_71:
	.loc 2 51 1
	setp.eq.s32	%p81, %r20, 16;
	.loc 2 51 1
	selp.f32	%f430, 0f47000000, 0f3F800000, %p81;

BB5_72:
	.loc 2 51 1
	setp.ne.s32	%p82, %r21, 0;
	@%p82 bra 	BB5_74;

	mov.f32 	%f431, 0f437F0000;
	bra.uni 	BB5_77;

BB5_74:
	.loc 2 51 1
	setp.ne.s32	%p83, %r21, 8;
	@%p83 bra 	BB5_76;

	mov.f32 	%f431, 0f447FC000;
	bra.uni 	BB5_77;

BB5_76:
	.loc 2 51 1
	setp.eq.s32	%p84, %r21, 16;
	.loc 2 51 1
	selp.f32	%f431, 0f47000000, 0f3F800000, %p84;

BB5_77:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f446, %f430, %f431;
	.loc 2 51 1
	@%p79 bra 	BB5_79;

	mov.f32 	%f432, 0f437F0000;
	bra.uni 	BB5_82;

BB5_79:
	.loc 2 51 1
	setp.ne.s32	%p86, %r20, 8;
	@%p86 bra 	BB5_81;

	mov.f32 	%f432, 0f447FC000;
	bra.uni 	BB5_82;

BB5_81:
	.loc 2 51 1
	setp.eq.s32	%p87, %r20, 16;
	.loc 2 51 1
	selp.f32	%f432, 0f47000000, 0f3F800000, %p87;

BB5_82:
	mov.f32 	%f278, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f445, %f432, %f278;
	.loc 2 51 1
	@%p79 bra 	BB5_84;

	mov.f32 	%f433, 0f437F0000;
	bra.uni 	BB5_87;

BB5_84:
	.loc 2 51 1
	setp.ne.s32	%p89, %r20, 8;
	@%p89 bra 	BB5_86;

	mov.f32 	%f433, 0f447FC000;
	bra.uni 	BB5_87;

BB5_86:
	.loc 2 51 1
	setp.eq.s32	%p90, %r20, 16;
	.loc 2 51 1
	selp.f32	%f433, 0f47000000, 0f3F800000, %p90;

BB5_87:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f444, %f433, %f278;
	.loc 2 51 1
	@%p79 bra 	BB5_89;

	mov.f32 	%f434, %f278;
	bra.uni 	BB5_92;

BB5_89:
	.loc 2 51 1
	setp.ne.s32	%p92, %r20, 8;
	@%p92 bra 	BB5_91;

	mov.f32 	%f282, 0f447FC000;
	.loc 2 51 1
	mov.f32 	%f434, %f282;
	bra.uni 	BB5_92;

BB5_91:
	.loc 2 51 1
	setp.eq.s32	%p93, %r20, 16;
	.loc 2 51 1
	selp.f32	%f44, 0f47000000, 0f3F800000, %p93;
	mov.f32 	%f434, %f44;

BB5_92:
	.loc 2 51 1
	mov.f32 	%f45, %f434;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f443, %f45, %f278;
	bra.uni 	BB5_119;

BB5_93:
	.loc 2 51 1
	setp.ne.s32	%p94, %r20, 0;
	@%p94 bra 	BB5_95;

	mov.f32 	%f435, 0f437F0000;
	bra.uni 	BB5_98;

BB5_95:
	.loc 2 51 1
	setp.ne.s32	%p95, %r20, 8;
	@%p95 bra 	BB5_97;

	mov.f32 	%f435, 0f447FC000;
	bra.uni 	BB5_98;

BB5_97:
	.loc 2 51 1
	setp.eq.s32	%p96, %r20, 16;
	.loc 2 51 1
	selp.f32	%f435, 0f47000000, 0f3F800000, %p96;

BB5_98:
	.loc 2 51 1
	setp.ne.s32	%p97, %r21, 0;
	@%p97 bra 	BB5_100;

	mov.f32 	%f436, 0f437F0000;
	bra.uni 	BB5_103;

BB5_100:
	.loc 2 51 1
	setp.ne.s32	%p98, %r21, 8;
	@%p98 bra 	BB5_102;

	mov.f32 	%f436, 0f447FC000;
	bra.uni 	BB5_103;

BB5_102:
	.loc 2 51 1
	setp.eq.s32	%p99, %r21, 16;
	.loc 2 51 1
	selp.f32	%f436, 0f47000000, 0f3F800000, %p99;

BB5_103:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f446, %f435, %f436;
	.loc 2 51 1
	@%p97 bra 	BB5_105;

	mov.f32 	%f437, 0f437F0000;
	bra.uni 	BB5_108;

BB5_105:
	.loc 2 51 1
	setp.ne.s32	%p101, %r21, 8;
	@%p101 bra 	BB5_107;

	mov.f32 	%f437, 0f447FC000;
	bra.uni 	BB5_108;

BB5_107:
	.loc 2 51 1
	setp.eq.s32	%p102, %r21, 16;
	.loc 2 51 1
	selp.f32	%f437, 0f47000000, 0f3F800000, %p102;

BB5_108:
	mov.f32 	%f291, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f445, %f291, %f437;
	.loc 2 51 1
	@%p97 bra 	BB5_110;

	mov.f32 	%f438, 0f437F0000;
	bra.uni 	BB5_113;

BB5_110:
	.loc 2 51 1
	setp.ne.s32	%p104, %r21, 8;
	@%p104 bra 	BB5_112;

	mov.f32 	%f438, 0f447FC000;
	bra.uni 	BB5_113;

BB5_112:
	.loc 2 51 1
	setp.eq.s32	%p105, %r21, 16;
	.loc 2 51 1
	selp.f32	%f438, 0f47000000, 0f3F800000, %p105;

BB5_113:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f444, %f291, %f438;
	.loc 2 51 1
	@%p97 bra 	BB5_115;

	mov.f32 	%f439, %f291;
	bra.uni 	BB5_118;

BB5_115:
	.loc 2 51 1
	setp.ne.s32	%p107, %r21, 8;
	@%p107 bra 	BB5_117;

	mov.f32 	%f295, 0f447FC000;
	.loc 2 51 1
	mov.f32 	%f439, %f295;
	bra.uni 	BB5_118;

BB5_117:
	.loc 2 51 1
	setp.eq.s32	%p108, %r21, 16;
	.loc 2 51 1
	selp.f32	%f58, 0f47000000, 0f3F800000, %p108;
	mov.f32 	%f439, %f58;

BB5_118:
	.loc 2 51 1
	mov.f32 	%f59, %f439;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f443, %f291, %f59;

BB5_119:
	mul.ftz.f32 	%f523, %f523, %f443;
	mul.ftz.f32 	%f519, %f28, %f444;
	mul.ftz.f32 	%f487, %f27, %f445;
	mul.ftz.f32 	%f458, %f458, %f446;
	bra.uni 	BB5_143;

BB5_120:
	.loc 2 51 1
	setp.ne.s32	%p109, %r20, 0;
	@%p109 bra 	BB5_122;

	mov.f32 	%f447, 0f437F0000;
	bra.uni 	BB5_125;

BB5_122:
	.loc 2 51 1
	setp.ne.s32	%p110, %r20, 8;
	@%p110 bra 	BB5_124;

	mov.f32 	%f447, 0f447FC000;
	bra.uni 	BB5_125;

BB5_124:
	.loc 2 51 1
	setp.eq.s32	%p111, %r20, 16;
	.loc 2 51 1
	selp.f32	%f447, 0f47000000, 0f3F800000, %p111;

BB5_125:
	.loc 2 51 1
	and.b32  	%r23, %r45, 248;
	setp.ne.s32	%p112, %r23, 0;
	@%p112 bra 	BB5_127;

	mov.f32 	%f448, 0f437F0000;
	bra.uni 	BB5_130;

BB5_127:
	.loc 2 51 1
	setp.ne.s32	%p113, %r23, 8;
	@%p113 bra 	BB5_129;

	mov.f32 	%f448, 0f447FC000;
	bra.uni 	BB5_130;

BB5_129:
	.loc 2 51 1
	setp.eq.s32	%p114, %r23, 16;
	.loc 2 51 1
	selp.f32	%f448, 0f47000000, 0f3F800000, %p114;

BB5_130:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f72, %f447, %f448;
	mul.ftz.f32 	%f458, %f458, %f72;
	mov.f32 	%f487, %f27;
	mov.f32 	%f519, %f28;
	bra.uni 	BB5_143;

BB5_131:
	and.b32  	%r155, %r13, 248;
	setp.eq.s32	%p115, %r155, 0;
	mov.f32 	%f487, %f24;
	mov.f32 	%f519, %f25;
	@%p115 bra 	BB5_143;

	.loc 2 51 1
	and.b32  	%r24, %r47, 248;
	setp.ne.s32	%p116, %r24, 0;
	@%p116 bra 	BB5_134;

	mov.f32 	%f449, 0f437F0000;
	bra.uni 	BB5_137;

BB5_134:
	.loc 2 51 1
	setp.ne.s32	%p117, %r24, 8;
	@%p117 bra 	BB5_136;

	mov.f32 	%f449, 0f447FC000;
	bra.uni 	BB5_137;

BB5_136:
	.loc 2 51 1
	setp.eq.s32	%p118, %r24, 16;
	.loc 2 51 1
	selp.f32	%f449, 0f47000000, 0f3F800000, %p118;

BB5_137:
	.loc 2 51 1
	and.b32  	%r25, %r45, 248;
	setp.ne.s32	%p119, %r25, 0;
	@%p119 bra 	BB5_139;

	mov.f32 	%f450, 0f437F0000;
	bra.uni 	BB5_142;

BB5_139:
	.loc 2 51 1
	setp.ne.s32	%p120, %r25, 8;
	@%p120 bra 	BB5_141;

	mov.f32 	%f450, 0f447FC000;
	bra.uni 	BB5_142;

BB5_141:
	.loc 2 51 1
	setp.eq.s32	%p121, %r25, 16;
	.loc 2 51 1
	selp.f32	%f450, 0f47000000, 0f3F800000, %p121;

BB5_142:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f306, %f449, %f450;
	mul.ftz.f32 	%f458, %f458, %f306;
	mul.ftz.f32 	%f487, %f24, %f306;
	mul.ftz.f32 	%f519, %f25, %f306;
	mul.ftz.f32 	%f523, %f523, %f306;

BB5_143:
	mov.f32 	%f517, %f519;
	mov.f32 	%f485, %f487;
	and.b32  	%r156, %r45, 1024;
	setp.eq.s32	%p122, %r156, 0;
	shr.u32 	%r157, %r47, 10;
	and.b32  	%r158, %r157, 1;
	setp.eq.b32	%p123, %r158, 1;
	and.pred  	%p124, %p122, %p123;
	@!%p124 bra 	BB5_153;
	bra.uni 	BB5_144;

BB5_144:
	setp.ltu.ftz.f32	%p125, %f485, 0f00000000;
	@%p125 bra 	BB5_146;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f307, %f485;
	mul.ftz.f32 	%f308, %f307, 0f400E38E4;
	ex2.approx.ftz.f32 	%f486, %f308;
	bra.uni 	BB5_147;

BB5_146:
	neg.ftz.f32 	%f309, %f485;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f310, %f309;
	mul.ftz.f32 	%f311, %f310, 0f400E38E4;
	ex2.approx.ftz.f32 	%f312, %f311;
	neg.ftz.f32 	%f486, %f312;

BB5_147:
	mov.f32 	%f485, %f486;
	setp.ltu.ftz.f32	%p126, %f517, 0f00000000;
	@%p126 bra 	BB5_149;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f313, %f517;
	mul.ftz.f32 	%f314, %f313, 0f400E38E4;
	ex2.approx.ftz.f32 	%f518, %f314;
	bra.uni 	BB5_150;

BB5_149:
	neg.ftz.f32 	%f315, %f517;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f316, %f315;
	mul.ftz.f32 	%f317, %f316, 0f400E38E4;
	ex2.approx.ftz.f32 	%f318, %f317;
	neg.ftz.f32 	%f518, %f318;

BB5_150:
	mov.f32 	%f517, %f518;
	setp.ltu.ftz.f32	%p127, %f523, 0f00000000;
	@%p127 bra 	BB5_152;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f319, %f523;
	mul.ftz.f32 	%f320, %f319, 0f400E38E4;
	ex2.approx.ftz.f32 	%f523, %f320;
	bra.uni 	BB5_153;

BB5_152:
	neg.ftz.f32 	%f321, %f523;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f322, %f321;
	mul.ftz.f32 	%f323, %f322, 0f400E38E4;
	ex2.approx.ftz.f32 	%f324, %f323;
	neg.ftz.f32 	%f523, %f324;

BB5_153:
	mov.f32 	%f516, %f517;
	mov.f32 	%f484, %f485;
	setp.eq.s32	%p128, %r110, 512;
	and.b32  	%r26, %r47, 768;
	setp.ne.s32	%p129, %r26, 512;
	or.pred  	%p130, %p128, %p129;
	@%p130 bra 	BB5_160;

	.loc 2 51 1
	and.b32  	%r27, %r47, 248;
	setp.ne.s32	%p131, %r27, 0;
	@%p131 bra 	BB5_156;

	mov.f32 	%f451, 0f437F0000;
	bra.uni 	BB5_159;

BB5_156:
	.loc 2 51 1
	setp.ne.s32	%p132, %r27, 8;
	@%p132 bra 	BB5_158;

	mov.f32 	%f451, 0f447FC000;
	bra.uni 	BB5_159;

BB5_158:
	.loc 2 51 1
	setp.eq.s32	%p133, %r27, 16;
	.loc 2 51 1
	selp.f32	%f451, 0f47000000, 0f3F800000, %p133;

BB5_159:
	and.b32  	%r160, %r47, 4096;
	setp.eq.s32	%p134, %r160, 0;
	ld.const.f32 	%f327, [kYCbCrOffset];
	ld.const.f32 	%f328, [kYCbCrFullRangeOffset];
	selp.f32	%f329, %f328, %f327, %p134;
	mov.f32 	%f330, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f331, %f451, %f330;
	fma.rn.ftz.f32 	%f484, %f329, %f331, %f484;
	ld.const.f32 	%f332, [kYCbCrOffset+4];
	ld.const.f32 	%f333, [kYCbCrFullRangeOffset+4];
	selp.f32	%f334, %f333, %f332, %p134;
	fma.rn.ftz.f32 	%f516, %f334, %f331, %f516;
	ld.const.f32 	%f335, [kYCbCrOffset+8];
	ld.const.f32 	%f336, [kYCbCrFullRangeOffset+8];
	selp.f32	%f337, %f336, %f335, %p134;
	fma.rn.ftz.f32 	%f523, %f337, %f331, %f523;

BB5_160:
	mov.f32 	%f515, %f516;
	mov.f32 	%f483, %f484;
	and.b32  	%r162, %r13, 7;
	setp.eq.s32	%p135, %r162, 0;
	mov.f32 	%f479, %f483;
	mov.f32 	%f511, %f515;
	@%p135 bra 	BB5_220;

	and.b32  	%r28, %r45, 7;
	setp.ne.s32	%p136, %r28, 2;
	@%p136 bra 	BB5_163;

	or.b32  	%r163, %r47, 4;
	and.b32  	%r164, %r163, 7;
	setp.eq.s32	%p137, %r164, 4;
	@%p137 bra 	BB5_164;

BB5_163:
	or.b32  	%r165, %r28, 4;
	setp.eq.s32	%p138, %r165, 4;
	@%p138 bra 	BB5_164;
	bra.uni 	BB5_170;

BB5_164:
	.loc 2 51 1
	and.b32  	%r29, %r47, 248;
	setp.ne.s32	%p139, %r29, 0;
	@%p139 bra 	BB5_167;

	mov.f32 	%f458, 0f437F0000;

BB5_166:
	.loc 2 51 1
	mov.f32 	%f479, %f483;
	mov.f32 	%f511, %f515;
	bra.uni 	BB5_220;

BB5_167:
	.loc 2 51 1
	setp.ne.s32	%p140, %r29, 8;
	@%p140 bra 	BB5_169;

	mov.f32 	%f458, 0f447FC000;
	bra.uni 	BB5_166;

BB5_169:
	.loc 2 51 1
	setp.eq.s32	%p141, %r29, 16;
	.loc 2 51 1
	selp.f32	%f458, 0f47000000, 0f3F800000, %p141;
	bra.uni 	BB5_166;

BB5_170:
	setp.ne.s32	%p142, %r28, 1;
	@%p142 bra 	BB5_198;

	and.b32  	%r166, %r47, 7;
	setp.gt.u32	%p143, %r166, 4;
	@%p143 bra 	BB5_198;

	mov.u32 	%r167, 1;
	shl.b32 	%r168, %r167, %r166;
	and.b32  	%r169, %r168, 21;
	setp.ne.s32	%p144, %r169, 0;
	@%p144 bra 	BB5_173;
	bra.uni 	BB5_198;

BB5_173:
	.loc 2 51 1
	and.b32  	%r31, %r47, 248;
	setp.ne.s32	%p145, %r31, 0;
	@%p145 bra 	BB5_175;

	mov.f32 	%f452, 0f437F0000;
	bra.uni 	BB5_178;

BB5_175:
	.loc 2 51 1
	setp.ne.s32	%p146, %r31, 8;
	@%p146 bra 	BB5_177;

	mov.f32 	%f452, 0f447FC000;
	bra.uni 	BB5_178;

BB5_177:
	.loc 2 51 1
	setp.eq.s32	%p147, %r31, 16;
	.loc 2 51 1
	selp.f32	%f452, 0f47000000, 0f3F800000, %p147;

BB5_178:
	rcp.approx.ftz.f32 	%f342, %f452;
	mul.ftz.f32 	%f112, %f458, %f342;
	@%p129 bra 	BB5_185;

	.loc 2 51 1
	@%p145 bra 	BB5_181;

	mov.f32 	%f453, 0f437F0000;
	bra.uni 	BB5_184;

BB5_181:
	.loc 2 51 1
	setp.ne.s32	%p150, %r31, 8;
	@%p150 bra 	BB5_183;

	mov.f32 	%f453, 0f447FC000;
	bra.uni 	BB5_184;

BB5_183:
	.loc 2 51 1
	setp.eq.s32	%p151, %r31, 16;
	.loc 2 51 1
	selp.f32	%f453, 0f47000000, 0f3F800000, %p151;

BB5_184:
	and.b32  	%r170, %r47, 4096;
	setp.eq.s32	%p152, %r170, 0;
	ld.const.f32 	%f345, [kYCbCrOffset];
	ld.const.f32 	%f346, [kYCbCrFullRangeOffset];
	selp.f32	%f347, %f346, %f345, %p152;
	mov.f32 	%f348, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f349, %f453, %f348;
	mul.ftz.f32 	%f350, %f347, %f349;
	sub.ftz.f32 	%f483, %f483, %f350;
	ld.const.f32 	%f351, [kYCbCrOffset+4];
	ld.const.f32 	%f352, [kYCbCrFullRangeOffset+4];
	selp.f32	%f353, %f352, %f351, %p152;
	mul.ftz.f32 	%f354, %f353, %f349;
	sub.ftz.f32 	%f515, %f515, %f354;
	ld.const.f32 	%f355, [kYCbCrOffset+8];
	ld.const.f32 	%f356, [kYCbCrFullRangeOffset+8];
	selp.f32	%f357, %f356, %f355, %p152;
	mul.ftz.f32 	%f358, %f357, %f349;
	sub.ftz.f32 	%f523, %f523, %f358;

BB5_185:
	mul.ftz.f32 	%f481, %f483, %f112;
	mul.ftz.f32 	%f513, %f515, %f112;
	mul.ftz.f32 	%f523, %f523, %f112;
	@%p129 bra 	BB5_192;

	.loc 2 51 1
	@%p145 bra 	BB5_188;

	mov.f32 	%f454, 0f437F0000;
	bra.uni 	BB5_191;

BB5_188:
	.loc 2 51 1
	setp.ne.s32	%p155, %r31, 8;
	@%p155 bra 	BB5_190;

	mov.f32 	%f454, 0f447FC000;
	bra.uni 	BB5_191;

BB5_190:
	.loc 2 51 1
	setp.eq.s32	%p156, %r31, 16;
	.loc 2 51 1
	selp.f32	%f454, 0f47000000, 0f3F800000, %p156;

BB5_191:
	and.b32  	%r171, %r47, 4096;
	setp.eq.s32	%p157, %r171, 0;
	ld.const.f32 	%f361, [kYCbCrOffset];
	ld.const.f32 	%f362, [kYCbCrFullRangeOffset];
	selp.f32	%f363, %f362, %f361, %p157;
	mov.f32 	%f364, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f365, %f454, %f364;
	fma.rn.ftz.f32 	%f481, %f363, %f365, %f481;
	ld.const.f32 	%f366, [kYCbCrOffset+4];
	ld.const.f32 	%f367, [kYCbCrFullRangeOffset+4];
	selp.f32	%f368, %f367, %f366, %p157;
	fma.rn.ftz.f32 	%f513, %f368, %f365, %f513;
	ld.const.f32 	%f369, [kYCbCrOffset+8];
	ld.const.f32 	%f370, [kYCbCrFullRangeOffset+8];
	selp.f32	%f371, %f370, %f369, %p157;
	fma.rn.ftz.f32 	%f523, %f371, %f365, %f523;

BB5_192:
	mov.f32 	%f511, %f513;
	mov.f32 	%f479, %f481;
	or.b32  	%r172, %r166, 4;
	setp.eq.s32	%p158, %r172, 4;
	@%p158 bra 	BB5_193;
	bra.uni 	BB5_220;

BB5_193:
	.loc 2 51 1
	@%p145 bra 	BB5_195;

	mov.f32 	%f458, 0f437F0000;
	bra.uni 	BB5_220;

BB5_195:
	.loc 2 51 1
	setp.ne.s32	%p160, %r31, 8;
	@%p160 bra 	BB5_197;

	mov.f32 	%f458, 0f447FC000;
	bra.uni 	BB5_220;

BB5_197:
	.loc 2 51 1
	setp.eq.s32	%p161, %r31, 16;
	.loc 2 51 1
	selp.f32	%f458, 0f47000000, 0f3F800000, %p161;
	bra.uni 	BB5_220;

BB5_198:
	.loc 2 51 1
	mov.f32 	%f482, %f483;
	mov.f32 	%f514, %f515;
	@%p129 bra 	BB5_205;

	.loc 2 51 1
	and.b32  	%r32, %r47, 248;
	setp.ne.s32	%p163, %r32, 0;
	@%p163 bra 	BB5_201;

	mov.f32 	%f455, 0f437F0000;
	bra.uni 	BB5_204;

BB5_201:
	.loc 2 51 1
	setp.ne.s32	%p164, %r32, 8;
	@%p164 bra 	BB5_203;

	mov.f32 	%f455, 0f447FC000;
	bra.uni 	BB5_204;

BB5_203:
	.loc 2 51 1
	setp.eq.s32	%p165, %r32, 16;
	.loc 2 51 1
	selp.f32	%f455, 0f47000000, 0f3F800000, %p165;

BB5_204:
	.loc 2 51 119
	and.b32  	%r173, %r47, 4096;
	setp.eq.s32	%p166, %r173, 0;
	ld.const.f32 	%f376, [kYCbCrOffset];
	ld.const.f32 	%f377, [kYCbCrFullRangeOffset];
	selp.f32	%f378, %f377, %f376, %p166;
	mov.f32 	%f379, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f380, %f455, %f379;
	.loc 2 51 119
	mul.ftz.f32 	%f381, %f378, %f380;
	sub.ftz.f32 	%f482, %f483, %f381;
	ld.const.f32 	%f382, [kYCbCrOffset+4];
	ld.const.f32 	%f383, [kYCbCrFullRangeOffset+4];
	selp.f32	%f384, %f383, %f382, %p166;
	mul.ftz.f32 	%f385, %f384, %f380;
	sub.ftz.f32 	%f514, %f515, %f385;
	ld.const.f32 	%f386, [kYCbCrOffset+8];
	ld.const.f32 	%f387, [kYCbCrFullRangeOffset+8];
	selp.f32	%f388, %f387, %f386, %p166;
	mul.ftz.f32 	%f389, %f388, %f380;
	sub.ftz.f32 	%f523, %f523, %f389;

BB5_205:
	.loc 2 51 1
	add.ftz.f32 	%f390, %f458, 0fB70637BD;
	setp.gtu.ftz.f32	%p167, %f390, 0f00000000;
	@%p167 bra 	BB5_207;

	mov.f32 	%f523, 0f00000000;
	mov.f32 	%f512, %f523;
	mov.f32 	%f480, %f523;
	mov.f32 	%f458, %f523;
	bra.uni 	BB5_213;

BB5_207:
	.loc 2 51 1
	and.b32  	%r33, %r47, 248;
	setp.ne.s32	%p168, %r33, 0;
	@%p168 bra 	BB5_209;

	mov.f32 	%f456, 0f437F0000;
	bra.uni 	BB5_212;

BB5_209:
	.loc 2 51 1
	setp.ne.s32	%p169, %r33, 8;
	@%p169 bra 	BB5_211;

	mov.f32 	%f456, 0f447FC000;
	bra.uni 	BB5_212;

BB5_211:
	.loc 2 51 1
	setp.eq.s32	%p170, %r33, 16;
	.loc 2 51 1
	selp.f32	%f456, 0f47000000, 0f3F800000, %p170;

BB5_212:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f397, %f456, %f458;
	.loc 2 51 1
	mul.ftz.f32 	%f480, %f482, %f397;
	mul.ftz.f32 	%f512, %f514, %f397;
	mul.ftz.f32 	%f523, %f523, %f397;

BB5_213:
	.loc 2 51 1
	mov.f32 	%f511, %f512;
	mov.f32 	%f479, %f480;
	@%p129 bra 	BB5_220;

	.loc 2 51 1
	and.b32  	%r34, %r47, 248;
	setp.ne.s32	%p172, %r34, 0;
	@%p172 bra 	BB5_216;

	mov.f32 	%f457, 0f437F0000;
	bra.uni 	BB5_219;

BB5_216:
	.loc 2 51 1
	setp.ne.s32	%p173, %r34, 8;
	@%p173 bra 	BB5_218;

	mov.f32 	%f457, 0f447FC000;
	bra.uni 	BB5_219;

BB5_218:
	.loc 2 51 1
	setp.eq.s32	%p174, %r34, 16;
	.loc 2 51 1
	selp.f32	%f457, 0f47000000, 0f3F800000, %p174;

BB5_219:
	.loc 2 51 120
	and.b32  	%r174, %r47, 4096;
	setp.eq.s32	%p175, %r174, 0;
	ld.const.f32 	%f400, [kYCbCrOffset];
	ld.const.f32 	%f401, [kYCbCrFullRangeOffset];
	selp.f32	%f402, %f401, %f400, %p175;
	mov.f32 	%f403, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f404, %f457, %f403;
	.loc 2 51 120
	fma.rn.ftz.f32 	%f479, %f402, %f404, %f479;
	ld.const.f32 	%f405, [kYCbCrOffset+4];
	ld.const.f32 	%f406, [kYCbCrFullRangeOffset+4];
	selp.f32	%f407, %f406, %f405, %p175;
	fma.rn.ftz.f32 	%f511, %f407, %f404, %f511;
	ld.const.f32 	%f408, [kYCbCrOffset+8];
	ld.const.f32 	%f409, [kYCbCrFullRangeOffset+8];
	selp.f32	%f410, %f409, %f408, %p175;
	fma.rn.ftz.f32 	%f523, %f410, %f404, %f523;

BB5_220:
	and.b32  	%r175, %r47, 8192;
	setp.eq.s32	%p176, %r175, 0;
	selp.f32	%f525, %f523, %f458, %p176;
	selp.f32	%f526, %f511, %f479, %p176;
	selp.f32	%f527, %f479, %f511, %p176;
	selp.f32	%f528, %f458, %f523, %p176;
	and.b32  	%r176, %r47, 256;
	.loc 2 51 1
	and.b32  	%r35, %r47, 248;
	setp.ne.s32	%p177, %r176, 0;
	@%p177 bra 	BB5_227;

	add.ftz.f32 	%f163, %f525, 0f3F000000;
	add.ftz.f32 	%f164, %f526, 0f3F000000;
	add.ftz.f32 	%f165, %f527, 0f3F000000;
	add.ftz.f32 	%f166, %f528, 0f3F000000;
	.loc 2 51 1
	setp.ne.s32	%p178, %r35, 0;
	@%p178 bra 	BB5_223;

	mov.f32 	%f524, 0f437F0000;
	bra.uni 	BB5_226;

BB5_223:
	.loc 2 51 1
	setp.ne.s32	%p179, %r35, 8;
	@%p179 bra 	BB5_225;

	mov.f32 	%f524, 0f447FC000;
	bra.uni 	BB5_226;

BB5_225:
	.loc 2 51 1
	setp.eq.s32	%p180, %r35, 16;
	.loc 2 51 1
	selp.f32	%f524, 0f47000000, 0f3F800000, %p180;

BB5_226:
	mov.f32 	%f413, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f414, %f163, %f413;
	.loc 3 2765 10
	min.ftz.f32 	%f525, %f414, %f524;
	.loc 3 2770 10
	max.ftz.f32 	%f415, %f164, %f413;
	.loc 3 2765 10
	min.ftz.f32 	%f526, %f415, %f524;
	.loc 3 2770 10
	max.ftz.f32 	%f416, %f165, %f413;
	.loc 3 2765 10
	min.ftz.f32 	%f527, %f416, %f524;
	.loc 3 2770 10
	max.ftz.f32 	%f417, %f166, %f413;
	.loc 3 2765 10
	min.ftz.f32 	%f528, %f417, %f524;

BB5_227:
	.loc 2 51 1
	mad.lo.s32 	%r37, %r2, %r46, %r1;
	mul.wide.s32 	%rd44, %r37, 4;
	add.s64 	%rd8, %rd10, %rd44;
	setp.eq.s32	%p181, %r35, 0;
	.loc 2 51 1
	@%p181 bra 	BB5_235;

	.loc 2 51 1
	setp.eq.s32	%p182, %r35, 8;
	@%p182 bra 	BB5_234;

	.loc 2 51 1
	setp.eq.s32	%p183, %r35, 16;
	mul.wide.s32 	%rd45, %r37, 8;
	add.s64 	%rd9, %rd10, %rd45;
	.loc 2 51 1
	@%p183 bra 	BB5_233;

	.loc 2 51 1
	setp.eq.s32	%p184, %r48, 0;
	@%p184 bra 	BB5_232;

	mul.wide.s32 	%rd46, %r37, 16;
	add.s64 	%rd47, %rd10, %rd46;
	.loc 2 51 1
	st.v4.f32 	[%rd47], {%f525, %f526, %f527, %f528};
	bra.uni 	BB5_236;

BB5_232:
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f525;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f526;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f527;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f528;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.v4.u16 	[%rd9], {%rs1, %rs2, %rs3, %rs4};
	bra.uni 	BB5_236;

BB5_233:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r185, %f525;
	.loc 2 51 82
	cvt.u16.u32	%rs5, %r185;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r186, %f526;
	.loc 2 51 161
	cvt.u16.u32	%rs6, %r186;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r187, %f527;
	.loc 2 51 242
	cvt.u16.u32	%rs7, %r187;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r188, %f528;
	cvt.u16.u32	%rs8, %r188;
	.loc 2 51 1
	st.v4.u16 	[%rd9], {%rs5, %rs6, %rs7, %rs8};
	bra.uni 	BB5_236;

BB5_234:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r189, %f525;
	.loc 2 51 65
	shl.b32 	%r190, %r189, 2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r191, %f526;
	.loc 2 51 127
	shl.b32 	%r192, %r191, 12;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r193, %f527;
	.loc 2 51 191
	shl.b32 	%r194, %r193, 22;
	.loc 2 51 127
	add.s32 	%r195, %r192, %r190;
	.loc 2 51 191
	add.s32 	%r196, %r195, %r194;
	.loc 2 51 1
	st.u32 	[%rd8], %r196;
	bra.uni 	BB5_236;

BB5_235:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r197, %f525;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r198, %f526;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r199, %f527;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r200, %f528;
	cvt.u16.u32	%rs9, %r200;
	.loc 2 51 239
	cvt.u16.u32	%rs10, %r199;
	.loc 2 51 159
	cvt.u16.u32	%rs11, %r198;
	.loc 2 51 81
	cvt.u16.u32	%rs12, %r197;
	.loc 2 51 1
	st.v4.u8 	[%rd8], {%rs12, %rs11, %rs10, %rs9};

BB5_236:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<71>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB6_7;
	bra.uni 	BB6_1;

BB6_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB6_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f19, %f18, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f20, %r49;
	cvt.rn.f32.s32	%f21, %r48;
	mov.f32 	%f22, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f23, %f21, %f22;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f20, %f23;
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r14;
	add.ftz.f32 	%f13, %f24, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f25, %r15;
	add.ftz.f32 	%f15, %f25, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB6_4;

BB6_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f30, %r1;
	add.ftz.f32 	%f31, %f30, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f33, %r68;
	cvt.rn.f32.s32	%f34, %r67;
	mov.f32 	%f35, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f34, %f35;
	.loc 1 129 110
	add.ftz.f32 	%f29, %f33, %f36;
	.loc 1 129 1
	cvt.rn.f32.s32	%f37, %r12;
	add.ftz.f32 	%f26, %f37, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f26, %f29}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f38, %r13;
	add.ftz.f32 	%f28, %f38, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f28, %f29}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB6_4:
	.loc 1 129 1
	mov.b32 	 %f39, %r70;
	mul.ftz.f32 	%f40, %f39, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f42, %r69;
	mul.ftz.f32 	%f43, %f42, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f46, %f44, %f45;
	.loc 1 129 1
	mul.ftz.f32 	%f47, %f1, 0f437F0000;
	sub.ftz.f32 	%f48, %f47, %f46;
	ld.const.f32 	%f49, [kYCbCrOffset+4];
	mul.ftz.f32 	%f50, %f49, %f45;
	sub.ftz.f32 	%f51, %f43, %f50;
	ld.const.f32 	%f52, [kYCbCrOffset+8];
	mul.ftz.f32 	%f53, %f52, %f45;
	sub.ftz.f32 	%f54, %f40, %f53;
	ld.const.f32 	%f55, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f56, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f57, %f51, %f56;
	fma.rn.ftz.f32 	%f58, %f48, %f55, %f57;
	ld.const.f32 	%f59, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f54, %f59, %f58;
	ld.const.f32 	%f60, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f61, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f51, %f61;
	fma.rn.ftz.f32 	%f63, %f48, %f60, %f62;
	ld.const.f32 	%f64, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f54, %f64, %f63;
	ld.const.f32 	%f65, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f66, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f67, %f51, %f66;
	fma.rn.ftz.f32 	%f68, %f48, %f65, %f67;
	ld.const.f32 	%f69, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f54, %f69, %f68;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB6_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB6_7;

BB6_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB6_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<54>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB7_4;
	bra.uni 	BB7_1;

BB7_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f16, %f15, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f17, %r29;
	add.ftz.f32 	%f8, %f17, 0f3F000000;
	cvt.rn.f32.s32	%f18, %r2;
	add.ftz.f32 	%f9, %f18, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f19, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f20, %r5;
	add.ftz.f32 	%f10, %f20, %f16;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f21, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f22, %r6;
	add.ftz.f32 	%f12, %f22, %f16;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f23, %r25;
	mul.ftz.f32 	%f24, %f23, 0f437F0000;
	mov.f32 	%f25, 0f437F0000;
	mul.ftz.f32 	%f26, %f21, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f27, %f19, 0f437F0000;
	ld.const.f32 	%f28, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f29, %f25, %f25;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f27, %f30;
	ld.const.f32 	%f32, [kYCbCrOffset+4];
	mul.ftz.f32 	%f33, %f32, %f29;
	sub.ftz.f32 	%f34, %f26, %f33;
	ld.const.f32 	%f35, [kYCbCrOffset+8];
	mul.ftz.f32 	%f36, %f35, %f29;
	sub.ftz.f32 	%f37, %f24, %f36;
	ld.const.f32 	%f38, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f39, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f40, %f34, %f39;
	fma.rn.ftz.f32 	%f41, %f31, %f38, %f40;
	ld.const.f32 	%f42, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f37, %f42, %f41;
	ld.const.f32 	%f43, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f44, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f45, %f34, %f44;
	fma.rn.ftz.f32 	%f46, %f31, %f43, %f45;
	ld.const.f32 	%f47, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f37, %f47, %f46;
	ld.const.f32 	%f48, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f49, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f50, %f34, %f49;
	fma.rn.ftz.f32 	%f51, %f31, %f48, %f50;
	ld.const.f32 	%f52, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f37, %f52, %f51;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB7_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB7_4;

BB7_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB7_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<71>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB8_7;
	bra.uni 	BB8_1;

BB8_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB8_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f19, %f18, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f20, %r49;
	cvt.rn.f32.s32	%f21, %r48;
	mov.f32 	%f22, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f23, %f21, %f22;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f20, %f23;
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r14;
	add.ftz.f32 	%f13, %f24, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f25, %r15;
	add.ftz.f32 	%f15, %f25, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB8_4;

BB8_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f30, %r1;
	add.ftz.f32 	%f31, %f30, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f33, %r68;
	cvt.rn.f32.s32	%f34, %r67;
	mov.f32 	%f35, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f34, %f35;
	.loc 1 129 110
	add.ftz.f32 	%f29, %f33, %f36;
	.loc 1 129 1
	cvt.rn.f32.s32	%f37, %r12;
	add.ftz.f32 	%f26, %f37, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f26, %f29}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f38, %r13;
	add.ftz.f32 	%f28, %f38, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f28, %f29}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB8_4:
	.loc 1 129 1
	mov.b32 	 %f39, %r70;
	mul.ftz.f32 	%f40, %f39, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f42, %r69;
	mul.ftz.f32 	%f43, %f42, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f46, %f44, %f45;
	.loc 1 129 1
	mul.ftz.f32 	%f47, %f1, 0f437F0000;
	sub.ftz.f32 	%f48, %f47, %f46;
	ld.const.f32 	%f49, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f50, %f49, %f45;
	sub.ftz.f32 	%f51, %f43, %f50;
	ld.const.f32 	%f52, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f53, %f52, %f45;
	sub.ftz.f32 	%f54, %f40, %f53;
	ld.const.f32 	%f55, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f56, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f57, %f51, %f56;
	fma.rn.ftz.f32 	%f58, %f48, %f55, %f57;
	ld.const.f32 	%f59, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f54, %f59, %f58;
	ld.const.f32 	%f60, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f61, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f51, %f61;
	fma.rn.ftz.f32 	%f63, %f48, %f60, %f62;
	ld.const.f32 	%f64, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f54, %f64, %f63;
	ld.const.f32 	%f65, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f66, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f67, %f51, %f66;
	fma.rn.ftz.f32 	%f68, %f48, %f65, %f67;
	ld.const.f32 	%f69, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f54, %f69, %f68;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB8_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB8_7;

BB8_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB8_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<54>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB9_4;
	bra.uni 	BB9_1;

BB9_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f16, %f15, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f17, %r29;
	add.ftz.f32 	%f8, %f17, 0f3F000000;
	cvt.rn.f32.s32	%f18, %r2;
	add.ftz.f32 	%f9, %f18, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f19, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f20, %r5;
	add.ftz.f32 	%f10, %f20, %f16;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f21, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f22, %r6;
	add.ftz.f32 	%f12, %f22, %f16;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f23, %r25;
	mul.ftz.f32 	%f24, %f23, 0f437F0000;
	mov.f32 	%f25, 0f437F0000;
	mul.ftz.f32 	%f26, %f21, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f27, %f19, 0f437F0000;
	ld.const.f32 	%f28, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f29, %f25, %f25;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f27, %f30;
	ld.const.f32 	%f32, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f33, %f32, %f29;
	sub.ftz.f32 	%f34, %f26, %f33;
	ld.const.f32 	%f35, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f36, %f35, %f29;
	sub.ftz.f32 	%f37, %f24, %f36;
	ld.const.f32 	%f38, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f39, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f40, %f34, %f39;
	fma.rn.ftz.f32 	%f41, %f31, %f38, %f40;
	ld.const.f32 	%f42, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f37, %f42, %f41;
	ld.const.f32 	%f43, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f44, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f45, %f34, %f44;
	fma.rn.ftz.f32 	%f46, %f31, %f43, %f45;
	ld.const.f32 	%f47, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f37, %f47, %f46;
	ld.const.f32 	%f48, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f49, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f50, %f34, %f49;
	fma.rn.ftz.f32 	%f51, %f31, %f48, %f50;
	ld.const.f32 	%f52, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f37, %f52, %f51;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB9_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB9_4;

BB9_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB9_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<71>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB10_7;
	bra.uni 	BB10_1;

BB10_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB10_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f19, %f18, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f20, %r49;
	cvt.rn.f32.s32	%f21, %r48;
	mov.f32 	%f22, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f23, %f21, %f22;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f20, %f23;
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r14;
	add.ftz.f32 	%f13, %f24, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f25, %r15;
	add.ftz.f32 	%f15, %f25, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB10_4;

BB10_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f30, %r1;
	add.ftz.f32 	%f31, %f30, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f33, %r68;
	cvt.rn.f32.s32	%f34, %r67;
	mov.f32 	%f35, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f34, %f35;
	.loc 1 129 110
	add.ftz.f32 	%f29, %f33, %f36;
	.loc 1 129 1
	cvt.rn.f32.s32	%f37, %r12;
	add.ftz.f32 	%f26, %f37, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f26, %f29}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f38, %r13;
	add.ftz.f32 	%f28, %f38, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f28, %f29}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB10_4:
	.loc 1 129 1
	mov.b32 	 %f39, %r70;
	mul.ftz.f32 	%f40, %f39, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f42, %r69;
	mul.ftz.f32 	%f43, %f42, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f46, %f44, %f45;
	.loc 1 129 1
	mul.ftz.f32 	%f47, %f1, 0f437F0000;
	sub.ftz.f32 	%f48, %f47, %f46;
	ld.const.f32 	%f49, [kYCbCrOffset+4];
	mul.ftz.f32 	%f50, %f49, %f45;
	sub.ftz.f32 	%f51, %f43, %f50;
	ld.const.f32 	%f52, [kYCbCrOffset+8];
	mul.ftz.f32 	%f53, %f52, %f45;
	sub.ftz.f32 	%f54, %f40, %f53;
	ld.const.f32 	%f55, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f56, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f57, %f51, %f56;
	fma.rn.ftz.f32 	%f58, %f48, %f55, %f57;
	ld.const.f32 	%f59, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f54, %f59, %f58;
	ld.const.f32 	%f60, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f61, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f51, %f61;
	fma.rn.ftz.f32 	%f63, %f48, %f60, %f62;
	ld.const.f32 	%f64, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f54, %f64, %f63;
	ld.const.f32 	%f65, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f66, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f67, %f51, %f66;
	fma.rn.ftz.f32 	%f68, %f48, %f65, %f67;
	ld.const.f32 	%f69, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f54, %f69, %f68;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB10_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB10_7;

BB10_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB10_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<54>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB11_4;
	bra.uni 	BB11_1;

BB11_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f16, %f15, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f17, %r29;
	add.ftz.f32 	%f8, %f17, 0f3F000000;
	cvt.rn.f32.s32	%f18, %r2;
	add.ftz.f32 	%f9, %f18, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f19, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f20, %r5;
	add.ftz.f32 	%f10, %f20, %f16;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f21, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f22, %r6;
	add.ftz.f32 	%f12, %f22, %f16;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f23, %r25;
	mul.ftz.f32 	%f24, %f23, 0f437F0000;
	mov.f32 	%f25, 0f437F0000;
	mul.ftz.f32 	%f26, %f21, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f27, %f19, 0f437F0000;
	ld.const.f32 	%f28, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f29, %f25, %f25;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f27, %f30;
	ld.const.f32 	%f32, [kYCbCrOffset+4];
	mul.ftz.f32 	%f33, %f32, %f29;
	sub.ftz.f32 	%f34, %f26, %f33;
	ld.const.f32 	%f35, [kYCbCrOffset+8];
	mul.ftz.f32 	%f36, %f35, %f29;
	sub.ftz.f32 	%f37, %f24, %f36;
	ld.const.f32 	%f38, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f39, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f40, %f34, %f39;
	fma.rn.ftz.f32 	%f41, %f31, %f38, %f40;
	ld.const.f32 	%f42, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f37, %f42, %f41;
	ld.const.f32 	%f43, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f44, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f45, %f34, %f44;
	fma.rn.ftz.f32 	%f46, %f31, %f43, %f45;
	ld.const.f32 	%f47, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f37, %f47, %f46;
	ld.const.f32 	%f48, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f49, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f50, %f34, %f49;
	fma.rn.ftz.f32 	%f51, %f31, %f48, %f50;
	ld.const.f32 	%f52, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f37, %f52, %f51;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB11_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB11_4;

BB11_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB11_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<71>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG2_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB12_7;
	bra.uni 	BB12_1;

BB12_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB12_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f19, %f18, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f20, %r49;
	cvt.rn.f32.s32	%f21, %r48;
	mov.f32 	%f22, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f23, %f21, %f22;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f20, %f23;
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r14;
	add.ftz.f32 	%f13, %f24, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f25, %r15;
	add.ftz.f32 	%f15, %f25, %f19;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB12_4;

BB12_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f30, %r1;
	add.ftz.f32 	%f31, %f30, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f33, %r68;
	cvt.rn.f32.s32	%f34, %r67;
	mov.f32 	%f35, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f34, %f35;
	.loc 1 129 110
	add.ftz.f32 	%f29, %f33, %f36;
	.loc 1 129 1
	cvt.rn.f32.s32	%f37, %r12;
	add.ftz.f32 	%f26, %f37, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f26, %f29}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f38, %r13;
	add.ftz.f32 	%f28, %f38, %f32;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f28, %f29}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB12_4:
	.loc 1 129 1
	mov.b32 	 %f39, %r70;
	mul.ftz.f32 	%f40, %f39, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f42, %r69;
	mul.ftz.f32 	%f43, %f42, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f46, %f44, %f45;
	.loc 1 129 1
	mul.ftz.f32 	%f47, %f1, 0f437F0000;
	sub.ftz.f32 	%f48, %f47, %f46;
	ld.const.f32 	%f49, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f50, %f49, %f45;
	sub.ftz.f32 	%f51, %f43, %f50;
	ld.const.f32 	%f52, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f53, %f52, %f45;
	sub.ftz.f32 	%f54, %f40, %f53;
	ld.const.f32 	%f55, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f56, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f57, %f51, %f56;
	fma.rn.ftz.f32 	%f58, %f48, %f55, %f57;
	ld.const.f32 	%f59, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f54, %f59, %f58;
	ld.const.f32 	%f60, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f61, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f51, %f61;
	fma.rn.ftz.f32 	%f63, %f48, %f60, %f62;
	ld.const.f32 	%f64, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f54, %f64, %f63;
	ld.const.f32 	%f65, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f66, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f67, %f51, %f66;
	fma.rn.ftz.f32 	%f68, %f48, %f65, %f67;
	ld.const.f32 	%f69, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f54, %f69, %f68;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB12_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB12_7;

BB12_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB12_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<54>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG2_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB13_4;
	bra.uni 	BB13_1;

BB13_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	.loc 1 129 1
	fma.rn.ftz.f32 	%f16, %f15, 0f3F000000, 0f3E800000;
	.loc 1 129 1
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f17, %r29;
	add.ftz.f32 	%f8, %f17, 0f3F000000;
	cvt.rn.f32.s32	%f18, %r2;
	add.ftz.f32 	%f9, %f18, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f19, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f20, %r5;
	add.ftz.f32 	%f10, %f20, %f16;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f21, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f22, %r6;
	add.ftz.f32 	%f12, %f22, %f16;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f23, %r25;
	mul.ftz.f32 	%f24, %f23, 0f437F0000;
	mov.f32 	%f25, 0f437F0000;
	mul.ftz.f32 	%f26, %f21, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f27, %f19, 0f437F0000;
	ld.const.f32 	%f28, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f29, %f25, %f25;
	mul.ftz.f32 	%f30, %f28, %f29;
	sub.ftz.f32 	%f31, %f27, %f30;
	ld.const.f32 	%f32, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f33, %f32, %f29;
	sub.ftz.f32 	%f34, %f26, %f33;
	ld.const.f32 	%f35, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f36, %f35, %f29;
	sub.ftz.f32 	%f37, %f24, %f36;
	ld.const.f32 	%f38, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f39, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f40, %f34, %f39;
	fma.rn.ftz.f32 	%f41, %f31, %f38, %f40;
	ld.const.f32 	%f42, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f37, %f42, %f41;
	ld.const.f32 	%f43, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f44, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f45, %f34, %f44;
	fma.rn.ftz.f32 	%f46, %f31, %f43, %f45;
	ld.const.f32 	%f47, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f37, %f47, %f46;
	ld.const.f32 	%f48, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f49, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f50, %f34, %f49;
	fma.rn.ftz.f32 	%f51, %f31, %f48, %f50;
	ld.const.f32 	%f52, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f37, %f52, %f51;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB13_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB13_4;

BB13_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB13_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<69>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB14_7;
	bra.uni 	BB14_1;

BB14_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB14_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f19, %r49;
	cvt.rn.f32.s32	%f20, %r48;
	mov.f32 	%f21, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f19, %f22;
	.loc 1 129 1
	cvt.rn.f32.s32	%f23, %r14;
	fma.rn.ftz.f32 	%f13, %f18, 0f3F000000, %f23;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r15;
	fma.rn.ftz.f32 	%f15, %f18, 0f3F000000, %f24;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB14_4;

BB14_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f29, %r1;
	add.ftz.f32 	%f30, %f29, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f31, %r68;
	cvt.rn.f32.s32	%f32, %r67;
	mov.f32 	%f33, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f34, %f32, %f33;
	.loc 1 129 110
	add.ftz.f32 	%f28, %f31, %f34;
	.loc 1 129 1
	cvt.rn.f32.s32	%f35, %r12;
	fma.rn.ftz.f32 	%f25, %f30, 0f3F000000, %f35;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f25, %f28}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f36, %r13;
	fma.rn.ftz.f32 	%f27, %f30, 0f3F000000, %f36;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f27, %f28}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB14_4:
	.loc 1 129 1
	mov.b32 	 %f37, %r70;
	mul.ftz.f32 	%f38, %f37, 0f437F0000;
	mov.f32 	%f39, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f40, %r69;
	mul.ftz.f32 	%f41, %f40, 0f437F0000;
	ld.const.f32 	%f42, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f43, %f39, %f39;
	mul.ftz.f32 	%f44, %f42, %f43;
	.loc 1 129 1
	mul.ftz.f32 	%f45, %f1, 0f437F0000;
	sub.ftz.f32 	%f46, %f45, %f44;
	ld.const.f32 	%f47, [kYCbCrOffset+4];
	mul.ftz.f32 	%f48, %f47, %f43;
	sub.ftz.f32 	%f49, %f41, %f48;
	ld.const.f32 	%f50, [kYCbCrOffset+8];
	mul.ftz.f32 	%f51, %f50, %f43;
	sub.ftz.f32 	%f52, %f38, %f51;
	ld.const.f32 	%f53, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f54, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f55, %f49, %f54;
	fma.rn.ftz.f32 	%f56, %f46, %f53, %f55;
	ld.const.f32 	%f57, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f52, %f57, %f56;
	ld.const.f32 	%f58, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f59, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f60, %f49, %f59;
	fma.rn.ftz.f32 	%f61, %f46, %f58, %f60;
	ld.const.f32 	%f62, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f52, %f62, %f61;
	ld.const.f32 	%f63, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f64, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f65, %f49, %f64;
	fma.rn.ftz.f32 	%f66, %f46, %f63, %f65;
	ld.const.f32 	%f67, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f52, %f67, %f66;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB14_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB14_7;

BB14_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB14_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<53>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB15_4;
	bra.uni 	BB15_1;

BB15_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f16, %r29;
	add.ftz.f32 	%f8, %f16, 0f3F000000;
	cvt.rn.f32.s32	%f17, %r2;
	add.ftz.f32 	%f9, %f17, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f18, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f19, %r5;
	fma.rn.ftz.f32 	%f10, %f15, 0f3F000000, %f19;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f20, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f21, %r6;
	fma.rn.ftz.f32 	%f12, %f15, 0f3F000000, %f21;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f22, %r25;
	mul.ftz.f32 	%f23, %f22, 0f437F0000;
	mov.f32 	%f24, 0f437F0000;
	mul.ftz.f32 	%f25, %f20, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f26, %f18, 0f437F0000;
	ld.const.f32 	%f27, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f28, %f24, %f24;
	mul.ftz.f32 	%f29, %f27, %f28;
	sub.ftz.f32 	%f30, %f26, %f29;
	ld.const.f32 	%f31, [kYCbCrOffset+4];
	mul.ftz.f32 	%f32, %f31, %f28;
	sub.ftz.f32 	%f33, %f25, %f32;
	ld.const.f32 	%f34, [kYCbCrOffset+8];
	mul.ftz.f32 	%f35, %f34, %f28;
	sub.ftz.f32 	%f36, %f23, %f35;
	ld.const.f32 	%f37, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f38, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f39, %f33, %f38;
	fma.rn.ftz.f32 	%f40, %f30, %f37, %f39;
	ld.const.f32 	%f41, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f36, %f41, %f40;
	ld.const.f32 	%f42, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f43, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f45, %f30, %f42, %f44;
	ld.const.f32 	%f46, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f36, %f46, %f45;
	ld.const.f32 	%f47, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f48, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f49, %f33, %f48;
	fma.rn.ftz.f32 	%f50, %f30, %f47, %f49;
	ld.const.f32 	%f51, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f36, %f51, %f50;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB15_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB15_4;

BB15_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB15_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<69>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB16_7;
	bra.uni 	BB16_1;

BB16_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB16_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f19, %r49;
	cvt.rn.f32.s32	%f20, %r48;
	mov.f32 	%f21, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f19, %f22;
	.loc 1 129 1
	cvt.rn.f32.s32	%f23, %r14;
	fma.rn.ftz.f32 	%f13, %f18, 0f3F000000, %f23;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r15;
	fma.rn.ftz.f32 	%f15, %f18, 0f3F000000, %f24;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB16_4;

BB16_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f29, %r1;
	add.ftz.f32 	%f30, %f29, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f31, %r68;
	cvt.rn.f32.s32	%f32, %r67;
	mov.f32 	%f33, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f34, %f32, %f33;
	.loc 1 129 110
	add.ftz.f32 	%f28, %f31, %f34;
	.loc 1 129 1
	cvt.rn.f32.s32	%f35, %r12;
	fma.rn.ftz.f32 	%f25, %f30, 0f3F000000, %f35;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f25, %f28}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f36, %r13;
	fma.rn.ftz.f32 	%f27, %f30, 0f3F000000, %f36;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f27, %f28}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB16_4:
	.loc 1 129 1
	mov.b32 	 %f37, %r70;
	mul.ftz.f32 	%f38, %f37, 0f437F0000;
	mov.f32 	%f39, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f40, %r69;
	mul.ftz.f32 	%f41, %f40, 0f437F0000;
	ld.const.f32 	%f42, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f43, %f39, %f39;
	mul.ftz.f32 	%f44, %f42, %f43;
	.loc 1 129 1
	mul.ftz.f32 	%f45, %f1, 0f437F0000;
	sub.ftz.f32 	%f46, %f45, %f44;
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f48, %f47, %f43;
	sub.ftz.f32 	%f49, %f41, %f48;
	ld.const.f32 	%f50, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f51, %f50, %f43;
	sub.ftz.f32 	%f52, %f38, %f51;
	ld.const.f32 	%f53, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f54, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f55, %f49, %f54;
	fma.rn.ftz.f32 	%f56, %f46, %f53, %f55;
	ld.const.f32 	%f57, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f52, %f57, %f56;
	ld.const.f32 	%f58, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f59, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f60, %f49, %f59;
	fma.rn.ftz.f32 	%f61, %f46, %f58, %f60;
	ld.const.f32 	%f62, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f52, %f62, %f61;
	ld.const.f32 	%f63, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f64, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f65, %f49, %f64;
	fma.rn.ftz.f32 	%f66, %f46, %f63, %f65;
	ld.const.f32 	%f67, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f52, %f67, %f66;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB16_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB16_7;

BB16_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB16_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<53>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_601_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB17_4;
	bra.uni 	BB17_1;

BB17_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f16, %r29;
	add.ftz.f32 	%f8, %f16, 0f3F000000;
	cvt.rn.f32.s32	%f17, %r2;
	add.ftz.f32 	%f9, %f17, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f18, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f19, %r5;
	fma.rn.ftz.f32 	%f10, %f15, 0f3F000000, %f19;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f20, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f21, %r6;
	fma.rn.ftz.f32 	%f12, %f15, 0f3F000000, %f21;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f22, %r25;
	mul.ftz.f32 	%f23, %f22, 0f437F0000;
	mov.f32 	%f24, 0f437F0000;
	mul.ftz.f32 	%f25, %f20, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f26, %f18, 0f437F0000;
	ld.const.f32 	%f27, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f28, %f24, %f24;
	mul.ftz.f32 	%f29, %f27, %f28;
	sub.ftz.f32 	%f30, %f26, %f29;
	ld.const.f32 	%f31, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f32, %f31, %f28;
	sub.ftz.f32 	%f33, %f25, %f32;
	ld.const.f32 	%f34, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f35, %f34, %f28;
	sub.ftz.f32 	%f36, %f23, %f35;
	ld.const.f32 	%f37, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f38, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f39, %f33, %f38;
	fma.rn.ftz.f32 	%f40, %f30, %f37, %f39;
	ld.const.f32 	%f41, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f36, %f41, %f40;
	ld.const.f32 	%f42, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f43, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f45, %f30, %f42, %f44;
	ld.const.f32 	%f46, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f36, %f46, %f45;
	ld.const.f32 	%f47, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f48, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f49, %f33, %f48;
	fma.rn.ftz.f32 	%f50, %f30, %f47, %f49;
	ld.const.f32 	%f51, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f36, %f51, %f50;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB17_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB17_4;

BB17_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB17_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<69>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB18_7;
	bra.uni 	BB18_1;

BB18_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB18_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f19, %r49;
	cvt.rn.f32.s32	%f20, %r48;
	mov.f32 	%f21, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f19, %f22;
	.loc 1 129 1
	cvt.rn.f32.s32	%f23, %r14;
	fma.rn.ftz.f32 	%f13, %f18, 0f3F000000, %f23;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r15;
	fma.rn.ftz.f32 	%f15, %f18, 0f3F000000, %f24;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB18_4;

BB18_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f29, %r1;
	add.ftz.f32 	%f30, %f29, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f31, %r68;
	cvt.rn.f32.s32	%f32, %r67;
	mov.f32 	%f33, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f34, %f32, %f33;
	.loc 1 129 110
	add.ftz.f32 	%f28, %f31, %f34;
	.loc 1 129 1
	cvt.rn.f32.s32	%f35, %r12;
	fma.rn.ftz.f32 	%f25, %f30, 0f3F000000, %f35;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f25, %f28}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f36, %r13;
	fma.rn.ftz.f32 	%f27, %f30, 0f3F000000, %f36;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f27, %f28}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB18_4:
	.loc 1 129 1
	mov.b32 	 %f37, %r70;
	mul.ftz.f32 	%f38, %f37, 0f437F0000;
	mov.f32 	%f39, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f40, %r69;
	mul.ftz.f32 	%f41, %f40, 0f437F0000;
	ld.const.f32 	%f42, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f43, %f39, %f39;
	mul.ftz.f32 	%f44, %f42, %f43;
	.loc 1 129 1
	mul.ftz.f32 	%f45, %f1, 0f437F0000;
	sub.ftz.f32 	%f46, %f45, %f44;
	ld.const.f32 	%f47, [kYCbCrOffset+4];
	mul.ftz.f32 	%f48, %f47, %f43;
	sub.ftz.f32 	%f49, %f41, %f48;
	ld.const.f32 	%f50, [kYCbCrOffset+8];
	mul.ftz.f32 	%f51, %f50, %f43;
	sub.ftz.f32 	%f52, %f38, %f51;
	ld.const.f32 	%f53, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f54, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f55, %f49, %f54;
	fma.rn.ftz.f32 	%f56, %f46, %f53, %f55;
	ld.const.f32 	%f57, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f52, %f57, %f56;
	ld.const.f32 	%f58, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f59, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f60, %f49, %f59;
	fma.rn.ftz.f32 	%f61, %f46, %f58, %f60;
	ld.const.f32 	%f62, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f52, %f62, %f61;
	ld.const.f32 	%f63, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f64, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f65, %f49, %f64;
	fma.rn.ftz.f32 	%f66, %f46, %f63, %f65;
	ld.const.f32 	%f67, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f52, %f67, %f66;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB18_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB18_7;

BB18_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB18_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<53>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB19_4;
	bra.uni 	BB19_1;

BB19_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f16, %r29;
	add.ftz.f32 	%f8, %f16, 0f3F000000;
	cvt.rn.f32.s32	%f17, %r2;
	add.ftz.f32 	%f9, %f17, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f18, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f19, %r5;
	fma.rn.ftz.f32 	%f10, %f15, 0f3F000000, %f19;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f20, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f21, %r6;
	fma.rn.ftz.f32 	%f12, %f15, 0f3F000000, %f21;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f22, %r25;
	mul.ftz.f32 	%f23, %f22, 0f437F0000;
	mov.f32 	%f24, 0f437F0000;
	mul.ftz.f32 	%f25, %f20, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f26, %f18, 0f437F0000;
	ld.const.f32 	%f27, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f28, %f24, %f24;
	mul.ftz.f32 	%f29, %f27, %f28;
	sub.ftz.f32 	%f30, %f26, %f29;
	ld.const.f32 	%f31, [kYCbCrOffset+4];
	mul.ftz.f32 	%f32, %f31, %f28;
	sub.ftz.f32 	%f33, %f25, %f32;
	ld.const.f32 	%f34, [kYCbCrOffset+8];
	mul.ftz.f32 	%f35, %f34, %f28;
	sub.ftz.f32 	%f36, %f23, %f35;
	ld.const.f32 	%f37, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f38, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f39, %f33, %f38;
	fma.rn.ftz.f32 	%f40, %f30, %f37, %f39;
	ld.const.f32 	%f41, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f36, %f41, %f40;
	ld.const.f32 	%f42, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f43, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f45, %f30, %f42, %f44;
	ld.const.f32 	%f46, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f36, %f46, %f45;
	ld.const.f32 	%f47, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f48, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f49, %f33, %f48;
	fma.rn.ftz.f32 	%f50, %f30, %f47, %f49;
	ld.const.f32 	%f51, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f36, %f51, %f50;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB19_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB19_4;

BB19_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB19_4:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<71>;
	.reg .f32 	%f<69>;
	.reg .s64 	%rd<16>;


	ld.param.u32 	%r11, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r12, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10];
	ld.param.u32 	%r15, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r16, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r17, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r18, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r19, [PixelFormatConvert_YUV_420_MPEG4_FIELD_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r20, %r21, %r22;
	mov.u32 	%r23, %ntid.y;
	mov.u32 	%r24, %ctaid.y;
	mov.u32 	%r25, %tid.y;
	mad.lo.s32 	%r2, %r23, %r24, %r25;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r19;
	setp.lt.s32	%p2, %r1, %r18;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB20_7;
	bra.uni 	BB20_1;

BB20_1:
	.loc 1 129 1
	add.s32 	%r30, %r1, %r11;
	cvt.rn.f32.s32	%f11, %r30;
	add.ftz.f32 	%f9, %f11, 0f3F000000;
	cvt.rn.f32.s32	%f12, %r2;
	add.ftz.f32 	%f10, %f12, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inYPlaneTexture, {%f9, %f10}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f1, %r26;
	.loc 1 129 1
	and.b32  	%r3, %r2, 1;
	setp.eq.s32	%p4, %r3, 0;
	.loc 1 129 1
	@%p4 bra 	BB20_3;

	.loc 1 129 1
	cvt.rn.f32.s32	%f17, %r1;
	add.ftz.f32 	%f18, %f17, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r39, 1, 0, %p4;
	shl.b32 	%r40, %r39, 1;
	shr.s32 	%r41, %r2, 31;
	shr.u32 	%r42, %r41, 30;
	add.s32 	%r43, %r2, %r42;
	and.b32  	%r44, %r43, 2147483644;
	sub.s32 	%r45, %r2, %r44;
	add.s32 	%r46, %r45, %r40;
	shl.b32 	%r47, %r46, 1;
	add.s32 	%r48, %r47, -1;
	shr.s32 	%r49, %r43, 2;
	cvt.rn.f32.s32	%f19, %r49;
	cvt.rn.f32.s32	%f20, %r48;
	mov.f32 	%f21, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f20, %f21;
	.loc 1 129 110
	add.ftz.f32 	%f16, %f19, %f22;
	.loc 1 129 1
	cvt.rn.f32.s32	%f23, %r14;
	fma.rn.ftz.f32 	%f13, %f18, 0f3F000000, %f23;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r31, %r32, %r33, %r34}, [inLowerUPlaneTexture, {%f13, %f16}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f24, %r15;
	fma.rn.ftz.f32 	%f15, %f18, 0f3F000000, %f24;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r35, %r36, %r37, %r38}, [inLowerVPlaneTexture, {%f15, %f16}];
	// inline asm
	mov.u32 	%r70, %r35;
	mov.u32 	%r69, %r31;
	bra.uni 	BB20_4;

BB20_3:
	.loc 1 129 1
	cvt.rn.f32.s32	%f29, %r1;
	add.ftz.f32 	%f30, %f29, 0f3F000000;
	.loc 1 129 1
	selp.u32	%r58, 1, 0, %p4;
	shl.b32 	%r59, %r58, 1;
	shr.s32 	%r60, %r2, 31;
	shr.u32 	%r61, %r60, 30;
	add.s32 	%r62, %r2, %r61;
	and.b32  	%r63, %r62, 2147483644;
	sub.s32 	%r64, %r2, %r63;
	add.s32 	%r65, %r64, %r59;
	shl.b32 	%r66, %r65, 1;
	add.s32 	%r67, %r66, -1;
	shr.s32 	%r68, %r62, 2;
	cvt.rn.f32.s32	%f31, %r68;
	cvt.rn.f32.s32	%f32, %r67;
	mov.f32 	%f33, 0f41000000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f34, %f32, %f33;
	.loc 1 129 110
	add.ftz.f32 	%f28, %f31, %f34;
	.loc 1 129 1
	cvt.rn.f32.s32	%f35, %r12;
	fma.rn.ftz.f32 	%f25, %f30, 0f3F000000, %f35;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inUpperUPlaneTexture, {%f25, %f28}];
	// inline asm
	.loc 1 129 1
	cvt.rn.f32.s32	%f36, %r13;
	fma.rn.ftz.f32 	%f27, %f30, 0f3F000000, %f36;
	.loc 1 129 201
	// inline asm
	tex.2d.v4.u32.f32 {%r54, %r55, %r56, %r57}, [inUpperVPlaneTexture, {%f27, %f28}];
	// inline asm
	mov.u32 	%r70, %r54;
	mov.u32 	%r69, %r50;

BB20_4:
	.loc 1 129 1
	mov.b32 	 %f37, %r70;
	mul.ftz.f32 	%f38, %f37, 0f437F0000;
	mov.f32 	%f39, 0f437F0000;
	.loc 1 129 1
	mov.b32 	 %f40, %r69;
	mul.ftz.f32 	%f41, %f40, 0f437F0000;
	ld.const.f32 	%f42, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f43, %f39, %f39;
	mul.ftz.f32 	%f44, %f42, %f43;
	.loc 1 129 1
	mul.ftz.f32 	%f45, %f1, 0f437F0000;
	sub.ftz.f32 	%f46, %f45, %f44;
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f48, %f47, %f43;
	sub.ftz.f32 	%f49, %f41, %f48;
	ld.const.f32 	%f50, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f51, %f50, %f43;
	sub.ftz.f32 	%f52, %f38, %f51;
	ld.const.f32 	%f53, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f54, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f55, %f49, %f54;
	fma.rn.ftz.f32 	%f56, %f46, %f53, %f55;
	ld.const.f32 	%f57, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f2, %f52, %f57, %f56;
	ld.const.f32 	%f58, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f59, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f60, %f49, %f59;
	fma.rn.ftz.f32 	%f61, %f46, %f58, %f60;
	ld.const.f32 	%f62, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f3, %f52, %f62, %f61;
	ld.const.f32 	%f63, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f64, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f65, %f49, %f64;
	fma.rn.ftz.f32 	%f66, %f46, %f63, %f65;
	ld.const.f32 	%f67, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f4, %f52, %f67, %f66;
	mov.f32 	%f8, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r10, %r2, %r16, %r1;
	.loc 2 51 1
	setp.eq.s32	%p7, %r17, 0;
	@%p7 bra 	BB20_6;

	mul.wide.s32 	%rd12, %r10, 16;
	add.s64 	%rd13, %rd1, %rd12;
	.loc 2 51 1
	st.global.v4.f32 	[%rd13], {%f4, %f3, %f2, %f8};
	bra.uni 	BB20_7;

BB20_6:
	mul.wide.s32 	%rd14, %r10, 8;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd15], {%rs1, %rs2, %rs3, %rs4};

BB20_7:
	.loc 1 129 2
	ret;
}

.visible .entry PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel(
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_6,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_8,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_9,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_10,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_11,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_12,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_13,
	.param .u64 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17,
	.param .u32 PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<30>;
	.reg .f32 	%f<53>;
	.reg .s64 	%rd<13>;


	ld.param.u32 	%r4, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u32 	%r6, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u64 	%rd2, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_14];
	ld.param.u32 	%r7, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_15];
	ld.param.u32 	%r8, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_16];
	ld.param.u32 	%r9, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_17];
	ld.param.u32 	%r10, [PixelFormatConvert_YUV_420_MPEG4_FRAME_PICTURE_PLANAR_8u_709_FullRange_To_BGRA_4444_32f_Kernel_param_18];
	cvta.to.global.u64 	%rd1, %rd2;
	.loc 1 129 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 1 129 1
	setp.lt.s32	%p1, %r2, %r10;
	setp.lt.s32	%p2, %r1, %r9;
	and.pred  	%p3, %p1, %p2;
	.loc 1 129 1
	@!%p3 bra 	BB21_4;
	bra.uni 	BB21_1;

BB21_1:
	.loc 1 129 1
	cvt.rn.f32.s32	%f14, %r1;
	add.ftz.f32 	%f15, %f14, 0f3F000000;
	add.s32 	%r29, %r1, %r4;
	cvt.rn.f32.s32	%f16, %r29;
	add.ftz.f32 	%f8, %f16, 0f3F000000;
	cvt.rn.f32.s32	%f17, %r2;
	add.ftz.f32 	%f9, %f17, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [inYPlaneTexture, {%f8, %f9}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f18, %r17;
	.loc 1 129 1
	cvt.rn.f32.s32	%f19, %r5;
	fma.rn.ftz.f32 	%f10, %f15, 0f3F000000, %f19;
	mul.ftz.f32 	%f13, %f9, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [inFrameCodedUPlaneTexture, {%f10, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f20, %r21;
	.loc 1 129 1
	cvt.rn.f32.s32	%f21, %r6;
	fma.rn.ftz.f32 	%f12, %f15, 0f3F000000, %f21;
	// inline asm
	tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [inFrameCodedVPlaneTexture, {%f12, %f13}];
	// inline asm
	.loc 3 3523 10
	mov.b32 	 %f22, %r25;
	mul.ftz.f32 	%f23, %f22, 0f437F0000;
	mov.f32 	%f24, 0f437F0000;
	mul.ftz.f32 	%f25, %f20, 0f437F0000;
	.loc 1 129 1
	mul.ftz.f32 	%f26, %f18, 0f437F0000;
	ld.const.f32 	%f27, [kYCbCrFullRangeOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f28, %f24, %f24;
	mul.ftz.f32 	%f29, %f27, %f28;
	sub.ftz.f32 	%f30, %f26, %f29;
	ld.const.f32 	%f31, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f32, %f31, %f28;
	sub.ftz.f32 	%f33, %f25, %f32;
	ld.const.f32 	%f34, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f35, %f34, %f28;
	sub.ftz.f32 	%f36, %f23, %f35;
	ld.const.f32 	%f37, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f38, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f39, %f33, %f38;
	fma.rn.ftz.f32 	%f40, %f30, %f37, %f39;
	ld.const.f32 	%f41, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f36, %f41, %f40;
	ld.const.f32 	%f42, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f43, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f44, %f33, %f43;
	fma.rn.ftz.f32 	%f45, %f30, %f42, %f44;
	ld.const.f32 	%f46, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f36, %f46, %f45;
	ld.const.f32 	%f47, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f48, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f49, %f33, %f48;
	fma.rn.ftz.f32 	%f50, %f30, %f47, %f49;
	ld.const.f32 	%f51, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f36, %f51, %f50;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB21_3;

	mul.wide.s32 	%rd9, %r3, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f3, %f2, %f1, %f7};
	bra.uni 	BB21_4;

BB21_3:
	mul.wide.s32 	%rd11, %r3, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs1, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs2, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs3, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs4, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs1, %rs2, %rs3, %rs4};

BB21_4:
	.loc 1 129 2
	ret;
}


