//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/GPUFoundation/Src/ImageProcessing/PixelFormatConvert_444_YUV.cu", 1399785311, 2182
	.file	2 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\Inc\\ImageProcessing/PixelFormatConvert_Common.h", 1399785310, 21667
	.file	3 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
	.file	4 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\API\\Inc\\GPUFoundation/KernelSupport/KernelCore.h", 1399785310, 7840
	.file	5 "D:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\MediaCore\\GPUFoundation\\Inc\\ImageProcessing/PixelFormatConvert_444.h", 1399785310, 4478
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 147, 24, 206, 61, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func  (.param .b32 func_retval0) _Z13MaxDepthValuej(
	.param .b32 _Z13MaxDepthValuej_param_0
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<3>;
	.reg .f32 	%f<6>;


	ld.param.u32 	%r2, [_Z13MaxDepthValuej_param_0];
	.loc 2 51 1
	and.b32  	%r1, %r2, 248;
	setp.ne.s32	%p1, %r1, 0;
	@%p1 bra 	BB0_2;

	mov.f32 	%f5, 0f437F0000;
	bra.uni 	BB0_5;

BB0_2:
	.loc 2 51 1
	setp.ne.s32	%p2, %r1, 8;
	@%p2 bra 	BB0_4;

	mov.f32 	%f5, 0f447FC000;
	bra.uni 	BB0_5;

BB0_4:
	.loc 2 51 1
	setp.eq.s32	%p3, %r1, 16;
	.loc 2 51 1
	selp.f32	%f5, 0f47000000, 0f3F800000, %p3;

BB0_5:
	st.param.f32	[func_retval0+0], %f5;
	.loc 2 51 1
	ret;
}

.visible .func  (.param .b64 func_retval0) _Z23ColorSpaceConvertMatrixjj(
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_0,
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_1
)
{
	.reg .pred 	%p<33>;
	.reg .s32 	%r<16>;
	.reg .s64 	%rd<32>;


	ld.param.u32 	%r9, [_Z23ColorSpaceConvertMatrixjj_param_0];
	ld.param.u32 	%r10, [_Z23ColorSpaceConvertMatrixjj_param_1];
	.loc 2 51 1
	and.b32  	%r11, %r9, 512;
	setp.eq.s32	%p1, %r11, 0;
	.loc 2 51 1
	and.b32  	%r1, %r10, 256;
	.loc 2 51 1
	@%p1 bra 	BB1_29;

	.loc 2 51 1
	and.b32  	%r12, %r9, 2048;
	setp.eq.s32	%p2, %r12, 0;
	.loc 2 51 1
	and.b32  	%r2, %r9, 4096;
	.loc 2 51 1
	and.b32  	%r3, %r10, 512;
	.loc 2 51 1
	@%p2 bra 	BB1_15;

	setp.eq.s32	%p3, %r2, 0;
	.loc 2 51 1
	@%p3 bra 	BB1_13;

	setp.eq.s32	%p4, %r3, 0;
	.loc 2 51 1
	and.b32  	%r4, %r9, 256;
	.loc 2 51 1
	@%p4 bra 	BB1_7;

	.loc 2 51 1
	setp.ne.s32	%p5, %r4, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p5 bra 	BB1_47;

	.loc 2 51 1
	and.b32  	%r13, %r10, 4096;
	setp.eq.s32	%p6, %r13, 0;
	.loc 2 51 1
	setp.ne.s32	%p7, %r1, 0;
	or.pred  	%p8, %p6, %p7;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p8 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_7:
	setp.eq.s32	%p9, %r4, 0;
	.loc 2 51 1
	@%p9 bra 	BB1_10;

	setp.eq.s32	%p10, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p10 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_10:
	setp.eq.s32	%p11, %r1, 0;
	.loc 2 51 1
	@%p11 bra 	BB1_12;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_12:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_13:
	.loc 2 51 1
	setp.ne.s32	%p12, %r3, 0;
	setp.eq.s32	%p13, %r1, 0;
	.loc 2 51 1
	or.pred  	%p14, %p13, %p12;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p14 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k709YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_15:
	setp.eq.s32	%p15, %r2, 0;
	.loc 2 51 1
	@%p15 bra 	BB1_25;

	setp.eq.s32	%p16, %r3, 0;
	.loc 2 51 1
	and.b32  	%r5, %r9, 256;
	.loc 2 51 1
	@%p16 bra 	BB1_19;

	or.b32  	%r14, %r5, %r1;
	.loc 2 51 1
	setp.ne.s32	%p17, %r14, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p17 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_19:
	setp.eq.s32	%p18, %r5, 0;
	.loc 2 51 1
	@%p18 bra 	BB1_22;

	setp.eq.s32	%p19, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p19 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_22:
	setp.eq.s32	%p20, %r1, 0;
	.loc 2 51 1
	@%p20 bra 	BB1_24;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_24:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_25:
	.loc 2 51 1
	setp.ne.s32	%p21, %r3, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p21 bra 	BB1_47;

	setp.eq.s32	%p22, %r1, 0;
	.loc 2 51 1
	@%p22 bra 	BB1_28;

	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_28:
	.loc 2 51 1
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	BB1_47;

BB1_29:
	.loc 2 51 1
	and.b32  	%r15, %r9, 256;
	setp.eq.s32	%p23, %r15, 0;
	.loc 2 51 1
	and.b32  	%r6, %r10, 2048;
	.loc 2 51 1
	@%p23 bra 	BB1_40;

	setp.eq.s32	%p24, %r1, 0;
	.loc 2 51 1
	@%p24 bra 	BB1_34;

	setp.eq.s32	%p25, %r6, 0;
	.loc 2 51 1
	@%p25 bra 	BB1_33;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_709YPbPr;
	bra.uni 	BB1_47;

BB1_33:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YPbPr;
	bra.uni 	BB1_47;

BB1_34:
	setp.eq.s32	%p26, %r6, 0;
	.loc 2 51 1
	and.b32  	%r7, %r10, 4096;
	.loc 2 51 1
	@%p26 bra 	BB1_37;

	setp.eq.s32	%p27, %r7, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p27 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_37:
	setp.eq.s32	%p28, %r7, 0;
	.loc 2 51 1
	@%p28 bra 	BB1_39;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_39:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	BB1_47;

BB1_40:
	.loc 2 51 1
	setp.ne.s32	%p29, %r1, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p29 bra 	BB1_47;

	setp.eq.s32	%p30, %r6, 0;
	.loc 2 51 1
	and.b32  	%r8, %r10, 4096;
	.loc 2 51 1
	@%p30 bra 	BB1_44;

	.loc 2 51 1
	setp.ne.s32	%p31, %r8, 0;
	mov.u64 	%rd31, 0;
	.loc 2 51 1
	@%p31 bra 	BB1_47;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_44:
	setp.eq.s32	%p32, %r8, 0;
	.loc 2 51 1
	@%p32 bra 	BB1_46;

	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_46:
	.loc 2 51 1
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCrFullRange;

BB1_47:
	st.param.b64	[func_retval0+0], %rd31;
	.loc 2 51 1
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	.loc 3 2770 10
	max.ftz.f32 	%f4, %f1, %f2;
	.loc 3 2765 10
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	.loc 4 146 39
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z23UnpremultiplyComponents6float4j(
	.param .align 16 .b8 _Z23UnpremultiplyComponents6float4j_param_0[16],
	.param .b32 _Z23UnpremultiplyComponents6float4j_param_1
)
{
	.reg .pred 	%p<15>;
	.reg .s32 	%r<8>;
	.reg .f32 	%f<77>;


	ld.param.f32 	%f70, [_Z23UnpremultiplyComponents6float4j_param_0+12];
	ld.param.f32 	%f69, [_Z23UnpremultiplyComponents6float4j_param_0+8];
	ld.param.f32 	%f68, [_Z23UnpremultiplyComponents6float4j_param_0+4];
	ld.param.f32 	%f72, [_Z23UnpremultiplyComponents6float4j_param_0];
	ld.param.u32 	%r5, [_Z23UnpremultiplyComponents6float4j_param_1];
	.loc 2 51 1
	and.b32  	%r1, %r5, 768;
	setp.ne.s32	%p1, %r1, 512;
	@%p1 bra 	BB3_7;

	.loc 2 51 1
	and.b32  	%r2, %r5, 248;
	setp.ne.s32	%p2, %r2, 0;
	@%p2 bra 	BB3_3;

	mov.f32 	%f67, 0f437F0000;
	bra.uni 	BB3_6;

BB3_3:
	.loc 2 51 1
	setp.ne.s32	%p3, %r2, 8;
	@%p3 bra 	BB3_5;

	mov.f32 	%f67, 0f447FC000;
	bra.uni 	BB3_6;

BB3_5:
	.loc 2 51 1
	setp.eq.s32	%p4, %r2, 16;
	.loc 2 51 1
	selp.f32	%f67, 0f47000000, 0f3F800000, %p4;

BB3_6:
	.loc 2 51 119
	and.b32  	%r6, %r5, 4096;
	setp.eq.s32	%p5, %r6, 0;
	ld.const.f32 	%f32, [kYCbCrOffset];
	ld.const.f32 	%f33, [kYCbCrFullRangeOffset];
	selp.f32	%f34, %f33, %f32, %p5;
	mov.f32 	%f35, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f36, %f67, %f35;
	.loc 2 51 119
	mul.ftz.f32 	%f37, %f34, %f36;
	sub.ftz.f32 	%f68, %f68, %f37;
	ld.const.f32 	%f38, [kYCbCrOffset+4];
	ld.const.f32 	%f39, [kYCbCrFullRangeOffset+4];
	selp.f32	%f40, %f39, %f38, %p5;
	mul.ftz.f32 	%f41, %f40, %f36;
	sub.ftz.f32 	%f69, %f69, %f41;
	ld.const.f32 	%f42, [kYCbCrOffset+8];
	ld.const.f32 	%f43, [kYCbCrFullRangeOffset+8];
	selp.f32	%f44, %f43, %f42, %p5;
	mul.ftz.f32 	%f45, %f44, %f36;
	sub.ftz.f32 	%f70, %f70, %f45;

BB3_7:
	.loc 2 51 1
	add.ftz.f32 	%f46, %f72, 0fB70637BD;
	setp.gtu.ftz.f32	%p6, %f46, 0f00000000;
	@%p6 bra 	BB3_9;

	mov.f32 	%f76, 0f00000000;
	mov.f32 	%f75, %f76;
	mov.f32 	%f74, %f76;
	mov.f32 	%f72, %f76;
	bra.uni 	BB3_15;

BB3_9:
	.loc 2 51 1
	and.b32  	%r3, %r5, 248;
	setp.ne.s32	%p7, %r3, 0;
	@%p7 bra 	BB3_11;

	mov.f32 	%f71, 0f437F0000;
	bra.uni 	BB3_14;

BB3_11:
	.loc 2 51 1
	setp.ne.s32	%p8, %r3, 8;
	@%p8 bra 	BB3_13;

	mov.f32 	%f71, 0f447FC000;
	bra.uni 	BB3_14;

BB3_13:
	.loc 2 51 1
	setp.eq.s32	%p9, %r3, 16;
	.loc 2 51 1
	selp.f32	%f71, 0f47000000, 0f3F800000, %p9;

BB3_14:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f53, %f71, %f72;
	.loc 2 51 1
	mul.ftz.f32 	%f74, %f68, %f53;
	mul.ftz.f32 	%f75, %f69, %f53;
	mul.ftz.f32 	%f76, %f70, %f53;

BB3_15:
	.loc 2 51 1
	@%p1 bra 	BB3_22;

	.loc 2 51 1
	and.b32  	%r4, %r5, 248;
	setp.eq.s32	%p11, %r4, 8;
	@%p11 bra 	BB3_19;

	setp.ne.s32	%p12, %r4, 0;
	@%p12 bra 	BB3_20;

	mov.f32 	%f73, 0f437F0000;
	bra.uni 	BB3_21;

BB3_19:
	mov.f32 	%f73, 0f447FC000;
	bra.uni 	BB3_21;

BB3_20:
	.loc 2 51 1
	setp.eq.s32	%p13, %r4, 16;
	.loc 2 51 1
	selp.f32	%f73, 0f47000000, 0f3F800000, %p13;

BB3_21:
	.loc 2 51 120
	and.b32  	%r7, %r5, 4096;
	setp.eq.s32	%p14, %r7, 0;
	ld.const.f32 	%f56, [kYCbCrOffset];
	ld.const.f32 	%f57, [kYCbCrFullRangeOffset];
	selp.f32	%f58, %f57, %f56, %p14;
	mov.f32 	%f59, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f60, %f73, %f59;
	.loc 2 51 120
	fma.rn.ftz.f32 	%f74, %f58, %f60, %f74;
	ld.const.f32 	%f61, [kYCbCrOffset+4];
	ld.const.f32 	%f62, [kYCbCrFullRangeOffset+4];
	selp.f32	%f63, %f62, %f61, %p14;
	fma.rn.ftz.f32 	%f75, %f63, %f60, %f75;
	ld.const.f32 	%f64, [kYCbCrOffset+8];
	ld.const.f32 	%f65, [kYCbCrFullRangeOffset+8];
	selp.f32	%f66, %f65, %f64, %p14;
	fma.rn.ftz.f32 	%f76, %f66, %f60, %f76;

BB3_22:
	st.param.f32	[func_retval0+0], %f72;
	st.param.f32	[func_retval0+4], %f74;
	st.param.f32	[func_retval0+8], %f75;
	st.param.f32	[func_retval0+12], %f76;
	.loc 2 51 1
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii(
	.param .b64 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_0,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_1,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_2,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_3,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_4,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_5
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<29>;
	.reg .s32 	%r<14>;
	.reg .f32 	%f<33>;
	.reg .s64 	%rd<8>;


	ld.param.u64 	%rd3, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_0];
	ld.param.u32 	%r4, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_1];
	ld.param.u32 	%r5, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r2, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_3];
	ld.param.u32 	%r6, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r7, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_5];
	.loc 2 51 1
	and.b32  	%r3, %r5, 248;
	.loc 2 51 1
	mad.lo.s32 	%r1, %r7, %r4, %r6;
	mul.wide.s32 	%rd4, %r1, 8;
	add.s64 	%rd1, %rd3, %rd4;
	mul.wide.s32 	%rd5, %r1, 4;
	add.s64 	%rd2, %rd3, %rd5;
	.loc 2 51 1
	setp.eq.s32	%p1, %r3, 0;
	@%p1 bra 	BB4_5;

	setp.eq.s32	%p2, %r3, 8;
	@%p2 bra 	BB4_4;

	setp.ne.s32	%p3, %r3, 16;
	@%p3 bra 	BB4_6;

	.loc 2 51 1
	ld.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd1];
	cvt.rn.f32.u16	%f29, %rs1;
	cvt.rn.f32.u16	%f30, %rs2;
	cvt.rn.f32.u16	%f31, %rs3;
	cvt.rn.f32.u16	%f32, %rs4;
	bra.uni 	BB4_9;

BB4_4:
	.loc 2 51 1
	ld.u32 	%r8, [%rd2];
	shr.u32 	%r9, %r8, 2;
	and.b32  	%r10, %r9, 1023;
	cvt.rn.f32.u32	%f29, %r10;
	shr.u32 	%r11, %r8, 12;
	and.b32  	%r12, %r11, 1023;
	cvt.rn.f32.u32	%f30, %r12;
	shr.u32 	%r13, %r8, 22;
	cvt.rn.f32.u32	%f31, %r13;
	mov.f32 	%f32, 0f447FC000;
	bra.uni 	BB4_9;

BB4_5:
	.loc 2 51 1
	ld.v4.u8 	{%rs9, %rs10, %rs11, %rs12}, [%rd2];
	and.b16  	%rs14, %rs9, 255;
	cvt.rn.f32.u16	%f29, %rs14;
	and.b16  	%rs16, %rs10, 255;
	cvt.rn.f32.u16	%f30, %rs16;
	and.b16  	%rs18, %rs11, 255;
	cvt.rn.f32.u16	%f31, %rs18;
	and.b16  	%rs20, %rs12, 255;
	cvt.rn.f32.u16	%f32, %rs20;
	bra.uni 	BB4_9;

BB4_6:
	.loc 2 51 1
	setp.eq.s32	%p4, %r2, 0;
	@%p4 bra 	BB4_8;

	mul.wide.s32 	%rd6, %r1, 16;
	add.s64 	%rd7, %rd3, %rd6;
	ld.v4.f32 	{%f25, %f26, %f27, %f28}, [%rd7];
	mov.f32 	%f32, %f28;
	mov.f32 	%f31, %f27;
	mov.f32 	%f30, %f26;
	mov.f32 	%f29, %f25;
	bra.uni 	BB4_9;

BB4_8:
	.loc 2 51 1
	ld.v4.u16 	{%rs21, %rs22, %rs23, %rs24}, [%rd1];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs21;
	cvt.f32.f16 	%f29, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs22;
	cvt.f32.f16 	%f30, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs23;
	cvt.f32.f16 	%f31, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs24;
	cvt.f32.f16 	%f32, %temp;
	}

BB4_9:
	st.param.f32	[func_retval0+0], %f29;
	st.param.f32	[func_retval0+4], %f30;
	st.param.f32	[func_retval0+8], %f31;
	st.param.f32	[func_retval0+12], %f32;
	.loc 2 51 1
	ret;
}

.visible .func _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii(
	.param .align 16 .b8 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0[16],
	.param .b64 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<24>;
	.reg .f32 	%f<9>;
	.reg .s64 	%rd<8>;


	ld.param.f32 	%f4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+12];
	ld.param.f32 	%f3, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+8];
	ld.param.f32 	%f2, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+4];
	ld.param.f32 	%f1, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0];
	ld.param.u64 	%rd3, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1];
	ld.param.u32 	%r4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r5, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3];
	ld.param.u32 	%r2, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r6, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5];
	ld.param.u32 	%r7, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6];
	.loc 2 51 1
	mad.lo.s32 	%r1, %r7, %r4, %r6;
	mul.wide.s32 	%rd4, %r1, 8;
	add.s64 	%rd1, %rd3, %rd4;
	mul.wide.s32 	%rd5, %r1, 4;
	add.s64 	%rd2, %rd3, %rd5;
	.loc 2 51 1
	and.b32  	%r3, %r5, 248;
	setp.eq.s32	%p1, %r3, 0;
	@%p1 bra 	BB5_5;

	setp.eq.s32	%p2, %r3, 8;
	@%p2 bra 	BB5_4;

	setp.ne.s32	%p3, %r3, 16;
	@%p3 bra 	BB5_6;

	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r8, %f1;
	.loc 2 51 82
	cvt.u16.u32	%rs1, %r8;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r9, %f2;
	.loc 2 51 161
	cvt.u16.u32	%rs2, %r9;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r10, %f3;
	.loc 2 51 242
	cvt.u16.u32	%rs3, %r10;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r11, %f4;
	cvt.u16.u32	%rs4, %r11;
	.loc 2 51 1
	st.v4.u16 	[%rd1], {%rs1, %rs2, %rs3, %rs4};
	bra.uni 	BB5_9;

BB5_4:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r12, %f1;
	.loc 2 51 65
	shl.b32 	%r13, %r12, 2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r14, %f2;
	.loc 2 51 127
	shl.b32 	%r15, %r14, 12;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f3;
	.loc 2 51 191
	shl.b32 	%r17, %r16, 22;
	.loc 2 51 127
	add.s32 	%r18, %r15, %r13;
	.loc 2 51 191
	add.s32 	%r19, %r18, %r17;
	.loc 2 51 1
	st.u32 	[%rd2], %r19;
	bra.uni 	BB5_9;

BB5_5:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r20, %f1;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r21, %f2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r22, %f3;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r23, %f4;
	cvt.u16.u32	%rs5, %r23;
	.loc 2 51 239
	cvt.u16.u32	%rs6, %r22;
	.loc 2 51 159
	cvt.u16.u32	%rs7, %r21;
	.loc 2 51 81
	cvt.u16.u32	%rs8, %r20;
	.loc 2 51 1
	st.v4.u8 	[%rd2], {%rs8, %rs7, %rs6, %rs5};
	bra.uni 	BB5_9;

BB5_6:
	.loc 2 51 1
	setp.eq.s32	%p4, %r2, 0;
	@%p4 bra 	BB5_8;

	mul.wide.s32 	%rd6, %r1, 16;
	add.s64 	%rd7, %rd3, %rd6;
	.loc 2 51 1
	st.v4.f32 	[%rd7], {%f1, %f2, %f3, %f4};
	bra.uni 	BB5_9;

BB5_8:
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.v4.u16 	[%rd1], {%rs9, %rs10, %rs11, %rs12};

BB5_9:
	.loc 2 51 2
	ret;
}

.visible .func _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii(
	.param .b64 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_0,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_1,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_2,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_3,
	.param .b64 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_4,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_5,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_6,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_7,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_8,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_9,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_10
)
{
	.reg .pred 	%p<180>;
	.reg .s16 	%rs<41>;
	.reg .s32 	%r<157>;
	.reg .f32 	%f<519>;
	.reg .s64 	%rd<43>;


	ld.param.u64 	%rd12, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_0];
	ld.param.u32 	%r29, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_1];
	ld.param.u32 	%r30, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_2];
	ld.param.u32 	%r31, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_3];
	ld.param.u64 	%rd13, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_4];
	ld.param.u32 	%r32, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_5];
	ld.param.u32 	%r33, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_6];
	ld.param.u32 	%r34, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_7];
	ld.param.u32 	%r35, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_8];
	ld.param.u32 	%r36, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_9];
	.loc 5 92 1
	mov.u32 	%r37, %ctaid.x;
	mov.u32 	%r38, %ntid.x;
	mov.u32 	%r39, %tid.x;
	mad.lo.s32 	%r1, %r38, %r37, %r39;
	mov.u32 	%r40, %ntid.y;
	mov.u32 	%r41, %ctaid.y;
	mov.u32 	%r42, %tid.y;
	mad.lo.s32 	%r2, %r40, %r41, %r42;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r35;
	setp.lt.s32	%p2, %r2, %r36;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB6_239;
	bra.uni 	BB6_1;

BB6_1:
	.loc 2 51 1
	and.b32  	%r3, %r30, 248;
	setp.eq.s32	%p4, %r3, 0;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r29, %r1;
	mul.wide.s32 	%rd14, %r4, 4;
	add.s64 	%rd1, %rd12, %rd14;
	.loc 2 51 1
	@%p4 bra 	BB6_9;

	.loc 2 51 1
	setp.eq.s32	%p5, %r3, 8;
	@%p5 bra 	BB6_8;

	.loc 2 51 1
	setp.eq.s32	%p6, %r3, 16;
	mul.wide.s32 	%rd15, %r4, 8;
	add.s64 	%rd2, %rd12, %rd15;
	.loc 2 51 1
	@%p6 bra 	BB6_7;

	.loc 2 51 1
	setp.eq.s32	%p7, %r31, 0;
	@%p7 bra 	BB6_6;

	mul.wide.s32 	%rd16, %r4, 16;
	add.s64 	%rd17, %rd12, %rd16;
	ld.v4.f32 	{%f202, %f203, %f204, %f205}, [%rd17];
	mov.f32 	%f417, %f205;
	mov.f32 	%f416, %f204;
	mov.f32 	%f415, %f203;
	mov.f32 	%f414, %f202;
	bra.uni 	BB6_10;

BB6_6:
	.loc 2 51 1
	ld.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd2];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f414, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f415, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f416, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f417, %temp;
	}
	bra.uni 	BB6_10;

BB6_7:
	.loc 2 51 1
	ld.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd2];
	cvt.rn.f32.u16	%f414, %rs9;
	cvt.rn.f32.u16	%f415, %rs10;
	cvt.rn.f32.u16	%f416, %rs11;
	cvt.rn.f32.u16	%f417, %rs12;
	bra.uni 	BB6_10;

BB6_8:
	.loc 2 51 1
	ld.u32 	%r43, [%rd1];
	shr.u32 	%r44, %r43, 2;
	and.b32  	%r45, %r44, 1023;
	cvt.rn.f32.u32	%f414, %r45;
	shr.u32 	%r46, %r43, 12;
	and.b32  	%r47, %r46, 1023;
	cvt.rn.f32.u32	%f415, %r47;
	shr.u32 	%r48, %r43, 22;
	cvt.rn.f32.u32	%f416, %r48;
	mov.f32 	%f417, 0f447FC000;
	bra.uni 	BB6_10;

BB6_9:
	.loc 2 51 1
	ld.v4.u8 	{%rs17, %rs18, %rs19, %rs20}, [%rd1];
	and.b16  	%rs22, %rs17, 255;
	cvt.rn.f32.u16	%f414, %rs22;
	and.b16  	%rs24, %rs18, 255;
	cvt.rn.f32.u16	%f415, %rs24;
	and.b16  	%rs26, %rs19, 255;
	cvt.rn.f32.u16	%f416, %rs26;
	and.b16  	%rs28, %rs20, 255;
	cvt.rn.f32.u16	%f417, %rs28;

BB6_10:
	.loc 5 92 71
	and.b32  	%r49, %r30, 8192;
	setp.eq.s32	%p8, %r49, 0;
	selp.f32	%f448, %f417, %f414, %p8;
	selp.f32	%f26, %f416, %f415, %p8;
	selp.f32	%f27, %f415, %f416, %p8;
	selp.f32	%f513, %f414, %f417, %p8;
	and.b32  	%r50, %r30, 768;
	setp.ne.s32	%p9, %r50, 512;
	mov.f32 	%f480, %f26;
	mov.f32 	%f512, %f27;
	@%p9 bra 	BB6_18;

	and.b32  	%r51, %r33, 768;
	setp.eq.s32	%p10, %r51, 512;
	mov.f32 	%f449, %f26;
	mov.f32 	%f480, %f449;
	mov.f32 	%f481, %f27;
	mov.f32 	%f512, %f481;
	@%p10 bra 	BB6_18;

	.loc 2 51 1
	setp.ne.s32	%p11, %r3, 0;
	@%p11 bra 	BB6_14;

	mov.f32 	%f418, 0f437F0000;
	bra.uni 	BB6_17;

BB6_14:
	.loc 2 51 1
	setp.ne.s32	%p12, %r3, 8;
	@%p12 bra 	BB6_16;

	mov.f32 	%f418, 0f447FC000;
	bra.uni 	BB6_17;

BB6_16:
	.loc 2 51 1
	setp.eq.s32	%p13, %r3, 16;
	.loc 2 51 1
	selp.f32	%f418, 0f47000000, 0f3F800000, %p13;

BB6_17:
	and.b32  	%r55, %r30, 4096;
	setp.eq.s32	%p14, %r55, 0;
	ld.const.f32 	%f208, [kYCbCrOffset];
	ld.const.f32 	%f209, [kYCbCrFullRangeOffset];
	selp.f32	%f210, %f209, %f208, %p14;
	mov.f32 	%f211, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f212, %f418, %f211;
	mul.ftz.f32 	%f213, %f210, %f212;
	sub.ftz.f32 	%f480, %f26, %f213;
	ld.const.f32 	%f214, [kYCbCrOffset+4];
	ld.const.f32 	%f215, [kYCbCrFullRangeOffset+4];
	selp.f32	%f216, %f215, %f214, %p14;
	mul.ftz.f32 	%f217, %f216, %f212;
	sub.ftz.f32 	%f512, %f27, %f217;
	ld.const.f32 	%f218, [kYCbCrOffset+8];
	ld.const.f32 	%f219, [kYCbCrFullRangeOffset+8];
	selp.f32	%f220, %f219, %f218, %p14;
	mul.ftz.f32 	%f221, %f220, %f212;
	sub.ftz.f32 	%f513, %f513, %f221;

BB6_18:
	mov.f32 	%f510, %f512;
	mov.f32 	%f478, %f480;
	and.b32  	%r56, %r33, 1024;
	setp.eq.s32	%p15, %r56, 0;
	shr.u32 	%r57, %r30, 10;
	and.b32  	%r58, %r57, 1;
	setp.eq.b32	%p16, %r58, 1;
	and.pred  	%p17, %p15, %p16;
	@!%p17 bra 	BB6_28;
	bra.uni 	BB6_19;

BB6_19:
	setp.ltu.ftz.f32	%p18, %f478, 0f00000000;
	@%p18 bra 	BB6_21;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f222, %f478;
	mul.ftz.f32 	%f223, %f222, 0f3EE66666;
	ex2.approx.ftz.f32 	%f479, %f223;
	bra.uni 	BB6_22;

BB6_21:
	neg.ftz.f32 	%f224, %f478;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f225, %f224;
	mul.ftz.f32 	%f226, %f225, 0f3EE66666;
	ex2.approx.ftz.f32 	%f227, %f226;
	neg.ftz.f32 	%f479, %f227;

BB6_22:
	mov.f32 	%f478, %f479;
	setp.ltu.ftz.f32	%p19, %f510, 0f00000000;
	@%p19 bra 	BB6_24;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f228, %f510;
	mul.ftz.f32 	%f229, %f228, 0f3EE66666;
	ex2.approx.ftz.f32 	%f511, %f229;
	bra.uni 	BB6_25;

BB6_24:
	neg.ftz.f32 	%f230, %f510;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f231, %f230;
	mul.ftz.f32 	%f232, %f231, 0f3EE66666;
	ex2.approx.ftz.f32 	%f233, %f232;
	neg.ftz.f32 	%f511, %f233;

BB6_25:
	mov.f32 	%f510, %f511;
	setp.ltu.ftz.f32	%p20, %f513, 0f00000000;
	@%p20 bra 	BB6_27;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f234, %f513;
	mul.ftz.f32 	%f235, %f234, 0f3EE66666;
	ex2.approx.ftz.f32 	%f513, %f235;
	bra.uni 	BB6_28;

BB6_27:
	neg.ftz.f32 	%f236, %f513;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f237, %f236;
	mul.ftz.f32 	%f238, %f237, 0f3EE66666;
	ex2.approx.ftz.f32 	%f239, %f238;
	neg.ftz.f32 	%f513, %f239;

BB6_28:
	mov.f32 	%f46, %f510;
	mov.f32 	%f45, %f478;
	xor.b32  	%r59, %r33, %r30;
	and.b32  	%r60, %r59, 2560;
	and.b32  	%r5, %r30, 512;
	setp.ne.s32	%p21, %r60, 0;
	@%p21 bra 	BB6_30;

	setp.eq.s32	%p22, %r5, 0;
	and.b32  	%r61, %r30, 256;
	setp.eq.s32	%p23, %r61, 0;
	selp.u32	%r62, 1, 0, %p23;
	selp.b32	%r63, 0, %r62, %p22;
	and.b32  	%r64, %r33, 256;
	setp.eq.s32	%p24, %r64, 0;
	selp.u32	%r65, 1, 0, %p24;
	shr.u32 	%r66, %r33, 9;
	and.b32  	%r67, %r66, %r65;
	setp.eq.s32	%p25, %r63, %r67;
	@%p25 bra 	BB6_134;

BB6_30:
	setp.eq.s32	%p26, %r5, 0;
	.loc 2 51 1
	and.b32  	%r6, %r33, 256;
	.loc 2 51 1
	@%p26 bra 	BB6_48;

	.loc 2 51 1
	and.b32  	%r68, %r30, 2048;
	setp.eq.s32	%p27, %r68, 0;
	.loc 2 51 1
	and.b32  	%r7, %r30, 4096;
	.loc 2 51 1
	and.b32  	%r8, %r33, 512;
	.loc 2 51 1
	@%p27 bra 	BB6_40;

	setp.eq.s32	%p28, %r7, 0;
	.loc 2 51 1
	@%p28 bra 	BB6_39;

	setp.eq.s32	%p29, %r8, 0;
	.loc 2 51 1
	and.b32  	%r9, %r30, 256;
	.loc 2 51 1
	@%p29 bra 	BB6_36;

	.loc 2 51 1
	setp.ne.s32	%p30, %r9, 0;
	@%p30 bra 	BB6_57;

	.loc 2 51 1
	and.b32  	%r69, %r33, 4096;
	setp.eq.s32	%p31, %r69, 0;
	.loc 2 51 1
	setp.ne.s32	%p32, %r6, 0;
	or.pred  	%p33, %p31, %p32;
	mov.u64 	%rd42, k709YCbCr_To_601YCbCr;
	.loc 2 51 1
	@%p33 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_36:
	setp.eq.s32	%p34, %r9, 0;
	.loc 2 51 1
	@%p34 bra 	BB6_38;

	setp.eq.s32	%p35, %r6, 0;
	mov.u64 	%rd42, k709YPbPr_To_RGB32f;
	.loc 2 51 1
	@%p35 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_38:
	setp.eq.s32	%p36, %r6, 0;
	.loc 2 51 1
	mov.u64 	%rd20, k709YCbCr_To_RGB32f;
	mov.u64 	%rd21, k709YCbCr_To_RGB8u;
	selp.b64	%rd42, %rd21, %rd20, %p36;
	bra.uni 	BB6_59;

BB6_39:
	.loc 2 51 1
	setp.ne.s32	%p37, %r8, 0;
	setp.eq.s32	%p38, %r6, 0;
	.loc 2 51 1
	or.pred  	%p39, %p38, %p37;
	mov.u64 	%rd42, k709YCbCrFullRange_To_RGB32f;
	.loc 2 51 1
	@%p39 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_40:
	setp.eq.s32	%p40, %r7, 0;
	.loc 2 51 1
	@%p40 bra 	BB6_46;

	setp.eq.s32	%p41, %r8, 0;
	.loc 2 51 1
	and.b32  	%r10, %r30, 256;
	.loc 2 51 1
	@%p41 bra 	BB6_43;

	or.b32  	%r70, %r10, %r6;
	.loc 2 51 1
	setp.ne.s32	%p42, %r70, 0;
	mov.u64 	%rd42, k601YCbCr_To_709YCbCr;
	.loc 2 51 1
	@%p42 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_43:
	setp.eq.s32	%p43, %r10, 0;
	.loc 2 51 1
	@%p43 bra 	BB6_45;

	setp.eq.s32	%p44, %r6, 0;
	mov.u64 	%rd42, k601YPbPr_To_RGB32f;
	.loc 2 51 1
	@%p44 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_45:
	setp.eq.s32	%p45, %r6, 0;
	.loc 2 51 1
	mov.u64 	%rd25, k601YCbCr_To_RGB32f;
	mov.u64 	%rd26, k601YCbCr_To_RGB8u;
	selp.b64	%rd42, %rd26, %rd25, %p45;
	bra.uni 	BB6_59;

BB6_46:
	.loc 2 51 1
	setp.ne.s32	%p46, %r8, 0;
	@%p46 bra 	BB6_57;

	setp.eq.s32	%p47, %r6, 0;
	.loc 2 51 1
	mov.u64 	%rd27, k601YCbCrFullRange_To_RGB32f;
	mov.u64 	%rd28, k601YCbCrFullRange_To_RGB8u;
	selp.b64	%rd42, %rd28, %rd27, %p47;
	bra.uni 	BB6_59;

BB6_48:
	.loc 2 51 1
	and.b32  	%r71, %r30, 256;
	setp.eq.s32	%p48, %r71, 0;
	@%p48 bra 	BB6_54;

	setp.eq.s32	%p49, %r6, 0;
	.loc 2 51 1
	@%p49 bra 	BB6_51;

	.loc 2 51 1
	and.b32  	%r73, %r33, 2048;
	setp.eq.s32	%p50, %r73, 0;
	.loc 2 51 1
	mov.u64 	%rd29, kRGB32f_To_709YPbPr;
	mov.u64 	%rd30, kRGB32f_To_601YPbPr;
	selp.b64	%rd42, %rd30, %rd29, %p50;
	bra.uni 	BB6_59;

BB6_51:
	.loc 2 51 1
	and.b32  	%r74, %r33, 2048;
	setp.eq.s32	%p51, %r74, 0;
	.loc 2 51 1
	and.b32  	%r11, %r33, 4096;
	.loc 2 51 1
	@%p51 bra 	BB6_53;

	setp.eq.s32	%p52, %r11, 0;
	mov.u64 	%rd42, kRGB32f_To_709YCbCr;
	.loc 2 51 1
	@%p52 bra 	BB6_57;
	bra.uni 	BB6_59;

BB6_53:
	setp.eq.s32	%p53, %r11, 0;
	.loc 2 51 1
	mov.u64 	%rd32, kRGB32f_To_601YCbCr;
	mov.u64 	%rd33, kRGB32f_To_601YCbCrFullRange;
	selp.b64	%rd42, %rd33, %rd32, %p53;
	bra.uni 	BB6_59;

BB6_54:
	.loc 2 51 1
	setp.ne.s32	%p54, %r6, 0;
	@%p54 bra 	BB6_57;

	.loc 2 51 1
	and.b32  	%r76, %r33, 2048;
	setp.eq.s32	%p55, %r76, 0;
	.loc 2 51 1
	and.b32  	%r12, %r33, 4096;
	.loc 2 51 1
	@%p55 bra 	BB6_58;

	setp.eq.s32	%p56, %r12, 0;
	mov.u64 	%rd42, kRGB8u_To_709YCbCr;
	.loc 2 51 1
	@%p56 bra 	BB6_59;

BB6_57:
	mov.u64 	%rd42, 0;
	bra.uni 	BB6_59;

BB6_58:
	setp.eq.s32	%p57, %r12, 0;
	.loc 2 51 1
	mov.u64 	%rd36, kRGB8u_To_601YCbCr;
	mov.u64 	%rd37, kRGB8u_To_601YCbCrFullRange;
	selp.b64	%rd42, %rd37, %rd36, %p57;

BB6_59:
	ld.const.f32 	%f240, [%rd42];
	ld.const.f32 	%f241, [%rd42+4];
	mul.ftz.f32 	%f242, %f46, %f241;
	fma.rn.ftz.f32 	%f243, %f45, %f240, %f242;
	ld.const.f32 	%f244, [%rd42+8];
	fma.rn.ftz.f32 	%f48, %f513, %f244, %f243;
	ld.const.f32 	%f245, [%rd42+12];
	ld.const.f32 	%f246, [%rd42+16];
	mul.ftz.f32 	%f247, %f46, %f246;
	fma.rn.ftz.f32 	%f248, %f45, %f245, %f247;
	ld.const.f32 	%f249, [%rd42+20];
	fma.rn.ftz.f32 	%f49, %f513, %f249, %f248;
	ld.const.f32 	%f250, [%rd42+24];
	ld.const.f32 	%f251, [%rd42+28];
	mul.ftz.f32 	%f252, %f46, %f251;
	fma.rn.ftz.f32 	%f253, %f45, %f250, %f252;
	ld.const.f32 	%f254, [%rd42+32];
	fma.rn.ftz.f32 	%f513, %f513, %f254, %f253;
	and.b32  	%r78, %r59, 248;
	setp.eq.s32	%p58, %r78, 0;
	mov.f32 	%f477, %f48;
	mov.f32 	%f509, %f49;
	@%p58 bra 	BB6_146;

	and.b32  	%r13, %r30, 256;
	and.b32  	%r14, %r33, 248;
	setp.eq.s32	%p59, %r14, 0;
	shr.u32 	%r79, %r13, 8;
	and.b32  	%r80, %r79, 1;
	setp.eq.b32	%p60, %r80, 1;
	and.pred  	%p61, %p59, %p60;
	@%p61 bra 	BB6_123;

	shr.u32 	%r82, %r6, 8;
	and.b32  	%r83, %r82, 1;
	setp.eq.b32	%p63, %r83, 1;
	and.pred  	%p64, %p4, %p63;
	@%p64 bra 	BB6_123;

	setp.ne.s32	%p65, %r6, 0;
	setp.eq.s32	%p66, %r13, 0;
	and.pred  	%p67, %p66, %p65;
	@%p67 bra 	BB6_96;

	setp.ne.s32	%p68, %r13, 0;
	setp.eq.s32	%p69, %r6, 0;
	and.pred  	%p70, %p69, %p68;
	@%p70 bra 	BB6_70;

	.loc 2 51 1
	setp.ne.s32	%p71, %r3, 0;
	@%p71 bra 	BB6_66;

	mov.f32 	%f419, 0f437F0000;
	bra.uni 	BB6_69;

BB6_66:
	.loc 2 51 1
	setp.ne.s32	%p72, %r3, 8;
	@%p72 bra 	BB6_68;

	mov.f32 	%f419, 0f447FC000;
	bra.uni 	BB6_69;

BB6_68:
	.loc 2 51 1
	setp.eq.s32	%p73, %r3, 16;
	.loc 2 51 1
	selp.f32	%f419, 0f47000000, 0f3F800000, %p73;

BB6_69:
	mov.f32 	%f257, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f433, %f419, %f257;
	mov.f32 	%f434, %f433;
	mov.f32 	%f435, %f433;
	mov.f32 	%f436, %f433;
	bra.uni 	BB6_122;

BB6_70:
	.loc 2 51 1
	setp.ne.s32	%p74, %r14, 0;
	@%p74 bra 	BB6_72;

	mov.f32 	%f420, 0f437F0000;
	bra.uni 	BB6_75;

BB6_72:
	.loc 2 51 1
	setp.ne.s32	%p75, %r14, 8;
	@%p75 bra 	BB6_74;

	mov.f32 	%f420, 0f447FC000;
	bra.uni 	BB6_75;

BB6_74:
	.loc 2 51 1
	setp.eq.s32	%p76, %r14, 16;
	.loc 2 51 1
	selp.f32	%f420, 0f47000000, 0f3F800000, %p76;

BB6_75:
	.loc 2 51 1
	setp.ne.s32	%p77, %r3, 0;
	@%p77 bra 	BB6_77;

	mov.f32 	%f421, 0f437F0000;
	bra.uni 	BB6_80;

BB6_77:
	.loc 2 51 1
	setp.ne.s32	%p78, %r3, 8;
	@%p78 bra 	BB6_79;

	mov.f32 	%f421, 0f447FC000;
	bra.uni 	BB6_80;

BB6_79:
	.loc 2 51 1
	setp.eq.s32	%p79, %r3, 16;
	.loc 2 51 1
	selp.f32	%f421, 0f47000000, 0f3F800000, %p79;

BB6_80:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f436, %f420, %f421;
	.loc 2 51 1
	@%p74 bra 	BB6_82;

	mov.f32 	%f422, 0f437F0000;
	bra.uni 	BB6_85;

BB6_82:
	.loc 2 51 1
	setp.ne.s32	%p81, %r14, 8;
	@%p81 bra 	BB6_84;

	mov.f32 	%f422, 0f447FC000;
	bra.uni 	BB6_85;

BB6_84:
	.loc 2 51 1
	setp.eq.s32	%p82, %r14, 16;
	.loc 2 51 1
	selp.f32	%f422, 0f47000000, 0f3F800000, %p82;

BB6_85:
	mov.f32 	%f264, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f435, %f422, %f264;
	.loc 2 51 1
	@%p74 bra 	BB6_87;

	mov.f32 	%f423, 0f437F0000;
	bra.uni 	BB6_90;

BB6_87:
	.loc 2 51 1
	setp.ne.s32	%p84, %r14, 8;
	@%p84 bra 	BB6_89;

	mov.f32 	%f423, 0f447FC000;
	bra.uni 	BB6_90;

BB6_89:
	.loc 2 51 1
	setp.eq.s32	%p85, %r14, 16;
	.loc 2 51 1
	selp.f32	%f423, 0f47000000, 0f3F800000, %p85;

BB6_90:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f434, %f423, %f264;
	.loc 2 51 1
	@%p74 bra 	BB6_92;

	mov.f32 	%f424, %f264;
	bra.uni 	BB6_95;

BB6_92:
	.loc 2 51 1
	setp.ne.s32	%p87, %r14, 8;
	@%p87 bra 	BB6_94;

	mov.f32 	%f268, 0f447FC000;
	.loc 2 51 1
	mov.f32 	%f424, %f268;
	bra.uni 	BB6_95;

BB6_94:
	.loc 2 51 1
	setp.eq.s32	%p88, %r14, 16;
	.loc 2 51 1
	selp.f32	%f65, 0f47000000, 0f3F800000, %p88;
	mov.f32 	%f424, %f65;

BB6_95:
	.loc 2 51 1
	mov.f32 	%f66, %f424;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f433, %f66, %f264;
	bra.uni 	BB6_122;

BB6_96:
	.loc 2 51 1
	setp.ne.s32	%p89, %r14, 0;
	@%p89 bra 	BB6_98;

	mov.f32 	%f425, 0f437F0000;
	bra.uni 	BB6_101;

BB6_98:
	.loc 2 51 1
	setp.ne.s32	%p90, %r14, 8;
	@%p90 bra 	BB6_100;

	mov.f32 	%f425, 0f447FC000;
	bra.uni 	BB6_101;

BB6_100:
	.loc 2 51 1
	setp.eq.s32	%p91, %r14, 16;
	.loc 2 51 1
	selp.f32	%f425, 0f47000000, 0f3F800000, %p91;

BB6_101:
	.loc 2 51 1
	setp.ne.s32	%p92, %r3, 0;
	@%p92 bra 	BB6_103;

	mov.f32 	%f426, 0f437F0000;
	bra.uni 	BB6_106;

BB6_103:
	.loc 2 51 1
	setp.ne.s32	%p93, %r3, 8;
	@%p93 bra 	BB6_105;

	mov.f32 	%f426, 0f447FC000;
	bra.uni 	BB6_106;

BB6_105:
	.loc 2 51 1
	setp.eq.s32	%p94, %r3, 16;
	.loc 2 51 1
	selp.f32	%f426, 0f47000000, 0f3F800000, %p94;

BB6_106:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f436, %f425, %f426;
	.loc 2 51 1
	@%p92 bra 	BB6_108;

	mov.f32 	%f427, 0f437F0000;
	bra.uni 	BB6_111;

BB6_108:
	.loc 2 51 1
	setp.ne.s32	%p96, %r3, 8;
	@%p96 bra 	BB6_110;

	mov.f32 	%f427, 0f447FC000;
	bra.uni 	BB6_111;

BB6_110:
	.loc 2 51 1
	setp.eq.s32	%p97, %r3, 16;
	.loc 2 51 1
	selp.f32	%f427, 0f47000000, 0f3F800000, %p97;

BB6_111:
	mov.f32 	%f277, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f435, %f277, %f427;
	.loc 2 51 1
	@%p92 bra 	BB6_113;

	mov.f32 	%f428, 0f437F0000;
	bra.uni 	BB6_116;

BB6_113:
	.loc 2 51 1
	setp.ne.s32	%p99, %r3, 8;
	@%p99 bra 	BB6_115;

	mov.f32 	%f428, 0f447FC000;
	bra.uni 	BB6_116;

BB6_115:
	.loc 2 51 1
	setp.eq.s32	%p100, %r3, 16;
	.loc 2 51 1
	selp.f32	%f428, 0f47000000, 0f3F800000, %p100;

BB6_116:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f434, %f277, %f428;
	.loc 2 51 1
	@%p92 bra 	BB6_118;

	mov.f32 	%f429, %f277;
	bra.uni 	BB6_121;

BB6_118:
	.loc 2 51 1
	setp.ne.s32	%p102, %r3, 8;
	@%p102 bra 	BB6_120;

	mov.f32 	%f281, 0f447FC000;
	.loc 2 51 1
	mov.f32 	%f429, %f281;
	bra.uni 	BB6_121;

BB6_120:
	.loc 2 51 1
	setp.eq.s32	%p103, %r3, 16;
	.loc 2 51 1
	selp.f32	%f79, 0f47000000, 0f3F800000, %p103;
	mov.f32 	%f429, %f79;

BB6_121:
	.loc 2 51 1
	mov.f32 	%f80, %f429;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f433, %f277, %f80;

BB6_122:
	mul.ftz.f32 	%f513, %f513, %f433;
	mul.ftz.f32 	%f509, %f49, %f434;
	mul.ftz.f32 	%f477, %f48, %f435;
	mul.ftz.f32 	%f448, %f448, %f436;
	bra.uni 	BB6_146;

BB6_123:
	.loc 2 51 1
	setp.ne.s32	%p104, %r14, 0;
	@%p104 bra 	BB6_125;

	mov.f32 	%f437, 0f437F0000;
	bra.uni 	BB6_128;

BB6_125:
	.loc 2 51 1
	setp.ne.s32	%p105, %r14, 8;
	@%p105 bra 	BB6_127;

	mov.f32 	%f437, 0f447FC000;
	bra.uni 	BB6_128;

BB6_127:
	.loc 2 51 1
	setp.eq.s32	%p106, %r14, 16;
	.loc 2 51 1
	selp.f32	%f437, 0f47000000, 0f3F800000, %p106;

BB6_128:
	.loc 2 51 1
	setp.ne.s32	%p107, %r3, 0;
	@%p107 bra 	BB6_130;

	mov.f32 	%f438, 0f437F0000;
	bra.uni 	BB6_133;

BB6_130:
	.loc 2 51 1
	setp.ne.s32	%p108, %r3, 8;
	@%p108 bra 	BB6_132;

	mov.f32 	%f438, 0f447FC000;
	bra.uni 	BB6_133;

BB6_132:
	.loc 2 51 1
	setp.eq.s32	%p109, %r3, 16;
	.loc 2 51 1
	selp.f32	%f438, 0f47000000, 0f3F800000, %p109;

BB6_133:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f93, %f437, %f438;
	mul.ftz.f32 	%f448, %f448, %f93;
	mov.f32 	%f477, %f48;
	mov.f32 	%f509, %f49;
	bra.uni 	BB6_146;

BB6_134:
	and.b32  	%r108, %r59, 248;
	setp.eq.s32	%p110, %r108, 0;
	mov.f32 	%f477, %f45;
	mov.f32 	%f509, %f46;
	@%p110 bra 	BB6_146;

	.loc 2 51 1
	and.b32  	%r16, %r33, 248;
	setp.ne.s32	%p111, %r16, 0;
	@%p111 bra 	BB6_137;

	mov.f32 	%f439, 0f437F0000;
	bra.uni 	BB6_140;

BB6_137:
	.loc 2 51 1
	setp.ne.s32	%p112, %r16, 8;
	@%p112 bra 	BB6_139;

	mov.f32 	%f439, 0f447FC000;
	bra.uni 	BB6_140;

BB6_139:
	.loc 2 51 1
	setp.eq.s32	%p113, %r16, 16;
	.loc 2 51 1
	selp.f32	%f439, 0f47000000, 0f3F800000, %p113;

BB6_140:
	.loc 2 51 1
	setp.ne.s32	%p114, %r3, 0;
	@%p114 bra 	BB6_142;

	mov.f32 	%f440, 0f437F0000;
	bra.uni 	BB6_145;

BB6_142:
	.loc 2 51 1
	setp.ne.s32	%p115, %r3, 8;
	@%p115 bra 	BB6_144;

	mov.f32 	%f440, 0f447FC000;
	bra.uni 	BB6_145;

BB6_144:
	.loc 2 51 1
	setp.eq.s32	%p116, %r3, 16;
	.loc 2 51 1
	selp.f32	%f440, 0f47000000, 0f3F800000, %p116;

BB6_145:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f292, %f439, %f440;
	mul.ftz.f32 	%f448, %f448, %f292;
	mul.ftz.f32 	%f477, %f45, %f292;
	mul.ftz.f32 	%f509, %f46, %f292;
	mul.ftz.f32 	%f513, %f513, %f292;

BB6_146:
	mov.f32 	%f507, %f509;
	mov.f32 	%f475, %f477;
	and.b32  	%r112, %r30, 1024;
	setp.eq.s32	%p117, %r112, 0;
	shr.u32 	%r113, %r33, 10;
	and.b32  	%r114, %r113, 1;
	setp.eq.b32	%p118, %r114, 1;
	and.pred  	%p119, %p117, %p118;
	@!%p119 bra 	BB6_156;
	bra.uni 	BB6_147;

BB6_147:
	setp.ltu.ftz.f32	%p120, %f475, 0f00000000;
	@%p120 bra 	BB6_149;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f293, %f475;
	mul.ftz.f32 	%f294, %f293, 0f400E38E4;
	ex2.approx.ftz.f32 	%f476, %f294;
	bra.uni 	BB6_150;

BB6_149:
	neg.ftz.f32 	%f295, %f475;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f296, %f295;
	mul.ftz.f32 	%f297, %f296, 0f400E38E4;
	ex2.approx.ftz.f32 	%f298, %f297;
	neg.ftz.f32 	%f476, %f298;

BB6_150:
	mov.f32 	%f475, %f476;
	setp.ltu.ftz.f32	%p121, %f507, 0f00000000;
	@%p121 bra 	BB6_152;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f299, %f507;
	mul.ftz.f32 	%f300, %f299, 0f400E38E4;
	ex2.approx.ftz.f32 	%f508, %f300;
	bra.uni 	BB6_153;

BB6_152:
	neg.ftz.f32 	%f301, %f507;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f302, %f301;
	mul.ftz.f32 	%f303, %f302, 0f400E38E4;
	ex2.approx.ftz.f32 	%f304, %f303;
	neg.ftz.f32 	%f508, %f304;

BB6_153:
	mov.f32 	%f507, %f508;
	setp.ltu.ftz.f32	%p122, %f513, 0f00000000;
	@%p122 bra 	BB6_155;

	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f305, %f513;
	mul.ftz.f32 	%f306, %f305, 0f400E38E4;
	ex2.approx.ftz.f32 	%f513, %f306;
	bra.uni 	BB6_156;

BB6_155:
	neg.ftz.f32 	%f307, %f513;
	.loc 3 3600 10
	lg2.approx.ftz.f32 	%f308, %f307;
	mul.ftz.f32 	%f309, %f308, 0f400E38E4;
	ex2.approx.ftz.f32 	%f310, %f309;
	neg.ftz.f32 	%f513, %f310;

BB6_156:
	mov.f32 	%f506, %f507;
	mov.f32 	%f474, %f475;
	setp.eq.s32	%p123, %r50, 512;
	and.b32  	%r17, %r33, 768;
	setp.ne.s32	%p124, %r17, 512;
	or.pred  	%p125, %p123, %p124;
	@%p125 bra 	BB6_163;

	.loc 2 51 1
	and.b32  	%r18, %r33, 248;
	setp.ne.s32	%p126, %r18, 0;
	@%p126 bra 	BB6_159;

	mov.f32 	%f441, 0f437F0000;
	bra.uni 	BB6_162;

BB6_159:
	.loc 2 51 1
	setp.ne.s32	%p127, %r18, 8;
	@%p127 bra 	BB6_161;

	mov.f32 	%f441, 0f447FC000;
	bra.uni 	BB6_162;

BB6_161:
	.loc 2 51 1
	setp.eq.s32	%p128, %r18, 16;
	.loc 2 51 1
	selp.f32	%f441, 0f47000000, 0f3F800000, %p128;

BB6_162:
	and.b32  	%r116, %r33, 4096;
	setp.eq.s32	%p129, %r116, 0;
	ld.const.f32 	%f313, [kYCbCrOffset];
	ld.const.f32 	%f314, [kYCbCrFullRangeOffset];
	selp.f32	%f315, %f314, %f313, %p129;
	mov.f32 	%f316, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f317, %f441, %f316;
	fma.rn.ftz.f32 	%f474, %f315, %f317, %f474;
	ld.const.f32 	%f318, [kYCbCrOffset+4];
	ld.const.f32 	%f319, [kYCbCrFullRangeOffset+4];
	selp.f32	%f320, %f319, %f318, %p129;
	fma.rn.ftz.f32 	%f506, %f320, %f317, %f506;
	ld.const.f32 	%f321, [kYCbCrOffset+8];
	ld.const.f32 	%f322, [kYCbCrFullRangeOffset+8];
	selp.f32	%f323, %f322, %f321, %p129;
	fma.rn.ftz.f32 	%f513, %f323, %f317, %f513;

BB6_163:
	mov.f32 	%f505, %f506;
	mov.f32 	%f473, %f474;
	and.b32  	%r118, %r59, 7;
	setp.eq.s32	%p130, %r118, 0;
	mov.f32 	%f469, %f473;
	mov.f32 	%f501, %f505;
	@%p130 bra 	BB6_223;

	and.b32  	%r19, %r30, 7;
	setp.ne.s32	%p131, %r19, 2;
	@%p131 bra 	BB6_166;

	or.b32  	%r119, %r33, 4;
	and.b32  	%r120, %r119, 7;
	setp.eq.s32	%p132, %r120, 4;
	@%p132 bra 	BB6_167;

BB6_166:
	or.b32  	%r121, %r19, 4;
	setp.eq.s32	%p133, %r121, 4;
	@%p133 bra 	BB6_167;
	bra.uni 	BB6_173;

BB6_167:
	.loc 2 51 1
	and.b32  	%r20, %r33, 248;
	setp.ne.s32	%p134, %r20, 0;
	@%p134 bra 	BB6_170;

	mov.f32 	%f448, 0f437F0000;

BB6_169:
	.loc 2 51 1
	mov.f32 	%f469, %f473;
	mov.f32 	%f501, %f505;
	bra.uni 	BB6_223;

BB6_170:
	.loc 2 51 1
	setp.ne.s32	%p135, %r20, 8;
	@%p135 bra 	BB6_172;

	mov.f32 	%f448, 0f447FC000;
	bra.uni 	BB6_169;

BB6_172:
	.loc 2 51 1
	setp.eq.s32	%p136, %r20, 16;
	.loc 2 51 1
	selp.f32	%f448, 0f47000000, 0f3F800000, %p136;
	bra.uni 	BB6_169;

BB6_173:
	setp.ne.s32	%p137, %r19, 1;
	@%p137 bra 	BB6_201;

	and.b32  	%r122, %r33, 7;
	setp.gt.u32	%p138, %r122, 4;
	@%p138 bra 	BB6_201;

	mov.u32 	%r123, 1;
	shl.b32 	%r124, %r123, %r122;
	and.b32  	%r125, %r124, 21;
	setp.ne.s32	%p139, %r125, 0;
	@%p139 bra 	BB6_176;
	bra.uni 	BB6_201;

BB6_176:
	.loc 2 51 1
	and.b32  	%r22, %r33, 248;
	setp.ne.s32	%p140, %r22, 0;
	@%p140 bra 	BB6_178;

	mov.f32 	%f442, 0f437F0000;
	bra.uni 	BB6_181;

BB6_178:
	.loc 2 51 1
	setp.ne.s32	%p141, %r22, 8;
	@%p141 bra 	BB6_180;

	mov.f32 	%f442, 0f447FC000;
	bra.uni 	BB6_181;

BB6_180:
	.loc 2 51 1
	setp.eq.s32	%p142, %r22, 16;
	.loc 2 51 1
	selp.f32	%f442, 0f47000000, 0f3F800000, %p142;

BB6_181:
	rcp.approx.ftz.f32 	%f328, %f442;
	mul.ftz.f32 	%f133, %f448, %f328;
	@%p124 bra 	BB6_188;

	.loc 2 51 1
	@%p140 bra 	BB6_184;

	mov.f32 	%f443, 0f437F0000;
	bra.uni 	BB6_187;

BB6_184:
	.loc 2 51 1
	setp.ne.s32	%p145, %r22, 8;
	@%p145 bra 	BB6_186;

	mov.f32 	%f443, 0f447FC000;
	bra.uni 	BB6_187;

BB6_186:
	.loc 2 51 1
	setp.eq.s32	%p146, %r22, 16;
	.loc 2 51 1
	selp.f32	%f443, 0f47000000, 0f3F800000, %p146;

BB6_187:
	and.b32  	%r126, %r33, 4096;
	setp.eq.s32	%p147, %r126, 0;
	ld.const.f32 	%f331, [kYCbCrOffset];
	ld.const.f32 	%f332, [kYCbCrFullRangeOffset];
	selp.f32	%f333, %f332, %f331, %p147;
	mov.f32 	%f334, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f335, %f443, %f334;
	mul.ftz.f32 	%f336, %f333, %f335;
	sub.ftz.f32 	%f473, %f473, %f336;
	ld.const.f32 	%f337, [kYCbCrOffset+4];
	ld.const.f32 	%f338, [kYCbCrFullRangeOffset+4];
	selp.f32	%f339, %f338, %f337, %p147;
	mul.ftz.f32 	%f340, %f339, %f335;
	sub.ftz.f32 	%f505, %f505, %f340;
	ld.const.f32 	%f341, [kYCbCrOffset+8];
	ld.const.f32 	%f342, [kYCbCrFullRangeOffset+8];
	selp.f32	%f343, %f342, %f341, %p147;
	mul.ftz.f32 	%f344, %f343, %f335;
	sub.ftz.f32 	%f513, %f513, %f344;

BB6_188:
	mul.ftz.f32 	%f471, %f473, %f133;
	mul.ftz.f32 	%f503, %f505, %f133;
	mul.ftz.f32 	%f513, %f513, %f133;
	@%p124 bra 	BB6_195;

	.loc 2 51 1
	@%p140 bra 	BB6_191;

	mov.f32 	%f444, 0f437F0000;
	bra.uni 	BB6_194;

BB6_191:
	.loc 2 51 1
	setp.ne.s32	%p150, %r22, 8;
	@%p150 bra 	BB6_193;

	mov.f32 	%f444, 0f447FC000;
	bra.uni 	BB6_194;

BB6_193:
	.loc 2 51 1
	setp.eq.s32	%p151, %r22, 16;
	.loc 2 51 1
	selp.f32	%f444, 0f47000000, 0f3F800000, %p151;

BB6_194:
	and.b32  	%r127, %r33, 4096;
	setp.eq.s32	%p152, %r127, 0;
	ld.const.f32 	%f347, [kYCbCrOffset];
	ld.const.f32 	%f348, [kYCbCrFullRangeOffset];
	selp.f32	%f349, %f348, %f347, %p152;
	mov.f32 	%f350, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f351, %f444, %f350;
	fma.rn.ftz.f32 	%f471, %f349, %f351, %f471;
	ld.const.f32 	%f352, [kYCbCrOffset+4];
	ld.const.f32 	%f353, [kYCbCrFullRangeOffset+4];
	selp.f32	%f354, %f353, %f352, %p152;
	fma.rn.ftz.f32 	%f503, %f354, %f351, %f503;
	ld.const.f32 	%f355, [kYCbCrOffset+8];
	ld.const.f32 	%f356, [kYCbCrFullRangeOffset+8];
	selp.f32	%f357, %f356, %f355, %p152;
	fma.rn.ftz.f32 	%f513, %f357, %f351, %f513;

BB6_195:
	mov.f32 	%f501, %f503;
	mov.f32 	%f469, %f471;
	or.b32  	%r128, %r122, 4;
	setp.eq.s32	%p153, %r128, 4;
	@%p153 bra 	BB6_196;
	bra.uni 	BB6_223;

BB6_196:
	.loc 2 51 1
	@%p140 bra 	BB6_198;

	mov.f32 	%f448, 0f437F0000;
	bra.uni 	BB6_223;

BB6_198:
	.loc 2 51 1
	setp.ne.s32	%p155, %r22, 8;
	@%p155 bra 	BB6_200;

	mov.f32 	%f448, 0f447FC000;
	bra.uni 	BB6_223;

BB6_200:
	.loc 2 51 1
	setp.eq.s32	%p156, %r22, 16;
	.loc 2 51 1
	selp.f32	%f448, 0f47000000, 0f3F800000, %p156;
	bra.uni 	BB6_223;

BB6_201:
	.loc 2 51 1
	mov.f32 	%f472, %f473;
	mov.f32 	%f504, %f505;
	@%p124 bra 	BB6_208;

	.loc 2 51 1
	and.b32  	%r23, %r33, 248;
	setp.ne.s32	%p158, %r23, 0;
	@%p158 bra 	BB6_204;

	mov.f32 	%f445, 0f437F0000;
	bra.uni 	BB6_207;

BB6_204:
	.loc 2 51 1
	setp.ne.s32	%p159, %r23, 8;
	@%p159 bra 	BB6_206;

	mov.f32 	%f445, 0f447FC000;
	bra.uni 	BB6_207;

BB6_206:
	.loc 2 51 1
	setp.eq.s32	%p160, %r23, 16;
	.loc 2 51 1
	selp.f32	%f445, 0f47000000, 0f3F800000, %p160;

BB6_207:
	.loc 2 51 119
	and.b32  	%r129, %r33, 4096;
	setp.eq.s32	%p161, %r129, 0;
	ld.const.f32 	%f362, [kYCbCrOffset];
	ld.const.f32 	%f363, [kYCbCrFullRangeOffset];
	selp.f32	%f364, %f363, %f362, %p161;
	mov.f32 	%f365, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f366, %f445, %f365;
	.loc 2 51 119
	mul.ftz.f32 	%f367, %f364, %f366;
	sub.ftz.f32 	%f472, %f473, %f367;
	ld.const.f32 	%f368, [kYCbCrOffset+4];
	ld.const.f32 	%f369, [kYCbCrFullRangeOffset+4];
	selp.f32	%f370, %f369, %f368, %p161;
	mul.ftz.f32 	%f371, %f370, %f366;
	sub.ftz.f32 	%f504, %f505, %f371;
	ld.const.f32 	%f372, [kYCbCrOffset+8];
	ld.const.f32 	%f373, [kYCbCrFullRangeOffset+8];
	selp.f32	%f374, %f373, %f372, %p161;
	mul.ftz.f32 	%f375, %f374, %f366;
	sub.ftz.f32 	%f513, %f513, %f375;

BB6_208:
	.loc 2 51 1
	add.ftz.f32 	%f376, %f448, 0fB70637BD;
	setp.gtu.ftz.f32	%p162, %f376, 0f00000000;
	@%p162 bra 	BB6_210;

	mov.f32 	%f448, 0f00000000;
	mov.f32 	%f470, %f448;
	mov.f32 	%f502, %f448;
	mov.f32 	%f513, %f448;
	bra.uni 	BB6_216;

BB6_210:
	.loc 2 51 1
	and.b32  	%r24, %r33, 248;
	setp.ne.s32	%p163, %r24, 0;
	@%p163 bra 	BB6_212;

	mov.f32 	%f446, 0f437F0000;
	bra.uni 	BB6_215;

BB6_212:
	.loc 2 51 1
	setp.ne.s32	%p164, %r24, 8;
	@%p164 bra 	BB6_214;

	mov.f32 	%f446, 0f447FC000;
	bra.uni 	BB6_215;

BB6_214:
	.loc 2 51 1
	setp.eq.s32	%p165, %r24, 16;
	.loc 2 51 1
	selp.f32	%f446, 0f47000000, 0f3F800000, %p165;

BB6_215:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f383, %f446, %f448;
	.loc 2 51 1
	mul.ftz.f32 	%f470, %f472, %f383;
	mul.ftz.f32 	%f502, %f504, %f383;
	mul.ftz.f32 	%f513, %f513, %f383;

BB6_216:
	.loc 2 51 1
	mov.f32 	%f501, %f502;
	mov.f32 	%f469, %f470;
	@%p124 bra 	BB6_223;

	.loc 2 51 1
	and.b32  	%r25, %r33, 248;
	setp.ne.s32	%p167, %r25, 0;
	@%p167 bra 	BB6_219;

	mov.f32 	%f447, 0f437F0000;
	bra.uni 	BB6_222;

BB6_219:
	.loc 2 51 1
	setp.ne.s32	%p168, %r25, 8;
	@%p168 bra 	BB6_221;

	mov.f32 	%f447, 0f447FC000;
	bra.uni 	BB6_222;

BB6_221:
	.loc 2 51 1
	setp.eq.s32	%p169, %r25, 16;
	.loc 2 51 1
	selp.f32	%f447, 0f47000000, 0f3F800000, %p169;

BB6_222:
	.loc 2 51 120
	and.b32  	%r130, %r33, 4096;
	setp.eq.s32	%p170, %r130, 0;
	ld.const.f32 	%f386, [kYCbCrOffset];
	ld.const.f32 	%f387, [kYCbCrFullRangeOffset];
	selp.f32	%f388, %f387, %f386, %p170;
	mov.f32 	%f389, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f390, %f447, %f389;
	.loc 2 51 120
	fma.rn.ftz.f32 	%f469, %f388, %f390, %f469;
	ld.const.f32 	%f391, [kYCbCrOffset+4];
	ld.const.f32 	%f392, [kYCbCrFullRangeOffset+4];
	selp.f32	%f393, %f392, %f391, %p170;
	fma.rn.ftz.f32 	%f501, %f393, %f390, %f501;
	ld.const.f32 	%f394, [kYCbCrOffset+8];
	ld.const.f32 	%f395, [kYCbCrFullRangeOffset+8];
	selp.f32	%f396, %f395, %f394, %p170;
	fma.rn.ftz.f32 	%f513, %f396, %f390, %f513;

BB6_223:
	and.b32  	%r131, %r33, 8192;
	setp.eq.s32	%p171, %r131, 0;
	selp.f32	%f515, %f513, %f448, %p171;
	selp.f32	%f516, %f501, %f469, %p171;
	selp.f32	%f517, %f469, %f501, %p171;
	selp.f32	%f518, %f448, %f513, %p171;
	and.b32  	%r132, %r33, 256;
	.loc 2 51 1
	and.b32  	%r26, %r33, 248;
	setp.ne.s32	%p172, %r132, 0;
	@%p172 bra 	BB6_230;

	add.ftz.f32 	%f184, %f515, 0f3F000000;
	add.ftz.f32 	%f185, %f516, 0f3F000000;
	add.ftz.f32 	%f186, %f517, 0f3F000000;
	add.ftz.f32 	%f187, %f518, 0f3F000000;
	.loc 2 51 1
	setp.ne.s32	%p173, %r26, 0;
	@%p173 bra 	BB6_226;

	mov.f32 	%f514, 0f437F0000;
	bra.uni 	BB6_229;

BB6_226:
	.loc 2 51 1
	setp.ne.s32	%p174, %r26, 8;
	@%p174 bra 	BB6_228;

	mov.f32 	%f514, 0f447FC000;
	bra.uni 	BB6_229;

BB6_228:
	.loc 2 51 1
	setp.eq.s32	%p175, %r26, 16;
	.loc 2 51 1
	selp.f32	%f514, 0f47000000, 0f3F800000, %p175;

BB6_229:
	mov.f32 	%f399, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f400, %f184, %f399;
	.loc 3 2765 10
	min.ftz.f32 	%f515, %f400, %f514;
	.loc 3 2770 10
	max.ftz.f32 	%f401, %f185, %f399;
	.loc 3 2765 10
	min.ftz.f32 	%f516, %f401, %f514;
	.loc 3 2770 10
	max.ftz.f32 	%f402, %f186, %f399;
	.loc 3 2765 10
	min.ftz.f32 	%f517, %f402, %f514;
	.loc 3 2770 10
	max.ftz.f32 	%f403, %f187, %f399;
	.loc 3 2765 10
	min.ftz.f32 	%f518, %f403, %f514;

BB6_230:
	.loc 2 51 1
	mad.lo.s32 	%r28, %r2, %r32, %r1;
	mul.wide.s32 	%rd38, %r28, 4;
	add.s64 	%rd10, %rd13, %rd38;
	setp.eq.s32	%p176, %r26, 0;
	.loc 2 51 1
	@%p176 bra 	BB6_238;

	.loc 2 51 1
	setp.eq.s32	%p177, %r26, 8;
	@%p177 bra 	BB6_237;

	.loc 2 51 1
	setp.eq.s32	%p178, %r26, 16;
	mul.wide.s32 	%rd39, %r28, 8;
	add.s64 	%rd11, %rd13, %rd39;
	.loc 2 51 1
	@%p178 bra 	BB6_236;

	.loc 2 51 1
	setp.eq.s32	%p179, %r34, 0;
	@%p179 bra 	BB6_235;

	mul.wide.s32 	%rd40, %r28, 16;
	add.s64 	%rd41, %rd13, %rd40;
	.loc 2 51 1
	st.v4.f32 	[%rd41], {%f515, %f516, %f517, %f518};
	bra.uni 	BB6_239;

BB6_235:
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f515;
	mov.b16 	%rs29, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f516;
	mov.b16 	%rs30, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f517;
	mov.b16 	%rs31, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f518;
	mov.b16 	%rs32, %temp;
}
	.loc 2 51 231
	st.v4.u16 	[%rd11], {%rs29, %rs30, %rs31, %rs32};
	bra.uni 	BB6_239;

BB6_236:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r141, %f515;
	.loc 2 51 82
	cvt.u16.u32	%rs33, %r141;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r142, %f516;
	.loc 2 51 161
	cvt.u16.u32	%rs34, %r142;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r143, %f517;
	.loc 2 51 242
	cvt.u16.u32	%rs35, %r143;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r144, %f518;
	cvt.u16.u32	%rs36, %r144;
	.loc 2 51 1
	st.v4.u16 	[%rd11], {%rs33, %rs34, %rs35, %rs36};
	bra.uni 	BB6_239;

BB6_237:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r145, %f515;
	.loc 2 51 65
	shl.b32 	%r146, %r145, 2;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r147, %f516;
	.loc 2 51 127
	shl.b32 	%r148, %r147, 12;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r149, %f517;
	.loc 2 51 191
	shl.b32 	%r150, %r149, 22;
	.loc 2 51 127
	add.s32 	%r151, %r148, %r146;
	.loc 2 51 191
	add.s32 	%r152, %r151, %r150;
	.loc 2 51 1
	st.u32 	[%rd10], %r152;
	bra.uni 	BB6_239;

BB6_238:
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r153, %f515;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r154, %f516;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r155, %f517;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r156, %f518;
	cvt.u16.u32	%rs37, %r156;
	.loc 2 51 239
	cvt.u16.u32	%rs38, %r155;
	.loc 2 51 159
	cvt.u16.u32	%rs39, %r154;
	.loc 2 51 81
	cvt.u16.u32	%rs40, %r153;
	.loc 2 51 1
	st.v4.u8 	[%rd10], {%rs40, %rs39, %rs38, %rs37};

BB6_239:
	.loc 5 92 2
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<41>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB7_4;
	bra.uni 	BB7_1;

BB7_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f9, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f10, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f11, %rs10;
	and.b16  	%rs12, %rs4, 255;
	cvt.rn.f32.u16	%f12, %rs12;
	ld.const.f32 	%f13, [kYCbCrOffset];
	mov.f32 	%f14, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f15, %f14, %f14;
	mul.ftz.f32 	%f16, %f13, %f15;
	sub.ftz.f32 	%f17, %f11, %f16;
	ld.const.f32 	%f18, [kYCbCrOffset+4];
	mul.ftz.f32 	%f19, %f18, %f15;
	sub.ftz.f32 	%f20, %f10, %f19;
	ld.const.f32 	%f21, [kYCbCrOffset+8];
	mul.ftz.f32 	%f22, %f21, %f15;
	sub.ftz.f32 	%f23, %f9, %f22;
	ld.const.f32 	%f24, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f25, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f26, %f20, %f25;
	fma.rn.ftz.f32 	%f27, %f17, %f24, %f26;
	ld.const.f32 	%f28, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f23, %f28, %f27;
	ld.const.f32 	%f29, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f30, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f31, %f20, %f30;
	fma.rn.ftz.f32 	%f32, %f17, %f29, %f31;
	ld.const.f32 	%f33, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f23, %f33, %f32;
	ld.const.f32 	%f34, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f35, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f36, %f20, %f35;
	fma.rn.ftz.f32 	%f37, %f17, %f34, %f36;
	ld.const.f32 	%f38, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f23, %f38, %f37;
	mov.f32 	%f39, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f39, %f14;
	mul.ftz.f32 	%f8, %f12, %f40;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB7_3;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f3, %f2, %f1, %f8};
	bra.uni 	BB7_4;

BB7_3:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs14, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs15, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs16, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs13, %rs14, %rs15, %rs16};

BB7_4:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB8_5;
	bra.uni 	BB8_1;

BB8_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB8_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f61, %f16;
	mov.f32 	%f60, %f15;
	mov.f32 	%f59, %f14;
	mov.f32 	%f58, %f13;
	bra.uni 	BB8_4;

BB8_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f58, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f59, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f60, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f61, %temp;
	}

BB8_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f59, %f18;
	fma.rn.ftz.f32 	%f20, %f60, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f58, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f59, %f24;
	fma.rn.ftz.f32 	%f26, %f60, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f58, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f59, %f30;
	fma.rn.ftz.f32 	%f32, %f60, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f58, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	ld.const.f32 	%f38, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f39, %f36, %f36;
	fma.rn.ftz.f32 	%f40, %f38, %f39, %f22;
	ld.const.f32 	%f41, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f42, %f41, %f39, %f28;
	ld.const.f32 	%f43, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f44, %f43, %f39, %f34;
	add.ftz.f32 	%f45, %f44, 0f3F000000;
	add.ftz.f32 	%f46, %f42, 0f3F000000;
	add.ftz.f32 	%f47, %f40, 0f3F000000;
	fma.rn.ftz.f32 	%f48, %f61, %f37, 0f3F000000;
	mov.f32 	%f49, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f50, %f45, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f51, %f50, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f52, %f46, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f53, %f52, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f54, %f47, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f55, %f54, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f56, %f48, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f57, %f56, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f51;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f53;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f55;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f57;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	cvt.u16.u32	%rs9, %r18;
	.loc 2 51 239
	cvt.u16.u32	%rs10, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs11, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs12, %r15;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs12, %rs11, %rs10, %rs9};

BB8_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB9_7;
	bra.uni 	BB9_1;

BB9_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f16, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f17, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f18, %rs10;
	and.b16  	%rs12, %rs4, 255;
	cvt.rn.f32.u16	%f19, %rs12;
	ld.const.f32 	%f20, [kYCbCrOffset];
	mov.f32 	%f21, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f21, %f21;
	mul.ftz.f32 	%f23, %f20, %f22;
	sub.ftz.f32 	%f24, %f18, %f23;
	ld.const.f32 	%f25, [kYCbCrOffset+4];
	mul.ftz.f32 	%f26, %f25, %f22;
	sub.ftz.f32 	%f27, %f17, %f26;
	ld.const.f32 	%f28, [kYCbCrOffset+8];
	mul.ftz.f32 	%f29, %f28, %f22;
	sub.ftz.f32 	%f30, %f16, %f29;
	ld.const.f32 	%f31, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f32, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f33, %f27, %f32;
	fma.rn.ftz.f32 	%f34, %f24, %f31, %f33;
	ld.const.f32 	%f35, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f30, %f35, %f34;
	ld.const.f32 	%f36, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f37, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f38, %f27, %f37;
	fma.rn.ftz.f32 	%f39, %f24, %f36, %f38;
	ld.const.f32 	%f40, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f30, %f40, %f39;
	ld.const.f32 	%f41, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f42, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f43, %f27, %f42;
	fma.rn.ftz.f32 	%f44, %f24, %f41, %f43;
	ld.const.f32 	%f45, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f30, %f45, %f44;
	mov.f32 	%f46, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f47, %f46, %f21;
	mul.ftz.f32 	%f58, %f19, %f47;
	.loc 2 51 1
	add.ftz.f32 	%f48, %f58, 0fB70637BD;
	setp.gtu.ftz.f32	%p4, %f48, 0f00000000;
	@%p4 bra 	BB9_3;

	mov.f32 	%f58, 0f00000000;
	mov.f32 	%f57, %f58;
	mov.f32 	%f56, %f58;
	mov.f32 	%f55, %f58;
	bra.uni 	BB9_4;

BB9_3:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f54, %f46, %f58;
	.loc 2 51 1
	mul.ftz.f32 	%f57, %f1, %f54;
	mul.ftz.f32 	%f56, %f2, %f54;
	mul.ftz.f32 	%f55, %f3, %f54;

BB9_4:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB9_6;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f55, %f56, %f57, %f58};
	bra.uni 	BB9_7;

BB9_6:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f56;
	mov.b16 	%rs14, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f57;
	mov.b16 	%rs15, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs16, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs13, %rs14, %rs15, %rs16};

BB9_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB10_5;
	bra.uni 	BB10_1;

BB10_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB10_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB10_4;

BB10_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB10_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	add.ftz.f32 	%f59, %f38, 0f3F000000;
	mov.f32 	%f60, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f61, %f56, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f62, %f61, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f63, %f57, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f64, %f63, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f65, %f58, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f66, %f65, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f67, %f59, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f68, %f67, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f62;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f64;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f66;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 239
	cvt.u16.u32	%rs9, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs10, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs11, %rs10, %rs9, %rs12};

BB10_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<15>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<38>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB11_4;
	bra.uni 	BB11_1;

BB11_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f8, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f9, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f10, %rs10;
	ld.const.f32 	%f11, [kYCbCrOffset];
	mov.f32 	%f12, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f13, %f12, %f12;
	mul.ftz.f32 	%f14, %f11, %f13;
	sub.ftz.f32 	%f15, %f10, %f14;
	ld.const.f32 	%f16, [kYCbCrOffset+4];
	mul.ftz.f32 	%f17, %f16, %f13;
	sub.ftz.f32 	%f18, %f9, %f17;
	ld.const.f32 	%f19, [kYCbCrOffset+8];
	mul.ftz.f32 	%f20, %f19, %f13;
	sub.ftz.f32 	%f21, %f8, %f20;
	ld.const.f32 	%f22, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f23, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f24, %f18, %f23;
	fma.rn.ftz.f32 	%f25, %f15, %f22, %f24;
	ld.const.f32 	%f26, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f21, %f26, %f25;
	ld.const.f32 	%f27, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f28, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f29, %f18, %f28;
	fma.rn.ftz.f32 	%f30, %f15, %f27, %f29;
	ld.const.f32 	%f31, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f21, %f31, %f30;
	ld.const.f32 	%f32, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f33, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f34, %f18, %f33;
	fma.rn.ftz.f32 	%f35, %f15, %f32, %f34;
	ld.const.f32 	%f36, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f21, %f36, %f35;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB11_3;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f3, %f2, %f1, %f7};
	bra.uni 	BB11_4;

BB11_3:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs12, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs14, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs11, %rs12, %rs13, %rs14};

BB11_4:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB12_5;
	bra.uni 	BB12_1;

BB12_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB12_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB12_4;

BB12_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB12_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	mov.f32 	%f59, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f60, %f56, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f61, %f60, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f62, %f57, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f63, %f62, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f64, %f58, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f65, %f64, %f36;
	mov.f32 	%f66, 0f437F8000;
	.loc 3 2770 10
	max.ftz.f32 	%f67, %f66, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f68, %f67, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f61;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f63;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f65;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 239
	cvt.u16.u32	%rs9, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs10, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs11, %rs10, %rs9, %rs12};

BB12_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<41>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB13_4;
	bra.uni 	BB13_1;

BB13_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f9, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f10, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f11, %rs10;
	and.b16  	%rs12, %rs4, 255;
	cvt.rn.f32.u16	%f12, %rs12;
	ld.const.f32 	%f13, [kYCbCrOffset];
	mov.f32 	%f14, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f15, %f14, %f14;
	mul.ftz.f32 	%f16, %f13, %f15;
	sub.ftz.f32 	%f17, %f11, %f16;
	ld.const.f32 	%f18, [kYCbCrOffset+4];
	mul.ftz.f32 	%f19, %f18, %f15;
	sub.ftz.f32 	%f20, %f10, %f19;
	ld.const.f32 	%f21, [kYCbCrOffset+8];
	mul.ftz.f32 	%f22, %f21, %f15;
	sub.ftz.f32 	%f23, %f9, %f22;
	ld.const.f32 	%f24, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f25, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f26, %f20, %f25;
	fma.rn.ftz.f32 	%f27, %f17, %f24, %f26;
	ld.const.f32 	%f28, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f23, %f28, %f27;
	ld.const.f32 	%f29, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f30, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f31, %f20, %f30;
	fma.rn.ftz.f32 	%f32, %f17, %f29, %f31;
	ld.const.f32 	%f33, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f23, %f33, %f32;
	ld.const.f32 	%f34, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f35, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f36, %f20, %f35;
	fma.rn.ftz.f32 	%f37, %f17, %f34, %f36;
	ld.const.f32 	%f38, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f23, %f38, %f37;
	mov.f32 	%f39, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f39, %f14;
	mul.ftz.f32 	%f8, %f12, %f40;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB13_3;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f3, %f2, %f1, %f8};
	bra.uni 	BB13_4;

BB13_3:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs14, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs15, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs16, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs13, %rs14, %rs15, %rs16};

BB13_4:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB14_5;
	bra.uni 	BB14_1;

BB14_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB14_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f61, %f16;
	mov.f32 	%f60, %f15;
	mov.f32 	%f59, %f14;
	mov.f32 	%f58, %f13;
	bra.uni 	BB14_4;

BB14_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f58, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f59, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f60, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f61, %temp;
	}

BB14_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f59, %f18;
	fma.rn.ftz.f32 	%f20, %f60, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f58, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f59, %f24;
	fma.rn.ftz.f32 	%f26, %f60, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f58, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f59, %f30;
	fma.rn.ftz.f32 	%f32, %f60, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f58, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	ld.const.f32 	%f38, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f39, %f36, %f36;
	fma.rn.ftz.f32 	%f40, %f38, %f39, %f22;
	ld.const.f32 	%f41, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f42, %f41, %f39, %f28;
	ld.const.f32 	%f43, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f44, %f43, %f39, %f34;
	add.ftz.f32 	%f45, %f44, 0f3F000000;
	add.ftz.f32 	%f46, %f42, 0f3F000000;
	add.ftz.f32 	%f47, %f40, 0f3F000000;
	fma.rn.ftz.f32 	%f48, %f61, %f37, 0f3F000000;
	mov.f32 	%f49, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f50, %f45, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f51, %f50, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f52, %f46, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f53, %f52, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f54, %f47, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f55, %f54, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f56, %f48, %f49;
	.loc 3 2765 10
	min.ftz.f32 	%f57, %f56, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f51;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f53;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f55;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f57;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	cvt.u16.u32	%rs9, %r18;
	.loc 2 51 239
	cvt.u16.u32	%rs10, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs11, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs12, %r15;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs12, %rs11, %rs10, %rs9};

BB14_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<59>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB15_7;
	bra.uni 	BB15_1;

BB15_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f16, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f17, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f18, %rs10;
	and.b16  	%rs12, %rs4, 255;
	cvt.rn.f32.u16	%f19, %rs12;
	ld.const.f32 	%f20, [kYCbCrOffset];
	mov.f32 	%f21, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f22, %f21, %f21;
	mul.ftz.f32 	%f23, %f20, %f22;
	sub.ftz.f32 	%f24, %f18, %f23;
	ld.const.f32 	%f25, [kYCbCrOffset+4];
	mul.ftz.f32 	%f26, %f25, %f22;
	sub.ftz.f32 	%f27, %f17, %f26;
	ld.const.f32 	%f28, [kYCbCrOffset+8];
	mul.ftz.f32 	%f29, %f28, %f22;
	sub.ftz.f32 	%f30, %f16, %f29;
	ld.const.f32 	%f31, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f32, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f33, %f27, %f32;
	fma.rn.ftz.f32 	%f34, %f24, %f31, %f33;
	ld.const.f32 	%f35, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f30, %f35, %f34;
	ld.const.f32 	%f36, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f37, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f38, %f27, %f37;
	fma.rn.ftz.f32 	%f39, %f24, %f36, %f38;
	ld.const.f32 	%f40, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f30, %f40, %f39;
	ld.const.f32 	%f41, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f42, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f43, %f27, %f42;
	fma.rn.ftz.f32 	%f44, %f24, %f41, %f43;
	ld.const.f32 	%f45, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f30, %f45, %f44;
	mov.f32 	%f46, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f47, %f46, %f21;
	mul.ftz.f32 	%f58, %f19, %f47;
	.loc 2 51 1
	add.ftz.f32 	%f48, %f58, 0fB70637BD;
	setp.gtu.ftz.f32	%p4, %f48, 0f00000000;
	@%p4 bra 	BB15_3;

	mov.f32 	%f58, 0f00000000;
	mov.f32 	%f57, %f58;
	mov.f32 	%f56, %f58;
	mov.f32 	%f55, %f58;
	bra.uni 	BB15_4;

BB15_3:
	.loc 3 3606 10
	div.approx.ftz.f32 	%f54, %f46, %f58;
	.loc 2 51 1
	mul.ftz.f32 	%f57, %f1, %f54;
	mul.ftz.f32 	%f56, %f2, %f54;
	mul.ftz.f32 	%f55, %f3, %f54;

BB15_4:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB15_6;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f55, %f56, %f57, %f58};
	bra.uni 	BB15_7;

BB15_6:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f56;
	mov.b16 	%rs14, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f57;
	mov.b16 	%rs15, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs16, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs13, %rs14, %rs15, %rs16};

BB15_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB16_5;
	bra.uni 	BB16_1;

BB16_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB16_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB16_4;

BB16_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB16_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	add.ftz.f32 	%f59, %f38, 0f3F000000;
	mov.f32 	%f60, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f61, %f56, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f62, %f61, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f63, %f57, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f64, %f63, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f65, %f58, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f66, %f65, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f67, %f59, %f60;
	.loc 3 2765 10
	min.ftz.f32 	%f68, %f67, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f62;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f64;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f66;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 239
	cvt.u16.u32	%rs9, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs10, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs11, %rs10, %rs9, %rs12};

BB16_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<15>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<38>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB17_4;
	bra.uni 	BB17_1;

BB17_1:
	.loc 2 51 1
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	mul.wide.s32 	%rd5, %r15, 4;
	add.s64 	%rd6, %rd2, %rd5;
	.loc 2 51 1
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd6];
	and.b16  	%rs6, %rs1, 255;
	cvt.rn.f32.u16	%f8, %rs6;
	and.b16  	%rs8, %rs2, 255;
	cvt.rn.f32.u16	%f9, %rs8;
	and.b16  	%rs10, %rs3, 255;
	cvt.rn.f32.u16	%f10, %rs10;
	ld.const.f32 	%f11, [kYCbCrOffset];
	mov.f32 	%f12, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f13, %f12, %f12;
	mul.ftz.f32 	%f14, %f11, %f13;
	sub.ftz.f32 	%f15, %f10, %f14;
	ld.const.f32 	%f16, [kYCbCrOffset+4];
	mul.ftz.f32 	%f17, %f16, %f13;
	sub.ftz.f32 	%f18, %f9, %f17;
	ld.const.f32 	%f19, [kYCbCrOffset+8];
	mul.ftz.f32 	%f20, %f19, %f13;
	sub.ftz.f32 	%f21, %f8, %f20;
	ld.const.f32 	%f22, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f23, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f24, %f18, %f23;
	fma.rn.ftz.f32 	%f25, %f15, %f22, %f24;
	ld.const.f32 	%f26, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f21, %f26, %f25;
	ld.const.f32 	%f27, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f28, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f29, %f18, %f28;
	fma.rn.ftz.f32 	%f30, %f15, %f27, %f29;
	ld.const.f32 	%f31, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f21, %f31, %f30;
	ld.const.f32 	%f32, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f33, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f34, %f18, %f33;
	fma.rn.ftz.f32 	%f35, %f15, %f32, %f34;
	ld.const.f32 	%f36, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f21, %f36, %f35;
	mov.f32 	%f7, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	.loc 2 51 1
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB17_3;

	mul.wide.s32 	%rd7, %r3, 16;
	add.s64 	%rd8, %rd1, %rd7;
	.loc 2 51 1
	st.global.v4.f32 	[%rd8], {%f3, %f2, %f1, %f7};
	bra.uni 	BB17_4;

BB17_3:
	mul.wide.s32 	%rd9, %r3, 8;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs12, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs13, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f7;
	mov.b16 	%rs14, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd10], {%rs11, %rs12, %rs13, %rs14};

BB17_4:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<11>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB18_5;
	bra.uni 	BB18_1;

BB18_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r4, %r1;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB18_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd6];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB18_4;

BB18_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB18_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	.loc 3 3606 10
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	mov.f32 	%f59, 0f00000000;
	.loc 3 2770 10
	max.ftz.f32 	%f60, %f56, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f61, %f60, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f62, %f57, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f63, %f62, %f36;
	.loc 3 2770 10
	max.ftz.f32 	%f64, %f58, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f65, %f64, %f36;
	mov.f32 	%f66, 0f437F8000;
	.loc 3 2770 10
	max.ftz.f32 	%f67, %f66, %f59;
	.loc 3 2765 10
	min.ftz.f32 	%f68, %f67, %f36;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r15, %f61;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r16, %f63;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r17, %f65;
	.loc 3 3272 10
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	.loc 2 51 1
	mad.lo.s32 	%r19, %r2, %r6, %r1;
	mul.wide.s32 	%rd9, %r19, 4;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 239
	cvt.u16.u32	%rs9, %r17;
	.loc 2 51 159
	cvt.u16.u32	%rs10, %r16;
	.loc 2 51 81
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	.loc 2 51 1
	st.global.v4.u8 	[%rd10], {%rs11, %rs10, %rs9, %rs12};

BB18_5:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB19_7;
	bra.uni 	BB19_1;

BB19_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB19_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f43, %f24;
	mov.f32 	%f42, %f23;
	mov.f32 	%f41, %f22;
	mov.f32 	%f40, %f21;
	bra.uni 	BB19_4;

BB19_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB19_4:
	ld.const.f32 	%f25, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f26, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f27, %f41, %f26;
	fma.rn.ftz.f32 	%f28, %f42, %f25, %f27;
	ld.const.f32 	%f29, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f40, %f29, %f28;
	ld.const.f32 	%f30, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f31, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f32, %f41, %f31;
	fma.rn.ftz.f32 	%f33, %f42, %f30, %f32;
	ld.const.f32 	%f34, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f40, %f34, %f33;
	ld.const.f32 	%f35, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f37, %f41, %f36;
	fma.rn.ftz.f32 	%f38, %f42, %f35, %f37;
	ld.const.f32 	%f39, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f40, %f39, %f38;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB19_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f43};
	bra.uni 	BB19_7;

BB19_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f43;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB19_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB20_7;
	bra.uni 	BB20_1;

BB20_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB20_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f43, %f24;
	mov.f32 	%f42, %f23;
	mov.f32 	%f41, %f22;
	mov.f32 	%f40, %f21;
	bra.uni 	BB20_4;

BB20_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB20_4:
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f41, %f26;
	fma.rn.ftz.f32 	%f28, %f42, %f25, %f27;
	ld.const.f32 	%f29, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f13, %f40, %f29, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f32, %f41, %f31;
	fma.rn.ftz.f32 	%f33, %f42, %f30, %f32;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f14, %f40, %f34, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f36, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f37, %f41, %f36;
	fma.rn.ftz.f32 	%f38, %f42, %f35, %f37;
	ld.const.f32 	%f39, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f15, %f40, %f39, %f38;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB20_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f43};
	bra.uni 	BB20_7;

BB20_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f43;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB20_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB21_10;
	bra.uni 	BB21_1;

BB21_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB21_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f28, %f29, %f30, %f31}, [%rd6];
	mov.f32 	%f57, %f31;
	mov.f32 	%f56, %f30;
	mov.f32 	%f55, %f29;
	mov.f32 	%f54, %f28;
	bra.uni 	BB21_4;

BB21_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f54, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f55, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f56, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f57, %temp;
	}

BB21_4:
	ld.const.f32 	%f32, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f33, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f34, %f55, %f33;
	fma.rn.ftz.f32 	%f35, %f56, %f32, %f34;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f54, %f36, %f35;
	ld.const.f32 	%f37, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f39, %f55, %f38;
	fma.rn.ftz.f32 	%f40, %f56, %f37, %f39;
	ld.const.f32 	%f41, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f54, %f41, %f40;
	ld.const.f32 	%f42, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f43, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f44, %f55, %f43;
	fma.rn.ftz.f32 	%f45, %f56, %f42, %f44;
	ld.const.f32 	%f46, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f54, %f46, %f45;
	.loc 2 51 1
	add.ftz.f32 	%f47, %f57, 0fB70637BD;
	setp.gtu.ftz.f32	%p5, %f47, 0f00000000;
	.loc 5 92 71
	mov.f32 	%f61, %f57;
	.loc 2 51 1
	@%p5 bra 	BB21_6;

	mov.f32 	%f61, 0f00000000;
	mov.f32 	%f60, %f61;
	mov.f32 	%f59, %f61;
	mov.f32 	%f58, %f61;
	bra.uni 	BB21_7;

BB21_6:
	mov.f32 	%f52, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f53, %f52, %f57;
	.loc 2 51 1
	mul.ftz.f32 	%f60, %f13, %f53;
	mul.ftz.f32 	%f59, %f14, %f53;
	mul.ftz.f32 	%f58, %f15, %f53;

BB21_7:
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p6, %r8, 0;
	@%p6 bra 	BB21_9;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f58, %f59, %f60, %f61};
	bra.uni 	BB21_10;

BB21_9:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f59;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f60;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f61;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB21_10:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<47>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB22_7;
	bra.uni 	BB22_1;

BB22_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB22_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f46, %f24;
	mov.f32 	%f45, %f23;
	mov.f32 	%f44, %f22;
	mov.f32 	%f43, %f21;
	bra.uni 	BB22_4;

BB22_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f43, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f44, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f45, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f46, %temp;
	}

BB22_4:
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f27, %f44, %f26;
	fma.rn.ftz.f32 	%f28, %f45, %f25, %f27;
	ld.const.f32 	%f29, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f30, %f43, %f29, %f28;
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f33, %f44, %f32;
	fma.rn.ftz.f32 	%f34, %f45, %f31, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f36, %f43, %f35, %f34;
	ld.const.f32 	%f37, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f38, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f39, %f44, %f38;
	fma.rn.ftz.f32 	%f40, %f45, %f37, %f39;
	ld.const.f32 	%f41, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f42, %f43, %f41, %f40;
	mul.ftz.f32 	%f19, %f30, %f46;
	mul.ftz.f32 	%f18, %f36, %f46;
	mul.ftz.f32 	%f17, %f42, %f46;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB22_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f17, %f18, %f19, %f46};
	bra.uni 	BB22_7;

BB22_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f46;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB22_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<43>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB23_7;
	bra.uni 	BB23_1;

BB23_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB23_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f20, %f21, %f22, %f23}, [%rd6];
	mov.f32 	%f4, %f23;
	mov.f32 	%f42, %f22;
	mov.f32 	%f41, %f21;
	mov.f32 	%f40, %f20;
	bra.uni 	BB23_4;

BB23_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}

BB23_4:
	ld.const.f32 	%f24, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f25, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f26, %f41, %f25;
	fma.rn.ftz.f32 	%f27, %f42, %f24, %f26;
	ld.const.f32 	%f28, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f40, %f28, %f27;
	ld.const.f32 	%f29, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f30, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f31, %f41, %f30;
	fma.rn.ftz.f32 	%f32, %f42, %f29, %f31;
	ld.const.f32 	%f33, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f40, %f33, %f32;
	ld.const.f32 	%f34, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f35, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f36, %f41, %f35;
	fma.rn.ftz.f32 	%f37, %f42, %f34, %f36;
	ld.const.f32 	%f38, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f40, %f38, %f37;
	mov.f32 	%f19, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB23_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f19};
	bra.uni 	BB23_7;

BB23_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB23_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<47>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB24_7;
	bra.uni 	BB24_1;

BB24_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB24_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f20, %f21, %f22, %f23}, [%rd6];
	mov.f32 	%f46, %f23;
	mov.f32 	%f45, %f22;
	mov.f32 	%f44, %f21;
	mov.f32 	%f43, %f20;
	bra.uni 	BB24_4;

BB24_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f43, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f44, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f45, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f46, %temp;
	}

BB24_4:
	ld.const.f32 	%f24, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f26, %f44, %f25;
	fma.rn.ftz.f32 	%f27, %f45, %f24, %f26;
	ld.const.f32 	%f28, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f29, %f43, %f28, %f27;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f32, %f44, %f31;
	fma.rn.ftz.f32 	%f33, %f45, %f30, %f32;
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f35, %f43, %f34, %f33;
	ld.const.f32 	%f36, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f37, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f38, %f44, %f37;
	fma.rn.ftz.f32 	%f39, %f45, %f36, %f38;
	ld.const.f32 	%f40, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f41, %f43, %f40, %f39;
	mul.ftz.f32 	%f18, %f29, %f46;
	mul.ftz.f32 	%f17, %f35, %f46;
	mul.ftz.f32 	%f16, %f41, %f46;
	mov.f32 	%f19, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB24_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f16, %f17, %f18, %f19};
	bra.uni 	BB24_7;

BB24_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB24_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB25_7;
	bra.uni 	BB25_1;

BB25_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB25_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f43, %f24;
	mov.f32 	%f42, %f23;
	mov.f32 	%f41, %f22;
	mov.f32 	%f40, %f21;
	bra.uni 	BB25_4;

BB25_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB25_4:
	ld.const.f32 	%f25, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f26, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f27, %f41, %f26;
	fma.rn.ftz.f32 	%f28, %f42, %f25, %f27;
	ld.const.f32 	%f29, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f40, %f29, %f28;
	ld.const.f32 	%f30, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f31, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f32, %f41, %f31;
	fma.rn.ftz.f32 	%f33, %f42, %f30, %f32;
	ld.const.f32 	%f34, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f40, %f34, %f33;
	ld.const.f32 	%f35, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f36, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f37, %f41, %f36;
	fma.rn.ftz.f32 	%f38, %f42, %f35, %f37;
	ld.const.f32 	%f39, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f40, %f39, %f38;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB25_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f43};
	bra.uni 	BB25_7;

BB25_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f43;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB25_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB26_7;
	bra.uni 	BB26_1;

BB26_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB26_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f43, %f24;
	mov.f32 	%f42, %f23;
	mov.f32 	%f41, %f22;
	mov.f32 	%f40, %f21;
	bra.uni 	BB26_4;

BB26_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB26_4:
	ld.const.f32 	%f25, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f26, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f27, %f41, %f26;
	fma.rn.ftz.f32 	%f28, %f42, %f25, %f27;
	ld.const.f32 	%f29, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f13, %f40, %f29, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f31, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f32, %f41, %f31;
	fma.rn.ftz.f32 	%f33, %f42, %f30, %f32;
	ld.const.f32 	%f34, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f14, %f40, %f34, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f36, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f37, %f41, %f36;
	fma.rn.ftz.f32 	%f38, %f42, %f35, %f37;
	ld.const.f32 	%f39, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f15, %f40, %f39, %f38;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB26_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f43};
	bra.uni 	BB26_7;

BB26_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f43;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB26_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB27_10;
	bra.uni 	BB27_1;

BB27_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB27_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f28, %f29, %f30, %f31}, [%rd6];
	mov.f32 	%f57, %f31;
	mov.f32 	%f56, %f30;
	mov.f32 	%f55, %f29;
	mov.f32 	%f54, %f28;
	bra.uni 	BB27_4;

BB27_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f54, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f55, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f56, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f57, %temp;
	}

BB27_4:
	ld.const.f32 	%f32, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f33, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f34, %f55, %f33;
	fma.rn.ftz.f32 	%f35, %f56, %f32, %f34;
	ld.const.f32 	%f36, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f54, %f36, %f35;
	ld.const.f32 	%f37, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f38, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f39, %f55, %f38;
	fma.rn.ftz.f32 	%f40, %f56, %f37, %f39;
	ld.const.f32 	%f41, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f54, %f41, %f40;
	ld.const.f32 	%f42, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f43, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f44, %f55, %f43;
	fma.rn.ftz.f32 	%f45, %f56, %f42, %f44;
	ld.const.f32 	%f46, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f54, %f46, %f45;
	.loc 2 51 1
	add.ftz.f32 	%f47, %f57, 0fB70637BD;
	setp.gtu.ftz.f32	%p5, %f47, 0f00000000;
	.loc 5 92 71
	mov.f32 	%f61, %f57;
	.loc 2 51 1
	@%p5 bra 	BB27_6;

	mov.f32 	%f61, 0f00000000;
	mov.f32 	%f60, %f61;
	mov.f32 	%f59, %f61;
	mov.f32 	%f58, %f61;
	bra.uni 	BB27_7;

BB27_6:
	mov.f32 	%f52, 0f3F800000;
	.loc 3 3606 10
	div.approx.ftz.f32 	%f53, %f52, %f57;
	.loc 2 51 1
	mul.ftz.f32 	%f60, %f13, %f53;
	mul.ftz.f32 	%f59, %f14, %f53;
	mul.ftz.f32 	%f58, %f15, %f53;

BB27_7:
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p6, %r8, 0;
	@%p6 bra 	BB27_9;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f58, %f59, %f60, %f61};
	bra.uni 	BB27_10;

BB27_9:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f59;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f60;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f61;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB27_10:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<47>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB28_7;
	bra.uni 	BB28_1;

BB28_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB28_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd6];
	mov.f32 	%f46, %f24;
	mov.f32 	%f45, %f23;
	mov.f32 	%f44, %f22;
	mov.f32 	%f43, %f21;
	bra.uni 	BB28_4;

BB28_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f43, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f44, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f45, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f46, %temp;
	}

BB28_4:
	ld.const.f32 	%f25, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f26, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f27, %f44, %f26;
	fma.rn.ftz.f32 	%f28, %f45, %f25, %f27;
	ld.const.f32 	%f29, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f30, %f43, %f29, %f28;
	ld.const.f32 	%f31, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f32, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f33, %f44, %f32;
	fma.rn.ftz.f32 	%f34, %f45, %f31, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f36, %f43, %f35, %f34;
	ld.const.f32 	%f37, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f38, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f39, %f44, %f38;
	fma.rn.ftz.f32 	%f40, %f45, %f37, %f39;
	ld.const.f32 	%f41, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f42, %f43, %f41, %f40;
	mul.ftz.f32 	%f19, %f30, %f46;
	mul.ftz.f32 	%f18, %f36, %f46;
	mul.ftz.f32 	%f17, %f42, %f46;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB28_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f17, %f18, %f19, %f46};
	bra.uni 	BB28_7;

BB28_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f46;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB28_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<43>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB29_7;
	bra.uni 	BB29_1;

BB29_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB29_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f20, %f21, %f22, %f23}, [%rd6];
	mov.f32 	%f4, %f23;
	mov.f32 	%f42, %f22;
	mov.f32 	%f41, %f21;
	mov.f32 	%f40, %f20;
	bra.uni 	BB29_4;

BB29_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}

BB29_4:
	ld.const.f32 	%f24, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f25, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f26, %f41, %f25;
	fma.rn.ftz.f32 	%f27, %f42, %f24, %f26;
	ld.const.f32 	%f28, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f40, %f28, %f27;
	ld.const.f32 	%f29, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f30, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f31, %f41, %f30;
	fma.rn.ftz.f32 	%f32, %f42, %f29, %f31;
	ld.const.f32 	%f33, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f40, %f33, %f32;
	ld.const.f32 	%f34, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f35, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f36, %f41, %f35;
	fma.rn.ftz.f32 	%f37, %f42, %f34, %f36;
	ld.const.f32 	%f38, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f40, %f38, %f37;
	mov.f32 	%f19, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB29_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f15, %f14, %f13, %f19};
	bra.uni 	BB29_7;

BB29_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB29_7:
	.loc 1 25 2
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<47>;
	.reg .s64 	%rd<13>;


	ld.param.u64 	%rd3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	cvta.to.global.u64 	%rd2, %rd3;
	.loc 5 92 1
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	mov.u32 	%r14, %ntid.y;
	mov.u32 	%r15, %ctaid.y;
	mov.u32 	%r16, %tid.y;
	mad.lo.s32 	%r2, %r14, %r15, %r16;
	.loc 5 92 1
	setp.lt.s32	%p1, %r1, %r9;
	setp.lt.s32	%p2, %r2, %r10;
	and.pred  	%p3, %p1, %p2;
	.loc 5 92 1
	@!%p3 bra 	BB30_7;
	bra.uni 	BB30_1;

BB30_1:
	.loc 2 51 1
	mad.lo.s32 	%r3, %r2, %r5, %r1;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB30_3;

	mul.wide.s32 	%rd5, %r3, 16;
	add.s64 	%rd6, %rd2, %rd5;
	ld.global.v4.f32 	{%f20, %f21, %f22, %f23}, [%rd6];
	mov.f32 	%f46, %f23;
	mov.f32 	%f45, %f22;
	mov.f32 	%f44, %f21;
	mov.f32 	%f43, %f20;
	bra.uni 	BB30_4;

BB30_3:
	mul.wide.s32 	%rd7, %r3, 8;
	add.s64 	%rd8, %rd2, %rd7;
	.loc 2 51 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd8];
	.loc 3 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f43, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f44, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f45, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f46, %temp;
	}

BB30_4:
	ld.const.f32 	%f24, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f25, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f26, %f44, %f25;
	fma.rn.ftz.f32 	%f27, %f45, %f24, %f26;
	ld.const.f32 	%f28, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f29, %f43, %f28, %f27;
	ld.const.f32 	%f30, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f31, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f32, %f44, %f31;
	fma.rn.ftz.f32 	%f33, %f45, %f30, %f32;
	ld.const.f32 	%f34, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f35, %f43, %f34, %f33;
	ld.const.f32 	%f36, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f37, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f38, %f44, %f37;
	fma.rn.ftz.f32 	%f39, %f45, %f36, %f38;
	ld.const.f32 	%f40, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f41, %f43, %f40, %f39;
	mul.ftz.f32 	%f18, %f29, %f46;
	mul.ftz.f32 	%f17, %f35, %f46;
	mul.ftz.f32 	%f16, %f41, %f46;
	mov.f32 	%f19, 0f3F800000;
	.loc 2 51 1
	mad.lo.s32 	%r4, %r2, %r7, %r1;
	.loc 2 51 1
	setp.eq.s32	%p5, %r8, 0;
	@%p5 bra 	BB30_6;

	mul.wide.s32 	%rd9, %r4, 16;
	add.s64 	%rd10, %rd1, %rd9;
	.loc 2 51 1
	st.global.v4.f32 	[%rd10], {%f16, %f17, %f18, %f19};
	bra.uni 	BB30_7;

BB30_6:
	mul.wide.s32 	%rd11, %r4, 8;
	add.s64 	%rd12, %rd1, %rd11;
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs9, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs10, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs11, %temp;
}
	.loc 3 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs12, %temp;
}
	.loc 2 51 231
	st.global.v4.u16 	[%rd12], {%rs9, %rs10, %rs11, %rs12};

BB30_7:
	.loc 1 25 2
	ret;
}


