//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 184, 30, 206, 189, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};

.visible .func  (.param .b32 func_retval0) _Z13MaxDepthValuej(
	.param .b32 _Z13MaxDepthValuej_param_0
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<3>;
	.reg .f32 	%f<6>;


	ld.param.u32 	%r2, [_Z13MaxDepthValuej_param_0];
	and.b32  	%r1, %r2, 248;
	setp.ne.s32	%p1, %r1, 0;
	@%p1 bra 	BB0_2;

	mov.f32 	%f5, 0f437F0000;
	bra.uni 	BB0_5;

BB0_2:
	setp.ne.s32	%p2, %r1, 8;
	@%p2 bra 	BB0_4;

	mov.f32 	%f5, 0f447FC000;
	bra.uni 	BB0_5;

BB0_4:
	setp.eq.s32	%p3, %r1, 16;
	selp.f32	%f5, 0f47000000, 0f3F800000, %p3;

BB0_5:
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .func  (.param .b64 func_retval0) _Z23ColorSpaceConvertMatrixjj(
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_0,
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_1
)
{
	.reg .pred 	%p<33>;
	.reg .s32 	%r<16>;
	.reg .s64 	%rd<32>;


	ld.param.u32 	%r9, [_Z23ColorSpaceConvertMatrixjj_param_0];
	ld.param.u32 	%r10, [_Z23ColorSpaceConvertMatrixjj_param_1];
	and.b32  	%r11, %r9, 512;
	setp.eq.s32	%p1, %r11, 0;
	and.b32  	%r1, %r10, 256;
	@%p1 bra 	BB1_29;

	and.b32  	%r12, %r9, 2048;
	setp.eq.s32	%p2, %r12, 0;
	and.b32  	%r2, %r9, 4096;
	and.b32  	%r3, %r10, 512;
	@%p2 bra 	BB1_15;

	setp.eq.s32	%p3, %r2, 0;
	@%p3 bra 	BB1_13;

	setp.eq.s32	%p4, %r3, 0;
	and.b32  	%r4, %r9, 256;
	@%p4 bra 	BB1_7;

	setp.ne.s32	%p5, %r4, 0;
	mov.u64 	%rd31, 0;
	@%p5 bra 	BB1_47;

	and.b32  	%r13, %r10, 4096;
	setp.eq.s32	%p6, %r13, 0;
	setp.ne.s32	%p7, %r1, 0;
	or.pred  	%p8, %p6, %p7;
	mov.u64 	%rd31, 0;
	@%p8 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YCbCr_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_7:
	setp.eq.s32	%p9, %r4, 0;
	@%p9 bra 	BB1_10;

	setp.eq.s32	%p10, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p10 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_10:
	setp.eq.s32	%p11, %r1, 0;
	@%p11 bra 	BB1_12;

	cvta.const.u64 	%rd31, k709YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_12:
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_13:
	setp.ne.s32	%p12, %r3, 0;
	setp.eq.s32	%p13, %r1, 0;
	or.pred  	%p14, %p13, %p12;
	mov.u64 	%rd31, 0;
	@%p14 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_15:
	setp.eq.s32	%p15, %r2, 0;
	@%p15 bra 	BB1_25;

	setp.eq.s32	%p16, %r3, 0;
	and.b32  	%r5, %r9, 256;
	@%p16 bra 	BB1_19;

	or.b32  	%r14, %r5, %r1;
	setp.ne.s32	%p17, %r14, 0;
	mov.u64 	%rd31, 0;
	@%p17 bra 	BB1_47;

	cvta.const.u64 	%rd31, k601YCbCr_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_19:
	setp.eq.s32	%p18, %r5, 0;
	@%p18 bra 	BB1_22;

	setp.eq.s32	%p19, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p19 bra 	BB1_47;

	cvta.const.u64 	%rd31, k601YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_22:
	setp.eq.s32	%p20, %r1, 0;
	@%p20 bra 	BB1_24;

	cvta.const.u64 	%rd31, k601YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_24:
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_25:
	setp.ne.s32	%p21, %r3, 0;
	mov.u64 	%rd31, 0;
	@%p21 bra 	BB1_47;

	setp.eq.s32	%p22, %r1, 0;
	@%p22 bra 	BB1_28;

	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_28:
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	BB1_47;

BB1_29:
	and.b32  	%r15, %r9, 256;
	setp.eq.s32	%p23, %r15, 0;
	and.b32  	%r6, %r10, 2048;
	@%p23 bra 	BB1_40;

	setp.eq.s32	%p24, %r1, 0;
	@%p24 bra 	BB1_34;

	setp.eq.s32	%p25, %r6, 0;
	@%p25 bra 	BB1_33;

	cvta.const.u64 	%rd31, kRGB32f_To_709YPbPr;
	bra.uni 	BB1_47;

BB1_33:
	cvta.const.u64 	%rd31, kRGB32f_To_601YPbPr;
	bra.uni 	BB1_47;

BB1_34:
	setp.eq.s32	%p26, %r6, 0;
	and.b32  	%r7, %r10, 4096;
	@%p26 bra 	BB1_37;

	setp.eq.s32	%p27, %r7, 0;
	mov.u64 	%rd31, 0;
	@%p27 bra 	BB1_47;

	cvta.const.u64 	%rd31, kRGB32f_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_37:
	setp.eq.s32	%p28, %r7, 0;
	@%p28 bra 	BB1_39;

	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_39:
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	BB1_47;

BB1_40:
	setp.ne.s32	%p29, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p29 bra 	BB1_47;

	setp.eq.s32	%p30, %r6, 0;
	and.b32  	%r8, %r10, 4096;
	@%p30 bra 	BB1_44;

	setp.ne.s32	%p31, %r8, 0;
	mov.u64 	%rd31, 0;
	@%p31 bra 	BB1_47;

	cvta.const.u64 	%rd31, kRGB8u_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_44:
	setp.eq.s32	%p32, %r8, 0;
	@%p32 bra 	BB1_46;

	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_46:
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCrFullRange;

BB1_47:
	st.param.b64	[func_retval0+0], %rd31;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	max.ftz.f32 	%f4, %f1, %f2;
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z23UnpremultiplyComponents6float4j(
	.param .align 16 .b8 _Z23UnpremultiplyComponents6float4j_param_0[16],
	.param .b32 _Z23UnpremultiplyComponents6float4j_param_1
)
{
	.reg .pred 	%p<15>;
	.reg .s32 	%r<8>;
	.reg .f32 	%f<81>;


	ld.param.f32 	%f74, [_Z23UnpremultiplyComponents6float4j_param_0+12];
	ld.param.f32 	%f73, [_Z23UnpremultiplyComponents6float4j_param_0+8];
	ld.param.f32 	%f72, [_Z23UnpremultiplyComponents6float4j_param_0+4];
	ld.param.f32 	%f30, [_Z23UnpremultiplyComponents6float4j_param_0];
	ld.param.u32 	%r5, [_Z23UnpremultiplyComponents6float4j_param_1];
	and.b32  	%r1, %r5, 768;
	setp.ne.s32	%p1, %r1, 512;
	@%p1 bra 	BB3_7;

	and.b32  	%r2, %r5, 248;
	setp.ne.s32	%p2, %r2, 0;
	@%p2 bra 	BB3_3;

	mov.f32 	%f71, 0f437F0000;
	bra.uni 	BB3_6;

BB3_3:
	setp.ne.s32	%p3, %r2, 8;
	@%p3 bra 	BB3_5;

	mov.f32 	%f71, 0f447FC000;
	bra.uni 	BB3_6;

BB3_5:
	setp.eq.s32	%p4, %r2, 16;
	selp.f32	%f71, 0f47000000, 0f3F800000, %p4;

BB3_6:
	and.b32  	%r6, %r5, 4096;
	setp.eq.s32	%p5, %r6, 0;
	ld.const.f32 	%f36, [kYCbCrOffset];
	ld.const.f32 	%f37, [kYCbCrFullRangeOffset];
	selp.f32	%f38, %f37, %f36, %p5;
	mov.f32 	%f39, 0f437F0000;
	div.approx.ftz.f32 	%f40, %f71, %f39;
	mul.ftz.f32 	%f41, %f38, %f40;
	sub.ftz.f32 	%f72, %f72, %f41;
	ld.const.f32 	%f42, [kYCbCrOffset+4];
	ld.const.f32 	%f43, [kYCbCrFullRangeOffset+4];
	selp.f32	%f44, %f43, %f42, %p5;
	mul.ftz.f32 	%f45, %f44, %f40;
	sub.ftz.f32 	%f73, %f73, %f45;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+8];
	selp.f32	%f48, %f47, %f46, %p5;
	mul.ftz.f32 	%f49, %f48, %f40;
	sub.ftz.f32 	%f74, %f74, %f49;

BB3_7:
	add.ftz.f32 	%f50, %f30, 0fB70637BD;
	setp.gtu.ftz.f32	%p6, %f50, 0f00000000;
	mov.f32 	%f76, %f30;
	@%p6 bra 	BB3_9;

	mov.f32 	%f80, 0f00000000;
	mov.f32 	%f79, %f80;
	mov.f32 	%f78, %f80;
	mov.f32 	%f76, %f80;
	bra.uni 	BB3_15;

BB3_9:
	and.b32  	%r3, %r5, 248;
	setp.ne.s32	%p7, %r3, 0;
	@%p7 bra 	BB3_11;

	mov.f32 	%f75, 0f437F0000;
	bra.uni 	BB3_14;

BB3_11:
	setp.ne.s32	%p8, %r3, 8;
	@%p8 bra 	BB3_13;

	mov.f32 	%f75, 0f447FC000;
	bra.uni 	BB3_14;

BB3_13:
	setp.eq.s32	%p9, %r3, 16;
	selp.f32	%f75, 0f47000000, 0f3F800000, %p9;

BB3_14:
	div.approx.ftz.f32 	%f57, %f75, %f30;
	mul.ftz.f32 	%f78, %f72, %f57;
	mul.ftz.f32 	%f79, %f73, %f57;
	mul.ftz.f32 	%f80, %f74, %f57;

BB3_15:
	@%p1 bra 	BB3_22;

	and.b32  	%r4, %r5, 248;
	setp.eq.s32	%p11, %r4, 8;
	@%p11 bra 	BB3_19;

	setp.ne.s32	%p12, %r4, 0;
	@%p12 bra 	BB3_20;

	mov.f32 	%f77, 0f437F0000;
	bra.uni 	BB3_21;

BB3_19:
	mov.f32 	%f77, 0f447FC000;
	bra.uni 	BB3_21;

BB3_20:
	setp.eq.s32	%p13, %r4, 16;
	selp.f32	%f77, 0f47000000, 0f3F800000, %p13;

BB3_21:
	and.b32  	%r7, %r5, 4096;
	setp.eq.s32	%p14, %r7, 0;
	ld.const.f32 	%f60, [kYCbCrOffset];
	ld.const.f32 	%f61, [kYCbCrFullRangeOffset];
	selp.f32	%f62, %f61, %f60, %p14;
	mov.f32 	%f63, 0f437F0000;
	div.approx.ftz.f32 	%f64, %f77, %f63;
	fma.rn.ftz.f32 	%f78, %f62, %f64, %f78;
	ld.const.f32 	%f65, [kYCbCrOffset+4];
	ld.const.f32 	%f66, [kYCbCrFullRangeOffset+4];
	selp.f32	%f67, %f66, %f65, %p14;
	fma.rn.ftz.f32 	%f79, %f67, %f64, %f79;
	ld.const.f32 	%f68, [kYCbCrOffset+8];
	ld.const.f32 	%f69, [kYCbCrFullRangeOffset+8];
	selp.f32	%f70, %f69, %f68, %p14;
	fma.rn.ftz.f32 	%f80, %f70, %f64, %f80;

BB3_22:
	st.param.f32	[func_retval0+0], %f76;
	st.param.f32	[func_retval0+4], %f78;
	st.param.f32	[func_retval0+8], %f79;
	st.param.f32	[func_retval0+12], %f80;
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii(
	.param .b64 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_0,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_1,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_2,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_3,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_4,
	.param .b32 _Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_5
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<21>;
	.reg .s32 	%r<12>;
	.reg .f32 	%f<33>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd4, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_0];
	ld.param.u32 	%r3, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_1];
	ld.param.u32 	%r4, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r1, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_3];
	ld.param.u32 	%r5, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r6, [_Z19ReadPixelFormat_444PK6float4ij17DevicePixelFormatii_param_5];
	and.b32  	%r2, %r4, 248;
	mad.lo.s32 	%r7, %r6, %r3, %r5;
	cvt.s64.s32	%rd1, %r7;
	mul.wide.s32 	%rd5, %r7, 8;
	add.s64 	%rd2, %rd4, %rd5;
	mul.wide.s32 	%rd6, %r7, 4;
	add.s64 	%rd3, %rd4, %rd6;
	setp.eq.s32	%p1, %r2, 0;
	@%p1 bra 	BB4_5;

	setp.eq.s32	%p2, %r2, 8;
	@%p2 bra 	BB4_4;

	setp.ne.s32	%p3, %r2, 16;
	@%p3 bra 	BB4_6;

	ld.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd2];
	cvt.rn.f32.u16	%f29, %rs1;
	cvt.rn.f32.u16	%f30, %rs2;
	cvt.rn.f32.u16	%f31, %rs3;
	cvt.rn.f32.u16	%f32, %rs4;
	bra.uni 	BB4_9;

BB4_4:
	ld.u32 	%r8, [%rd3];
	bfe.u32 	%r9, %r8, 2, 10;
	cvt.rn.f32.u32	%f29, %r9;
	bfe.u32 	%r10, %r8, 12, 10;
	cvt.rn.f32.u32	%f30, %r10;
	shr.u32 	%r11, %r8, 22;
	cvt.rn.f32.u32	%f31, %r11;
	mov.f32 	%f32, 0f447FC000;
	bra.uni 	BB4_9;

BB4_5:
	ld.v4.u8 	{%rs9, %rs10, %rs11, %rs12}, [%rd3];
	cvt.rn.f32.u16	%f29, %rs9;
	cvt.rn.f32.u16	%f30, %rs10;
	cvt.rn.f32.u16	%f31, %rs11;
	cvt.rn.f32.u16	%f32, %rs12;
	bra.uni 	BB4_9;

BB4_6:
	setp.eq.s32	%p4, %r1, 0;
	@%p4 bra 	BB4_8;

	shl.b64 	%rd7, %rd1, 4;
	add.s64 	%rd8, %rd4, %rd7;
	ld.v4.f32 	{%f25, %f26, %f27, %f28}, [%rd8];
	mov.f32 	%f32, %f28;
	mov.f32 	%f31, %f27;
	mov.f32 	%f30, %f26;
	mov.f32 	%f29, %f25;
	bra.uni 	BB4_9;

BB4_8:
	ld.v4.u16 	{%rs13, %rs14, %rs15, %rs16}, [%rd2];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs13;
	cvt.f32.f16 	%f29, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs14;
	cvt.f32.f16 	%f30, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs15;
	cvt.f32.f16 	%f31, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs16;
	cvt.f32.f16 	%f32, %temp;
	}

BB4_9:
	st.param.f32	[func_retval0+0], %f29;
	st.param.f32	[func_retval0+4], %f30;
	st.param.f32	[func_retval0+8], %f31;
	st.param.f32	[func_retval0+12], %f32;
	ret;
}

.visible .func _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii(
	.param .align 16 .b8 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0[16],
	.param .b64 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5,
	.param .b32 _Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<5>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<5>;
	.reg .s64 	%rd<9>;


	ld.param.f32 	%f4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+12];
	ld.param.f32 	%f3, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+8];
	ld.param.f32 	%f2, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0+4];
	ld.param.f32 	%f1, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_0];
	ld.param.u64 	%rd4, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_1];
	ld.param.u32 	%r7, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_2];
	ld.param.u32 	%r8, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_3];
	ld.param.u32 	%r5, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_4];
	ld.param.u32 	%r9, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_5];
	ld.param.u32 	%r10, [_Z20WritePixelFormat_4446float4PS_ij17DevicePixelFormatii_param_6];
	mad.lo.s32 	%r11, %r10, %r7, %r9;
	cvt.s64.s32	%rd1, %r11;
	cvt.rzi.ftz.u32.f32	%r1, %f1;
	cvt.rzi.ftz.u32.f32	%r2, %f2;
	cvt.rzi.ftz.u32.f32	%r3, %f3;
	cvt.rzi.ftz.u32.f32	%r4, %f4;
	mul.wide.s32 	%rd5, %r11, 8;
	add.s64 	%rd2, %rd4, %rd5;
	mul.wide.s32 	%rd6, %r11, 4;
	add.s64 	%rd3, %rd4, %rd6;
	and.b32  	%r6, %r8, 248;
	setp.eq.s32	%p1, %r6, 0;
	@%p1 bra 	BB5_5;

	setp.eq.s32	%p2, %r6, 8;
	@%p2 bra 	BB5_4;

	setp.ne.s32	%p3, %r6, 16;
	@%p3 bra 	BB5_6;

	st.u16 	[%rd2], %r1;
	st.u16 	[%rd2+2], %r2;
	st.u16 	[%rd2+4], %r3;
	st.u16 	[%rd2+6], %r4;
	bra.uni 	BB5_9;

BB5_4:
	shl.b32 	%r12, %r1, 2;
	shl.b32 	%r13, %r2, 12;
	add.s32 	%r14, %r13, %r12;
	shl.b32 	%r15, %r3, 22;
	add.s32 	%r16, %r14, %r15;
	st.u32 	[%rd3], %r16;
	bra.uni 	BB5_9;

BB5_5:
	st.u8 	[%rd3], %r1;
	st.u8 	[%rd3+1], %r2;
	st.u8 	[%rd3+2], %r3;
	st.u8 	[%rd3+3], %r4;
	bra.uni 	BB5_9;

BB5_6:
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB5_8;

	shl.b64 	%rd7, %rd1, 4;
	add.s64 	%rd8, %rd4, %rd7;
	st.v4.f32 	[%rd8], {%f1, %f2, %f3, %f4};
	bra.uni 	BB5_9;

BB5_8:
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs4, %temp;
}
	st.v4.u16 	[%rd2], {%rs4, %rs3, %rs2, %rs1};

BB5_9:
	ret;
}

.visible .func _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii(
	.param .b64 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_0,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_1,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_2,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_3,
	.param .b64 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_4,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_5,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_6,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_7,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_8,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_9,
	.param .b32 _Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_10
)
{
	.reg .pred 	%p<148>;
	.reg .s16 	%rs<25>;
	.reg .s32 	%r<148>;
	.reg .f32 	%f<451>;
	.reg .s64 	%rd<54>;


	ld.param.u64 	%rd14, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_0];
	ld.param.u32 	%r25, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_1];
	ld.param.u32 	%r26, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_2];
	ld.param.u32 	%r27, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_3];
	ld.param.u64 	%rd15, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_4];
	ld.param.u32 	%r28, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_5];
	ld.param.u32 	%r29, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_6];
	ld.param.u32 	%r30, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_7];
	ld.param.u32 	%r31, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_8];
	ld.param.u32 	%r32, [_Z29PixelFormatConvert_444_To_444PK6float4ij17DevicePixelFormatPS_ijS2_iii_param_9];
	mov.u32 	%r33, %ctaid.x;
	mov.u32 	%r34, %ntid.x;
	mov.u32 	%r35, %tid.x;
	mad.lo.s32 	%r1, %r34, %r33, %r35;
	mov.u32 	%r36, %ntid.y;
	mov.u32 	%r37, %ctaid.y;
	mov.u32 	%r38, %tid.y;
	mad.lo.s32 	%r2, %r36, %r37, %r38;
	setp.lt.s32	%p1, %r1, %r31;
	setp.lt.s32	%p2, %r2, %r32;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB6_192;
	bra.uni 	BB6_1;

BB6_1:
	and.b32  	%r3, %r26, 248;
	setp.eq.s32	%p4, %r3, 0;
	mad.lo.s32 	%r39, %r2, %r25, %r1;
	cvt.s64.s32	%rd1, %r39;
	mul.wide.s32 	%rd16, %r39, 4;
	add.s64 	%rd2, %rd14, %rd16;
	@%p4 bra 	BB6_9;

	setp.eq.s32	%p5, %r3, 8;
	@%p5 bra 	BB6_8;

	setp.eq.s32	%p6, %r3, 16;
	shl.b64 	%rd17, %rd1, 3;
	add.s64 	%rd3, %rd14, %rd17;
	@%p6 bra 	BB6_7;

	setp.eq.s32	%p7, %r27, 0;
	@%p7 bra 	BB6_6;

	shl.b64 	%rd18, %rd1, 4;
	add.s64 	%rd19, %rd14, %rd18;
	ld.v4.f32 	{%f166, %f167, %f168, %f169}, [%rd19];
	mov.f32 	%f350, %f169;
	mov.f32 	%f349, %f168;
	mov.f32 	%f348, %f167;
	mov.f32 	%f347, %f166;
	bra.uni 	BB6_10;

BB6_6:
	ld.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd3];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f347, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f348, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f349, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f350, %temp;
	}
	bra.uni 	BB6_10;

BB6_7:
	ld.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd3];
	cvt.rn.f32.u16	%f347, %rs9;
	cvt.rn.f32.u16	%f348, %rs10;
	cvt.rn.f32.u16	%f349, %rs11;
	cvt.rn.f32.u16	%f350, %rs12;
	bra.uni 	BB6_10;

BB6_8:
	ld.u32 	%r40, [%rd2];
	bfe.u32 	%r41, %r40, 2, 10;
	cvt.rn.f32.u32	%f347, %r41;
	bfe.u32 	%r42, %r40, 12, 10;
	cvt.rn.f32.u32	%f348, %r42;
	shr.u32 	%r43, %r40, 22;
	cvt.rn.f32.u32	%f349, %r43;
	mov.f32 	%f350, 0f447FC000;
	bra.uni 	BB6_10;

BB6_9:
	ld.v4.u8 	{%rs17, %rs18, %rs19, %rs20}, [%rd2];
	cvt.rn.f32.u16	%f347, %rs17;
	cvt.rn.f32.u16	%f348, %rs18;
	cvt.rn.f32.u16	%f349, %rs19;
	cvt.rn.f32.u16	%f350, %rs20;

BB6_10:
	and.b32  	%r44, %r26, 8192;
	setp.eq.s32	%p8, %r44, 0;
	selp.f32	%f25, %f350, %f347, %p8;
	selp.f32	%f26, %f349, %f348, %p8;
	selp.f32	%f27, %f348, %f349, %p8;
	selp.f32	%f445, %f347, %f350, %p8;
	and.b32  	%r45, %r26, 768;
	setp.ne.s32	%p9, %r45, 512;
	mov.f32 	%f411, %f26;
	mov.f32 	%f444, %f27;
	@%p9 bra 	BB6_18;

	and.b32  	%r46, %r29, 768;
	setp.eq.s32	%p10, %r46, 512;
	mov.f32 	%f379, %f26;
	mov.f32 	%f411, %f379;
	mov.f32 	%f412, %f27;
	mov.f32 	%f444, %f412;
	@%p10 bra 	BB6_18;

	setp.ne.s32	%p11, %r3, 0;
	@%p11 bra 	BB6_14;

	mov.f32 	%f351, 0f437F0000;
	bra.uni 	BB6_17;

BB6_14:
	setp.ne.s32	%p12, %r3, 8;
	@%p12 bra 	BB6_16;

	mov.f32 	%f351, 0f447FC000;
	bra.uni 	BB6_17;

BB6_16:
	setp.eq.s32	%p13, %r3, 16;
	selp.f32	%f351, 0f47000000, 0f3F800000, %p13;

BB6_17:
	and.b32  	%r50, %r26, 4096;
	setp.eq.s32	%p14, %r50, 0;
	ld.const.f32 	%f172, [kYCbCrOffset];
	ld.const.f32 	%f173, [kYCbCrFullRangeOffset];
	selp.f32	%f174, %f173, %f172, %p14;
	mov.f32 	%f175, 0f437F0000;
	div.approx.ftz.f32 	%f176, %f351, %f175;
	mul.ftz.f32 	%f177, %f174, %f176;
	sub.ftz.f32 	%f411, %f26, %f177;
	ld.const.f32 	%f178, [kYCbCrOffset+4];
	ld.const.f32 	%f179, [kYCbCrFullRangeOffset+4];
	selp.f32	%f180, %f179, %f178, %p14;
	mul.ftz.f32 	%f181, %f180, %f176;
	sub.ftz.f32 	%f444, %f27, %f181;
	ld.const.f32 	%f182, [kYCbCrOffset+8];
	ld.const.f32 	%f183, [kYCbCrFullRangeOffset+8];
	selp.f32	%f184, %f183, %f182, %p14;
	mul.ftz.f32 	%f185, %f184, %f176;
	sub.ftz.f32 	%f445, %f445, %f185;

BB6_18:
	mov.f32 	%f442, %f444;
	mov.f32 	%f409, %f411;
	and.b32  	%r51, %r29, 1024;
	setp.eq.s32	%p15, %r51, 0;
	shr.u32 	%r52, %r26, 10;
	and.b32  	%r53, %r52, 1;
	setp.eq.b32	%p16, %r53, 1;
	and.pred  	%p17, %p15, %p16;
	@!%p17 bra 	BB6_28;
	bra.uni 	BB6_19;

BB6_19:
	setp.ltu.ftz.f32	%p18, %f409, 0f00000000;
	@%p18 bra 	BB6_21;

	lg2.approx.ftz.f32 	%f186, %f409;
	mul.ftz.f32 	%f187, %f186, 0f3EE66666;
	ex2.approx.ftz.f32 	%f410, %f187;
	bra.uni 	BB6_22;

BB6_21:
	neg.ftz.f32 	%f188, %f409;
	lg2.approx.ftz.f32 	%f189, %f188;
	mul.ftz.f32 	%f190, %f189, 0f3EE66666;
	ex2.approx.ftz.f32 	%f191, %f190;
	neg.ftz.f32 	%f410, %f191;

BB6_22:
	mov.f32 	%f409, %f410;
	setp.ltu.ftz.f32	%p19, %f442, 0f00000000;
	@%p19 bra 	BB6_24;

	lg2.approx.ftz.f32 	%f192, %f442;
	mul.ftz.f32 	%f193, %f192, 0f3EE66666;
	ex2.approx.ftz.f32 	%f443, %f193;
	bra.uni 	BB6_25;

BB6_24:
	neg.ftz.f32 	%f194, %f442;
	lg2.approx.ftz.f32 	%f195, %f194;
	mul.ftz.f32 	%f196, %f195, 0f3EE66666;
	ex2.approx.ftz.f32 	%f197, %f196;
	neg.ftz.f32 	%f443, %f197;

BB6_25:
	mov.f32 	%f442, %f443;
	setp.ltu.ftz.f32	%p20, %f445, 0f00000000;
	@%p20 bra 	BB6_27;

	lg2.approx.ftz.f32 	%f198, %f445;
	mul.ftz.f32 	%f199, %f198, 0f3EE66666;
	ex2.approx.ftz.f32 	%f445, %f199;
	bra.uni 	BB6_28;

BB6_27:
	neg.ftz.f32 	%f200, %f445;
	lg2.approx.ftz.f32 	%f201, %f200;
	mul.ftz.f32 	%f202, %f201, 0f3EE66666;
	ex2.approx.ftz.f32 	%f203, %f202;
	neg.ftz.f32 	%f445, %f203;

BB6_28:
	mov.f32 	%f46, %f442;
	mov.f32 	%f45, %f409;
	xor.b32  	%r4, %r29, %r26;
	and.b32  	%r54, %r4, 2560;
	setp.ne.s32	%p21, %r54, 0;
	@%p21 bra 	BB6_42;

	setp.eq.s32	%p22, %r45, 512;
	and.b32  	%r56, %r29, 768;
	setp.eq.s32	%p23, %r56, 512;
	xor.pred  	%p24, %p22, %p23;
	@%p24 bra 	BB6_42;

	and.b32  	%r57, %r4, 248;
	setp.eq.s32	%p25, %r57, 0;
	mov.f32 	%f377, %f25;
	mov.f32 	%f407, %f45;
	mov.f32 	%f440, %f46;
	@%p25 bra 	BB6_113;

	and.b32  	%r5, %r29, 248;
	setp.ne.s32	%p26, %r5, 0;
	@%p26 bra 	BB6_33;

	mov.f32 	%f352, 0f437F0000;
	bra.uni 	BB6_36;

BB6_33:
	setp.ne.s32	%p27, %r5, 8;
	@%p27 bra 	BB6_35;

	mov.f32 	%f352, 0f447FC000;
	bra.uni 	BB6_36;

BB6_35:
	setp.eq.s32	%p28, %r5, 16;
	selp.f32	%f352, 0f47000000, 0f3F800000, %p28;

BB6_36:
	setp.ne.s32	%p29, %r3, 0;
	@%p29 bra 	BB6_38;

	mov.f32 	%f353, 0f437F0000;
	bra.uni 	BB6_41;

BB6_38:
	setp.ne.s32	%p30, %r3, 8;
	@%p30 bra 	BB6_40;

	mov.f32 	%f353, 0f447FC000;
	bra.uni 	BB6_41;

BB6_40:
	setp.eq.s32	%p31, %r3, 16;
	selp.f32	%f353, 0f47000000, 0f3F800000, %p31;

BB6_41:
	div.approx.ftz.f32 	%f208, %f352, %f353;
	mul.ftz.f32 	%f377, %f25, %f208;
	mul.ftz.f32 	%f407, %f45, %f208;
	mul.ftz.f32 	%f440, %f46, %f208;
	mul.ftz.f32 	%f445, %f445, %f208;
	bra.uni 	BB6_113;

BB6_42:
	and.b32  	%r61, %r26, 512;
	setp.eq.s32	%p32, %r61, 0;
	@%p32 bra 	BB6_65;

	and.b32  	%r62, %r26, 2048;
	setp.eq.s32	%p33, %r62, 0;
	and.b32  	%r6, %r26, 4096;
	and.b32  	%r7, %r29, 512;
	@%p33 bra 	BB6_55;

	setp.eq.s32	%p34, %r6, 0;
	@%p34 bra 	BB6_53;

	setp.eq.s32	%p35, %r7, 0;
	and.b32  	%r8, %r26, 256;
	@%p35 bra 	BB6_49;

	setp.ne.s32	%p36, %r8, 0;
	mov.u64 	%rd53, 0;
	@%p36 bra 	BB6_77;

	and.b32  	%r63, %r29, 4096;
	setp.eq.s32	%p37, %r63, 0;
	shr.u32 	%r64, %r29, 8;
	and.b32  	%r65, %r64, 1;
	setp.eq.b32	%p38, %r65, 1;
	or.pred  	%p39, %p37, %p38;
	mov.u64 	%rd53, 0;
	@%p39 bra 	BB6_77;

	mov.u64 	%rd53, k709YCbCr_To_601YCbCr;
	bra.uni 	BB6_77;

BB6_49:
	setp.eq.s32	%p40, %r8, 0;
	@%p40 bra 	BB6_52;

	and.b32  	%r66, %r29, 256;
	setp.eq.s32	%p41, %r66, 0;
	mov.u64 	%rd53, 0;
	@%p41 bra 	BB6_77;

	mov.u64 	%rd53, k709YPbPr_To_RGB32f;
	bra.uni 	BB6_77;

BB6_52:
	and.b32  	%r67, %r29, 256;
	setp.eq.s32	%p42, %r67, 0;
	mov.u64 	%rd25, k709YCbCr_To_RGB32f;
	mov.u64 	%rd26, k709YCbCr_To_RGB8u;
	selp.b64	%rd53, %rd26, %rd25, %p42;
	bra.uni 	BB6_77;

BB6_53:
	and.b32  	%r68, %r29, 256;
	setp.eq.s32	%p43, %r68, 0;
	setp.ne.s32	%p44, %r7, 0;
	or.pred  	%p45, %p43, %p44;
	mov.u64 	%rd53, 0;
	@%p45 bra 	BB6_77;

	mov.u64 	%rd53, k709YCbCrFullRange_To_RGB32f;
	bra.uni 	BB6_77;

BB6_55:
	setp.eq.s32	%p46, %r6, 0;
	@%p46 bra 	BB6_63;

	setp.eq.s32	%p47, %r7, 0;
	and.b32  	%r9, %r26, 256;
	@%p47 bra 	BB6_59;

	setp.ne.s32	%p48, %r9, 0;
	shr.u32 	%r69, %r29, 8;
	and.b32  	%r70, %r69, 1;
	setp.eq.b32	%p49, %r70, 1;
	or.pred  	%p50, %p48, %p49;
	mov.u64 	%rd53, 0;
	@%p50 bra 	BB6_77;

	mov.u64 	%rd53, k601YCbCr_To_709YCbCr;
	bra.uni 	BB6_77;

BB6_59:
	setp.eq.s32	%p51, %r9, 0;
	@%p51 bra 	BB6_62;

	and.b32  	%r71, %r29, 256;
	setp.eq.s32	%p52, %r71, 0;
	mov.u64 	%rd53, 0;
	@%p52 bra 	BB6_77;

	mov.u64 	%rd53, k601YPbPr_To_RGB32f;
	bra.uni 	BB6_77;

BB6_62:
	and.b32  	%r72, %r29, 256;
	setp.eq.s32	%p53, %r72, 0;
	mov.u64 	%rd33, k601YCbCr_To_RGB32f;
	mov.u64 	%rd34, k601YCbCr_To_RGB8u;
	selp.b64	%rd53, %rd34, %rd33, %p53;
	bra.uni 	BB6_77;

BB6_63:
	setp.ne.s32	%p54, %r7, 0;
	mov.u64 	%rd53, 0;
	@%p54 bra 	BB6_77;

	and.b32  	%r73, %r29, 256;
	setp.eq.s32	%p55, %r73, 0;
	mov.u64 	%rd36, k601YCbCrFullRange_To_RGB32f;
	mov.u64 	%rd37, k601YCbCrFullRange_To_RGB8u;
	selp.b64	%rd53, %rd37, %rd36, %p55;
	bra.uni 	BB6_77;

BB6_65:
	and.b32  	%r74, %r26, 256;
	setp.eq.s32	%p56, %r74, 0;
	and.b32  	%r10, %r29, 2048;
	@%p56 bra 	BB6_72;

	and.b32  	%r75, %r29, 256;
	setp.eq.s32	%p57, %r75, 0;
	@%p57 bra 	BB6_68;

	setp.eq.s32	%p58, %r10, 0;
	mov.u64 	%rd38, kRGB32f_To_709YPbPr;
	mov.u64 	%rd39, kRGB32f_To_601YPbPr;
	selp.b64	%rd53, %rd39, %rd38, %p58;
	bra.uni 	BB6_77;

BB6_68:
	setp.eq.s32	%p59, %r10, 0;
	and.b32  	%r11, %r29, 4096;
	@%p59 bra 	BB6_71;

	setp.eq.s32	%p60, %r11, 0;
	mov.u64 	%rd53, 0;
	@%p60 bra 	BB6_77;

	mov.u64 	%rd53, kRGB32f_To_709YCbCr;
	bra.uni 	BB6_77;

BB6_71:
	setp.eq.s32	%p61, %r11, 0;
	mov.u64 	%rd42, kRGB32f_To_601YCbCr;
	mov.u64 	%rd43, kRGB32f_To_601YCbCrFullRange;
	selp.b64	%rd53, %rd43, %rd42, %p61;
	bra.uni 	BB6_77;

BB6_72:
	and.b32  	%r76, %r29, 256;
	mov.u64 	%rd53, 0;
	setp.ne.s32	%p62, %r76, 0;
	@%p62 bra 	BB6_77;

	setp.eq.s32	%p63, %r10, 0;
	and.b32  	%r12, %r29, 4096;
	@%p63 bra 	BB6_76;

	setp.ne.s32	%p64, %r12, 0;
	mov.u64 	%rd53, 0;
	@%p64 bra 	BB6_77;

	mov.u64 	%rd53, kRGB8u_To_709YCbCr;
	bra.uni 	BB6_77;

BB6_76:
	setp.eq.s32	%p65, %r12, 0;
	mov.u64 	%rd47, kRGB8u_To_601YCbCr;
	mov.u64 	%rd48, kRGB8u_To_601YCbCrFullRange;
	selp.b64	%rd53, %rd48, %rd47, %p65;

BB6_77:
	ld.const.f32 	%f209, [%rd53];
	ld.const.f32 	%f210, [%rd53+4];
	mul.ftz.f32 	%f211, %f46, %f210;
	fma.rn.ftz.f32 	%f212, %f45, %f209, %f211;
	ld.const.f32 	%f213, [%rd53+8];
	fma.rn.ftz.f32 	%f56, %f445, %f213, %f212;
	ld.const.f32 	%f214, [%rd53+12];
	ld.const.f32 	%f215, [%rd53+16];
	mul.ftz.f32 	%f216, %f46, %f215;
	fma.rn.ftz.f32 	%f217, %f45, %f214, %f216;
	ld.const.f32 	%f218, [%rd53+20];
	fma.rn.ftz.f32 	%f57, %f445, %f218, %f217;
	ld.const.f32 	%f219, [%rd53+24];
	ld.const.f32 	%f220, [%rd53+28];
	mul.ftz.f32 	%f221, %f46, %f220;
	fma.rn.ftz.f32 	%f222, %f45, %f219, %f221;
	ld.const.f32 	%f223, [%rd53+32];
	fma.rn.ftz.f32 	%f445, %f445, %f223, %f222;
	and.b32  	%r78, %r4, 248;
	setp.eq.s32	%p66, %r78, 0;
	mov.f32 	%f368, %f25;
	mov.f32 	%f377, %f368;
	mov.f32 	%f407, %f56;
	mov.f32 	%f440, %f57;
	@%p66 bra 	BB6_113;

	and.b32  	%r13, %r29, 248;
	setp.eq.s32	%p67, %r13, 0;
	shr.u32 	%r79, %r26, 8;
	and.b32  	%r80, %r79, 1;
	setp.eq.b32	%p68, %r80, 1;
	and.pred  	%p69, %p67, %p68;
	@%p69 bra 	BB6_101;

	shr.u32 	%r82, %r29, 8;
	and.b32  	%r83, %r82, 1;
	setp.eq.b32	%p71, %r83, 1;
	and.pred  	%p72, %p4, %p71;
	@%p72 bra 	BB6_101;

	setp.ne.s32	%p73, %r3, 0;
	@%p73 bra 	BB6_82;

	mov.f32 	%f354, 0f437F0000;
	bra.uni 	BB6_85;

BB6_82:
	setp.ne.s32	%p74, %r3, 8;
	@%p74 bra 	BB6_84;

	mov.f32 	%f354, 0f447FC000;
	bra.uni 	BB6_85;

BB6_84:
	setp.eq.s32	%p75, %r3, 16;
	selp.f32	%f354, 0f47000000, 0f3F800000, %p75;

BB6_85:
	and.b32  	%r87, %r26, 256;
	setp.eq.s32	%p76, %r87, 0;
	and.pred  	%p78, %p76, %p71;
	@%p78 bra 	BB6_94;

	and.b32  	%r92, %r29, 256;
	setp.eq.s32	%p80, %r92, 0;
	and.pred  	%p81, %p80, %p68;
	@%p81 bra 	BB6_88;

	mov.f32 	%f226, 0f437F0000;
	div.approx.ftz.f32 	%f362, %f354, %f226;
	mov.f32 	%f363, %f362;
	bra.uni 	BB6_100;

BB6_88:
	setp.ne.s32	%p82, %r13, 0;
	@%p82 bra 	BB6_90;

	mov.f32 	%f355, 0f437F0000;
	bra.uni 	BB6_93;

BB6_90:
	setp.ne.s32	%p83, %r13, 8;
	@%p83 bra 	BB6_92;

	mov.f32 	%f355, 0f447FC000;
	bra.uni 	BB6_93;

BB6_92:
	setp.eq.s32	%p84, %r13, 16;
	selp.f32	%f355, 0f47000000, 0f3F800000, %p84;

BB6_93:
	div.approx.ftz.f32 	%f363, %f355, %f354;
	mov.f32 	%f229, 0f437F0000;
	div.approx.ftz.f32 	%f362, %f355, %f229;
	bra.uni 	BB6_100;

BB6_94:
	setp.ne.s32	%p85, %r13, 0;
	@%p85 bra 	BB6_96;

	mov.f32 	%f356, 0f437F0000;
	bra.uni 	BB6_99;

BB6_96:
	setp.ne.s32	%p86, %r13, 8;
	@%p86 bra 	BB6_98;

	mov.f32 	%f356, 0f447FC000;
	bra.uni 	BB6_99;

BB6_98:
	setp.eq.s32	%p87, %r13, 16;
	selp.f32	%f356, 0f47000000, 0f3F800000, %p87;

BB6_99:
	div.approx.ftz.f32 	%f363, %f356, %f354;
	mov.f32 	%f232, 0f437F0000;
	div.approx.ftz.f32 	%f362, %f232, %f354;

BB6_100:
	mov.f32 	%f361, %f363;
	mul.ftz.f32 	%f445, %f445, %f362;
	mul.ftz.f32 	%f441, %f57, %f362;
	mul.ftz.f32 	%f408, %f56, %f362;
	bra.uni 	BB6_112;

BB6_101:
	setp.ne.s32	%p88, %r13, 0;
	@%p88 bra 	BB6_103;

	mov.f32 	%f357, 0f437F0000;
	bra.uni 	BB6_106;

BB6_103:
	setp.ne.s32	%p89, %r13, 8;
	@%p89 bra 	BB6_105;

	mov.f32 	%f357, 0f447FC000;
	bra.uni 	BB6_106;

BB6_105:
	setp.eq.s32	%p90, %r13, 16;
	selp.f32	%f357, 0f47000000, 0f3F800000, %p90;

BB6_106:
	setp.ne.s32	%p91, %r3, 0;
	@%p91 bra 	BB6_108;

	mov.f32 	%f358, 0f437F0000;
	bra.uni 	BB6_111;

BB6_108:
	setp.ne.s32	%p92, %r3, 8;
	@%p92 bra 	BB6_110;

	mov.f32 	%f358, 0f447FC000;
	bra.uni 	BB6_111;

BB6_110:
	setp.eq.s32	%p93, %r3, 16;
	selp.f32	%f358, 0f47000000, 0f3F800000, %p93;

BB6_111:
	div.approx.ftz.f32 	%f361, %f357, %f358;
	mov.f32 	%f408, %f56;
	mov.f32 	%f441, %f57;

BB6_112:
	mov.f32 	%f440, %f441;
	mov.f32 	%f407, %f408;
	mul.ftz.f32 	%f377, %f25, %f361;

BB6_113:
	mov.f32 	%f438, %f440;
	mov.f32 	%f405, %f407;
	mov.f32 	%f85, %f377;
	and.b32  	%r102, %r26, 1024;
	setp.eq.s32	%p94, %r102, 0;
	shr.u32 	%r103, %r29, 10;
	and.b32  	%r104, %r103, 1;
	setp.eq.b32	%p95, %r104, 1;
	and.pred  	%p96, %p94, %p95;
	@!%p96 bra 	BB6_123;
	bra.uni 	BB6_114;

BB6_114:
	setp.ltu.ftz.f32	%p97, %f405, 0f00000000;
	@%p97 bra 	BB6_116;

	lg2.approx.ftz.f32 	%f237, %f405;
	mul.ftz.f32 	%f238, %f237, 0f400E38E4;
	ex2.approx.ftz.f32 	%f406, %f238;
	bra.uni 	BB6_117;

BB6_116:
	neg.ftz.f32 	%f239, %f405;
	lg2.approx.ftz.f32 	%f240, %f239;
	mul.ftz.f32 	%f241, %f240, 0f400E38E4;
	ex2.approx.ftz.f32 	%f242, %f241;
	neg.ftz.f32 	%f406, %f242;

BB6_117:
	mov.f32 	%f405, %f406;
	setp.ltu.ftz.f32	%p98, %f438, 0f00000000;
	@%p98 bra 	BB6_119;

	lg2.approx.ftz.f32 	%f243, %f438;
	mul.ftz.f32 	%f244, %f243, 0f400E38E4;
	ex2.approx.ftz.f32 	%f439, %f244;
	bra.uni 	BB6_120;

BB6_119:
	neg.ftz.f32 	%f245, %f438;
	lg2.approx.ftz.f32 	%f246, %f245;
	mul.ftz.f32 	%f247, %f246, 0f400E38E4;
	ex2.approx.ftz.f32 	%f248, %f247;
	neg.ftz.f32 	%f439, %f248;

BB6_120:
	mov.f32 	%f438, %f439;
	setp.ltu.ftz.f32	%p99, %f445, 0f00000000;
	@%p99 bra 	BB6_122;

	lg2.approx.ftz.f32 	%f249, %f445;
	mul.ftz.f32 	%f250, %f249, 0f400E38E4;
	ex2.approx.ftz.f32 	%f445, %f250;
	bra.uni 	BB6_123;

BB6_122:
	neg.ftz.f32 	%f251, %f445;
	lg2.approx.ftz.f32 	%f252, %f251;
	mul.ftz.f32 	%f253, %f252, 0f400E38E4;
	ex2.approx.ftz.f32 	%f254, %f253;
	neg.ftz.f32 	%f445, %f254;

BB6_123:
	mov.f32 	%f437, %f438;
	mov.f32 	%f404, %f405;
	setp.eq.s32	%p100, %r45, 512;
	and.b32  	%r14, %r29, 768;
	setp.ne.s32	%p101, %r14, 512;
	or.pred  	%p102, %p100, %p101;
	@%p102 bra 	BB6_130;

	and.b32  	%r15, %r29, 248;
	setp.ne.s32	%p103, %r15, 0;
	@%p103 bra 	BB6_126;

	mov.f32 	%f364, 0f437F0000;
	bra.uni 	BB6_129;

BB6_126:
	setp.ne.s32	%p104, %r15, 8;
	@%p104 bra 	BB6_128;

	mov.f32 	%f364, 0f447FC000;
	bra.uni 	BB6_129;

BB6_128:
	setp.eq.s32	%p105, %r15, 16;
	selp.f32	%f364, 0f47000000, 0f3F800000, %p105;

BB6_129:
	and.b32  	%r106, %r29, 4096;
	setp.eq.s32	%p106, %r106, 0;
	ld.const.f32 	%f257, [kYCbCrOffset];
	ld.const.f32 	%f258, [kYCbCrFullRangeOffset];
	selp.f32	%f259, %f258, %f257, %p106;
	mov.f32 	%f260, 0f437F0000;
	div.approx.ftz.f32 	%f261, %f364, %f260;
	fma.rn.ftz.f32 	%f404, %f259, %f261, %f404;
	ld.const.f32 	%f262, [kYCbCrOffset+4];
	ld.const.f32 	%f263, [kYCbCrFullRangeOffset+4];
	selp.f32	%f264, %f263, %f262, %p106;
	fma.rn.ftz.f32 	%f437, %f264, %f261, %f437;
	ld.const.f32 	%f265, [kYCbCrOffset+8];
	ld.const.f32 	%f266, [kYCbCrFullRangeOffset+8];
	selp.f32	%f267, %f266, %f265, %p106;
	fma.rn.ftz.f32 	%f445, %f267, %f261, %f445;

BB6_130:
	mov.f32 	%f436, %f437;
	mov.f32 	%f403, %f404;
	and.b32  	%r108, %r4, 7;
	setp.eq.s32	%p107, %r108, 0;
	mov.f32 	%f375, %f85;
	mov.f32 	%f399, %f403;
	mov.f32 	%f432, %f436;
	@%p107 bra 	BB6_176;

	and.b32  	%r16, %r26, 7;
	setp.ne.s32	%p108, %r16, 2;
	@%p108 bra 	BB6_133;

	or.b32  	%r109, %r29, 4;
	and.b32  	%r110, %r109, 7;
	setp.eq.s32	%p109, %r110, 4;
	@%p109 bra 	BB6_134;

BB6_133:
	or.b32  	%r111, %r16, 4;
	setp.eq.s32	%p110, %r111, 4;
	@%p110 bra 	BB6_134;
	bra.uni 	BB6_140;

BB6_134:
	and.b32  	%r17, %r29, 248;
	setp.ne.s32	%p111, %r17, 0;
	@%p111 bra 	BB6_137;

	mov.f32 	%f375, 0f437F0000;

BB6_136:
	mov.f32 	%f399, %f403;
	mov.f32 	%f432, %f436;
	bra.uni 	BB6_176;

BB6_137:
	setp.ne.s32	%p112, %r17, 8;
	@%p112 bra 	BB6_139;

	mov.f32 	%f375, 0f447FC000;
	bra.uni 	BB6_136;

BB6_139:
	setp.eq.s32	%p113, %r17, 16;
	selp.f32	%f375, 0f47000000, 0f3F800000, %p113;
	bra.uni 	BB6_136;

BB6_140:
	setp.ne.s32	%p114, %r16, 1;
	@%p114 bra 	BB6_154;

	and.b32  	%r112, %r29, 7;
	setp.gt.u32	%p115, %r112, 4;
	@%p115 bra 	BB6_154;

	mov.u32 	%r113, 1;
	shl.b32 	%r114, %r113, %r112;
	and.b32  	%r115, %r114, 21;
	setp.ne.s32	%p116, %r115, 0;
	@%p116 bra 	BB6_143;
	bra.uni 	BB6_154;

BB6_143:
	and.b32  	%r19, %r29, 248;
	setp.ne.s32	%p117, %r19, 0;
	@%p117 bra 	BB6_145;

	mov.f32 	%f378, 0f437F0000;
	bra.uni 	BB6_148;

BB6_145:
	setp.ne.s32	%p118, %r19, 8;
	@%p118 bra 	BB6_147;

	mov.f32 	%f378, 0f447FC000;
	bra.uni 	BB6_148;

BB6_147:
	setp.eq.s32	%p119, %r19, 16;
	selp.f32	%f378, 0f47000000, 0f3F800000, %p119;

BB6_148:
	mov.f32 	%f110, %f378;
	@%p101 bra 	BB6_150;

	mov.f32 	%f272, 0f437F0000;
	div.approx.ftz.f32 	%f273, %f110, %f272;
	and.b32  	%r116, %r29, 4096;
	setp.eq.s32	%p121, %r116, 0;
	ld.const.f32 	%f274, [kYCbCrOffset];
	ld.const.f32 	%f275, [kYCbCrFullRangeOffset];
	selp.f32	%f276, %f275, %f274, %p121;
	mul.ftz.f32 	%f277, %f276, %f273;
	sub.ftz.f32 	%f403, %f403, %f277;
	ld.const.f32 	%f278, [kYCbCrOffset+4];
	ld.const.f32 	%f279, [kYCbCrFullRangeOffset+4];
	selp.f32	%f280, %f279, %f278, %p121;
	mul.ftz.f32 	%f281, %f280, %f273;
	sub.ftz.f32 	%f436, %f436, %f281;
	ld.const.f32 	%f282, [kYCbCrOffset+8];
	ld.const.f32 	%f283, [kYCbCrFullRangeOffset+8];
	selp.f32	%f284, %f283, %f282, %p121;
	mul.ftz.f32 	%f285, %f284, %f273;
	sub.ftz.f32 	%f445, %f445, %f285;

BB6_150:
	rcp.approx.ftz.f32 	%f286, %f110;
	mul.ftz.f32 	%f287, %f85, %f286;
	mul.ftz.f32 	%f401, %f403, %f287;
	mul.ftz.f32 	%f434, %f436, %f287;
	mul.ftz.f32 	%f445, %f445, %f287;
	@%p101 bra 	BB6_152;

	mov.f32 	%f288, 0f437F0000;
	div.approx.ftz.f32 	%f289, %f110, %f288;
	and.b32  	%r117, %r29, 4096;
	setp.eq.s32	%p123, %r117, 0;
	ld.const.f32 	%f290, [kYCbCrOffset];
	ld.const.f32 	%f291, [kYCbCrFullRangeOffset];
	selp.f32	%f292, %f291, %f290, %p123;
	fma.rn.ftz.f32 	%f401, %f292, %f289, %f401;
	ld.const.f32 	%f293, [kYCbCrOffset+4];
	ld.const.f32 	%f294, [kYCbCrFullRangeOffset+4];
	selp.f32	%f295, %f294, %f293, %p123;
	fma.rn.ftz.f32 	%f434, %f295, %f289, %f434;
	ld.const.f32 	%f296, [kYCbCrOffset+8];
	ld.const.f32 	%f297, [kYCbCrFullRangeOffset+8];
	selp.f32	%f298, %f297, %f296, %p123;
	fma.rn.ftz.f32 	%f445, %f298, %f289, %f445;

BB6_152:
	mov.f32 	%f432, %f434;
	mov.f32 	%f399, %f401;
	or.b32  	%r118, %r112, 4;
	setp.eq.s32	%p124, %r118, 4;
	mov.f32 	%f370, %f85;
	mov.f32 	%f375, %f370;
	@%p124 bra 	BB6_153;
	bra.uni 	BB6_176;

BB6_153:
	mov.f32 	%f375, %f110;
	bra.uni 	BB6_176;

BB6_154:
	mov.f32 	%f402, %f403;
	mov.f32 	%f435, %f436;
	@%p101 bra 	BB6_161;

	and.b32  	%r20, %r29, 248;
	setp.ne.s32	%p126, %r20, 0;
	@%p126 bra 	BB6_157;

	mov.f32 	%f365, 0f437F0000;
	bra.uni 	BB6_160;

BB6_157:
	setp.ne.s32	%p127, %r20, 8;
	@%p127 bra 	BB6_159;

	mov.f32 	%f365, 0f447FC000;
	bra.uni 	BB6_160;

BB6_159:
	setp.eq.s32	%p128, %r20, 16;
	selp.f32	%f365, 0f47000000, 0f3F800000, %p128;

BB6_160:
	and.b32  	%r119, %r29, 4096;
	setp.eq.s32	%p129, %r119, 0;
	ld.const.f32 	%f301, [kYCbCrOffset];
	ld.const.f32 	%f302, [kYCbCrFullRangeOffset];
	selp.f32	%f303, %f302, %f301, %p129;
	mov.f32 	%f304, 0f437F0000;
	div.approx.ftz.f32 	%f305, %f365, %f304;
	mul.ftz.f32 	%f306, %f303, %f305;
	sub.ftz.f32 	%f402, %f403, %f306;
	ld.const.f32 	%f307, [kYCbCrOffset+4];
	ld.const.f32 	%f308, [kYCbCrFullRangeOffset+4];
	selp.f32	%f309, %f308, %f307, %p129;
	mul.ftz.f32 	%f310, %f309, %f305;
	sub.ftz.f32 	%f435, %f436, %f310;
	ld.const.f32 	%f311, [kYCbCrOffset+8];
	ld.const.f32 	%f312, [kYCbCrFullRangeOffset+8];
	selp.f32	%f313, %f312, %f311, %p129;
	mul.ftz.f32 	%f314, %f313, %f305;
	sub.ftz.f32 	%f445, %f445, %f314;

BB6_161:
	add.ftz.f32 	%f315, %f85, 0fB70637BD;
	setp.gtu.ftz.f32	%p130, %f315, 0f00000000;
	@%p130 bra 	BB6_163;

	mov.f32 	%f445, 0f00000000;
	mov.f32 	%f433, %f445;
	mov.f32 	%f400, %f445;
	mov.f32 	%f376, %f445;
	bra.uni 	BB6_169;

BB6_163:
	and.b32  	%r21, %r29, 248;
	setp.ne.s32	%p131, %r21, 0;
	@%p131 bra 	BB6_165;

	mov.f32 	%f366, 0f437F0000;
	bra.uni 	BB6_168;

BB6_165:
	setp.ne.s32	%p132, %r21, 8;
	@%p132 bra 	BB6_167;

	mov.f32 	%f366, 0f447FC000;
	bra.uni 	BB6_168;

BB6_167:
	setp.eq.s32	%p133, %r21, 16;
	selp.f32	%f366, 0f47000000, 0f3F800000, %p133;

BB6_168:
	div.approx.ftz.f32 	%f322, %f366, %f85;
	mul.ftz.f32 	%f400, %f402, %f322;
	mul.ftz.f32 	%f433, %f435, %f322;
	mul.ftz.f32 	%f445, %f445, %f322;
	mov.f32 	%f376, %f85;

BB6_169:
	mov.f32 	%f432, %f433;
	mov.f32 	%f399, %f400;
	mov.f32 	%f375, %f376;
	@%p101 bra 	BB6_176;

	and.b32  	%r22, %r29, 248;
	setp.ne.s32	%p135, %r22, 0;
	@%p135 bra 	BB6_172;

	mov.f32 	%f367, 0f437F0000;
	bra.uni 	BB6_175;

BB6_172:
	setp.ne.s32	%p136, %r22, 8;
	@%p136 bra 	BB6_174;

	mov.f32 	%f367, 0f447FC000;
	bra.uni 	BB6_175;

BB6_174:
	setp.eq.s32	%p137, %r22, 16;
	selp.f32	%f367, 0f47000000, 0f3F800000, %p137;

BB6_175:
	and.b32  	%r120, %r29, 4096;
	setp.eq.s32	%p138, %r120, 0;
	ld.const.f32 	%f325, [kYCbCrOffset];
	ld.const.f32 	%f326, [kYCbCrFullRangeOffset];
	selp.f32	%f327, %f326, %f325, %p138;
	mov.f32 	%f328, 0f437F0000;
	div.approx.ftz.f32 	%f329, %f367, %f328;
	fma.rn.ftz.f32 	%f399, %f327, %f329, %f399;
	ld.const.f32 	%f330, [kYCbCrOffset+4];
	ld.const.f32 	%f331, [kYCbCrFullRangeOffset+4];
	selp.f32	%f332, %f331, %f330, %p138;
	fma.rn.ftz.f32 	%f432, %f332, %f329, %f432;
	ld.const.f32 	%f333, [kYCbCrOffset+8];
	ld.const.f32 	%f334, [kYCbCrFullRangeOffset+8];
	selp.f32	%f335, %f334, %f333, %p138;
	fma.rn.ftz.f32 	%f445, %f335, %f329, %f445;

BB6_176:
	and.b32  	%r121, %r29, 8192;
	setp.eq.s32	%p139, %r121, 0;
	selp.f32	%f447, %f445, %f375, %p139;
	selp.f32	%f448, %f432, %f399, %p139;
	selp.f32	%f449, %f399, %f432, %p139;
	selp.f32	%f450, %f375, %f445, %p139;
	and.b32  	%r122, %r29, 256;
	and.b32  	%r23, %r29, 248;
	setp.ne.s32	%p140, %r122, 0;
	@%p140 bra 	BB6_183;

	setp.ne.s32	%p141, %r23, 0;
	@%p141 bra 	BB6_179;

	mov.f32 	%f446, 0f437F0000;
	bra.uni 	BB6_182;

BB6_179:
	setp.ne.s32	%p142, %r23, 8;
	@%p142 bra 	BB6_181;

	mov.f32 	%f446, 0f447FC000;
	bra.uni 	BB6_182;

BB6_181:
	setp.eq.s32	%p143, %r23, 16;
	selp.f32	%f446, 0f47000000, 0f3F800000, %p143;

BB6_182:
	add.ftz.f32 	%f338, %f447, 0f3F000000;
	add.ftz.f32 	%f339, %f448, 0f3F000000;
	add.ftz.f32 	%f340, %f449, 0f3F000000;
	add.ftz.f32 	%f341, %f450, 0f3F000000;
	mov.f32 	%f342, 0f00000000;
	max.ftz.f32 	%f343, %f338, %f342;
	min.ftz.f32 	%f447, %f343, %f446;
	max.ftz.f32 	%f344, %f339, %f342;
	min.ftz.f32 	%f448, %f344, %f446;
	max.ftz.f32 	%f345, %f340, %f342;
	min.ftz.f32 	%f449, %f345, %f446;
	max.ftz.f32 	%f346, %f341, %f342;
	min.ftz.f32 	%f450, %f346, %f446;

BB6_183:
	mad.lo.s32 	%r131, %r2, %r28, %r1;
	cvt.s64.s32	%rd11, %r131;
	mul.wide.s32 	%rd49, %r131, 4;
	add.s64 	%rd12, %rd15, %rd49;
	setp.eq.s32	%p144, %r23, 0;
	@%p144 bra 	BB6_191;

	setp.eq.s32	%p145, %r23, 8;
	@%p145 bra 	BB6_190;

	setp.eq.s32	%p146, %r23, 16;
	shl.b64 	%rd50, %rd11, 3;
	add.s64 	%rd13, %rd15, %rd50;
	@%p146 bra 	BB6_189;

	setp.eq.s32	%p147, %r30, 0;
	@%p147 bra 	BB6_188;

	shl.b64 	%rd51, %rd11, 4;
	add.s64 	%rd52, %rd15, %rd51;
	st.v4.f32 	[%rd52], {%f447, %f448, %f449, %f450};
	bra.uni 	BB6_192;

BB6_188:
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f450;
	mov.b16 	%rs21, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f449;
	mov.b16 	%rs22, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f448;
	mov.b16 	%rs23, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f447;
	mov.b16 	%rs24, %temp;
}
	st.v4.u16 	[%rd13], {%rs24, %rs23, %rs22, %rs21};
	bra.uni 	BB6_192;

BB6_189:
	cvt.rzi.ftz.u32.f32	%r132, %f447;
	cvt.rzi.ftz.u32.f32	%r133, %f448;
	cvt.rzi.ftz.u32.f32	%r134, %f449;
	cvt.rzi.ftz.u32.f32	%r135, %f450;
	st.u16 	[%rd13], %r132;
	st.u16 	[%rd13+2], %r133;
	st.u16 	[%rd13+4], %r134;
	st.u16 	[%rd13+6], %r135;
	bra.uni 	BB6_192;

BB6_190:
	cvt.rzi.ftz.u32.f32	%r136, %f447;
	shl.b32 	%r137, %r136, 2;
	cvt.rzi.ftz.u32.f32	%r138, %f448;
	shl.b32 	%r139, %r138, 12;
	cvt.rzi.ftz.u32.f32	%r140, %f449;
	shl.b32 	%r141, %r140, 22;
	add.s32 	%r142, %r139, %r137;
	add.s32 	%r143, %r142, %r141;
	st.u32 	[%rd12], %r143;
	bra.uni 	BB6_192;

BB6_191:
	cvt.rzi.ftz.u32.f32	%r144, %f447;
	cvt.rzi.ftz.u32.f32	%r145, %f448;
	cvt.rzi.ftz.u32.f32	%r146, %f449;
	cvt.rzi.ftz.u32.f32	%r147, %f450;
	st.u8 	[%rd12], %r144;
	st.u8 	[%rd12+1], %r145;
	st.u8 	[%rd12+2], %r146;
	st.u8 	[%rd12+3], %r147;

BB6_192:
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<37>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB7_4;
	bra.uni 	BB7_1;

BB7_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd7];
	cvt.rn.f32.u16	%f5, %rs1;
	cvt.rn.f32.u16	%f6, %rs2;
	cvt.rn.f32.u16	%f7, %rs3;
	cvt.rn.f32.u16	%f8, %rs4;
	ld.const.f32 	%f9, [kYCbCrOffset];
	mov.f32 	%f10, 0f437F0000;
	div.approx.ftz.f32 	%f11, %f10, %f10;
	mul.ftz.f32 	%f12, %f9, %f11;
	sub.ftz.f32 	%f13, %f7, %f12;
	ld.const.f32 	%f14, [kYCbCrOffset+4];
	mul.ftz.f32 	%f15, %f14, %f11;
	sub.ftz.f32 	%f16, %f6, %f15;
	ld.const.f32 	%f17, [kYCbCrOffset+8];
	mul.ftz.f32 	%f18, %f17, %f11;
	sub.ftz.f32 	%f19, %f5, %f18;
	ld.const.f32 	%f20, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f21, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f22, %f16, %f21;
	fma.rn.ftz.f32 	%f23, %f13, %f20, %f22;
	ld.const.f32 	%f24, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f19, %f24, %f23;
	ld.const.f32 	%f25, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f26, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f27, %f16, %f26;
	fma.rn.ftz.f32 	%f28, %f13, %f25, %f27;
	ld.const.f32 	%f29, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f19, %f29, %f28;
	ld.const.f32 	%f30, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f31, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f32, %f16, %f31;
	fma.rn.ftz.f32 	%f33, %f13, %f30, %f32;
	ld.const.f32 	%f34, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f19, %f34, %f33;
	mov.f32 	%f35, 0f3F800000;
	div.approx.ftz.f32 	%f36, %f35, %f10;
	mul.ftz.f32 	%f4, %f8, %f36;
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB7_3;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	st.global.v4.f32 	[%rd9], {%f3, %f2, %f1, %f4};
	bra.uni 	BB7_4;

BB7_3:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs8, %rs7, %rs6, %rs5};

BB7_4:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB8_5;
	bra.uni 	BB8_1;

BB8_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB8_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f61, %f16;
	mov.f32 	%f60, %f15;
	mov.f32 	%f59, %f14;
	mov.f32 	%f58, %f13;
	bra.uni 	BB8_4;

BB8_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f58, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f59, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f60, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f61, %temp;
	}

BB8_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f59, %f18;
	fma.rn.ftz.f32 	%f20, %f60, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f58, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f59, %f24;
	fma.rn.ftz.f32 	%f26, %f60, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f58, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f59, %f30;
	fma.rn.ftz.f32 	%f32, %f60, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f58, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	ld.const.f32 	%f38, [kYCbCrOffset];
	div.approx.ftz.f32 	%f39, %f36, %f36;
	fma.rn.ftz.f32 	%f40, %f38, %f39, %f22;
	ld.const.f32 	%f41, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f42, %f41, %f39, %f28;
	ld.const.f32 	%f43, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f44, %f43, %f39, %f34;
	add.ftz.f32 	%f45, %f44, 0f3F000000;
	add.ftz.f32 	%f46, %f42, 0f3F000000;
	add.ftz.f32 	%f47, %f40, 0f3F000000;
	fma.rn.ftz.f32 	%f48, %f61, %f37, 0f3F000000;
	mov.f32 	%f49, 0f00000000;
	max.ftz.f32 	%f50, %f45, %f49;
	min.ftz.f32 	%f51, %f50, %f36;
	max.ftz.f32 	%f52, %f46, %f49;
	min.ftz.f32 	%f53, %f52, %f36;
	max.ftz.f32 	%f54, %f47, %f49;
	min.ftz.f32 	%f55, %f54, %f36;
	max.ftz.f32 	%f56, %f48, %f49;
	min.ftz.f32 	%f57, %f56, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f51;
	cvt.rzi.ftz.u32.f32	%r16, %f53;
	cvt.rzi.ftz.u32.f32	%r17, %f55;
	cvt.rzi.ftz.u32.f32	%r18, %f57;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r18;
	cvt.u16.u32	%rs10, %r17;
	cvt.u16.u32	%rs11, %r16;
	cvt.u16.u32	%rs12, %r15;
	st.global.v4.u8 	[%rd11], {%rs12, %rs11, %rs10, %rs9};

BB8_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<16>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<56>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB9_7;
	bra.uni 	BB9_1;

BB9_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs5, %rs6, %rs7, %rs8}, [%rd7];
	cvt.rn.f32.u16	%f9, %rs8;
	mov.f32 	%f10, 0f437F0000;
	mov.f32 	%f11, 0f3F800000;
	div.approx.ftz.f32 	%f12, %f11, %f10;
	mul.ftz.f32 	%f52, %f9, %f12;
	add.ftz.f32 	%f13, %f52, 0fB70637BD;
	setp.gtu.ftz.f32	%p4, %f13, 0f00000000;
	@%p4 bra 	BB9_3;

	mov.f32 	%f55, 0f00000000;
	mov.f32 	%f54, %f55;
	mov.f32 	%f53, %f55;
	mov.f32 	%f52, %f55;
	bra.uni 	BB9_4;

BB9_3:
	and.b16  	%rs9, %rs5, 255;
	cvt.rn.f32.u16	%f18, %rs9;
	and.b16  	%rs10, %rs6, 255;
	cvt.rn.f32.u16	%f19, %rs10;
	and.b16  	%rs11, %rs7, 255;
	cvt.rn.f32.u16	%f20, %rs11;
	ld.const.f32 	%f21, [kYCbCrOffset];
	div.approx.ftz.f32 	%f23, %f10, %f10;
	mul.ftz.f32 	%f24, %f21, %f23;
	sub.ftz.f32 	%f25, %f20, %f24;
	ld.const.f32 	%f26, [kYCbCrOffset+4];
	mul.ftz.f32 	%f27, %f26, %f23;
	sub.ftz.f32 	%f28, %f19, %f27;
	ld.const.f32 	%f29, [kYCbCrOffset+8];
	mul.ftz.f32 	%f30, %f29, %f23;
	sub.ftz.f32 	%f31, %f18, %f30;
	ld.const.f32 	%f32, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f33, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f34, %f28, %f33;
	fma.rn.ftz.f32 	%f35, %f25, %f32, %f34;
	ld.const.f32 	%f36, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f37, %f31, %f36, %f35;
	ld.const.f32 	%f38, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f39, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f40, %f28, %f39;
	fma.rn.ftz.f32 	%f41, %f25, %f38, %f40;
	ld.const.f32 	%f42, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f43, %f31, %f42, %f41;
	ld.const.f32 	%f44, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f45, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f46, %f28, %f45;
	fma.rn.ftz.f32 	%f47, %f25, %f44, %f46;
	ld.const.f32 	%f48, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f49, %f31, %f48, %f47;
	div.approx.ftz.f32 	%f51, %f11, %f52;
	mul.ftz.f32 	%f53, %f37, %f51;
	mul.ftz.f32 	%f54, %f43, %f51;
	mul.ftz.f32 	%f55, %f49, %f51;

BB9_4:
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p5, %r5, 0;
	@%p5 bra 	BB9_6;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	st.global.v4.f32 	[%rd9], {%f55, %f54, %f53, %f52};
	bra.uni 	BB9_7;

BB9_6:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f52;
	mov.b16 	%rs12, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f53;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f54;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs15, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs15, %rs14, %rs13, %rs12};

BB9_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB10_5;
	bra.uni 	BB10_1;

BB10_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB10_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB10_4;

BB10_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB10_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	add.ftz.f32 	%f59, %f38, 0f3F000000;
	mov.f32 	%f60, 0f00000000;
	max.ftz.f32 	%f61, %f56, %f60;
	min.ftz.f32 	%f62, %f61, %f36;
	max.ftz.f32 	%f63, %f57, %f60;
	min.ftz.f32 	%f64, %f63, %f36;
	max.ftz.f32 	%f65, %f58, %f60;
	min.ftz.f32 	%f66, %f65, %f36;
	max.ftz.f32 	%f67, %f59, %f60;
	min.ftz.f32 	%f68, %f67, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f62;
	cvt.rzi.ftz.u32.f32	%r16, %f64;
	cvt.rzi.ftz.u32.f32	%r17, %f66;
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r17;
	cvt.u16.u32	%rs10, %r16;
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	st.global.v4.u8 	[%rd11], {%rs11, %rs10, %rs9, %rs12};

BB10_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<35>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_8u_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB11_4;
	bra.uni 	BB11_1;

BB11_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd7];
	cvt.rn.f32.u16	%f4, %rs1;
	cvt.rn.f32.u16	%f5, %rs2;
	cvt.rn.f32.u16	%f6, %rs3;
	ld.const.f32 	%f7, [kYCbCrOffset];
	mov.f32 	%f8, 0f437F0000;
	div.approx.ftz.f32 	%f9, %f8, %f8;
	mul.ftz.f32 	%f10, %f7, %f9;
	sub.ftz.f32 	%f11, %f6, %f10;
	ld.const.f32 	%f12, [kYCbCrOffset+4];
	mul.ftz.f32 	%f13, %f12, %f9;
	sub.ftz.f32 	%f14, %f5, %f13;
	ld.const.f32 	%f15, [kYCbCrOffset+8];
	mul.ftz.f32 	%f16, %f15, %f9;
	sub.ftz.f32 	%f17, %f4, %f16;
	ld.const.f32 	%f18, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f19, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f20, %f14, %f19;
	fma.rn.ftz.f32 	%f21, %f11, %f18, %f20;
	ld.const.f32 	%f22, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f17, %f22, %f21;
	ld.const.f32 	%f23, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f24, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f25, %f14, %f24;
	fma.rn.ftz.f32 	%f26, %f11, %f23, %f25;
	ld.const.f32 	%f27, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f17, %f27, %f26;
	ld.const.f32 	%f28, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f29, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f30, %f14, %f29;
	fma.rn.ftz.f32 	%f31, %f11, %f28, %f30;
	ld.const.f32 	%f32, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f17, %f32, %f31;
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB11_3;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	mov.f32 	%f33, 0f3F800000;
	st.global.v4.f32 	[%rd9], {%f3, %f2, %f1, %f33};
	bra.uni 	BB11_4;

BB11_3:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	mov.f32 	%f34, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f34;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs8, %rs7, %rs6, %rs5};

BB11_4:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB12_5;
	bra.uni 	BB12_1;

BB12_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB12_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB12_4;

BB12_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB12_4:
	ld.const.f32 	%f17, [kRGB32f_To_601YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_601YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_601YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_601YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_601YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_601YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_601YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_601YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_601YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	mov.f32 	%f59, 0f00000000;
	max.ftz.f32 	%f60, %f56, %f59;
	min.ftz.f32 	%f61, %f60, %f36;
	max.ftz.f32 	%f62, %f57, %f59;
	min.ftz.f32 	%f63, %f62, %f36;
	max.ftz.f32 	%f64, %f58, %f59;
	min.ftz.f32 	%f65, %f64, %f36;
	mov.f32 	%f66, 0f437F8000;
	max.ftz.f32 	%f67, %f66, %f59;
	min.ftz.f32 	%f68, %f67, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f61;
	cvt.rzi.ftz.u32.f32	%r16, %f63;
	cvt.rzi.ftz.u32.f32	%r17, %f65;
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r17;
	cvt.u16.u32	%rs10, %r16;
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	st.global.v4.u8 	[%rd11], {%rs11, %rs10, %rs9, %rs12};

BB12_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<37>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB13_4;
	bra.uni 	BB13_1;

BB13_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd7];
	cvt.rn.f32.u16	%f5, %rs1;
	cvt.rn.f32.u16	%f6, %rs2;
	cvt.rn.f32.u16	%f7, %rs3;
	cvt.rn.f32.u16	%f8, %rs4;
	ld.const.f32 	%f9, [kYCbCrOffset];
	mov.f32 	%f10, 0f437F0000;
	div.approx.ftz.f32 	%f11, %f10, %f10;
	mul.ftz.f32 	%f12, %f9, %f11;
	sub.ftz.f32 	%f13, %f7, %f12;
	ld.const.f32 	%f14, [kYCbCrOffset+4];
	mul.ftz.f32 	%f15, %f14, %f11;
	sub.ftz.f32 	%f16, %f6, %f15;
	ld.const.f32 	%f17, [kYCbCrOffset+8];
	mul.ftz.f32 	%f18, %f17, %f11;
	sub.ftz.f32 	%f19, %f5, %f18;
	ld.const.f32 	%f20, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f21, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f22, %f16, %f21;
	fma.rn.ftz.f32 	%f23, %f13, %f20, %f22;
	ld.const.f32 	%f24, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f19, %f24, %f23;
	ld.const.f32 	%f25, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f26, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f27, %f16, %f26;
	fma.rn.ftz.f32 	%f28, %f13, %f25, %f27;
	ld.const.f32 	%f29, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f19, %f29, %f28;
	ld.const.f32 	%f30, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f31, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f32, %f16, %f31;
	fma.rn.ftz.f32 	%f33, %f13, %f30, %f32;
	ld.const.f32 	%f34, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f19, %f34, %f33;
	mov.f32 	%f35, 0f3F800000;
	div.approx.ftz.f32 	%f36, %f35, %f10;
	mul.ftz.f32 	%f4, %f8, %f36;
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB13_3;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	st.global.v4.f32 	[%rd9], {%f3, %f2, %f1, %f4};
	bra.uni 	BB13_4;

BB13_3:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs8, %rs7, %rs6, %rs5};

BB13_4:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<62>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB14_5;
	bra.uni 	BB14_1;

BB14_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB14_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f61, %f16;
	mov.f32 	%f60, %f15;
	mov.f32 	%f59, %f14;
	mov.f32 	%f58, %f13;
	bra.uni 	BB14_4;

BB14_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f58, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f59, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f60, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f61, %temp;
	}

BB14_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f59, %f18;
	fma.rn.ftz.f32 	%f20, %f60, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f58, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f59, %f24;
	fma.rn.ftz.f32 	%f26, %f60, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f58, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f59, %f30;
	fma.rn.ftz.f32 	%f32, %f60, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f58, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	ld.const.f32 	%f38, [kYCbCrOffset];
	div.approx.ftz.f32 	%f39, %f36, %f36;
	fma.rn.ftz.f32 	%f40, %f38, %f39, %f22;
	ld.const.f32 	%f41, [kYCbCrOffset+4];
	fma.rn.ftz.f32 	%f42, %f41, %f39, %f28;
	ld.const.f32 	%f43, [kYCbCrOffset+8];
	fma.rn.ftz.f32 	%f44, %f43, %f39, %f34;
	add.ftz.f32 	%f45, %f44, 0f3F000000;
	add.ftz.f32 	%f46, %f42, 0f3F000000;
	add.ftz.f32 	%f47, %f40, 0f3F000000;
	fma.rn.ftz.f32 	%f48, %f61, %f37, 0f3F000000;
	mov.f32 	%f49, 0f00000000;
	max.ftz.f32 	%f50, %f45, %f49;
	min.ftz.f32 	%f51, %f50, %f36;
	max.ftz.f32 	%f52, %f46, %f49;
	min.ftz.f32 	%f53, %f52, %f36;
	max.ftz.f32 	%f54, %f47, %f49;
	min.ftz.f32 	%f55, %f54, %f36;
	max.ftz.f32 	%f56, %f48, %f49;
	min.ftz.f32 	%f57, %f56, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f51;
	cvt.rzi.ftz.u32.f32	%r16, %f53;
	cvt.rzi.ftz.u32.f32	%r17, %f55;
	cvt.rzi.ftz.u32.f32	%r18, %f57;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r18;
	cvt.u16.u32	%rs10, %r17;
	cvt.u16.u32	%rs11, %r16;
	cvt.u16.u32	%rs12, %r15;
	st.global.v4.u8 	[%rd11], {%rs12, %rs11, %rs10, %rs9};

BB14_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<16>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<56>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB15_7;
	bra.uni 	BB15_1;

BB15_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs5, %rs6, %rs7, %rs8}, [%rd7];
	cvt.rn.f32.u16	%f9, %rs8;
	mov.f32 	%f10, 0f437F0000;
	mov.f32 	%f11, 0f3F800000;
	div.approx.ftz.f32 	%f12, %f11, %f10;
	mul.ftz.f32 	%f52, %f9, %f12;
	add.ftz.f32 	%f13, %f52, 0fB70637BD;
	setp.gtu.ftz.f32	%p4, %f13, 0f00000000;
	@%p4 bra 	BB15_3;

	mov.f32 	%f55, 0f00000000;
	mov.f32 	%f54, %f55;
	mov.f32 	%f53, %f55;
	mov.f32 	%f52, %f55;
	bra.uni 	BB15_4;

BB15_3:
	and.b16  	%rs9, %rs5, 255;
	cvt.rn.f32.u16	%f18, %rs9;
	and.b16  	%rs10, %rs6, 255;
	cvt.rn.f32.u16	%f19, %rs10;
	and.b16  	%rs11, %rs7, 255;
	cvt.rn.f32.u16	%f20, %rs11;
	ld.const.f32 	%f21, [kYCbCrOffset];
	div.approx.ftz.f32 	%f23, %f10, %f10;
	mul.ftz.f32 	%f24, %f21, %f23;
	sub.ftz.f32 	%f25, %f20, %f24;
	ld.const.f32 	%f26, [kYCbCrOffset+4];
	mul.ftz.f32 	%f27, %f26, %f23;
	sub.ftz.f32 	%f28, %f19, %f27;
	ld.const.f32 	%f29, [kYCbCrOffset+8];
	mul.ftz.f32 	%f30, %f29, %f23;
	sub.ftz.f32 	%f31, %f18, %f30;
	ld.const.f32 	%f32, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f33, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f34, %f28, %f33;
	fma.rn.ftz.f32 	%f35, %f25, %f32, %f34;
	ld.const.f32 	%f36, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f37, %f31, %f36, %f35;
	ld.const.f32 	%f38, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f39, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f40, %f28, %f39;
	fma.rn.ftz.f32 	%f41, %f25, %f38, %f40;
	ld.const.f32 	%f42, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f43, %f31, %f42, %f41;
	ld.const.f32 	%f44, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f45, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f46, %f28, %f45;
	fma.rn.ftz.f32 	%f47, %f25, %f44, %f46;
	ld.const.f32 	%f48, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f49, %f31, %f48, %f47;
	div.approx.ftz.f32 	%f51, %f11, %f52;
	mul.ftz.f32 	%f53, %f37, %f51;
	mul.ftz.f32 	%f54, %f43, %f51;
	mul.ftz.f32 	%f55, %f49, %f51;

BB15_4:
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p5, %r5, 0;
	@%p5 bra 	BB15_6;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	st.global.v4.f32 	[%rd9], {%f55, %f54, %f53, %f52};
	bra.uni 	BB15_7;

BB15_6:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f52;
	mov.b16 	%rs12, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f53;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f54;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs15, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs15, %rs14, %rs13, %rs12};

BB15_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB16_5;
	bra.uni 	BB16_1;

BB16_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB16_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB16_4;

BB16_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB16_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	add.ftz.f32 	%f59, %f38, 0f3F000000;
	mov.f32 	%f60, 0f00000000;
	max.ftz.f32 	%f61, %f56, %f60;
	min.ftz.f32 	%f62, %f61, %f36;
	max.ftz.f32 	%f63, %f57, %f60;
	min.ftz.f32 	%f64, %f63, %f36;
	max.ftz.f32 	%f65, %f58, %f60;
	min.ftz.f32 	%f66, %f65, %f36;
	max.ftz.f32 	%f67, %f59, %f60;
	min.ftz.f32 	%f68, %f67, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f62;
	cvt.rzi.ftz.u32.f32	%r16, %f64;
	cvt.rzi.ftz.u32.f32	%r17, %f66;
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r17;
	cvt.u16.u32	%rs10, %r16;
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	st.global.v4.u8 	[%rd11], {%rs11, %rs10, %rs9, %rs12};

BB16_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<16>;
	.reg .f32 	%f<35>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd3, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u64 	%rd4, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_8u_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd4;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB17_4;
	bra.uni 	BB17_1;

BB17_1:
	cvta.to.global.u64 	%rd5, %rd3;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	mul.wide.s32 	%rd6, %r14, 4;
	add.s64 	%rd7, %rd5, %rd6;
	ld.global.v4.u8 	{%rs1, %rs2, %rs3, %rs4}, [%rd7];
	cvt.rn.f32.u16	%f4, %rs1;
	cvt.rn.f32.u16	%f5, %rs2;
	cvt.rn.f32.u16	%f6, %rs3;
	ld.const.f32 	%f7, [kYCbCrOffset];
	mov.f32 	%f8, 0f437F0000;
	div.approx.ftz.f32 	%f9, %f8, %f8;
	mul.ftz.f32 	%f10, %f7, %f9;
	sub.ftz.f32 	%f11, %f6, %f10;
	ld.const.f32 	%f12, [kYCbCrOffset+4];
	mul.ftz.f32 	%f13, %f12, %f9;
	sub.ftz.f32 	%f14, %f5, %f13;
	ld.const.f32 	%f15, [kYCbCrOffset+8];
	mul.ftz.f32 	%f16, %f15, %f9;
	sub.ftz.f32 	%f17, %f4, %f16;
	ld.const.f32 	%f18, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f19, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f20, %f14, %f19;
	fma.rn.ftz.f32 	%f21, %f11, %f18, %f20;
	ld.const.f32 	%f22, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f1, %f17, %f22, %f21;
	ld.const.f32 	%f23, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f24, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f25, %f14, %f24;
	fma.rn.ftz.f32 	%f26, %f11, %f23, %f25;
	ld.const.f32 	%f27, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f2, %f17, %f27, %f26;
	ld.const.f32 	%f28, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f29, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f30, %f14, %f29;
	fma.rn.ftz.f32 	%f31, %f11, %f28, %f30;
	ld.const.f32 	%f32, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f3, %f17, %f32, %f31;
	mad.lo.s32 	%r15, %r2, %r4, %r1;
	cvt.s64.s32	%rd2, %r15;
	setp.eq.s32	%p4, %r5, 0;
	@%p4 bra 	BB17_3;

	shl.b64 	%rd8, %rd2, 4;
	add.s64 	%rd9, %rd1, %rd8;
	mov.f32 	%f33, 0f3F800000;
	st.global.v4.f32 	[%rd9], {%f3, %f2, %f1, %f33};
	bra.uni 	BB17_4;

BB17_3:
	shl.b64 	%rd10, %rd2, 3;
	add.s64 	%rd11, %rd1, %rd10;
	mov.f32 	%f34, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f34;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f1;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f2;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f3;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd11], {%rs8, %rs7, %rs6, %rs5};

BB17_4:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_8
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<20>;
	.reg .f32 	%f<73>;
	.reg .s64 	%rd<12>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_2];
	ld.param.u64 	%rd4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_6];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_8u_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd5;
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p1, %r1, %r6;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB18_5;
	bra.uni 	BB18_1;

BB18_1:
	cvta.to.global.u64 	%rd2, %rd4;
	mad.lo.s32 	%r14, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r14;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB18_3;

	shl.b64 	%rd6, %rd3, 4;
	add.s64 	%rd7, %rd1, %rd6;
	ld.global.v4.f32 	{%f13, %f14, %f15, %f16}, [%rd7];
	mov.f32 	%f72, %f16;
	mov.f32 	%f71, %f15;
	mov.f32 	%f70, %f14;
	mov.f32 	%f69, %f13;
	bra.uni 	BB18_4;

BB18_3:
	shl.b64 	%rd8, %rd3, 3;
	add.s64 	%rd9, %rd1, %rd8;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd9];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f69, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f70, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f71, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f72, %temp;
	}

BB18_4:
	ld.const.f32 	%f17, [kRGB32f_To_709YCbCr];
	ld.const.f32 	%f18, [kRGB32f_To_709YCbCr+4];
	mul.ftz.f32 	%f19, %f70, %f18;
	fma.rn.ftz.f32 	%f20, %f71, %f17, %f19;
	ld.const.f32 	%f21, [kRGB32f_To_709YCbCr+8];
	fma.rn.ftz.f32 	%f22, %f69, %f21, %f20;
	ld.const.f32 	%f23, [kRGB32f_To_709YCbCr+12];
	ld.const.f32 	%f24, [kRGB32f_To_709YCbCr+16];
	mul.ftz.f32 	%f25, %f70, %f24;
	fma.rn.ftz.f32 	%f26, %f71, %f23, %f25;
	ld.const.f32 	%f27, [kRGB32f_To_709YCbCr+20];
	fma.rn.ftz.f32 	%f28, %f69, %f27, %f26;
	ld.const.f32 	%f29, [kRGB32f_To_709YCbCr+24];
	ld.const.f32 	%f30, [kRGB32f_To_709YCbCr+28];
	mul.ftz.f32 	%f31, %f70, %f30;
	fma.rn.ftz.f32 	%f32, %f71, %f29, %f31;
	ld.const.f32 	%f33, [kRGB32f_To_709YCbCr+32];
	fma.rn.ftz.f32 	%f34, %f69, %f33, %f32;
	mov.f32 	%f35, 0f3F800000;
	mov.f32 	%f36, 0f437F0000;
	div.approx.ftz.f32 	%f37, %f36, %f35;
	mul.ftz.f32 	%f38, %f72, %f37;
	ld.const.f32 	%f39, [kYCbCrOffset];
	div.approx.ftz.f32 	%f40, %f36, %f36;
	mul.ftz.f32 	%f41, %f39, %f40;
	add.ftz.f32 	%f42, %f22, %f41;
	ld.const.f32 	%f43, [kYCbCrOffset+4];
	mul.ftz.f32 	%f44, %f43, %f40;
	add.ftz.f32 	%f45, %f28, %f44;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	mul.ftz.f32 	%f47, %f46, %f40;
	add.ftz.f32 	%f48, %f34, %f47;
	mul.ftz.f32 	%f49, %f38, 0f3B808081;
	sub.ftz.f32 	%f50, %f42, %f41;
	sub.ftz.f32 	%f51, %f45, %f44;
	sub.ftz.f32 	%f52, %f48, %f47;
	fma.rn.ftz.f32 	%f53, %f50, %f49, %f41;
	fma.rn.ftz.f32 	%f54, %f51, %f49, %f44;
	fma.rn.ftz.f32 	%f55, %f52, %f49, %f47;
	add.ftz.f32 	%f56, %f55, 0f3F000000;
	add.ftz.f32 	%f57, %f54, 0f3F000000;
	add.ftz.f32 	%f58, %f53, 0f3F000000;
	mov.f32 	%f59, 0f00000000;
	max.ftz.f32 	%f60, %f56, %f59;
	min.ftz.f32 	%f61, %f60, %f36;
	max.ftz.f32 	%f62, %f57, %f59;
	min.ftz.f32 	%f63, %f62, %f36;
	max.ftz.f32 	%f64, %f58, %f59;
	min.ftz.f32 	%f65, %f64, %f36;
	mov.f32 	%f66, 0f437F8000;
	max.ftz.f32 	%f67, %f66, %f59;
	min.ftz.f32 	%f68, %f67, %f36;
	cvt.rzi.ftz.u32.f32	%r15, %f61;
	cvt.rzi.ftz.u32.f32	%r16, %f63;
	cvt.rzi.ftz.u32.f32	%r17, %f65;
	cvt.rzi.ftz.u32.f32	%r18, %f68;
	mad.lo.s32 	%r19, %r2, %r5, %r1;
	mul.wide.s32 	%rd10, %r19, 4;
	add.s64 	%rd11, %rd2, %rd10;
	cvt.u16.u32	%rs9, %r17;
	cvt.u16.u32	%rs10, %r16;
	cvt.u16.u32	%rs11, %r15;
	cvt.u16.u32	%rs12, %r18;
	st.global.v4.u8 	[%rd11], {%rs11, %rs10, %rs9, %rs12};

BB18_5:
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB19_7;
	bra.uni 	BB19_1;

BB19_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB19_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f39, %f20;
	mov.f32 	%f38, %f19;
	mov.f32 	%f37, %f18;
	mov.f32 	%f36, %f17;
	bra.uni 	BB19_4;

BB19_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f36, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f39, %temp;
	}

BB19_4:
	ld.const.f32 	%f21, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f22, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f23, %f37, %f22;
	fma.rn.ftz.f32 	%f24, %f38, %f21, %f23;
	ld.const.f32 	%f25, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f36, %f25, %f24;
	ld.const.f32 	%f26, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f27, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f28, %f37, %f27;
	fma.rn.ftz.f32 	%f29, %f38, %f26, %f28;
	ld.const.f32 	%f30, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f36, %f30, %f29;
	ld.const.f32 	%f31, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f32, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f33, %f37, %f32;
	fma.rn.ftz.f32 	%f34, %f38, %f31, %f33;
	ld.const.f32 	%f35, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f36, %f35, %f34;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB19_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f39};
	bra.uni 	BB19_7;

BB19_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB19_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB20_7;
	bra.uni 	BB20_1;

BB20_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB20_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f39, %f20;
	mov.f32 	%f38, %f19;
	mov.f32 	%f37, %f18;
	mov.f32 	%f36, %f17;
	bra.uni 	BB20_4;

BB20_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f36, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f39, %temp;
	}

BB20_4:
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f23, %f37, %f22;
	fma.rn.ftz.f32 	%f24, %f38, %f21, %f23;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f13, %f36, %f25, %f24;
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f27, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f28, %f37, %f27;
	fma.rn.ftz.f32 	%f29, %f38, %f26, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f14, %f36, %f30, %f29;
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f33, %f37, %f32;
	fma.rn.ftz.f32 	%f34, %f38, %f31, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f15, %f36, %f35, %f34;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB20_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f39};
	bra.uni 	BB20_7;

BB20_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB20_7:
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<58>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB21_10;
	bra.uni 	BB21_1;

BB21_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB21_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd8];
	mov.f32 	%f53, %f24;
	mov.f32 	%f52, %f23;
	mov.f32 	%f51, %f22;
	mov.f32 	%f50, %f21;
	bra.uni 	BB21_4;

BB21_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f50, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f51, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f52, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f53, %temp;
	}

BB21_4:
	add.ftz.f32 	%f25, %f53, 0fB70637BD;
	setp.gtu.ftz.f32	%p5, %f25, 0f00000000;
	mov.f32 	%f54, %f53;
	@%p5 bra 	BB21_6;

	mov.f32 	%f57, 0f00000000;
	mov.f32 	%f56, %f57;
	mov.f32 	%f55, %f57;
	mov.f32 	%f54, %f57;
	bra.uni 	BB21_7;

BB21_6:
	ld.const.f32 	%f30, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f31, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f32, %f51, %f31;
	fma.rn.ftz.f32 	%f33, %f52, %f30, %f32;
	ld.const.f32 	%f34, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f35, %f50, %f34, %f33;
	ld.const.f32 	%f36, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f37, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f38, %f51, %f37;
	fma.rn.ftz.f32 	%f39, %f52, %f36, %f38;
	ld.const.f32 	%f40, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f41, %f50, %f40, %f39;
	ld.const.f32 	%f42, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f43, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f44, %f51, %f43;
	fma.rn.ftz.f32 	%f45, %f52, %f42, %f44;
	ld.const.f32 	%f46, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f47, %f50, %f46, %f45;
	mov.f32 	%f48, 0f3F800000;
	div.approx.ftz.f32 	%f49, %f48, %f53;
	mul.ftz.f32 	%f55, %f35, %f49;
	mul.ftz.f32 	%f56, %f41, %f49;
	mul.ftz.f32 	%f57, %f47, %f49;

BB21_7:
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p6, %r6, 0;
	@%p6 bra 	BB21_9;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f57, %f56, %f55, %f54};
	bra.uni 	BB21_10;

BB21_9:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f54;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f56;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f57;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB21_10:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<43>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB22_7;
	bra.uni 	BB22_1;

BB22_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB22_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f42, %f20;
	mov.f32 	%f41, %f19;
	mov.f32 	%f40, %f18;
	mov.f32 	%f39, %f17;
	bra.uni 	BB22_4;

BB22_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f39, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f42, %temp;
	}

BB22_4:
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f22, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f23, %f40, %f22;
	fma.rn.ftz.f32 	%f24, %f41, %f21, %f23;
	ld.const.f32 	%f25, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f39, %f25, %f24;
	ld.const.f32 	%f27, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f28, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f29, %f40, %f28;
	fma.rn.ftz.f32 	%f30, %f41, %f27, %f29;
	ld.const.f32 	%f31, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f32, %f39, %f31, %f30;
	ld.const.f32 	%f33, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f34, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f35, %f40, %f34;
	fma.rn.ftz.f32 	%f36, %f41, %f33, %f35;
	ld.const.f32 	%f37, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f38, %f39, %f37, %f36;
	mul.ftz.f32 	%f14, %f26, %f42;
	mul.ftz.f32 	%f15, %f32, %f42;
	mul.ftz.f32 	%f16, %f38, %f42;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB22_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f16, %f15, %f14, %f42};
	bra.uni 	BB22_7;

BB22_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f42;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB22_7:
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_32f_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB23_7;
	bra.uni 	BB23_1;

BB23_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB23_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f16, %f17, %f18, %f19}, [%rd8];
	mov.f32 	%f4, %f19;
	mov.f32 	%f39, %f18;
	mov.f32 	%f38, %f17;
	mov.f32 	%f37, %f16;
	bra.uni 	BB23_4;

BB23_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f39, %temp;
	}

BB23_4:
	ld.const.f32 	%f20, [k601YPbPr_To_RGB32f];
	ld.const.f32 	%f21, [k601YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f22, %f38, %f21;
	fma.rn.ftz.f32 	%f23, %f39, %f20, %f22;
	ld.const.f32 	%f24, [k601YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f37, %f24, %f23;
	ld.const.f32 	%f25, [k601YPbPr_To_RGB32f+12];
	ld.const.f32 	%f26, [k601YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f27, %f38, %f26;
	fma.rn.ftz.f32 	%f28, %f39, %f25, %f27;
	ld.const.f32 	%f29, [k601YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f37, %f29, %f28;
	ld.const.f32 	%f30, [k601YPbPr_To_RGB32f+24];
	ld.const.f32 	%f31, [k601YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f32, %f38, %f31;
	fma.rn.ftz.f32 	%f33, %f39, %f30, %f32;
	ld.const.f32 	%f34, [k601YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f37, %f34, %f33;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB23_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	mov.f32 	%f35, 0f3F800000;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f35};
	bra.uni 	BB23_7;

BB23_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	mov.f32 	%f36, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f36;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB23_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB24_7;
	bra.uni 	BB24_1;

BB24_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB24_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f16, %f17, %f18, %f19}, [%rd8];
	mov.f32 	%f43, %f19;
	mov.f32 	%f42, %f18;
	mov.f32 	%f41, %f17;
	mov.f32 	%f40, %f16;
	bra.uni 	BB24_4;

BB24_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB24_4:
	ld.const.f32 	%f20, [kRGB32f_To_601YPbPr];
	ld.const.f32 	%f21, [kRGB32f_To_601YPbPr+4];
	mul.ftz.f32 	%f22, %f41, %f21;
	fma.rn.ftz.f32 	%f23, %f42, %f20, %f22;
	ld.const.f32 	%f24, [kRGB32f_To_601YPbPr+8];
	fma.rn.ftz.f32 	%f25, %f40, %f24, %f23;
	ld.const.f32 	%f26, [kRGB32f_To_601YPbPr+12];
	ld.const.f32 	%f27, [kRGB32f_To_601YPbPr+16];
	mul.ftz.f32 	%f28, %f41, %f27;
	fma.rn.ftz.f32 	%f29, %f42, %f26, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_601YPbPr+20];
	fma.rn.ftz.f32 	%f31, %f40, %f30, %f29;
	ld.const.f32 	%f32, [kRGB32f_To_601YPbPr+24];
	ld.const.f32 	%f33, [kRGB32f_To_601YPbPr+28];
	mul.ftz.f32 	%f34, %f41, %f33;
	fma.rn.ftz.f32 	%f35, %f42, %f32, %f34;
	ld.const.f32 	%f36, [kRGB32f_To_601YPbPr+32];
	fma.rn.ftz.f32 	%f37, %f40, %f36, %f35;
	mul.ftz.f32 	%f13, %f25, %f43;
	mul.ftz.f32 	%f14, %f31, %f43;
	mul.ftz.f32 	%f15, %f37, %f43;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB24_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	mov.f32 	%f38, 0f3F800000;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f38};
	bra.uni 	BB24_7;

BB24_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	mov.f32 	%f39, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB24_7:
	ret;
}

.visible .entry PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYA_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB25_7;
	bra.uni 	BB25_1;

BB25_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB25_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f39, %f20;
	mov.f32 	%f38, %f19;
	mov.f32 	%f37, %f18;
	mov.f32 	%f36, %f17;
	bra.uni 	BB25_4;

BB25_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f36, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f39, %temp;
	}

BB25_4:
	ld.const.f32 	%f21, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f22, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f23, %f37, %f22;
	fma.rn.ftz.f32 	%f24, %f38, %f21, %f23;
	ld.const.f32 	%f25, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f36, %f25, %f24;
	ld.const.f32 	%f26, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f27, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f28, %f37, %f27;
	fma.rn.ftz.f32 	%f29, %f38, %f26, %f28;
	ld.const.f32 	%f30, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f36, %f30, %f29;
	ld.const.f32 	%f31, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f32, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f33, %f37, %f32;
	fma.rn.ftz.f32 	%f34, %f38, %f31, %f33;
	ld.const.f32 	%f35, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f36, %f35, %f34;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB25_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f39};
	bra.uni 	BB25_7;

BB25_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB25_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYA_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB26_7;
	bra.uni 	BB26_1;

BB26_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB26_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f39, %f20;
	mov.f32 	%f38, %f19;
	mov.f32 	%f37, %f18;
	mov.f32 	%f36, %f17;
	bra.uni 	BB26_4;

BB26_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f36, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f39, %temp;
	}

BB26_4:
	ld.const.f32 	%f21, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f22, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f23, %f37, %f22;
	fma.rn.ftz.f32 	%f24, %f38, %f21, %f23;
	ld.const.f32 	%f25, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f13, %f36, %f25, %f24;
	ld.const.f32 	%f26, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f27, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f28, %f37, %f27;
	fma.rn.ftz.f32 	%f29, %f38, %f26, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f14, %f36, %f30, %f29;
	ld.const.f32 	%f31, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f32, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f33, %f37, %f32;
	fma.rn.ftz.f32 	%f34, %f38, %f31, %f33;
	ld.const.f32 	%f35, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f15, %f36, %f35, %f34;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB26_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f39};
	bra.uni 	BB26_7;

BB26_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB26_7:
	ret;
}

.visible .entry PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<7>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<58>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYP_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB27_10;
	bra.uni 	BB27_1;

BB27_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB27_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f21, %f22, %f23, %f24}, [%rd8];
	mov.f32 	%f53, %f24;
	mov.f32 	%f52, %f23;
	mov.f32 	%f51, %f22;
	mov.f32 	%f50, %f21;
	bra.uni 	BB27_4;

BB27_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f50, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f51, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f52, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f53, %temp;
	}

BB27_4:
	add.ftz.f32 	%f25, %f53, 0fB70637BD;
	setp.gtu.ftz.f32	%p5, %f25, 0f00000000;
	mov.f32 	%f54, %f53;
	@%p5 bra 	BB27_6;

	mov.f32 	%f57, 0f00000000;
	mov.f32 	%f56, %f57;
	mov.f32 	%f55, %f57;
	mov.f32 	%f54, %f57;
	bra.uni 	BB27_7;

BB27_6:
	ld.const.f32 	%f30, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f31, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f32, %f51, %f31;
	fma.rn.ftz.f32 	%f33, %f52, %f30, %f32;
	ld.const.f32 	%f34, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f35, %f50, %f34, %f33;
	ld.const.f32 	%f36, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f37, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f38, %f51, %f37;
	fma.rn.ftz.f32 	%f39, %f52, %f36, %f38;
	ld.const.f32 	%f40, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f41, %f50, %f40, %f39;
	ld.const.f32 	%f42, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f43, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f44, %f51, %f43;
	fma.rn.ftz.f32 	%f45, %f52, %f42, %f44;
	ld.const.f32 	%f46, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f47, %f50, %f46, %f45;
	mov.f32 	%f48, 0f3F800000;
	div.approx.ftz.f32 	%f49, %f48, %f53;
	mul.ftz.f32 	%f55, %f35, %f49;
	mul.ftz.f32 	%f56, %f41, %f49;
	mul.ftz.f32 	%f57, %f47, %f49;

BB27_7:
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p6, %r6, 0;
	@%p6 bra 	BB27_9;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f57, %f56, %f55, %f54};
	bra.uni 	BB27_10;

BB27_9:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f54;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f55;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f56;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f57;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB27_10:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<43>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYP_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB28_7;
	bra.uni 	BB28_1;

BB28_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB28_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f17, %f18, %f19, %f20}, [%rd8];
	mov.f32 	%f42, %f20;
	mov.f32 	%f41, %f19;
	mov.f32 	%f40, %f18;
	mov.f32 	%f39, %f17;
	bra.uni 	BB28_4;

BB28_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f39, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f42, %temp;
	}

BB28_4:
	ld.const.f32 	%f21, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f22, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f23, %f40, %f22;
	fma.rn.ftz.f32 	%f24, %f41, %f21, %f23;
	ld.const.f32 	%f25, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f26, %f39, %f25, %f24;
	ld.const.f32 	%f27, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f28, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f29, %f40, %f28;
	fma.rn.ftz.f32 	%f30, %f41, %f27, %f29;
	ld.const.f32 	%f31, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f32, %f39, %f31, %f30;
	ld.const.f32 	%f33, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f34, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f35, %f40, %f34;
	fma.rn.ftz.f32 	%f36, %f41, %f33, %f35;
	ld.const.f32 	%f37, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f38, %f39, %f37, %f36;
	mul.ftz.f32 	%f14, %f26, %f42;
	mul.ftz.f32 	%f15, %f32, %f42;
	mul.ftz.f32 	%f16, %f38, %f42;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB28_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	st.global.v4.f32 	[%rd12], {%f16, %f15, %f14, %f42};
	bra.uni 	BB28_7;

BB28_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f42;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB28_7:
	ret;
}

.visible .entry PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel(
	.param .u64 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2,
	.param .u64 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7,
	.param .u32 PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<40>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_VUYX_4444_32f_709_To_BGRA_4444_32f_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB29_7;
	bra.uni 	BB29_1;

BB29_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB29_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f16, %f17, %f18, %f19}, [%rd8];
	mov.f32 	%f4, %f19;
	mov.f32 	%f39, %f18;
	mov.f32 	%f38, %f17;
	mov.f32 	%f37, %f16;
	bra.uni 	BB29_4;

BB29_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f37, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f38, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f39, %temp;
	}

BB29_4:
	ld.const.f32 	%f20, [k709YPbPr_To_RGB32f];
	ld.const.f32 	%f21, [k709YPbPr_To_RGB32f+4];
	mul.ftz.f32 	%f22, %f38, %f21;
	fma.rn.ftz.f32 	%f23, %f39, %f20, %f22;
	ld.const.f32 	%f24, [k709YPbPr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f37, %f24, %f23;
	ld.const.f32 	%f25, [k709YPbPr_To_RGB32f+12];
	ld.const.f32 	%f26, [k709YPbPr_To_RGB32f+16];
	mul.ftz.f32 	%f27, %f38, %f26;
	fma.rn.ftz.f32 	%f28, %f39, %f25, %f27;
	ld.const.f32 	%f29, [k709YPbPr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f14, %f37, %f29, %f28;
	ld.const.f32 	%f30, [k709YPbPr_To_RGB32f+24];
	ld.const.f32 	%f31, [k709YPbPr_To_RGB32f+28];
	mul.ftz.f32 	%f32, %f38, %f31;
	fma.rn.ftz.f32 	%f33, %f39, %f30, %f32;
	ld.const.f32 	%f34, [k709YPbPr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f15, %f37, %f34, %f33;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB29_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	mov.f32 	%f35, 0f3F800000;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f35};
	bra.uni 	BB29_7;

BB29_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	mov.f32 	%f36, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f36;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB29_7:
	ret;
}

.visible .entry PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel(
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_0,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_2,
	.param .u64 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_5,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_6,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_7,
	.param .u32 PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_8
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<13>;
	.reg .s32 	%r<17>;
	.reg .f32 	%f<44>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_0];
	ld.param.u32 	%r3, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_1];
	ld.param.u32 	%r4, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_2];
	ld.param.u64 	%rd6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_3];
	ld.param.u32 	%r5, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_4];
	ld.param.u32 	%r6, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_5];
	ld.param.u32 	%r7, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_6];
	ld.param.u32 	%r8, [PixelFormatConvert_BGRA_4444_32f_To_VUYX_4444_32f_709_Kernel_param_7];
	cvta.to.global.u64 	%rd1, %rd6;
	cvta.to.global.u64 	%rd2, %rd5;
	mov.u32 	%r9, %ntid.x;
	mov.u32 	%r10, %ctaid.x;
	mov.u32 	%r11, %tid.x;
	mad.lo.s32 	%r1, %r9, %r10, %r11;
	mov.u32 	%r12, %ntid.y;
	mov.u32 	%r13, %ctaid.y;
	mov.u32 	%r14, %tid.y;
	mad.lo.s32 	%r2, %r12, %r13, %r14;
	setp.lt.s32	%p1, %r1, %r7;
	setp.lt.s32	%p2, %r2, %r8;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB30_7;
	bra.uni 	BB30_1;

BB30_1:
	mad.lo.s32 	%r15, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r15;
	setp.eq.s32	%p4, %r4, 0;
	@%p4 bra 	BB30_3;

	shl.b64 	%rd7, %rd3, 4;
	add.s64 	%rd8, %rd2, %rd7;
	ld.global.v4.f32 	{%f16, %f17, %f18, %f19}, [%rd8];
	mov.f32 	%f43, %f19;
	mov.f32 	%f42, %f18;
	mov.f32 	%f41, %f17;
	mov.f32 	%f40, %f16;
	bra.uni 	BB30_4;

BB30_3:
	shl.b64 	%rd9, %rd3, 3;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd10];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f40, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f41, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f42, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f43, %temp;
	}

BB30_4:
	ld.const.f32 	%f20, [kRGB32f_To_709YPbPr];
	ld.const.f32 	%f21, [kRGB32f_To_709YPbPr+4];
	mul.ftz.f32 	%f22, %f41, %f21;
	fma.rn.ftz.f32 	%f23, %f42, %f20, %f22;
	ld.const.f32 	%f24, [kRGB32f_To_709YPbPr+8];
	fma.rn.ftz.f32 	%f25, %f40, %f24, %f23;
	ld.const.f32 	%f26, [kRGB32f_To_709YPbPr+12];
	ld.const.f32 	%f27, [kRGB32f_To_709YPbPr+16];
	mul.ftz.f32 	%f28, %f41, %f27;
	fma.rn.ftz.f32 	%f29, %f42, %f26, %f28;
	ld.const.f32 	%f30, [kRGB32f_To_709YPbPr+20];
	fma.rn.ftz.f32 	%f31, %f40, %f30, %f29;
	ld.const.f32 	%f32, [kRGB32f_To_709YPbPr+24];
	ld.const.f32 	%f33, [kRGB32f_To_709YPbPr+28];
	mul.ftz.f32 	%f34, %f41, %f33;
	fma.rn.ftz.f32 	%f35, %f42, %f32, %f34;
	ld.const.f32 	%f36, [kRGB32f_To_709YPbPr+32];
	fma.rn.ftz.f32 	%f37, %f40, %f36, %f35;
	mul.ftz.f32 	%f13, %f25, %f43;
	mul.ftz.f32 	%f14, %f31, %f43;
	mul.ftz.f32 	%f15, %f37, %f43;
	mad.lo.s32 	%r16, %r2, %r5, %r1;
	cvt.s64.s32	%rd4, %r16;
	setp.eq.s32	%p5, %r6, 0;
	@%p5 bra 	BB30_6;

	shl.b64 	%rd11, %rd4, 4;
	add.s64 	%rd12, %rd1, %rd11;
	mov.f32 	%f38, 0f3F800000;
	st.global.v4.f32 	[%rd12], {%f15, %f14, %f13, %f38};
	bra.uni 	BB30_7;

BB30_6:
	shl.b64 	%rd13, %rd4, 3;
	add.s64 	%rd14, %rd1, %rd13;
	mov.f32 	%f39, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f39;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f14;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f15;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs12, %rs11, %rs10, %rs9};

BB30_7:
	ret;
}


