//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.global .texref inTexture;
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 184, 30, 206, 189, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};

.visible .func  (.param .b32 func_retval0) _Z13MaxDepthValuej(
	.param .b32 _Z13MaxDepthValuej_param_0
)
{
	.reg .pred 	%p<4>;
	.reg .s32 	%r<3>;
	.reg .f32 	%f<6>;


	ld.param.u32 	%r2, [_Z13MaxDepthValuej_param_0];
	and.b32  	%r1, %r2, 248;
	setp.ne.s32	%p1, %r1, 0;
	@%p1 bra 	BB0_2;

	mov.f32 	%f5, 0f437F0000;
	bra.uni 	BB0_5;

BB0_2:
	setp.ne.s32	%p2, %r1, 8;
	@%p2 bra 	BB0_4;

	mov.f32 	%f5, 0f447FC000;
	bra.uni 	BB0_5;

BB0_4:
	setp.eq.s32	%p3, %r1, 16;
	selp.f32	%f5, 0f47000000, 0f3F800000, %p3;

BB0_5:
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .func  (.param .b64 func_retval0) _Z23ColorSpaceConvertMatrixjj(
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_0,
	.param .b32 _Z23ColorSpaceConvertMatrixjj_param_1
)
{
	.reg .pred 	%p<33>;
	.reg .s32 	%r<16>;
	.reg .s64 	%rd<32>;


	ld.param.u32 	%r9, [_Z23ColorSpaceConvertMatrixjj_param_0];
	ld.param.u32 	%r10, [_Z23ColorSpaceConvertMatrixjj_param_1];
	and.b32  	%r11, %r9, 512;
	setp.eq.s32	%p1, %r11, 0;
	and.b32  	%r1, %r10, 256;
	@%p1 bra 	BB1_29;

	and.b32  	%r12, %r9, 2048;
	setp.eq.s32	%p2, %r12, 0;
	and.b32  	%r2, %r9, 4096;
	and.b32  	%r3, %r10, 512;
	@%p2 bra 	BB1_15;

	setp.eq.s32	%p3, %r2, 0;
	@%p3 bra 	BB1_13;

	setp.eq.s32	%p4, %r3, 0;
	and.b32  	%r4, %r9, 256;
	@%p4 bra 	BB1_7;

	setp.ne.s32	%p5, %r4, 0;
	mov.u64 	%rd31, 0;
	@%p5 bra 	BB1_47;

	and.b32  	%r13, %r10, 4096;
	setp.eq.s32	%p6, %r13, 0;
	setp.ne.s32	%p7, %r1, 0;
	or.pred  	%p8, %p6, %p7;
	mov.u64 	%rd31, 0;
	@%p8 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YCbCr_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_7:
	setp.eq.s32	%p9, %r4, 0;
	@%p9 bra 	BB1_10;

	setp.eq.s32	%p10, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p10 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_10:
	setp.eq.s32	%p11, %r1, 0;
	@%p11 bra 	BB1_12;

	cvta.const.u64 	%rd31, k709YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_12:
	cvta.const.u64 	%rd31, k709YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_13:
	setp.ne.s32	%p12, %r3, 0;
	setp.eq.s32	%p13, %r1, 0;
	or.pred  	%p14, %p13, %p12;
	mov.u64 	%rd31, 0;
	@%p14 bra 	BB1_47;

	cvta.const.u64 	%rd31, k709YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_15:
	setp.eq.s32	%p15, %r2, 0;
	@%p15 bra 	BB1_25;

	setp.eq.s32	%p16, %r3, 0;
	and.b32  	%r5, %r9, 256;
	@%p16 bra 	BB1_19;

	or.b32  	%r14, %r5, %r1;
	setp.ne.s32	%p17, %r14, 0;
	mov.u64 	%rd31, 0;
	@%p17 bra 	BB1_47;

	cvta.const.u64 	%rd31, k601YCbCr_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_19:
	setp.eq.s32	%p18, %r5, 0;
	@%p18 bra 	BB1_22;

	setp.eq.s32	%p19, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p19 bra 	BB1_47;

	cvta.const.u64 	%rd31, k601YPbPr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_22:
	setp.eq.s32	%p20, %r1, 0;
	@%p20 bra 	BB1_24;

	cvta.const.u64 	%rd31, k601YCbCr_To_RGB32f;
	bra.uni 	BB1_47;

BB1_24:
	cvta.const.u64 	%rd31, k601YCbCr_To_RGB8u;
	bra.uni 	BB1_47;

BB1_25:
	setp.ne.s32	%p21, %r3, 0;
	mov.u64 	%rd31, 0;
	@%p21 bra 	BB1_47;

	setp.eq.s32	%p22, %r1, 0;
	@%p22 bra 	BB1_28;

	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB32f;
	bra.uni 	BB1_47;

BB1_28:
	cvta.const.u64 	%rd31, k601YCbCrFullRange_To_RGB8u;
	bra.uni 	BB1_47;

BB1_29:
	and.b32  	%r15, %r9, 256;
	setp.eq.s32	%p23, %r15, 0;
	and.b32  	%r6, %r10, 2048;
	@%p23 bra 	BB1_40;

	setp.eq.s32	%p24, %r1, 0;
	@%p24 bra 	BB1_34;

	setp.eq.s32	%p25, %r6, 0;
	@%p25 bra 	BB1_33;

	cvta.const.u64 	%rd31, kRGB32f_To_709YPbPr;
	bra.uni 	BB1_47;

BB1_33:
	cvta.const.u64 	%rd31, kRGB32f_To_601YPbPr;
	bra.uni 	BB1_47;

BB1_34:
	setp.eq.s32	%p26, %r6, 0;
	and.b32  	%r7, %r10, 4096;
	@%p26 bra 	BB1_37;

	setp.eq.s32	%p27, %r7, 0;
	mov.u64 	%rd31, 0;
	@%p27 bra 	BB1_47;

	cvta.const.u64 	%rd31, kRGB32f_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_37:
	setp.eq.s32	%p28, %r7, 0;
	@%p28 bra 	BB1_39;

	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_39:
	cvta.const.u64 	%rd31, kRGB32f_To_601YCbCrFullRange;
	bra.uni 	BB1_47;

BB1_40:
	setp.ne.s32	%p29, %r1, 0;
	mov.u64 	%rd31, 0;
	@%p29 bra 	BB1_47;

	setp.eq.s32	%p30, %r6, 0;
	and.b32  	%r8, %r10, 4096;
	@%p30 bra 	BB1_44;

	setp.ne.s32	%p31, %r8, 0;
	mov.u64 	%rd31, 0;
	@%p31 bra 	BB1_47;

	cvta.const.u64 	%rd31, kRGB8u_To_709YCbCr;
	bra.uni 	BB1_47;

BB1_44:
	setp.eq.s32	%p32, %r8, 0;
	@%p32 bra 	BB1_46;

	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCr;
	bra.uni 	BB1_47;

BB1_46:
	cvta.const.u64 	%rd31, kRGB8u_To_601YCbCrFullRange;

BB1_47:
	st.param.b64	[func_retval0+0], %rd31;
	ret;
}

.visible .func  (.param .b32 func_retval0) _Z5clampIfET_S0_S0_S0_(
	.param .b32 _Z5clampIfET_S0_S0_S0__param_0,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_1,
	.param .b32 _Z5clampIfET_S0_S0_S0__param_2
)
{
	.reg .f32 	%f<6>;


	ld.param.f32 	%f1, [_Z5clampIfET_S0_S0_S0__param_0];
	ld.param.f32 	%f2, [_Z5clampIfET_S0_S0_S0__param_1];
	ld.param.f32 	%f3, [_Z5clampIfET_S0_S0_S0__param_2];
	max.ftz.f32 	%f4, %f1, %f2;
	min.ftz.f32 	%f5, %f4, %f3;
	st.param.f32	[func_retval0+0], %f5;
	ret;
}

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z23UnpremultiplyComponents6float4j(
	.param .align 16 .b8 _Z23UnpremultiplyComponents6float4j_param_0[16],
	.param .b32 _Z23UnpremultiplyComponents6float4j_param_1
)
{
	.reg .pred 	%p<15>;
	.reg .s32 	%r<8>;
	.reg .f32 	%f<81>;


	ld.param.f32 	%f74, [_Z23UnpremultiplyComponents6float4j_param_0+12];
	ld.param.f32 	%f73, [_Z23UnpremultiplyComponents6float4j_param_0+8];
	ld.param.f32 	%f72, [_Z23UnpremultiplyComponents6float4j_param_0+4];
	ld.param.f32 	%f30, [_Z23UnpremultiplyComponents6float4j_param_0];
	ld.param.u32 	%r5, [_Z23UnpremultiplyComponents6float4j_param_1];
	and.b32  	%r1, %r5, 768;
	setp.ne.s32	%p1, %r1, 512;
	@%p1 bra 	BB3_7;

	and.b32  	%r2, %r5, 248;
	setp.ne.s32	%p2, %r2, 0;
	@%p2 bra 	BB3_3;

	mov.f32 	%f71, 0f437F0000;
	bra.uni 	BB3_6;

BB3_3:
	setp.ne.s32	%p3, %r2, 8;
	@%p3 bra 	BB3_5;

	mov.f32 	%f71, 0f447FC000;
	bra.uni 	BB3_6;

BB3_5:
	setp.eq.s32	%p4, %r2, 16;
	selp.f32	%f71, 0f47000000, 0f3F800000, %p4;

BB3_6:
	and.b32  	%r6, %r5, 4096;
	setp.eq.s32	%p5, %r6, 0;
	ld.const.f32 	%f36, [kYCbCrOffset];
	ld.const.f32 	%f37, [kYCbCrFullRangeOffset];
	selp.f32	%f38, %f37, %f36, %p5;
	mov.f32 	%f39, 0f437F0000;
	div.approx.ftz.f32 	%f40, %f71, %f39;
	mul.ftz.f32 	%f41, %f38, %f40;
	sub.ftz.f32 	%f72, %f72, %f41;
	ld.const.f32 	%f42, [kYCbCrOffset+4];
	ld.const.f32 	%f43, [kYCbCrFullRangeOffset+4];
	selp.f32	%f44, %f43, %f42, %p5;
	mul.ftz.f32 	%f45, %f44, %f40;
	sub.ftz.f32 	%f73, %f73, %f45;
	ld.const.f32 	%f46, [kYCbCrOffset+8];
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+8];
	selp.f32	%f48, %f47, %f46, %p5;
	mul.ftz.f32 	%f49, %f48, %f40;
	sub.ftz.f32 	%f74, %f74, %f49;

BB3_7:
	add.ftz.f32 	%f50, %f30, 0fB70637BD;
	setp.gtu.ftz.f32	%p6, %f50, 0f00000000;
	mov.f32 	%f76, %f30;
	@%p6 bra 	BB3_9;

	mov.f32 	%f80, 0f00000000;
	mov.f32 	%f79, %f80;
	mov.f32 	%f78, %f80;
	mov.f32 	%f76, %f80;
	bra.uni 	BB3_15;

BB3_9:
	and.b32  	%r3, %r5, 248;
	setp.ne.s32	%p7, %r3, 0;
	@%p7 bra 	BB3_11;

	mov.f32 	%f75, 0f437F0000;
	bra.uni 	BB3_14;

BB3_11:
	setp.ne.s32	%p8, %r3, 8;
	@%p8 bra 	BB3_13;

	mov.f32 	%f75, 0f447FC000;
	bra.uni 	BB3_14;

BB3_13:
	setp.eq.s32	%p9, %r3, 16;
	selp.f32	%f75, 0f47000000, 0f3F800000, %p9;

BB3_14:
	div.approx.ftz.f32 	%f57, %f75, %f30;
	mul.ftz.f32 	%f78, %f72, %f57;
	mul.ftz.f32 	%f79, %f73, %f57;
	mul.ftz.f32 	%f80, %f74, %f57;

BB3_15:
	@%p1 bra 	BB3_22;

	and.b32  	%r4, %r5, 248;
	setp.eq.s32	%p11, %r4, 8;
	@%p11 bra 	BB3_19;

	setp.ne.s32	%p12, %r4, 0;
	@%p12 bra 	BB3_20;

	mov.f32 	%f77, 0f437F0000;
	bra.uni 	BB3_21;

BB3_19:
	mov.f32 	%f77, 0f447FC000;
	bra.uni 	BB3_21;

BB3_20:
	setp.eq.s32	%p13, %r4, 16;
	selp.f32	%f77, 0f47000000, 0f3F800000, %p13;

BB3_21:
	and.b32  	%r7, %r5, 4096;
	setp.eq.s32	%p14, %r7, 0;
	ld.const.f32 	%f60, [kYCbCrOffset];
	ld.const.f32 	%f61, [kYCbCrFullRangeOffset];
	selp.f32	%f62, %f61, %f60, %p14;
	mov.f32 	%f63, 0f437F0000;
	div.approx.ftz.f32 	%f64, %f77, %f63;
	fma.rn.ftz.f32 	%f78, %f62, %f64, %f78;
	ld.const.f32 	%f65, [kYCbCrOffset+4];
	ld.const.f32 	%f66, [kYCbCrFullRangeOffset+4];
	selp.f32	%f67, %f66, %f65, %p14;
	fma.rn.ftz.f32 	%f79, %f67, %f64, %f79;
	ld.const.f32 	%f68, [kYCbCrOffset+8];
	ld.const.f32 	%f69, [kYCbCrFullRangeOffset+8];
	selp.f32	%f70, %f69, %f68, %p14;
	fma.rn.ftz.f32 	%f80, %f70, %f64, %f80;

BB3_22:
	st.param.f32	[func_retval0+0], %f76;
	st.param.f32	[func_retval0+4], %f78;
	st.param.f32	[func_retval0+8], %f79;
	st.param.f32	[func_retval0+12], %f80;
	ret;
}

.visible .func _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj(
	.param .b64 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_0,
	.param .b32 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_1,
	.param .b64 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_2,
	.param .b32 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_3,
	.param .b32 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_4,
	.param .b32 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_5,
	.param .b32 _Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_6
)
{
	.reg .pred 	%p<69>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<81>;
	.reg .f32 	%f<346>;
	.reg .s64 	%rd<42>;


	ld.param.u32 	%r18, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_1];
	ld.param.u64 	%rd9, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_2];
	ld.param.u32 	%r19, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_3];
	ld.param.u32 	%r20, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_4];
	ld.param.u32 	%r21, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_5];
	ld.param.u32 	%r22, [_Z29PixelFormatConvert_NV12_FRAMEPvjS_j17DevicePixelFormatjj_param_6];
	mov.u32 	%r23, %ctaid.x;
	mov.u32 	%r24, %ntid.x;
	mov.u32 	%r25, %tid.x;
	mad.lo.s32 	%r1, %r24, %r23, %r25;
	shl.b32 	%r26, %r1, 1;
	mov.u32 	%r27, %ntid.y;
	mov.u32 	%r28, %ctaid.y;
	mov.u32 	%r29, %tid.y;
	mad.lo.s32 	%r2, %r27, %r28, %r29;
	setp.ge.u32	%p2, %r2, %r22;
	setp.ge.u32	%p3, %r26, %r21;
	or.pred  	%p4, %p2, %p3;
	@%p4 bra 	BB4_107;

	cvt.rn.f32.s32	%f116, %r2;
	add.ftz.f32 	%f117, %f116, 0fBE800000;
	mov.f32 	%f118, 0f00000000;
	max.ftz.f32 	%f119, %f117, %f118;
	cvt.rn.f32.u32	%f120, %r22;
	min.ftz.f32 	%f121, %f119, %f120;
	fma.rn.ftz.f32 	%f122, %f121, 0f3F000000, %f120;
	add.ftz.f32 	%f113, %f122, 0f3F000000;
	cvt.rn.f32.s32	%f123, %r1;
	add.ftz.f32 	%f114, %f123, 0f3F000000;
	cvt.rn.f32.u32	%f124, %r21;
	min.ftz.f32 	%f125, %f114, %f124;
	add.ftz.f32 	%f126, %f125, 0f00000000;
	add.ftz.f32 	%f110, %f126, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r30, %r31, %r32, %r33}, [inTexture, {%f110, %f113}];
	// inline asm
	mov.b32 	 %f127, %r30;
	mov.b32 	 %f128, %r31;
	// inline asm
	tex.2d.v4.u32.f32 {%r34, %r35, %r36, %r37}, [inTexture, {%f114, %f113}];
	// inline asm
	add.ftz.f32 	%f115, %f116, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r38, %r39, %r40, %r41}, [inTexture, {%f114, %f115}];
	// inline asm
	mov.b32 	 %f129, %r39;
	mul.ftz.f32 	%f130, %f128, 0f437F0000;
	mul.ftz.f32 	%f131, %f127, 0f437F0000;
	mul.ftz.f32 	%f132, %f129, 0f437F0000;
	and.b32  	%r42, %r18, 8192;
	setp.eq.s32	%p5, %r42, 0;
	selp.f32	%f283, 0f00000000, %f130, %p5;
	selp.f32	%f295, %f132, %f131, %p5;
	selp.f32	%f307, %f131, %f132, %p5;
	selp.f32	%f316, %f130, 0f00000000, %p5;
	and.b32  	%r43, %r18, 768;
	setp.ne.s32	%p6, %r43, 512;
	@%p6 bra 	BB4_8;

	and.b32  	%r11, %r18, 248;
	setp.ne.s32	%p7, %r11, 0;
	@%p7 bra 	BB4_4;

	mov.f32 	%f280, 0f437F0000;
	bra.uni 	BB4_7;

BB4_4:
	setp.ne.s32	%p8, %r11, 8;
	@%p8 bra 	BB4_6;

	mov.f32 	%f280, 0f447FC000;
	bra.uni 	BB4_7;

BB4_6:
	setp.eq.s32	%p9, %r11, 16;
	selp.f32	%f280, 0f47000000, 0f3F800000, %p9;

BB4_7:
	and.b32  	%r44, %r18, 4096;
	setp.eq.s32	%p10, %r44, 0;
	ld.const.f32 	%f135, [kYCbCrOffset];
	ld.const.f32 	%f136, [kYCbCrFullRangeOffset];
	selp.f32	%f137, %f136, %f135, %p10;
	mov.f32 	%f138, 0f437F0000;
	div.approx.ftz.f32 	%f139, %f280, %f138;
	mul.ftz.f32 	%f140, %f137, %f139;
	sub.ftz.f32 	%f295, %f295, %f140;
	ld.const.f32 	%f141, [kYCbCrOffset+4];
	ld.const.f32 	%f142, [kYCbCrFullRangeOffset+4];
	selp.f32	%f143, %f142, %f141, %p10;
	mul.ftz.f32 	%f144, %f143, %f139;
	sub.ftz.f32 	%f307, %f307, %f144;
	ld.const.f32 	%f145, [kYCbCrOffset+8];
	ld.const.f32 	%f146, [kYCbCrFullRangeOffset+8];
	selp.f32	%f147, %f146, %f145, %p10;
	mul.ftz.f32 	%f148, %f147, %f139;
	sub.ftz.f32 	%f316, %f316, %f148;

BB4_8:
	mov.f32 	%f315, %f316;
	mov.f32 	%f305, %f307;
	mov.f32 	%f293, %f295;
	and.b32  	%r12, %r18, 1024;
	setp.eq.s32	%p11, %r12, 0;
	@%p11 bra 	BB4_18;

	setp.ltu.ftz.f32	%p12, %f293, 0f00000000;
	@%p12 bra 	BB4_11;

	lg2.approx.ftz.f32 	%f149, %f293;
	mul.ftz.f32 	%f150, %f149, 0f3EE66666;
	ex2.approx.ftz.f32 	%f294, %f150;
	bra.uni 	BB4_12;

BB4_11:
	neg.ftz.f32 	%f151, %f293;
	lg2.approx.ftz.f32 	%f152, %f151;
	mul.ftz.f32 	%f153, %f152, 0f3EE66666;
	ex2.approx.ftz.f32 	%f154, %f153;
	neg.ftz.f32 	%f294, %f154;

BB4_12:
	mov.f32 	%f293, %f294;
	setp.ltu.ftz.f32	%p13, %f305, 0f00000000;
	@%p13 bra 	BB4_14;

	lg2.approx.ftz.f32 	%f155, %f305;
	mul.ftz.f32 	%f156, %f155, 0f3EE66666;
	ex2.approx.ftz.f32 	%f306, %f156;
	bra.uni 	BB4_15;

BB4_14:
	neg.ftz.f32 	%f157, %f305;
	lg2.approx.ftz.f32 	%f158, %f157;
	mul.ftz.f32 	%f159, %f158, 0f3EE66666;
	ex2.approx.ftz.f32 	%f160, %f159;
	neg.ftz.f32 	%f306, %f160;

BB4_15:
	mov.f32 	%f305, %f306;
	setp.ltu.ftz.f32	%p14, %f315, 0f00000000;
	@%p14 bra 	BB4_17;

	lg2.approx.ftz.f32 	%f161, %f315;
	mul.ftz.f32 	%f162, %f161, 0f3EE66666;
	ex2.approx.ftz.f32 	%f315, %f162;
	bra.uni 	BB4_18;

BB4_17:
	neg.ftz.f32 	%f163, %f315;
	lg2.approx.ftz.f32 	%f164, %f163;
	mul.ftz.f32 	%f165, %f164, 0f3EE66666;
	ex2.approx.ftz.f32 	%f166, %f165;
	neg.ftz.f32 	%f315, %f166;

BB4_18:
	mov.f32 	%f314, %f315;
	mov.f32 	%f22, %f305;
	mov.f32 	%f21, %f293;
	mov.b32 	 %f24, %r38;
	setp.eq.s32	%p15, %r43, 512;
	and.b32  	%r46, %r18, 2560;
	setp.ne.s32	%p16, %r46, 0;
	or.pred  	%p1, %p16, %p15;
	and.b32  	%r13, %r18, 248;
	@%p1 bra 	BB4_26;

	setp.eq.s32	%p17, %r13, 128;
	mov.f32 	%f292, %f21;
	mov.f32 	%f304, %f22;
	@%p17 bra 	BB4_45;

	setp.ne.s32	%p18, %r13, 0;
	@%p18 bra 	BB4_22;

	mov.f32 	%f281, 0f437F0000;
	bra.uni 	BB4_25;

BB4_22:
	setp.ne.s32	%p19, %r13, 8;
	@%p19 bra 	BB4_24;

	mov.f32 	%f281, 0f447FC000;
	bra.uni 	BB4_25;

BB4_24:
	setp.eq.s32	%p20, %r13, 16;
	selp.f32	%f281, 0f47000000, 0f3F800000, %p20;

BB4_25:
	mov.f32 	%f169, 0f3F800000;
	div.approx.ftz.f32 	%f170, %f169, %f281;
	mul.ftz.f32 	%f283, %f283, %f170;
	mul.ftz.f32 	%f292, %f21, %f170;
	mul.ftz.f32 	%f304, %f22, %f170;
	mul.ftz.f32 	%f314, %f314, %f170;
	bra.uni 	BB4_45;

BB4_26:
	and.b32  	%r47, %r18, 512;
	setp.eq.s32	%p21, %r47, 0;
	@%p21 bra 	BB4_32;

	and.b32  	%r48, %r18, 2048;
	setp.eq.s32	%p22, %r48, 0;
	and.b32  	%r14, %r18, 4096;
	@%p22 bra 	BB4_30;

	setp.eq.s32	%p23, %r14, 0;
	mov.u64 	%rd40, k709YCbCrFullRange_To_RGB32f;
	@%p23 bra 	BB4_34;

	and.b32  	%r49, %r18, 256;
	setp.eq.s32	%p24, %r49, 0;
	mov.u64 	%rd17, k709YPbPr_To_RGB32f;
	mov.u64 	%rd18, k709YCbCr_To_RGB32f;
	selp.b64	%rd40, %rd18, %rd17, %p24;
	bra.uni 	BB4_34;

BB4_30:
	setp.eq.s32	%p25, %r14, 0;
	mov.u64 	%rd40, k601YCbCrFullRange_To_RGB32f;
	@%p25 bra 	BB4_34;

	and.b32  	%r50, %r18, 256;
	setp.eq.s32	%p26, %r50, 0;
	mov.u64 	%rd20, k601YPbPr_To_RGB32f;
	mov.u64 	%rd21, k601YCbCr_To_RGB32f;
	selp.b64	%rd40, %rd21, %rd20, %p26;
	bra.uni 	BB4_34;

BB4_32:
	and.b32  	%r51, %r18, 256;
	setp.eq.s32	%p27, %r51, 0;
	mov.u64 	%rd40, 0;
	@%p27 bra 	BB4_34;

	mov.u64 	%rd40, kRGB32f_To_601YPbPr;

BB4_34:
	ld.const.f32 	%f171, [%rd40];
	ld.const.f32 	%f172, [%rd40+4];
	mul.ftz.f32 	%f173, %f22, %f172;
	fma.rn.ftz.f32 	%f174, %f21, %f171, %f173;
	ld.const.f32 	%f175, [%rd40+8];
	fma.rn.ftz.f32 	%f31, %f314, %f175, %f174;
	ld.const.f32 	%f176, [%rd40+12];
	ld.const.f32 	%f177, [%rd40+16];
	mul.ftz.f32 	%f178, %f22, %f177;
	fma.rn.ftz.f32 	%f179, %f21, %f176, %f178;
	ld.const.f32 	%f180, [%rd40+20];
	fma.rn.ftz.f32 	%f32, %f314, %f180, %f179;
	ld.const.f32 	%f181, [%rd40+24];
	ld.const.f32 	%f182, [%rd40+28];
	mul.ftz.f32 	%f183, %f22, %f182;
	fma.rn.ftz.f32 	%f184, %f21, %f181, %f183;
	ld.const.f32 	%f185, [%rd40+32];
	fma.rn.ftz.f32 	%f314, %f314, %f185, %f184;
	setp.eq.s32	%p28, %r13, 128;
	mov.f32 	%f292, %f31;
	mov.f32 	%f304, %f32;
	@%p28 bra 	BB4_45;

	setp.ne.s32	%p29, %r13, 0;
	@%p29 bra 	BB4_37;

	mov.f32 	%f282, 0f437F0000;
	bra.uni 	BB4_40;

BB4_37:
	setp.ne.s32	%p30, %r13, 8;
	@%p30 bra 	BB4_39;

	mov.f32 	%f282, 0f447FC000;
	bra.uni 	BB4_40;

BB4_39:
	setp.eq.s32	%p31, %r13, 16;
	selp.f32	%f282, 0f47000000, 0f3F800000, %p31;

BB4_40:
	setp.eq.s32	%p32, %r13, 0;
	@%p32 bra 	BB4_44;

	and.b32  	%r52, %r18, 256;
	setp.eq.s32	%p33, %r52, 0;
	@%p33 bra 	BB4_43;

	mov.f32 	%f188, 0f437F0000;
	div.approx.ftz.f32 	%f189, %f282, %f188;
	mul.ftz.f32 	%f283, %f283, %f189;
	mul.ftz.f32 	%f292, %f31, %f189;
	mul.ftz.f32 	%f304, %f32, %f189;
	mul.ftz.f32 	%f314, %f314, %f189;
	bra.uni 	BB4_45;

BB4_43:
	mov.f32 	%f190, 0f3F800000;
	div.approx.ftz.f32 	%f191, %f190, %f282;
	mul.ftz.f32 	%f283, %f283, %f191;
	mov.f32 	%f192, 0f437F0000;
	div.approx.ftz.f32 	%f193, %f192, %f282;
	mul.ftz.f32 	%f292, %f31, %f193;
	mul.ftz.f32 	%f304, %f32, %f193;
	mul.ftz.f32 	%f314, %f314, %f193;
	bra.uni 	BB4_45;

BB4_44:
	mov.f32 	%f194, 0f3F800000;
	div.approx.ftz.f32 	%f195, %f194, %f282;
	mul.ftz.f32 	%f283, %f283, %f195;
	mov.f32 	%f292, %f31;
	mov.f32 	%f304, %f32;

BB4_45:
	mov.f32 	%f48, %f314;
	mov.f32 	%f303, %f304;
	mov.f32 	%f291, %f292;
	and.b32  	%r15, %r18, 7;
	setp.eq.s32	%p34, %r15, 1;
	mov.f32 	%f313, %f48;
	@%p34 bra 	BB4_51;

	or.b32  	%r53, %r15, 4;
	setp.eq.s32	%p35, %r53, 4;
	@%p35 bra 	BB4_47;
	bra.uni 	BB4_48;

BB4_47:
	mov.f32 	%f283, 0f3F800000;
	mov.f32 	%f313, %f48;
	bra.uni 	BB4_51;

BB4_48:
	add.ftz.f32 	%f197, %f283, 0fB70637BD;
	setp.gtu.ftz.f32	%p36, %f197, 0f00000000;
	@%p36 bra 	BB4_50;

	mov.f32 	%f303, %f118;
	mov.f32 	%f291, %f118;
	mov.f32 	%f283, %f118;
	mov.f32 	%f313, %f118;
	bra.uni 	BB4_51;

BB4_50:
	mov.f32 	%f202, 0f3F800000;
	div.approx.ftz.f32 	%f203, %f202, %f283;
	mul.ftz.f32 	%f291, %f291, %f203;
	mul.ftz.f32 	%f303, %f303, %f203;
	mul.ftz.f32 	%f313, %f48, %f203;

BB4_51:
	mul.lo.s32 	%r16, %r2, %r19;
	add.s32 	%r63, %r26, %r16;
	add.s32 	%r64, %r63, 1;
	cvt.s64.s32	%rd4, %r64;
	setp.eq.s32	%p37, %r20, 0;
	@%p37 bra 	BB4_53;

	shl.b64 	%rd24, %rd4, 4;
	add.s64 	%rd25, %rd9, %rd24;
	st.v4.f32 	[%rd25], {%f313, %f303, %f291, %f283};
	bra.uni 	BB4_54;

BB4_53:
	shl.b64 	%rd26, %rd4, 3;
	add.s64 	%rd27, %rd9, %rd26;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f283;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f291;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f303;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f313;
	mov.b16 	%rs4, %temp;
}
	st.v4.u16 	[%rd27], {%rs4, %rs3, %rs2, %rs1};

BB4_54:
	mov.b32 	 %f204, %r35;
	mul.ftz.f32 	%f205, %f204, 0f437F0000;
	mov.b32 	 %f206, %r34;
	mul.ftz.f32 	%f207, %f206, 0f437F0000;
	selp.f32	%f320, 0f00000000, %f205, %p5;
	mul.ftz.f32 	%f208, %f24, 0f437F0000;
	selp.f32	%f332, %f208, %f207, %p5;
	selp.f32	%f344, %f207, %f208, %p5;
	selp.f32	%f345, %f205, 0f00000000, %p5;
	@%p6 bra 	BB4_61;

	setp.ne.s32	%p40, %r13, 0;
	@%p40 bra 	BB4_57;

	mov.f32 	%f317, 0f437F0000;
	bra.uni 	BB4_60;

BB4_57:
	setp.ne.s32	%p41, %r13, 8;
	@%p41 bra 	BB4_59;

	mov.f32 	%f317, 0f447FC000;
	bra.uni 	BB4_60;

BB4_59:
	setp.eq.s32	%p42, %r13, 16;
	selp.f32	%f317, 0f47000000, 0f3F800000, %p42;

BB4_60:
	and.b32  	%r67, %r18, 4096;
	setp.eq.s32	%p43, %r67, 0;
	ld.const.f32 	%f211, [kYCbCrOffset];
	ld.const.f32 	%f212, [kYCbCrFullRangeOffset];
	selp.f32	%f213, %f212, %f211, %p43;
	mov.f32 	%f214, 0f437F0000;
	div.approx.ftz.f32 	%f215, %f317, %f214;
	mul.ftz.f32 	%f216, %f213, %f215;
	sub.ftz.f32 	%f332, %f332, %f216;
	ld.const.f32 	%f217, [kYCbCrOffset+4];
	ld.const.f32 	%f218, [kYCbCrFullRangeOffset+4];
	selp.f32	%f219, %f218, %f217, %p43;
	mul.ftz.f32 	%f220, %f219, %f215;
	sub.ftz.f32 	%f344, %f344, %f220;
	ld.const.f32 	%f221, [kYCbCrOffset+8];
	ld.const.f32 	%f222, [kYCbCrFullRangeOffset+8];
	selp.f32	%f223, %f222, %f221, %p43;
	mul.ftz.f32 	%f224, %f223, %f215;
	sub.ftz.f32 	%f345, %f345, %f224;

BB4_61:
	mov.f32 	%f342, %f344;
	mov.f32 	%f330, %f332;
	@%p11 bra 	BB4_71;

	setp.ltu.ftz.f32	%p45, %f330, 0f00000000;
	@%p45 bra 	BB4_64;

	lg2.approx.ftz.f32 	%f225, %f330;
	mul.ftz.f32 	%f226, %f225, 0f3EE66666;
	ex2.approx.ftz.f32 	%f331, %f226;
	bra.uni 	BB4_65;

BB4_64:
	neg.ftz.f32 	%f227, %f330;
	lg2.approx.ftz.f32 	%f228, %f227;
	mul.ftz.f32 	%f229, %f228, 0f3EE66666;
	ex2.approx.ftz.f32 	%f230, %f229;
	neg.ftz.f32 	%f331, %f230;

BB4_65:
	mov.f32 	%f330, %f331;
	setp.ltu.ftz.f32	%p46, %f342, 0f00000000;
	@%p46 bra 	BB4_67;

	lg2.approx.ftz.f32 	%f231, %f342;
	mul.ftz.f32 	%f232, %f231, 0f3EE66666;
	ex2.approx.ftz.f32 	%f343, %f232;
	bra.uni 	BB4_68;

BB4_67:
	neg.ftz.f32 	%f233, %f342;
	lg2.approx.ftz.f32 	%f234, %f233;
	mul.ftz.f32 	%f235, %f234, 0f3EE66666;
	ex2.approx.ftz.f32 	%f236, %f235;
	neg.ftz.f32 	%f343, %f236;

BB4_68:
	mov.f32 	%f342, %f343;
	setp.ltu.ftz.f32	%p47, %f345, 0f00000000;
	@%p47 bra 	BB4_70;

	lg2.approx.ftz.f32 	%f237, %f345;
	mul.ftz.f32 	%f238, %f237, 0f3EE66666;
	ex2.approx.ftz.f32 	%f345, %f238;
	bra.uni 	BB4_71;

BB4_70:
	neg.ftz.f32 	%f239, %f345;
	lg2.approx.ftz.f32 	%f240, %f239;
	mul.ftz.f32 	%f241, %f240, 0f3EE66666;
	ex2.approx.ftz.f32 	%f242, %f241;
	neg.ftz.f32 	%f345, %f242;

BB4_71:
	mov.f32 	%f77, %f342;
	mov.f32 	%f76, %f330;
	@%p1 bra 	BB4_79;

	setp.eq.s32	%p48, %r13, 128;
	mov.f32 	%f329, %f76;
	mov.f32 	%f341, %f77;
	@%p48 bra 	BB4_98;

	setp.ne.s32	%p49, %r13, 0;
	@%p49 bra 	BB4_75;

	mov.f32 	%f318, 0f437F0000;
	bra.uni 	BB4_78;

BB4_75:
	setp.ne.s32	%p50, %r13, 8;
	@%p50 bra 	BB4_77;

	mov.f32 	%f318, 0f447FC000;
	bra.uni 	BB4_78;

BB4_77:
	setp.eq.s32	%p51, %r13, 16;
	selp.f32	%f318, 0f47000000, 0f3F800000, %p51;

BB4_78:
	mov.f32 	%f245, 0f3F800000;
	div.approx.ftz.f32 	%f246, %f245, %f318;
	mul.ftz.f32 	%f320, %f320, %f246;
	mul.ftz.f32 	%f329, %f76, %f246;
	mul.ftz.f32 	%f341, %f77, %f246;
	mul.ftz.f32 	%f345, %f345, %f246;
	bra.uni 	BB4_98;

BB4_79:
	and.b32  	%r68, %r18, 512;
	setp.eq.s32	%p52, %r68, 0;
	@%p52 bra 	BB4_85;

	and.b32  	%r69, %r18, 2048;
	setp.eq.s32	%p53, %r69, 0;
	and.b32  	%r17, %r18, 4096;
	@%p53 bra 	BB4_83;

	setp.eq.s32	%p54, %r17, 0;
	mov.u64 	%rd41, k709YCbCrFullRange_To_RGB32f;
	@%p54 bra 	BB4_87;

	and.b32  	%r70, %r18, 256;
	setp.eq.s32	%p55, %r70, 0;
	mov.u64 	%rd29, k709YPbPr_To_RGB32f;
	mov.u64 	%rd30, k709YCbCr_To_RGB32f;
	selp.b64	%rd41, %rd30, %rd29, %p55;
	bra.uni 	BB4_87;

BB4_83:
	setp.eq.s32	%p56, %r17, 0;
	mov.u64 	%rd41, k601YCbCrFullRange_To_RGB32f;
	@%p56 bra 	BB4_87;

	and.b32  	%r71, %r18, 256;
	setp.eq.s32	%p57, %r71, 0;
	mov.u64 	%rd32, k601YPbPr_To_RGB32f;
	mov.u64 	%rd33, k601YCbCr_To_RGB32f;
	selp.b64	%rd41, %rd33, %rd32, %p57;
	bra.uni 	BB4_87;

BB4_85:
	and.b32  	%r72, %r18, 256;
	setp.eq.s32	%p58, %r72, 0;
	mov.u64 	%rd41, 0;
	@%p58 bra 	BB4_87;

	mov.u64 	%rd41, kRGB32f_To_601YPbPr;

BB4_87:
	ld.const.f32 	%f247, [%rd41];
	ld.const.f32 	%f248, [%rd41+4];
	mul.ftz.f32 	%f249, %f77, %f248;
	fma.rn.ftz.f32 	%f250, %f76, %f247, %f249;
	ld.const.f32 	%f251, [%rd41+8];
	fma.rn.ftz.f32 	%f85, %f345, %f251, %f250;
	ld.const.f32 	%f252, [%rd41+12];
	ld.const.f32 	%f253, [%rd41+16];
	mul.ftz.f32 	%f254, %f77, %f253;
	fma.rn.ftz.f32 	%f255, %f76, %f252, %f254;
	ld.const.f32 	%f256, [%rd41+20];
	fma.rn.ftz.f32 	%f86, %f345, %f256, %f255;
	ld.const.f32 	%f257, [%rd41+24];
	ld.const.f32 	%f258, [%rd41+28];
	mul.ftz.f32 	%f259, %f77, %f258;
	fma.rn.ftz.f32 	%f260, %f76, %f257, %f259;
	ld.const.f32 	%f261, [%rd41+32];
	fma.rn.ftz.f32 	%f345, %f345, %f261, %f260;
	setp.eq.s32	%p59, %r13, 128;
	mov.f32 	%f329, %f85;
	mov.f32 	%f341, %f86;
	@%p59 bra 	BB4_98;

	setp.ne.s32	%p60, %r13, 0;
	@%p60 bra 	BB4_90;

	mov.f32 	%f319, 0f437F0000;
	bra.uni 	BB4_93;

BB4_90:
	setp.ne.s32	%p61, %r13, 8;
	@%p61 bra 	BB4_92;

	mov.f32 	%f319, 0f447FC000;
	bra.uni 	BB4_93;

BB4_92:
	setp.eq.s32	%p62, %r13, 16;
	selp.f32	%f319, 0f47000000, 0f3F800000, %p62;

BB4_93:
	setp.eq.s32	%p63, %r13, 0;
	@%p63 bra 	BB4_97;

	and.b32  	%r73, %r18, 256;
	setp.eq.s32	%p64, %r73, 0;
	@%p64 bra 	BB4_96;

	mov.f32 	%f264, 0f437F0000;
	div.approx.ftz.f32 	%f265, %f319, %f264;
	mul.ftz.f32 	%f320, %f320, %f265;
	mul.ftz.f32 	%f329, %f85, %f265;
	mul.ftz.f32 	%f341, %f86, %f265;
	mul.ftz.f32 	%f345, %f345, %f265;
	bra.uni 	BB4_98;

BB4_96:
	mov.f32 	%f266, 0f3F800000;
	div.approx.ftz.f32 	%f267, %f266, %f319;
	mul.ftz.f32 	%f320, %f320, %f267;
	mov.f32 	%f268, 0f437F0000;
	div.approx.ftz.f32 	%f269, %f268, %f319;
	mul.ftz.f32 	%f329, %f85, %f269;
	mul.ftz.f32 	%f341, %f86, %f269;
	mul.ftz.f32 	%f345, %f345, %f269;
	bra.uni 	BB4_98;

BB4_97:
	mov.f32 	%f270, 0f3F800000;
	div.approx.ftz.f32 	%f271, %f270, %f319;
	mul.ftz.f32 	%f320, %f320, %f271;
	mov.f32 	%f329, %f85;
	mov.f32 	%f341, %f86;

BB4_98:
	mov.f32 	%f340, %f341;
	mov.f32 	%f328, %f329;
	@%p34 bra 	BB4_104;

	or.b32  	%r74, %r15, 4;
	setp.eq.s32	%p66, %r74, 4;
	@%p66 bra 	BB4_100;
	bra.uni 	BB4_101;

BB4_100:
	mov.f32 	%f320, 0f3F800000;
	bra.uni 	BB4_104;

BB4_101:
	add.ftz.f32 	%f273, %f320, 0fB70637BD;
	setp.gtu.ftz.f32	%p67, %f273, 0f00000000;
	@%p67 bra 	BB4_103;

	mov.f32 	%f345, 0f00000000;
	mov.f32 	%f340, %f345;
	mov.f32 	%f328, %f345;
	mov.f32 	%f320, %f345;
	bra.uni 	BB4_104;

BB4_103:
	mov.f32 	%f278, 0f3F800000;
	div.approx.ftz.f32 	%f279, %f278, %f320;
	mul.ftz.f32 	%f328, %f328, %f279;
	mul.ftz.f32 	%f340, %f340, %f279;
	mul.ftz.f32 	%f345, %f345, %f279;

BB4_104:
	add.s32 	%r80, %r16, %r26;
	cvt.s64.s32	%rd8, %r80;
	@%p37 bra 	BB4_106;

	shl.b64 	%rd36, %rd8, 4;
	add.s64 	%rd37, %rd9, %rd36;
	st.v4.f32 	[%rd37], {%f345, %f340, %f328, %f320};
	bra.uni 	BB4_107;

BB4_106:
	shl.b64 	%rd38, %rd8, 3;
	add.s64 	%rd39, %rd9, %rd38;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f320;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f328;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f340;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f345;
	mov.b16 	%rs8, %temp;
}
	st.v4.u16 	[%rd39], {%rs8, %rs7, %rs6, %rs5};

BB4_107:
	ret;
}

.visible .func _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj(
	.param .b64 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_0,
	.param .b32 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_1,
	.param .b64 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_2,
	.param .b32 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_3,
	.param .b32 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_4,
	.param .b32 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_5,
	.param .b32 _Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_6
)
{
	.reg .pred 	%p<131>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<153>;
	.reg .f32 	%f<687>;
	.reg .s64 	%rd<82>;


	ld.param.u32 	%r30, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_1];
	ld.param.u64 	%rd17, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_2];
	ld.param.u32 	%r33, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_5];
	ld.param.u32 	%r34, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_6];
	mov.u32 	%r35, %ctaid.x;
	mov.u32 	%r36, %ntid.x;
	mov.u32 	%r37, %tid.x;
	mad.lo.s32 	%r1, %r36, %r35, %r37;
	shl.b32 	%r38, %r1, 1;
	mov.u32 	%r39, %ntid.y;
	mov.u32 	%r40, %ctaid.y;
	mov.u32 	%r41, %tid.y;
	mad.lo.s32 	%r2, %r39, %r40, %r41;
	setp.ge.u32	%p2, %r2, %r34;
	setp.ge.u32	%p3, %r38, %r33;
	or.pred  	%p4, %p2, %p3;
	@%p4 bra 	BB5_213;

	cvt.rn.f32.s32	%f1, %r2;
	add.ftz.f32 	%f232, %f1, 0fBE000000;
	mov.f32 	%f233, 0f00000000;
	max.ftz.f32 	%f234, %f232, %f233;
	cvt.rn.f32.u32	%f2, %r34;
	min.ftz.f32 	%f235, %f234, %f2;
	fma.rn.ftz.f32 	%f236, %f235, 0f3F000000, %f2;
	add.ftz.f32 	%f229, %f236, 0f3F000000;
	cvt.rn.f32.s32	%f3, %r1;
	add.ftz.f32 	%f230, %f3, 0f3F000000;
	cvt.rn.f32.u32	%f4, %r33;
	min.ftz.f32 	%f5, %f230, %f4;
	add.ftz.f32 	%f237, %f5, 0f00000000;
	add.ftz.f32 	%f226, %f237, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r42, %r43, %r44, %r45}, [inTexture, {%f226, %f229}];
	// inline asm
	mov.b32 	 %f238, %r42;
	mov.b32 	 %f239, %r43;
	// inline asm
	tex.2d.v4.u32.f32 {%r46, %r47, %r48, %r49}, [inTexture, {%f230, %f229}];
	// inline asm
	add.ftz.f32 	%f231, %f1, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r50, %r51, %r52, %r53}, [inTexture, {%f230, %f231}];
	// inline asm
	mov.b32 	 %f240, %r51;
	mul.ftz.f32 	%f241, %f239, 0f437F0000;
	mul.ftz.f32 	%f242, %f238, 0f437F0000;
	mul.ftz.f32 	%f243, %f240, 0f437F0000;
	and.b32  	%r54, %r30, 8192;
	setp.eq.s32	%p5, %r54, 0;
	selp.f32	%f566, 0f00000000, %f241, %p5;
	selp.f32	%f578, %f243, %f242, %p5;
	selp.f32	%f590, %f242, %f243, %p5;
	selp.f32	%f591, %f241, 0f00000000, %p5;
	and.b32  	%r55, %r30, 768;
	setp.ne.s32	%p6, %r55, 512;
	@%p6 bra 	BB5_8;

	and.b32  	%r11, %r30, 248;
	setp.ne.s32	%p7, %r11, 0;
	@%p7 bra 	BB5_4;

	mov.f32 	%f563, 0f437F0000;
	bra.uni 	BB5_7;

BB5_4:
	setp.ne.s32	%p8, %r11, 8;
	@%p8 bra 	BB5_6;

	mov.f32 	%f563, 0f447FC000;
	bra.uni 	BB5_7;

BB5_6:
	setp.eq.s32	%p9, %r11, 16;
	selp.f32	%f563, 0f47000000, 0f3F800000, %p9;

BB5_7:
	and.b32  	%r56, %r30, 4096;
	setp.eq.s32	%p10, %r56, 0;
	ld.const.f32 	%f246, [kYCbCrOffset];
	ld.const.f32 	%f247, [kYCbCrFullRangeOffset];
	selp.f32	%f248, %f247, %f246, %p10;
	mov.f32 	%f249, 0f437F0000;
	div.approx.ftz.f32 	%f250, %f563, %f249;
	mul.ftz.f32 	%f251, %f248, %f250;
	sub.ftz.f32 	%f578, %f578, %f251;
	ld.const.f32 	%f252, [kYCbCrOffset+4];
	ld.const.f32 	%f253, [kYCbCrFullRangeOffset+4];
	selp.f32	%f254, %f253, %f252, %p10;
	mul.ftz.f32 	%f255, %f254, %f250;
	sub.ftz.f32 	%f590, %f590, %f255;
	ld.const.f32 	%f256, [kYCbCrOffset+8];
	ld.const.f32 	%f257, [kYCbCrFullRangeOffset+8];
	selp.f32	%f258, %f257, %f256, %p10;
	mul.ftz.f32 	%f259, %f258, %f250;
	sub.ftz.f32 	%f591, %f591, %f259;

BB5_8:
	mov.f32 	%f588, %f590;
	mov.f32 	%f576, %f578;
	and.b32  	%r12, %r30, 1024;
	setp.eq.s32	%p11, %r12, 0;
	@%p11 bra 	BB5_18;

	setp.ltu.ftz.f32	%p12, %f576, 0f00000000;
	@%p12 bra 	BB5_11;

	lg2.approx.ftz.f32 	%f260, %f576;
	mul.ftz.f32 	%f261, %f260, 0f3EE66666;
	ex2.approx.ftz.f32 	%f577, %f261;
	bra.uni 	BB5_12;

BB5_11:
	neg.ftz.f32 	%f262, %f576;
	lg2.approx.ftz.f32 	%f263, %f262;
	mul.ftz.f32 	%f264, %f263, 0f3EE66666;
	ex2.approx.ftz.f32 	%f265, %f264;
	neg.ftz.f32 	%f577, %f265;

BB5_12:
	mov.f32 	%f576, %f577;
	setp.ltu.ftz.f32	%p13, %f588, 0f00000000;
	@%p13 bra 	BB5_14;

	lg2.approx.ftz.f32 	%f266, %f588;
	mul.ftz.f32 	%f267, %f266, 0f3EE66666;
	ex2.approx.ftz.f32 	%f589, %f267;
	bra.uni 	BB5_15;

BB5_14:
	neg.ftz.f32 	%f268, %f588;
	lg2.approx.ftz.f32 	%f269, %f268;
	mul.ftz.f32 	%f270, %f269, 0f3EE66666;
	ex2.approx.ftz.f32 	%f271, %f270;
	neg.ftz.f32 	%f589, %f271;

BB5_15:
	mov.f32 	%f588, %f589;
	setp.ltu.ftz.f32	%p14, %f591, 0f00000000;
	@%p14 bra 	BB5_17;

	lg2.approx.ftz.f32 	%f272, %f591;
	mul.ftz.f32 	%f273, %f272, 0f3EE66666;
	ex2.approx.ftz.f32 	%f591, %f273;
	bra.uni 	BB5_18;

BB5_17:
	neg.ftz.f32 	%f274, %f591;
	lg2.approx.ftz.f32 	%f275, %f274;
	mul.ftz.f32 	%f276, %f275, 0f3EE66666;
	ex2.approx.ftz.f32 	%f277, %f276;
	neg.ftz.f32 	%f591, %f277;

BB5_18:
	mov.f32 	%f27, %f588;
	mov.f32 	%f26, %f576;
	and.b32  	%r145, %r30, 768;
	setp.eq.s32	%p15, %r145, 512;
	and.b32  	%r58, %r30, 2560;
	setp.ne.s32	%p16, %r58, 0;
	or.pred  	%p1, %p16, %p15;
	and.b32  	%r13, %r30, 248;
	@%p1 bra 	BB5_26;

	setp.eq.s32	%p17, %r13, 128;
	mov.f32 	%f575, %f26;
	mov.f32 	%f587, %f27;
	@%p17 bra 	BB5_45;

	setp.ne.s32	%p18, %r13, 0;
	@%p18 bra 	BB5_22;

	mov.f32 	%f564, 0f437F0000;
	bra.uni 	BB5_25;

BB5_22:
	setp.ne.s32	%p19, %r13, 8;
	@%p19 bra 	BB5_24;

	mov.f32 	%f564, 0f447FC000;
	bra.uni 	BB5_25;

BB5_24:
	setp.eq.s32	%p20, %r13, 16;
	selp.f32	%f564, 0f47000000, 0f3F800000, %p20;

BB5_25:
	mov.f32 	%f280, 0f3F800000;
	div.approx.ftz.f32 	%f281, %f280, %f564;
	mul.ftz.f32 	%f566, %f566, %f281;
	mul.ftz.f32 	%f575, %f26, %f281;
	mul.ftz.f32 	%f587, %f27, %f281;
	mul.ftz.f32 	%f591, %f591, %f281;
	bra.uni 	BB5_45;

BB5_26:
	and.b32  	%r59, %r30, 512;
	setp.eq.s32	%p21, %r59, 0;
	@%p21 bra 	BB5_32;

	and.b32  	%r60, %r30, 2048;
	setp.eq.s32	%p22, %r60, 0;
	and.b32  	%r14, %r30, 4096;
	@%p22 bra 	BB5_30;

	setp.eq.s32	%p23, %r14, 0;
	mov.u64 	%rd78, k709YCbCrFullRange_To_RGB32f;
	@%p23 bra 	BB5_34;

	and.b32  	%r61, %r30, 256;
	setp.eq.s32	%p24, %r61, 0;
	mov.u64 	%rd25, k709YPbPr_To_RGB32f;
	mov.u64 	%rd26, k709YCbCr_To_RGB32f;
	selp.b64	%rd78, %rd26, %rd25, %p24;
	bra.uni 	BB5_34;

BB5_30:
	setp.eq.s32	%p25, %r14, 0;
	mov.u64 	%rd78, k601YCbCrFullRange_To_RGB32f;
	@%p25 bra 	BB5_34;

	and.b32  	%r62, %r30, 256;
	setp.eq.s32	%p26, %r62, 0;
	mov.u64 	%rd28, k601YPbPr_To_RGB32f;
	mov.u64 	%rd29, k601YCbCr_To_RGB32f;
	selp.b64	%rd78, %rd29, %rd28, %p26;
	bra.uni 	BB5_34;

BB5_32:
	and.b32  	%r63, %r30, 256;
	setp.eq.s32	%p27, %r63, 0;
	mov.u64 	%rd78, 0;
	@%p27 bra 	BB5_34;

	mov.u64 	%rd78, kRGB32f_To_601YPbPr;

BB5_34:
	ld.const.f32 	%f282, [%rd78];
	ld.const.f32 	%f283, [%rd78+4];
	mul.ftz.f32 	%f284, %f27, %f283;
	fma.rn.ftz.f32 	%f285, %f26, %f282, %f284;
	ld.const.f32 	%f286, [%rd78+8];
	fma.rn.ftz.f32 	%f36, %f591, %f286, %f285;
	ld.const.f32 	%f287, [%rd78+12];
	ld.const.f32 	%f288, [%rd78+16];
	mul.ftz.f32 	%f289, %f27, %f288;
	fma.rn.ftz.f32 	%f290, %f26, %f287, %f289;
	ld.const.f32 	%f291, [%rd78+20];
	fma.rn.ftz.f32 	%f37, %f591, %f291, %f290;
	ld.const.f32 	%f292, [%rd78+24];
	ld.const.f32 	%f293, [%rd78+28];
	mul.ftz.f32 	%f294, %f27, %f293;
	fma.rn.ftz.f32 	%f295, %f26, %f292, %f294;
	ld.const.f32 	%f296, [%rd78+32];
	fma.rn.ftz.f32 	%f591, %f591, %f296, %f295;
	setp.eq.s32	%p28, %r13, 128;
	mov.f32 	%f575, %f36;
	mov.f32 	%f587, %f37;
	@%p28 bra 	BB5_45;

	setp.ne.s32	%p29, %r13, 0;
	@%p29 bra 	BB5_37;

	mov.f32 	%f565, 0f437F0000;
	bra.uni 	BB5_40;

BB5_37:
	setp.ne.s32	%p30, %r13, 8;
	@%p30 bra 	BB5_39;

	mov.f32 	%f565, 0f447FC000;
	bra.uni 	BB5_40;

BB5_39:
	setp.eq.s32	%p31, %r13, 16;
	selp.f32	%f565, 0f47000000, 0f3F800000, %p31;

BB5_40:
	setp.eq.s32	%p32, %r13, 0;
	@%p32 bra 	BB5_44;

	and.b32  	%r64, %r30, 256;
	setp.eq.s32	%p33, %r64, 0;
	@%p33 bra 	BB5_43;

	mov.f32 	%f299, 0f437F0000;
	div.approx.ftz.f32 	%f300, %f565, %f299;
	mul.ftz.f32 	%f566, %f566, %f300;
	mul.ftz.f32 	%f575, %f36, %f300;
	mul.ftz.f32 	%f587, %f37, %f300;
	mul.ftz.f32 	%f591, %f591, %f300;
	bra.uni 	BB5_45;

BB5_43:
	mov.f32 	%f301, 0f3F800000;
	div.approx.ftz.f32 	%f302, %f301, %f565;
	mul.ftz.f32 	%f566, %f566, %f302;
	mov.f32 	%f303, 0f437F0000;
	div.approx.ftz.f32 	%f304, %f303, %f565;
	mul.ftz.f32 	%f575, %f36, %f304;
	mul.ftz.f32 	%f587, %f37, %f304;
	mul.ftz.f32 	%f591, %f591, %f304;
	bra.uni 	BB5_45;

BB5_44:
	mov.f32 	%f305, 0f3F800000;
	div.approx.ftz.f32 	%f306, %f305, %f565;
	mul.ftz.f32 	%f566, %f566, %f306;
	mov.f32 	%f575, %f36;
	mov.f32 	%f587, %f37;

BB5_45:
	mov.f32 	%f586, %f587;
	mov.f32 	%f574, %f575;
	and.b32  	%r15, %r30, 7;
	setp.eq.s32	%p34, %r15, 1;
	@%p34 bra 	BB5_51;

	or.b32  	%r65, %r15, 4;
	setp.eq.s32	%p35, %r65, 4;
	@%p35 bra 	BB5_47;
	bra.uni 	BB5_48;

BB5_47:
	mov.f32 	%f566, 0f3F800000;
	bra.uni 	BB5_51;

BB5_48:
	add.ftz.f32 	%f308, %f566, 0fB70637BD;
	setp.gtu.ftz.f32	%p36, %f308, 0f00000000;
	@%p36 bra 	BB5_50;

	mov.f32 	%f591, 0f00000000;
	mov.f32 	%f586, %f591;
	mov.f32 	%f574, %f591;
	mov.f32 	%f566, %f591;
	bra.uni 	BB5_51;

BB5_50:
	mov.f32 	%f313, 0f3F800000;
	div.approx.ftz.f32 	%f314, %f313, %f566;
	mul.ftz.f32 	%f574, %f574, %f314;
	mul.ftz.f32 	%f586, %f586, %f314;
	mul.ftz.f32 	%f591, %f591, %f314;

BB5_51:
	ld.param.u32 	%r146, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_3];
	ld.param.u32 	%r144, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_4];
	mul.lo.s32 	%r70, %r2, %r146;
	shl.b32 	%r16, %r70, 1;
	add.s32 	%r76, %r38, %r16;
	add.s32 	%r77, %r76, 1;
	cvt.s64.s32	%rd4, %r77;
	setp.eq.s32	%p37, %r144, 0;
	@%p37 bra 	BB5_53;

	shl.b64 	%rd32, %rd4, 4;
	add.s64 	%rd33, %rd17, %rd32;
	st.v4.f32 	[%rd33], {%f591, %f586, %f574, %f566};
	bra.uni 	BB5_54;

BB5_53:
	shl.b64 	%rd34, %rd4, 3;
	add.s64 	%rd35, %rd17, %rd34;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f566;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f574;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f586;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f591;
	mov.b16 	%rs4, %temp;
}
	st.v4.u16 	[%rd35], {%rs4, %rs3, %rs2, %rs1};

BB5_54:
	mov.b32 	 %f562, %r50;
	mov.b32 	 %f315, %r47;
	mul.ftz.f32 	%f316, %f315, 0f437F0000;
	mov.b32 	 %f317, %r46;
	mul.ftz.f32 	%f318, %f317, 0f437F0000;
	selp.f32	%f595, 0f00000000, %f316, %p5;
	mul.ftz.f32 	%f319, %f562, 0f437F0000;
	selp.f32	%f607, %f319, %f318, %p5;
	selp.f32	%f619, %f318, %f319, %p5;
	selp.f32	%f620, %f316, 0f00000000, %p5;
	@%p6 bra 	BB5_61;

	setp.ne.s32	%p40, %r13, 0;
	@%p40 bra 	BB5_57;

	mov.f32 	%f592, 0f437F0000;
	bra.uni 	BB5_60;

BB5_57:
	setp.ne.s32	%p41, %r13, 8;
	@%p41 bra 	BB5_59;

	mov.f32 	%f592, 0f447FC000;
	bra.uni 	BB5_60;

BB5_59:
	setp.eq.s32	%p42, %r13, 16;
	selp.f32	%f592, 0f47000000, 0f3F800000, %p42;

BB5_60:
	and.b32  	%r80, %r30, 4096;
	setp.eq.s32	%p43, %r80, 0;
	ld.const.f32 	%f322, [kYCbCrOffset];
	ld.const.f32 	%f323, [kYCbCrFullRangeOffset];
	selp.f32	%f324, %f323, %f322, %p43;
	mov.f32 	%f325, 0f437F0000;
	div.approx.ftz.f32 	%f326, %f592, %f325;
	mul.ftz.f32 	%f327, %f324, %f326;
	sub.ftz.f32 	%f607, %f607, %f327;
	ld.const.f32 	%f328, [kYCbCrOffset+4];
	ld.const.f32 	%f329, [kYCbCrFullRangeOffset+4];
	selp.f32	%f330, %f329, %f328, %p43;
	mul.ftz.f32 	%f331, %f330, %f326;
	sub.ftz.f32 	%f619, %f619, %f331;
	ld.const.f32 	%f332, [kYCbCrOffset+8];
	ld.const.f32 	%f333, [kYCbCrFullRangeOffset+8];
	selp.f32	%f334, %f333, %f332, %p43;
	mul.ftz.f32 	%f335, %f334, %f326;
	sub.ftz.f32 	%f620, %f620, %f335;

BB5_61:
	mov.f32 	%f617, %f619;
	mov.f32 	%f605, %f607;
	@%p11 bra 	BB5_71;

	setp.ltu.ftz.f32	%p45, %f605, 0f00000000;
	@%p45 bra 	BB5_64;

	lg2.approx.ftz.f32 	%f336, %f605;
	mul.ftz.f32 	%f337, %f336, 0f3EE66666;
	ex2.approx.ftz.f32 	%f606, %f337;
	bra.uni 	BB5_65;

BB5_64:
	neg.ftz.f32 	%f338, %f605;
	lg2.approx.ftz.f32 	%f339, %f338;
	mul.ftz.f32 	%f340, %f339, 0f3EE66666;
	ex2.approx.ftz.f32 	%f341, %f340;
	neg.ftz.f32 	%f606, %f341;

BB5_65:
	mov.f32 	%f605, %f606;
	setp.ltu.ftz.f32	%p46, %f617, 0f00000000;
	@%p46 bra 	BB5_67;

	lg2.approx.ftz.f32 	%f342, %f617;
	mul.ftz.f32 	%f343, %f342, 0f3EE66666;
	ex2.approx.ftz.f32 	%f618, %f343;
	bra.uni 	BB5_68;

BB5_67:
	neg.ftz.f32 	%f344, %f617;
	lg2.approx.ftz.f32 	%f345, %f344;
	mul.ftz.f32 	%f346, %f345, 0f3EE66666;
	ex2.approx.ftz.f32 	%f347, %f346;
	neg.ftz.f32 	%f618, %f347;

BB5_68:
	mov.f32 	%f617, %f618;
	setp.ltu.ftz.f32	%p47, %f620, 0f00000000;
	@%p47 bra 	BB5_70;

	lg2.approx.ftz.f32 	%f348, %f620;
	mul.ftz.f32 	%f349, %f348, 0f3EE66666;
	ex2.approx.ftz.f32 	%f620, %f349;
	bra.uni 	BB5_71;

BB5_70:
	neg.ftz.f32 	%f350, %f620;
	lg2.approx.ftz.f32 	%f351, %f350;
	mul.ftz.f32 	%f352, %f351, 0f3EE66666;
	ex2.approx.ftz.f32 	%f353, %f352;
	neg.ftz.f32 	%f620, %f353;

BB5_71:
	mov.f32 	%f82, %f617;
	mov.f32 	%f81, %f605;
	@%p1 bra 	BB5_79;

	setp.eq.s32	%p48, %r13, 128;
	mov.f32 	%f604, %f81;
	mov.f32 	%f616, %f82;
	@%p48 bra 	BB5_98;

	setp.ne.s32	%p49, %r13, 0;
	@%p49 bra 	BB5_75;

	mov.f32 	%f593, 0f437F0000;
	bra.uni 	BB5_78;

BB5_75:
	setp.ne.s32	%p50, %r13, 8;
	@%p50 bra 	BB5_77;

	mov.f32 	%f593, 0f447FC000;
	bra.uni 	BB5_78;

BB5_77:
	setp.eq.s32	%p51, %r13, 16;
	selp.f32	%f593, 0f47000000, 0f3F800000, %p51;

BB5_78:
	mov.f32 	%f356, 0f3F800000;
	div.approx.ftz.f32 	%f357, %f356, %f593;
	mul.ftz.f32 	%f595, %f595, %f357;
	mul.ftz.f32 	%f604, %f81, %f357;
	mul.ftz.f32 	%f616, %f82, %f357;
	mul.ftz.f32 	%f620, %f620, %f357;
	bra.uni 	BB5_98;

BB5_79:
	and.b32  	%r81, %r30, 512;
	setp.eq.s32	%p52, %r81, 0;
	@%p52 bra 	BB5_85;

	and.b32  	%r82, %r30, 2048;
	setp.eq.s32	%p53, %r82, 0;
	and.b32  	%r17, %r30, 4096;
	@%p53 bra 	BB5_83;

	setp.eq.s32	%p54, %r17, 0;
	mov.u64 	%rd79, k709YCbCrFullRange_To_RGB32f;
	@%p54 bra 	BB5_87;

	and.b32  	%r83, %r30, 256;
	setp.eq.s32	%p55, %r83, 0;
	mov.u64 	%rd37, k709YPbPr_To_RGB32f;
	mov.u64 	%rd38, k709YCbCr_To_RGB32f;
	selp.b64	%rd79, %rd38, %rd37, %p55;
	bra.uni 	BB5_87;

BB5_83:
	setp.eq.s32	%p56, %r17, 0;
	mov.u64 	%rd79, k601YCbCrFullRange_To_RGB32f;
	@%p56 bra 	BB5_87;

	and.b32  	%r84, %r30, 256;
	setp.eq.s32	%p57, %r84, 0;
	mov.u64 	%rd40, k601YPbPr_To_RGB32f;
	mov.u64 	%rd41, k601YCbCr_To_RGB32f;
	selp.b64	%rd79, %rd41, %rd40, %p57;
	bra.uni 	BB5_87;

BB5_85:
	and.b32  	%r85, %r30, 256;
	setp.eq.s32	%p58, %r85, 0;
	mov.u64 	%rd79, 0;
	@%p58 bra 	BB5_87;

	mov.u64 	%rd79, kRGB32f_To_601YPbPr;

BB5_87:
	ld.const.f32 	%f358, [%rd79];
	ld.const.f32 	%f359, [%rd79+4];
	mul.ftz.f32 	%f360, %f82, %f359;
	fma.rn.ftz.f32 	%f361, %f81, %f358, %f360;
	ld.const.f32 	%f362, [%rd79+8];
	fma.rn.ftz.f32 	%f90, %f620, %f362, %f361;
	ld.const.f32 	%f363, [%rd79+12];
	ld.const.f32 	%f364, [%rd79+16];
	mul.ftz.f32 	%f365, %f82, %f364;
	fma.rn.ftz.f32 	%f366, %f81, %f363, %f365;
	ld.const.f32 	%f367, [%rd79+20];
	fma.rn.ftz.f32 	%f91, %f620, %f367, %f366;
	ld.const.f32 	%f368, [%rd79+24];
	ld.const.f32 	%f369, [%rd79+28];
	mul.ftz.f32 	%f370, %f82, %f369;
	fma.rn.ftz.f32 	%f371, %f81, %f368, %f370;
	ld.const.f32 	%f372, [%rd79+32];
	fma.rn.ftz.f32 	%f620, %f620, %f372, %f371;
	setp.eq.s32	%p59, %r13, 128;
	mov.f32 	%f604, %f90;
	mov.f32 	%f616, %f91;
	@%p59 bra 	BB5_98;

	setp.ne.s32	%p60, %r13, 0;
	@%p60 bra 	BB5_90;

	mov.f32 	%f594, 0f437F0000;
	bra.uni 	BB5_93;

BB5_90:
	setp.ne.s32	%p61, %r13, 8;
	@%p61 bra 	BB5_92;

	mov.f32 	%f594, 0f447FC000;
	bra.uni 	BB5_93;

BB5_92:
	setp.eq.s32	%p62, %r13, 16;
	selp.f32	%f594, 0f47000000, 0f3F800000, %p62;

BB5_93:
	setp.eq.s32	%p63, %r13, 0;
	@%p63 bra 	BB5_97;

	and.b32  	%r86, %r30, 256;
	setp.eq.s32	%p64, %r86, 0;
	@%p64 bra 	BB5_96;

	mov.f32 	%f375, 0f437F0000;
	div.approx.ftz.f32 	%f376, %f594, %f375;
	mul.ftz.f32 	%f595, %f595, %f376;
	mul.ftz.f32 	%f604, %f90, %f376;
	mul.ftz.f32 	%f616, %f91, %f376;
	mul.ftz.f32 	%f620, %f620, %f376;
	bra.uni 	BB5_98;

BB5_96:
	mov.f32 	%f377, 0f3F800000;
	div.approx.ftz.f32 	%f378, %f377, %f594;
	mul.ftz.f32 	%f595, %f595, %f378;
	mov.f32 	%f379, 0f437F0000;
	div.approx.ftz.f32 	%f380, %f379, %f594;
	mul.ftz.f32 	%f604, %f90, %f380;
	mul.ftz.f32 	%f616, %f91, %f380;
	mul.ftz.f32 	%f620, %f620, %f380;
	bra.uni 	BB5_98;

BB5_97:
	mov.f32 	%f381, 0f3F800000;
	div.approx.ftz.f32 	%f382, %f381, %f594;
	mul.ftz.f32 	%f595, %f595, %f382;
	mov.f32 	%f604, %f90;
	mov.f32 	%f616, %f91;

BB5_98:
	mov.f32 	%f615, %f616;
	mov.f32 	%f603, %f604;
	@%p34 bra 	BB5_104;

	or.b32  	%r87, %r15, 4;
	setp.eq.s32	%p66, %r87, 4;
	@%p66 bra 	BB5_100;
	bra.uni 	BB5_101;

BB5_100:
	mov.f32 	%f595, 0f3F800000;
	bra.uni 	BB5_104;

BB5_101:
	add.ftz.f32 	%f384, %f595, 0fB70637BD;
	setp.gtu.ftz.f32	%p67, %f384, 0f00000000;
	@%p67 bra 	BB5_103;

	mov.f32 	%f620, 0f00000000;
	mov.f32 	%f615, %f620;
	mov.f32 	%f603, %f620;
	mov.f32 	%f595, %f620;
	bra.uni 	BB5_104;

BB5_103:
	mov.f32 	%f389, 0f3F800000;
	div.approx.ftz.f32 	%f390, %f389, %f595;
	mul.ftz.f32 	%f603, %f603, %f390;
	mul.ftz.f32 	%f615, %f615, %f390;
	mul.ftz.f32 	%f620, %f620, %f390;

BB5_104:
	ld.param.u32 	%r152, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_3];
	mul.lo.s32 	%r151, %r2, %r152;
	shl.b32 	%r150, %r151, 1;
	add.s32 	%r93, %r150, %r38;
	cvt.s64.s32	%rd8, %r93;
	@%p37 bra 	BB5_106;

	shl.b64 	%rd44, %rd8, 4;
	add.s64 	%rd45, %rd17, %rd44;
	st.v4.f32 	[%rd45], {%f620, %f615, %f603, %f595};
	bra.uni 	BB5_107;

BB5_106:
	shl.b64 	%rd46, %rd8, 3;
	add.s64 	%rd47, %rd17, %rd46;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f595;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f603;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f615;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f620;
	mov.b16 	%rs8, %temp;
}
	st.v4.u16 	[%rd47], {%rs8, %rs7, %rs6, %rs5};

BB5_107:
	cvt.rn.f32.s32	%f561, %r2;
	add.ftz.f32 	%f560, %f561, 0f3F000000;
	ld.param.u32 	%r148, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_5];
	cvt.rn.f32.u32	%f559, %r148;
	ld.param.u32 	%r147, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_6];
	cvt.rn.f32.u32	%f558, %r147;
	add.ftz.f32 	%f397, %f561, 0f3E000000;
	mov.f32 	%f398, 0f00000000;
	max.ftz.f32 	%f399, %f397, %f398;
	min.ftz.f32 	%f400, %f399, %f558;
	fma.rn.ftz.f32 	%f401, %f400, 0f3F000000, %f558;
	add.ftz.f32 	%f394, %f401, 0f3F000000;
	mov.f32 	%f402, 0f40000000;
	div.approx.ftz.f32 	%f403, %f559, %f402;
	add.ftz.f32 	%f404, %f5, %f403;
	add.ftz.f32 	%f391, %f404, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r94, %r95, %r96, %r97}, [inTexture, {%f391, %f394}];
	// inline asm
	mov.b32 	 %f405, %r94;
	mov.b32 	 %f406, %r95;
	add.ftz.f32 	%f407, %f3, %f403;
	add.ftz.f32 	%f395, %f407, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r98, %r99, %r100, %r101}, [inTexture, {%f395, %f394}];
	// inline asm
	// inline asm
	tex.2d.v4.u32.f32 {%r102, %r103, %r104, %r105}, [inTexture, {%f395, %f560}];
	// inline asm
	mov.b32 	 %f408, %r103;
	mul.ftz.f32 	%f409, %f406, 0f437F0000;
	mul.ftz.f32 	%f410, %f405, 0f437F0000;
	mul.ftz.f32 	%f411, %f408, 0f437F0000;
	selp.f32	%f624, 0f00000000, %f409, %p5;
	selp.f32	%f636, %f411, %f410, %p5;
	selp.f32	%f648, %f410, %f411, %p5;
	selp.f32	%f657, %f409, 0f00000000, %p5;
	@%p6 bra 	BB5_114;

	setp.ne.s32	%p71, %r13, 0;
	@%p71 bra 	BB5_110;

	mov.f32 	%f621, 0f437F0000;
	bra.uni 	BB5_113;

BB5_110:
	setp.ne.s32	%p72, %r13, 8;
	@%p72 bra 	BB5_112;

	mov.f32 	%f621, 0f447FC000;
	bra.uni 	BB5_113;

BB5_112:
	setp.eq.s32	%p73, %r13, 16;
	selp.f32	%f621, 0f47000000, 0f3F800000, %p73;

BB5_113:
	and.b32  	%r108, %r30, 4096;
	setp.eq.s32	%p74, %r108, 0;
	ld.const.f32 	%f414, [kYCbCrOffset];
	ld.const.f32 	%f415, [kYCbCrFullRangeOffset];
	selp.f32	%f416, %f415, %f414, %p74;
	mov.f32 	%f417, 0f437F0000;
	div.approx.ftz.f32 	%f418, %f621, %f417;
	mul.ftz.f32 	%f419, %f416, %f418;
	sub.ftz.f32 	%f636, %f636, %f419;
	ld.const.f32 	%f420, [kYCbCrOffset+4];
	ld.const.f32 	%f421, [kYCbCrFullRangeOffset+4];
	selp.f32	%f422, %f421, %f420, %p74;
	mul.ftz.f32 	%f423, %f422, %f418;
	sub.ftz.f32 	%f648, %f648, %f423;
	ld.const.f32 	%f424, [kYCbCrOffset+8];
	ld.const.f32 	%f425, [kYCbCrFullRangeOffset+8];
	selp.f32	%f426, %f425, %f424, %p74;
	mul.ftz.f32 	%f427, %f426, %f418;
	sub.ftz.f32 	%f657, %f657, %f427;

BB5_114:
	mov.f32 	%f656, %f657;
	mov.f32 	%f646, %f648;
	mov.f32 	%f634, %f636;
	@%p11 bra 	BB5_124;

	setp.ltu.ftz.f32	%p76, %f634, 0f00000000;
	@%p76 bra 	BB5_117;

	lg2.approx.ftz.f32 	%f428, %f634;
	mul.ftz.f32 	%f429, %f428, 0f3EE66666;
	ex2.approx.ftz.f32 	%f635, %f429;
	bra.uni 	BB5_118;

BB5_117:
	neg.ftz.f32 	%f430, %f634;
	lg2.approx.ftz.f32 	%f431, %f430;
	mul.ftz.f32 	%f432, %f431, 0f3EE66666;
	ex2.approx.ftz.f32 	%f433, %f432;
	neg.ftz.f32 	%f635, %f433;

BB5_118:
	mov.f32 	%f634, %f635;
	setp.ltu.ftz.f32	%p77, %f646, 0f00000000;
	@%p77 bra 	BB5_120;

	lg2.approx.ftz.f32 	%f434, %f646;
	mul.ftz.f32 	%f435, %f434, 0f3EE66666;
	ex2.approx.ftz.f32 	%f647, %f435;
	bra.uni 	BB5_121;

BB5_120:
	neg.ftz.f32 	%f436, %f646;
	lg2.approx.ftz.f32 	%f437, %f436;
	mul.ftz.f32 	%f438, %f437, 0f3EE66666;
	ex2.approx.ftz.f32 	%f439, %f438;
	neg.ftz.f32 	%f647, %f439;

BB5_121:
	mov.f32 	%f646, %f647;
	setp.ltu.ftz.f32	%p78, %f656, 0f00000000;
	@%p78 bra 	BB5_123;

	lg2.approx.ftz.f32 	%f440, %f656;
	mul.ftz.f32 	%f441, %f440, 0f3EE66666;
	ex2.approx.ftz.f32 	%f656, %f441;
	bra.uni 	BB5_124;

BB5_123:
	neg.ftz.f32 	%f442, %f656;
	lg2.approx.ftz.f32 	%f443, %f442;
	mul.ftz.f32 	%f444, %f443, 0f3EE66666;
	ex2.approx.ftz.f32 	%f445, %f444;
	neg.ftz.f32 	%f656, %f445;

BB5_124:
	mov.f32 	%f655, %f656;
	mov.f32 	%f136, %f646;
	mov.f32 	%f135, %f634;
	mov.b32 	 %f138, %r98;
	mov.b32 	 %f139, %r99;
	shl.b32 	%r113, %r2, 1;
	add.s32 	%r26, %r113, 1;
	mov.b32 	 %f140, %r102;
	@%p1 bra 	BB5_132;

	setp.eq.s32	%p79, %r13, 128;
	mov.f32 	%f633, %f135;
	mov.f32 	%f645, %f136;
	@%p79 bra 	BB5_151;

	setp.ne.s32	%p80, %r13, 0;
	@%p80 bra 	BB5_128;

	mov.f32 	%f622, 0f437F0000;
	bra.uni 	BB5_131;

BB5_128:
	setp.ne.s32	%p81, %r13, 8;
	@%p81 bra 	BB5_130;

	mov.f32 	%f622, 0f447FC000;
	bra.uni 	BB5_131;

BB5_130:
	setp.eq.s32	%p82, %r13, 16;
	selp.f32	%f622, 0f47000000, 0f3F800000, %p82;

BB5_131:
	mov.f32 	%f448, 0f3F800000;
	div.approx.ftz.f32 	%f449, %f448, %f622;
	mul.ftz.f32 	%f624, %f624, %f449;
	mul.ftz.f32 	%f633, %f135, %f449;
	mul.ftz.f32 	%f645, %f136, %f449;
	mul.ftz.f32 	%f655, %f655, %f449;
	bra.uni 	BB5_151;

BB5_132:
	and.b32  	%r114, %r30, 512;
	setp.eq.s32	%p83, %r114, 0;
	@%p83 bra 	BB5_138;

	and.b32  	%r115, %r30, 2048;
	setp.eq.s32	%p84, %r115, 0;
	and.b32  	%r27, %r30, 4096;
	@%p84 bra 	BB5_136;

	setp.eq.s32	%p85, %r27, 0;
	mov.u64 	%rd80, k709YCbCrFullRange_To_RGB32f;
	@%p85 bra 	BB5_140;

	and.b32  	%r116, %r30, 256;
	setp.eq.s32	%p86, %r116, 0;
	mov.u64 	%rd55, k709YPbPr_To_RGB32f;
	mov.u64 	%rd56, k709YCbCr_To_RGB32f;
	selp.b64	%rd80, %rd56, %rd55, %p86;
	bra.uni 	BB5_140;

BB5_136:
	setp.eq.s32	%p87, %r27, 0;
	mov.u64 	%rd80, k601YCbCrFullRange_To_RGB32f;
	@%p87 bra 	BB5_140;

	and.b32  	%r117, %r30, 256;
	setp.eq.s32	%p88, %r117, 0;
	mov.u64 	%rd58, k601YPbPr_To_RGB32f;
	mov.u64 	%rd59, k601YCbCr_To_RGB32f;
	selp.b64	%rd80, %rd59, %rd58, %p88;
	bra.uni 	BB5_140;

BB5_138:
	and.b32  	%r118, %r30, 256;
	setp.eq.s32	%p89, %r118, 0;
	mov.u64 	%rd80, 0;
	@%p89 bra 	BB5_140;

	mov.u64 	%rd80, kRGB32f_To_601YPbPr;

BB5_140:
	ld.const.f32 	%f450, [%rd80];
	ld.const.f32 	%f451, [%rd80+4];
	mul.ftz.f32 	%f452, %f136, %f451;
	fma.rn.ftz.f32 	%f453, %f135, %f450, %f452;
	ld.const.f32 	%f454, [%rd80+8];
	fma.rn.ftz.f32 	%f147, %f655, %f454, %f453;
	ld.const.f32 	%f455, [%rd80+12];
	ld.const.f32 	%f456, [%rd80+16];
	mul.ftz.f32 	%f457, %f136, %f456;
	fma.rn.ftz.f32 	%f458, %f135, %f455, %f457;
	ld.const.f32 	%f459, [%rd80+20];
	fma.rn.ftz.f32 	%f148, %f655, %f459, %f458;
	ld.const.f32 	%f460, [%rd80+24];
	ld.const.f32 	%f461, [%rd80+28];
	mul.ftz.f32 	%f462, %f136, %f461;
	fma.rn.ftz.f32 	%f463, %f135, %f460, %f462;
	ld.const.f32 	%f464, [%rd80+32];
	fma.rn.ftz.f32 	%f655, %f655, %f464, %f463;
	setp.eq.s32	%p90, %r13, 128;
	mov.f32 	%f633, %f147;
	mov.f32 	%f645, %f148;
	@%p90 bra 	BB5_151;

	setp.ne.s32	%p91, %r13, 0;
	@%p91 bra 	BB5_143;

	mov.f32 	%f623, 0f437F0000;
	bra.uni 	BB5_146;

BB5_143:
	setp.ne.s32	%p92, %r13, 8;
	@%p92 bra 	BB5_145;

	mov.f32 	%f623, 0f447FC000;
	bra.uni 	BB5_146;

BB5_145:
	setp.eq.s32	%p93, %r13, 16;
	selp.f32	%f623, 0f47000000, 0f3F800000, %p93;

BB5_146:
	setp.eq.s32	%p94, %r13, 0;
	@%p94 bra 	BB5_150;

	and.b32  	%r119, %r30, 256;
	setp.eq.s32	%p95, %r119, 0;
	@%p95 bra 	BB5_149;

	mov.f32 	%f467, 0f437F0000;
	div.approx.ftz.f32 	%f468, %f623, %f467;
	mul.ftz.f32 	%f624, %f624, %f468;
	mul.ftz.f32 	%f633, %f147, %f468;
	mul.ftz.f32 	%f645, %f148, %f468;
	mul.ftz.f32 	%f655, %f655, %f468;
	bra.uni 	BB5_151;

BB5_149:
	mov.f32 	%f469, 0f3F800000;
	div.approx.ftz.f32 	%f470, %f469, %f623;
	mul.ftz.f32 	%f624, %f624, %f470;
	mov.f32 	%f471, 0f437F0000;
	div.approx.ftz.f32 	%f472, %f471, %f623;
	mul.ftz.f32 	%f633, %f147, %f472;
	mul.ftz.f32 	%f645, %f148, %f472;
	mul.ftz.f32 	%f655, %f655, %f472;
	bra.uni 	BB5_151;

BB5_150:
	mov.f32 	%f473, 0f3F800000;
	div.approx.ftz.f32 	%f474, %f473, %f623;
	mul.ftz.f32 	%f624, %f624, %f474;
	mov.f32 	%f633, %f147;
	mov.f32 	%f645, %f148;

BB5_151:
	mov.f32 	%f164, %f655;
	mov.f32 	%f644, %f645;
	mov.f32 	%f632, %f633;
	mov.f32 	%f654, %f164;
	@%p34 bra 	BB5_157;

	or.b32  	%r120, %r15, 4;
	setp.eq.s32	%p97, %r120, 4;
	@%p97 bra 	BB5_153;
	bra.uni 	BB5_154;

BB5_153:
	mov.f32 	%f624, 0f3F800000;
	mov.f32 	%f654, %f164;
	bra.uni 	BB5_157;

BB5_154:
	add.ftz.f32 	%f476, %f624, 0fB70637BD;
	setp.gtu.ftz.f32	%p98, %f476, 0f00000000;
	@%p98 bra 	BB5_156;

	mov.f32 	%f644, %f398;
	mov.f32 	%f632, %f398;
	mov.f32 	%f624, %f398;
	mov.f32 	%f654, %f398;
	bra.uni 	BB5_157;

BB5_156:
	mov.f32 	%f481, 0f3F800000;
	div.approx.ftz.f32 	%f482, %f481, %f624;
	mul.ftz.f32 	%f632, %f632, %f482;
	mul.ftz.f32 	%f644, %f644, %f482;
	mul.ftz.f32 	%f654, %f164, %f482;

BB5_157:
	ld.param.u32 	%r149, [_Z29PixelFormatConvert_NV12_FIELDPvjS_j17DevicePixelFormatjj_param_3];
	mul.lo.s32 	%r28, %r26, %r149;
	add.s32 	%r126, %r38, %r28;
	add.s32 	%r127, %r126, 1;
	cvt.s64.s32	%rd12, %r127;
	@%p37 bra 	BB5_159;

	shl.b64 	%rd62, %rd12, 4;
	add.s64 	%rd63, %rd17, %rd62;
	st.v4.f32 	[%rd63], {%f654, %f644, %f632, %f624};
	bra.uni 	BB5_160;

BB5_159:
	shl.b64 	%rd64, %rd12, 3;
	add.s64 	%rd65, %rd17, %rd64;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f624;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f632;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f644;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f654;
	mov.b16 	%rs12, %temp;
}
	st.v4.u16 	[%rd65], {%rs12, %rs11, %rs10, %rs9};

BB5_160:
	mul.ftz.f32 	%f483, %f139, 0f437F0000;
	selp.f32	%f661, 0f00000000, %f483, %p5;
	mul.ftz.f32 	%f484, %f140, 0f437F0000;
	mul.ftz.f32 	%f485, %f138, 0f437F0000;
	selp.f32	%f673, %f484, %f485, %p5;
	selp.f32	%f685, %f485, %f484, %p5;
	selp.f32	%f686, %f483, 0f00000000, %p5;
	@%p6 bra 	BB5_167;

	setp.ne.s32	%p102, %r13, 0;
	@%p102 bra 	BB5_163;

	mov.f32 	%f658, 0f437F0000;
	bra.uni 	BB5_166;

BB5_163:
	setp.ne.s32	%p103, %r13, 8;
	@%p103 bra 	BB5_165;

	mov.f32 	%f658, 0f447FC000;
	bra.uni 	BB5_166;

BB5_165:
	setp.eq.s32	%p104, %r13, 16;
	selp.f32	%f658, 0f47000000, 0f3F800000, %p104;

BB5_166:
	and.b32  	%r130, %r30, 4096;
	setp.eq.s32	%p105, %r130, 0;
	ld.const.f32 	%f488, [kYCbCrOffset];
	ld.const.f32 	%f489, [kYCbCrFullRangeOffset];
	selp.f32	%f490, %f489, %f488, %p105;
	mov.f32 	%f491, 0f437F0000;
	div.approx.ftz.f32 	%f492, %f658, %f491;
	mul.ftz.f32 	%f493, %f490, %f492;
	sub.ftz.f32 	%f673, %f673, %f493;
	ld.const.f32 	%f494, [kYCbCrOffset+4];
	ld.const.f32 	%f495, [kYCbCrFullRangeOffset+4];
	selp.f32	%f496, %f495, %f494, %p105;
	mul.ftz.f32 	%f497, %f496, %f492;
	sub.ftz.f32 	%f685, %f685, %f497;
	ld.const.f32 	%f498, [kYCbCrOffset+8];
	ld.const.f32 	%f499, [kYCbCrFullRangeOffset+8];
	selp.f32	%f500, %f499, %f498, %p105;
	mul.ftz.f32 	%f501, %f500, %f492;
	sub.ftz.f32 	%f686, %f686, %f501;

BB5_167:
	mov.f32 	%f683, %f685;
	mov.f32 	%f671, %f673;
	@%p11 bra 	BB5_177;

	setp.ltu.ftz.f32	%p107, %f671, 0f00000000;
	@%p107 bra 	BB5_170;

	lg2.approx.ftz.f32 	%f502, %f671;
	mul.ftz.f32 	%f503, %f502, 0f3EE66666;
	ex2.approx.ftz.f32 	%f672, %f503;
	bra.uni 	BB5_171;

BB5_170:
	neg.ftz.f32 	%f504, %f671;
	lg2.approx.ftz.f32 	%f505, %f504;
	mul.ftz.f32 	%f506, %f505, 0f3EE66666;
	ex2.approx.ftz.f32 	%f507, %f506;
	neg.ftz.f32 	%f672, %f507;

BB5_171:
	mov.f32 	%f671, %f672;
	setp.ltu.ftz.f32	%p108, %f683, 0f00000000;
	@%p108 bra 	BB5_173;

	lg2.approx.ftz.f32 	%f508, %f683;
	mul.ftz.f32 	%f509, %f508, 0f3EE66666;
	ex2.approx.ftz.f32 	%f684, %f509;
	bra.uni 	BB5_174;

BB5_173:
	neg.ftz.f32 	%f510, %f683;
	lg2.approx.ftz.f32 	%f511, %f510;
	mul.ftz.f32 	%f512, %f511, 0f3EE66666;
	ex2.approx.ftz.f32 	%f513, %f512;
	neg.ftz.f32 	%f684, %f513;

BB5_174:
	mov.f32 	%f683, %f684;
	setp.ltu.ftz.f32	%p109, %f686, 0f00000000;
	@%p109 bra 	BB5_176;

	lg2.approx.ftz.f32 	%f514, %f686;
	mul.ftz.f32 	%f515, %f514, 0f3EE66666;
	ex2.approx.ftz.f32 	%f686, %f515;
	bra.uni 	BB5_177;

BB5_176:
	neg.ftz.f32 	%f516, %f686;
	lg2.approx.ftz.f32 	%f517, %f516;
	mul.ftz.f32 	%f518, %f517, 0f3EE66666;
	ex2.approx.ftz.f32 	%f519, %f518;
	neg.ftz.f32 	%f686, %f519;

BB5_177:
	mov.f32 	%f193, %f683;
	mov.f32 	%f192, %f671;
	@%p1 bra 	BB5_185;

	setp.eq.s32	%p110, %r13, 128;
	mov.f32 	%f670, %f192;
	mov.f32 	%f682, %f193;
	@%p110 bra 	BB5_204;

	setp.ne.s32	%p111, %r13, 0;
	@%p111 bra 	BB5_181;

	mov.f32 	%f659, 0f437F0000;
	bra.uni 	BB5_184;

BB5_181:
	setp.ne.s32	%p112, %r13, 8;
	@%p112 bra 	BB5_183;

	mov.f32 	%f659, 0f447FC000;
	bra.uni 	BB5_184;

BB5_183:
	setp.eq.s32	%p113, %r13, 16;
	selp.f32	%f659, 0f47000000, 0f3F800000, %p113;

BB5_184:
	mov.f32 	%f522, 0f3F800000;
	div.approx.ftz.f32 	%f523, %f522, %f659;
	mul.ftz.f32 	%f661, %f661, %f523;
	mul.ftz.f32 	%f670, %f192, %f523;
	mul.ftz.f32 	%f682, %f193, %f523;
	mul.ftz.f32 	%f686, %f686, %f523;
	bra.uni 	BB5_204;

BB5_185:
	and.b32  	%r131, %r30, 512;
	setp.eq.s32	%p114, %r131, 0;
	@%p114 bra 	BB5_191;

	and.b32  	%r132, %r30, 2048;
	setp.eq.s32	%p115, %r132, 0;
	and.b32  	%r29, %r30, 4096;
	@%p115 bra 	BB5_189;

	setp.eq.s32	%p116, %r29, 0;
	mov.u64 	%rd81, k709YCbCrFullRange_To_RGB32f;
	@%p116 bra 	BB5_193;

	and.b32  	%r133, %r30, 256;
	setp.eq.s32	%p117, %r133, 0;
	mov.u64 	%rd67, k709YPbPr_To_RGB32f;
	mov.u64 	%rd68, k709YCbCr_To_RGB32f;
	selp.b64	%rd81, %rd68, %rd67, %p117;
	bra.uni 	BB5_193;

BB5_189:
	setp.eq.s32	%p118, %r29, 0;
	mov.u64 	%rd81, k601YCbCrFullRange_To_RGB32f;
	@%p118 bra 	BB5_193;

	and.b32  	%r134, %r30, 256;
	setp.eq.s32	%p119, %r134, 0;
	mov.u64 	%rd70, k601YPbPr_To_RGB32f;
	mov.u64 	%rd71, k601YCbCr_To_RGB32f;
	selp.b64	%rd81, %rd71, %rd70, %p119;
	bra.uni 	BB5_193;

BB5_191:
	and.b32  	%r135, %r30, 256;
	setp.eq.s32	%p120, %r135, 0;
	mov.u64 	%rd81, 0;
	@%p120 bra 	BB5_193;

	mov.u64 	%rd81, kRGB32f_To_601YPbPr;

BB5_193:
	ld.const.f32 	%f524, [%rd81];
	ld.const.f32 	%f525, [%rd81+4];
	mul.ftz.f32 	%f526, %f193, %f525;
	fma.rn.ftz.f32 	%f527, %f192, %f524, %f526;
	ld.const.f32 	%f528, [%rd81+8];
	fma.rn.ftz.f32 	%f201, %f686, %f528, %f527;
	ld.const.f32 	%f529, [%rd81+12];
	ld.const.f32 	%f530, [%rd81+16];
	mul.ftz.f32 	%f531, %f193, %f530;
	fma.rn.ftz.f32 	%f532, %f192, %f529, %f531;
	ld.const.f32 	%f533, [%rd81+20];
	fma.rn.ftz.f32 	%f202, %f686, %f533, %f532;
	ld.const.f32 	%f534, [%rd81+24];
	ld.const.f32 	%f535, [%rd81+28];
	mul.ftz.f32 	%f536, %f193, %f535;
	fma.rn.ftz.f32 	%f537, %f192, %f534, %f536;
	ld.const.f32 	%f538, [%rd81+32];
	fma.rn.ftz.f32 	%f686, %f686, %f538, %f537;
	setp.eq.s32	%p121, %r13, 128;
	mov.f32 	%f670, %f201;
	mov.f32 	%f682, %f202;
	@%p121 bra 	BB5_204;

	setp.ne.s32	%p122, %r13, 0;
	@%p122 bra 	BB5_196;

	mov.f32 	%f660, 0f437F0000;
	bra.uni 	BB5_199;

BB5_196:
	setp.ne.s32	%p123, %r13, 8;
	@%p123 bra 	BB5_198;

	mov.f32 	%f660, 0f447FC000;
	bra.uni 	BB5_199;

BB5_198:
	setp.eq.s32	%p124, %r13, 16;
	selp.f32	%f660, 0f47000000, 0f3F800000, %p124;

BB5_199:
	setp.eq.s32	%p125, %r13, 0;
	@%p125 bra 	BB5_203;

	and.b32  	%r136, %r30, 256;
	setp.eq.s32	%p126, %r136, 0;
	@%p126 bra 	BB5_202;

	mov.f32 	%f541, 0f437F0000;
	div.approx.ftz.f32 	%f542, %f660, %f541;
	mul.ftz.f32 	%f661, %f661, %f542;
	mul.ftz.f32 	%f670, %f201, %f542;
	mul.ftz.f32 	%f682, %f202, %f542;
	mul.ftz.f32 	%f686, %f686, %f542;
	bra.uni 	BB5_204;

BB5_202:
	mov.f32 	%f543, 0f3F800000;
	div.approx.ftz.f32 	%f544, %f543, %f660;
	mul.ftz.f32 	%f661, %f661, %f544;
	mov.f32 	%f545, 0f437F0000;
	div.approx.ftz.f32 	%f546, %f545, %f660;
	mul.ftz.f32 	%f670, %f201, %f546;
	mul.ftz.f32 	%f682, %f202, %f546;
	mul.ftz.f32 	%f686, %f686, %f546;
	bra.uni 	BB5_204;

BB5_203:
	mov.f32 	%f547, 0f3F800000;
	div.approx.ftz.f32 	%f548, %f547, %f660;
	mul.ftz.f32 	%f661, %f661, %f548;
	mov.f32 	%f670, %f201;
	mov.f32 	%f682, %f202;

BB5_204:
	mov.f32 	%f681, %f682;
	mov.f32 	%f669, %f670;
	@%p34 bra 	BB5_210;

	or.b32  	%r137, %r15, 4;
	setp.eq.s32	%p128, %r137, 4;
	@%p128 bra 	BB5_206;
	bra.uni 	BB5_207;

BB5_206:
	mov.f32 	%f661, 0f3F800000;
	bra.uni 	BB5_210;

BB5_207:
	add.ftz.f32 	%f550, %f661, 0fB70637BD;
	setp.gtu.ftz.f32	%p129, %f550, 0f00000000;
	@%p129 bra 	BB5_209;

	mov.f32 	%f686, 0f00000000;
	mov.f32 	%f681, %f686;
	mov.f32 	%f669, %f686;
	mov.f32 	%f661, %f686;
	bra.uni 	BB5_210;

BB5_209:
	mov.f32 	%f555, 0f3F800000;
	div.approx.ftz.f32 	%f556, %f555, %f661;
	mul.ftz.f32 	%f669, %f669, %f556;
	mul.ftz.f32 	%f681, %f681, %f556;
	mul.ftz.f32 	%f686, %f686, %f556;

BB5_210:
	add.s32 	%r143, %r28, %r38;
	cvt.s64.s32	%rd16, %r143;
	@%p37 bra 	BB5_212;

	shl.b64 	%rd74, %rd16, 4;
	add.s64 	%rd75, %rd17, %rd74;
	st.v4.f32 	[%rd75], {%f686, %f681, %f669, %f661};
	bra.uni 	BB5_213;

BB5_212:
	shl.b64 	%rd76, %rd16, 3;
	add.s64 	%rd77, %rd17, %rd76;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f661;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f669;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f681;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f686;
	mov.b16 	%rs16, %temp;
}
	st.v4.u16 	[%rd77], {%rs16, %rs15, %rs14, %rs13};

BB5_213:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FIELD_601_Kernel(
	.param .u64 PixelFormatConvert_NV12_FIELD_601_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FIELD_601_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_Kernel_param_5
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<99>;
	.reg .f32 	%f<130>;
	.reg .s64 	%rd<41>;


	ld.param.u64 	%rd4, [PixelFormatConvert_NV12_FIELD_601_Kernel_param_1];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FIELD_601_Kernel_param_2];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FIELD_601_Kernel_param_3];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FIELD_601_Kernel_param_4];
	ld.param.u32 	%r11, [PixelFormatConvert_NV12_FIELD_601_Kernel_param_5];
	mov.u32 	%r12, %ntid.x;
	mov.u32 	%r13, %ctaid.x;
	mov.u32 	%r14, %tid.x;
	mad.lo.s32 	%r1, %r12, %r13, %r14;
	shl.b32 	%r15, %r1, 1;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	setp.ge.u32	%p1, %r2, %r11;
	setp.ge.u32	%p2, %r15, %r10;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB6_13;

	cvt.rn.f32.s32	%f1, %r2;
	add.ftz.f32 	%f40, %f1, 0fBE000000;
	mov.f32 	%f41, 0f00000000;
	max.ftz.f32 	%f42, %f40, %f41;
	cvt.rn.f32.u32	%f2, %r11;
	min.ftz.f32 	%f43, %f42, %f2;
	fma.rn.ftz.f32 	%f44, %f43, 0f3F000000, %f2;
	add.ftz.f32 	%f37, %f44, 0f3F000000;
	cvt.rn.f32.s32	%f3, %r1;
	add.ftz.f32 	%f38, %f3, 0f3F000000;
	cvt.rn.f32.u32	%f4, %r10;
	min.ftz.f32 	%f5, %f38, %f4;
	add.ftz.f32 	%f45, %f5, 0f00000000;
	add.ftz.f32 	%f34, %f45, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r19, %r20, %r21, %r22}, [inTexture, {%f34, %f37}];
	// inline asm
	mov.b32 	 %f46, %r19;
	mov.b32 	 %f47, %r20;
	// inline asm
	tex.2d.v4.u32.f32 {%r23, %r24, %r25, %r26}, [inTexture, {%f38, %f37}];
	// inline asm
	add.ftz.f32 	%f39, %f1, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r27, %r28, %r29, %r30}, [inTexture, {%f38, %f39}];
	// inline asm
	mov.b32 	 %f6, %r27;
	mov.b32 	 %f48, %r28;
	mul.ftz.f32 	%f49, %f47, 0f437F0000;
	mov.f32 	%f50, 0f437F0000;
	mul.ftz.f32 	%f51, %f46, 0f437F0000;
	mul.ftz.f32 	%f52, %f48, 0f437F0000;
	ld.const.f32 	%f53, [kYCbCrOffset];
	div.approx.ftz.f32 	%f54, %f50, %f50;
	mul.ftz.f32 	%f7, %f53, %f54;
	sub.ftz.f32 	%f55, %f52, %f7;
	ld.const.f32 	%f56, [kYCbCrOffset+4];
	mul.ftz.f32 	%f8, %f56, %f54;
	sub.ftz.f32 	%f57, %f51, %f8;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	mul.ftz.f32 	%f9, %f58, %f54;
	sub.ftz.f32 	%f59, %f49, %f9;
	ld.const.f32 	%f10, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f11, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f60, %f57, %f11;
	fma.rn.ftz.f32 	%f61, %f55, %f10, %f60;
	ld.const.f32 	%f12, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f59, %f12, %f61;
	ld.const.f32 	%f14, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f15, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f57, %f15;
	fma.rn.ftz.f32 	%f63, %f55, %f14, %f62;
	ld.const.f32 	%f16, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f17, %f59, %f16, %f63;
	ld.const.f32 	%f18, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f19, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f64, %f57, %f19;
	fma.rn.ftz.f32 	%f65, %f55, %f18, %f64;
	ld.const.f32 	%f20, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f21, %f59, %f20, %f65;
	setp.eq.s32	%p4, %r9, 0;
	@%p4 bra 	BB6_3;

	cvta.to.global.u64 	%rd11, %rd4;
	mul.lo.s32 	%r35, %r2, %r8;
	shl.b32 	%r36, %r35, 1;
	add.s32 	%r42, %r15, %r36;
	add.s32 	%r43, %r42, 1;
	mul.wide.s32 	%rd12, %r43, 16;
	add.s64 	%rd13, %rd11, %rd12;
	mov.f32 	%f66, 0f3F800000;
	st.global.v4.f32 	[%rd13], {%f21, %f17, %f13, %f66};
	bra.uni 	BB6_4;

BB6_3:
	cvta.to.global.u64 	%rd14, %rd4;
	mul.lo.s32 	%r48, %r2, %r8;
	shl.b32 	%r49, %r48, 1;
	add.s32 	%r55, %r15, %r49;
	add.s32 	%r56, %r55, 1;
	mul.wide.s32 	%rd15, %r56, 8;
	add.s64 	%rd16, %rd14, %rd15;
	mov.f32 	%f67, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f67;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f21;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd16], {%rs4, %rs3, %rs2, %rs1};

BB6_4:
	mov.b32 	 %f68, %r24;
	mul.ftz.f32 	%f69, %f68, 0f437F0000;
	mov.b32 	 %f70, %r23;
	mul.ftz.f32 	%f71, %f70, 0f437F0000;
	mul.ftz.f32 	%f72, %f6, 0f437F0000;
	sub.ftz.f32 	%f73, %f72, %f7;
	sub.ftz.f32 	%f74, %f71, %f8;
	sub.ftz.f32 	%f75, %f69, %f9;
	mul.ftz.f32 	%f76, %f74, %f11;
	fma.rn.ftz.f32 	%f77, %f73, %f10, %f76;
	fma.rn.ftz.f32 	%f22, %f75, %f12, %f77;
	mul.ftz.f32 	%f78, %f74, %f15;
	fma.rn.ftz.f32 	%f79, %f73, %f14, %f78;
	fma.rn.ftz.f32 	%f23, %f75, %f16, %f79;
	mul.ftz.f32 	%f80, %f74, %f19;
	fma.rn.ftz.f32 	%f81, %f73, %f18, %f80;
	fma.rn.ftz.f32 	%f24, %f75, %f20, %f81;
	shl.b32 	%r66, %r2, 1;
	mad.lo.s32 	%r67, %r66, %r8, %r15;
	cvt.s64.s32	%rd1, %r67;
	@%p4 bra 	BB6_6;

	cvta.to.global.u64 	%rd17, %rd4;
	shl.b64 	%rd18, %rd1, 4;
	add.s64 	%rd19, %rd17, %rd18;
	mov.f32 	%f82, 0f3F800000;
	st.global.v4.f32 	[%rd19], {%f24, %f23, %f22, %f82};
	bra.uni 	BB6_7;

BB6_6:
	cvta.to.global.u64 	%rd20, %rd4;
	shl.b64 	%rd21, %rd1, 3;
	add.s64 	%rd22, %rd20, %rd21;
	mov.f32 	%f83, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f83;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f22;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f23;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f24;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd22], {%rs8, %rs7, %rs6, %rs5};

BB6_7:
	add.ftz.f32 	%f90, %f1, 0f3E000000;
	max.ftz.f32 	%f92, %f90, %f41;
	min.ftz.f32 	%f93, %f92, %f2;
	fma.rn.ftz.f32 	%f94, %f93, 0f3F000000, %f2;
	add.ftz.f32 	%f87, %f94, 0f3F000000;
	mov.f32 	%f95, 0f40000000;
	div.approx.ftz.f32 	%f96, %f4, %f95;
	add.ftz.f32 	%f97, %f5, %f96;
	add.ftz.f32 	%f84, %f97, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r68, %r69, %r70, %r71}, [inTexture, {%f84, %f87}];
	// inline asm
	mov.b32 	 %f98, %r68;
	mov.b32 	 %f99, %r69;
	add.ftz.f32 	%f100, %f3, %f96;
	add.ftz.f32 	%f88, %f100, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r72, %r73, %r74, %r75}, [inTexture, {%f88, %f87}];
	// inline asm
	mov.b32 	 %f25, %r72;
	mov.b32 	 %f26, %r73;
	// inline asm
	tex.2d.v4.u32.f32 {%r76, %r77, %r78, %r79}, [inTexture, {%f88, %f39}];
	// inline asm
	add.s32 	%r85, %r66, 1;
	mov.b32 	 %f27, %r76;
	mov.b32 	 %f101, %r77;
	mul.ftz.f32 	%f102, %f99, 0f437F0000;
	mul.ftz.f32 	%f103, %f98, 0f437F0000;
	mul.ftz.f32 	%f104, %f101, 0f437F0000;
	sub.ftz.f32 	%f105, %f104, %f7;
	sub.ftz.f32 	%f106, %f103, %f8;
	sub.ftz.f32 	%f107, %f102, %f9;
	mul.ftz.f32 	%f108, %f106, %f11;
	fma.rn.ftz.f32 	%f109, %f105, %f10, %f108;
	fma.rn.ftz.f32 	%f28, %f107, %f12, %f109;
	mul.ftz.f32 	%f110, %f106, %f15;
	fma.rn.ftz.f32 	%f111, %f105, %f14, %f110;
	fma.rn.ftz.f32 	%f29, %f107, %f16, %f111;
	mul.ftz.f32 	%f112, %f106, %f19;
	fma.rn.ftz.f32 	%f113, %f105, %f18, %f112;
	fma.rn.ftz.f32 	%f30, %f107, %f20, %f113;
	mul.lo.s32 	%r7, %r85, %r8;
	add.s32 	%r91, %r15, %r7;
	add.s32 	%r92, %r91, 1;
	cvt.s64.s32	%rd2, %r92;
	@%p4 bra 	BB6_9;

	cvta.to.global.u64 	%rd29, %rd4;
	shl.b64 	%rd30, %rd2, 4;
	add.s64 	%rd31, %rd29, %rd30;
	mov.f32 	%f114, 0f3F800000;
	st.global.v4.f32 	[%rd31], {%f30, %f29, %f28, %f114};
	bra.uni 	BB6_10;

BB6_9:
	cvta.to.global.u64 	%rd32, %rd4;
	shl.b64 	%rd33, %rd2, 3;
	add.s64 	%rd34, %rd32, %rd33;
	mov.f32 	%f115, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f115;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f28;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f30;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd34], {%rs12, %rs11, %rs10, %rs9};

BB6_10:
	mul.ftz.f32 	%f116, %f27, 0f437F0000;
	sub.ftz.f32 	%f117, %f116, %f7;
	mul.ftz.f32 	%f118, %f25, 0f437F0000;
	sub.ftz.f32 	%f119, %f118, %f8;
	mul.ftz.f32 	%f120, %f26, 0f437F0000;
	sub.ftz.f32 	%f121, %f120, %f9;
	mul.ftz.f32 	%f122, %f119, %f11;
	fma.rn.ftz.f32 	%f123, %f117, %f10, %f122;
	fma.rn.ftz.f32 	%f31, %f121, %f12, %f123;
	mul.ftz.f32 	%f124, %f119, %f15;
	fma.rn.ftz.f32 	%f125, %f117, %f14, %f124;
	fma.rn.ftz.f32 	%f32, %f121, %f16, %f125;
	mul.ftz.f32 	%f126, %f119, %f19;
	fma.rn.ftz.f32 	%f127, %f117, %f18, %f126;
	fma.rn.ftz.f32 	%f33, %f121, %f20, %f127;
	add.s32 	%r98, %r7, %r15;
	cvt.s64.s32	%rd3, %r98;
	@%p4 bra 	BB6_12;

	cvta.to.global.u64 	%rd35, %rd4;
	shl.b64 	%rd36, %rd3, 4;
	add.s64 	%rd37, %rd35, %rd36;
	mov.f32 	%f128, 0f3F800000;
	st.global.v4.f32 	[%rd37], {%f33, %f32, %f31, %f128};
	bra.uni 	BB6_13;

BB6_12:
	cvta.to.global.u64 	%rd38, %rd4;
	shl.b64 	%rd39, %rd3, 3;
	add.s64 	%rd40, %rd38, %rd39;
	mov.f32 	%f129, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f129;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f31;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f32;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f33;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd40], {%rs16, %rs15, %rs14, %rs13};

BB6_13:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FRAME_601_Kernel(
	.param .u64 PixelFormatConvert_NV12_FRAME_601_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FRAME_601_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_Kernel_param_5
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<62>;
	.reg .f32 	%f<75>;
	.reg .s64 	%rd<21>;


	ld.param.u64 	%rd2, [PixelFormatConvert_NV12_FRAME_601_Kernel_param_1];
	ld.param.u32 	%r7, [PixelFormatConvert_NV12_FRAME_601_Kernel_param_2];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FRAME_601_Kernel_param_3];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FRAME_601_Kernel_param_4];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FRAME_601_Kernel_param_5];
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	shl.b32 	%r14, %r1, 1;
	mov.u32 	%r15, %ntid.y;
	mov.u32 	%r16, %ctaid.y;
	mov.u32 	%r17, %tid.y;
	mad.lo.s32 	%r2, %r15, %r16, %r17;
	setp.ge.u32	%p1, %r2, %r10;
	setp.ge.u32	%p2, %r14, %r9;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB7_7;

	cvt.rn.f32.s32	%f26, %r2;
	add.ftz.f32 	%f27, %f26, 0fBE800000;
	mov.f32 	%f28, 0f00000000;
	max.ftz.f32 	%f29, %f27, %f28;
	cvt.rn.f32.u32	%f30, %r10;
	min.ftz.f32 	%f31, %f29, %f30;
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, %f30;
	add.ftz.f32 	%f23, %f32, 0f3F000000;
	cvt.rn.f32.s32	%f33, %r1;
	add.ftz.f32 	%f24, %f33, 0f3F000000;
	cvt.rn.f32.u32	%f34, %r9;
	min.ftz.f32 	%f35, %f24, %f34;
	add.ftz.f32 	%f36, %f35, 0f00000000;
	add.ftz.f32 	%f20, %f36, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [inTexture, {%f20, %f23}];
	// inline asm
	mov.b32 	 %f37, %r18;
	mov.b32 	 %f38, %r19;
	// inline asm
	tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [inTexture, {%f24, %f23}];
	// inline asm
	add.ftz.f32 	%f25, %f26, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inTexture, {%f24, %f25}];
	// inline asm
	mov.b32 	 %f1, %r26;
	mov.b32 	 %f39, %r27;
	mul.ftz.f32 	%f40, %f38, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	mul.ftz.f32 	%f42, %f37, 0f437F0000;
	mul.ftz.f32 	%f43, %f39, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrOffset];
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f2, %f44, %f45;
	sub.ftz.f32 	%f46, %f43, %f2;
	ld.const.f32 	%f47, [kYCbCrOffset+4];
	mul.ftz.f32 	%f3, %f47, %f45;
	sub.ftz.f32 	%f48, %f42, %f3;
	ld.const.f32 	%f49, [kYCbCrOffset+8];
	mul.ftz.f32 	%f4, %f49, %f45;
	sub.ftz.f32 	%f50, %f40, %f4;
	ld.const.f32 	%f5, [k601YCbCr_To_RGB32f];
	ld.const.f32 	%f6, [k601YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f51, %f48, %f6;
	fma.rn.ftz.f32 	%f52, %f46, %f5, %f51;
	ld.const.f32 	%f7, [k601YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f8, %f50, %f7, %f52;
	ld.const.f32 	%f9, [k601YCbCr_To_RGB32f+12];
	ld.const.f32 	%f10, [k601YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f53, %f48, %f10;
	fma.rn.ftz.f32 	%f54, %f46, %f9, %f53;
	ld.const.f32 	%f11, [k601YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f12, %f50, %f11, %f54;
	ld.const.f32 	%f13, [k601YCbCr_To_RGB32f+24];
	ld.const.f32 	%f14, [k601YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f55, %f48, %f14;
	fma.rn.ftz.f32 	%f56, %f46, %f13, %f55;
	ld.const.f32 	%f15, [k601YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f16, %f50, %f15, %f56;
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB7_3;

	cvta.to.global.u64 	%rd9, %rd2;
	mad.lo.s32 	%r39, %r2, %r7, %r14;
	add.s32 	%r40, %r39, 1;
	mul.wide.s32 	%rd10, %r40, 16;
	add.s64 	%rd11, %rd9, %rd10;
	mov.f32 	%f57, 0f3F800000;
	st.global.v4.f32 	[%rd11], {%f16, %f12, %f8, %f57};
	bra.uni 	BB7_4;

BB7_3:
	cvta.to.global.u64 	%rd12, %rd2;
	mad.lo.s32 	%r50, %r2, %r7, %r14;
	add.s32 	%r51, %r50, 1;
	mul.wide.s32 	%rd13, %r51, 8;
	add.s64 	%rd14, %rd12, %rd13;
	mov.f32 	%f58, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs4, %rs3, %rs2, %rs1};

BB7_4:
	mov.b32 	 %f59, %r23;
	mul.ftz.f32 	%f60, %f59, 0f437F0000;
	mov.b32 	 %f61, %r22;
	mul.ftz.f32 	%f62, %f61, 0f437F0000;
	mul.ftz.f32 	%f63, %f1, 0f437F0000;
	sub.ftz.f32 	%f64, %f63, %f2;
	sub.ftz.f32 	%f65, %f62, %f3;
	sub.ftz.f32 	%f66, %f60, %f4;
	mul.ftz.f32 	%f67, %f65, %f6;
	fma.rn.ftz.f32 	%f68, %f64, %f5, %f67;
	fma.rn.ftz.f32 	%f17, %f66, %f7, %f68;
	mul.ftz.f32 	%f69, %f65, %f10;
	fma.rn.ftz.f32 	%f70, %f64, %f9, %f69;
	fma.rn.ftz.f32 	%f18, %f66, %f11, %f70;
	mul.ftz.f32 	%f71, %f65, %f14;
	fma.rn.ftz.f32 	%f72, %f64, %f13, %f71;
	fma.rn.ftz.f32 	%f19, %f66, %f15, %f72;
	mad.lo.s32 	%r61, %r2, %r7, %r14;
	cvt.s64.s32	%rd1, %r61;
	@%p4 bra 	BB7_6;

	cvta.to.global.u64 	%rd15, %rd2;
	shl.b64 	%rd16, %rd1, 4;
	add.s64 	%rd17, %rd15, %rd16;
	mov.f32 	%f73, 0f3F800000;
	st.global.v4.f32 	[%rd17], {%f19, %f18, %f17, %f73};
	bra.uni 	BB7_7;

BB7_6:
	cvta.to.global.u64 	%rd18, %rd2;
	shl.b64 	%rd19, %rd1, 3;
	add.s64 	%rd20, %rd18, %rd19;
	mov.f32 	%f74, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd20], {%rs8, %rs7, %rs6, %rs5};

BB7_7:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FIELD_709_Kernel(
	.param .u64 PixelFormatConvert_NV12_FIELD_709_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FIELD_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_Kernel_param_5
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<99>;
	.reg .f32 	%f<130>;
	.reg .s64 	%rd<41>;


	ld.param.u64 	%rd4, [PixelFormatConvert_NV12_FIELD_709_Kernel_param_1];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FIELD_709_Kernel_param_2];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FIELD_709_Kernel_param_3];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FIELD_709_Kernel_param_4];
	ld.param.u32 	%r11, [PixelFormatConvert_NV12_FIELD_709_Kernel_param_5];
	mov.u32 	%r12, %ntid.x;
	mov.u32 	%r13, %ctaid.x;
	mov.u32 	%r14, %tid.x;
	mad.lo.s32 	%r1, %r12, %r13, %r14;
	shl.b32 	%r15, %r1, 1;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	setp.ge.u32	%p1, %r2, %r11;
	setp.ge.u32	%p2, %r15, %r10;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB8_13;

	cvt.rn.f32.s32	%f1, %r2;
	add.ftz.f32 	%f40, %f1, 0fBE000000;
	mov.f32 	%f41, 0f00000000;
	max.ftz.f32 	%f42, %f40, %f41;
	cvt.rn.f32.u32	%f2, %r11;
	min.ftz.f32 	%f43, %f42, %f2;
	fma.rn.ftz.f32 	%f44, %f43, 0f3F000000, %f2;
	add.ftz.f32 	%f37, %f44, 0f3F000000;
	cvt.rn.f32.s32	%f3, %r1;
	add.ftz.f32 	%f38, %f3, 0f3F000000;
	cvt.rn.f32.u32	%f4, %r10;
	min.ftz.f32 	%f5, %f38, %f4;
	add.ftz.f32 	%f45, %f5, 0f00000000;
	add.ftz.f32 	%f34, %f45, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r19, %r20, %r21, %r22}, [inTexture, {%f34, %f37}];
	// inline asm
	mov.b32 	 %f46, %r19;
	mov.b32 	 %f47, %r20;
	// inline asm
	tex.2d.v4.u32.f32 {%r23, %r24, %r25, %r26}, [inTexture, {%f38, %f37}];
	// inline asm
	add.ftz.f32 	%f39, %f1, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r27, %r28, %r29, %r30}, [inTexture, {%f38, %f39}];
	// inline asm
	mov.b32 	 %f6, %r27;
	mov.b32 	 %f48, %r28;
	mul.ftz.f32 	%f49, %f47, 0f437F0000;
	mov.f32 	%f50, 0f437F0000;
	mul.ftz.f32 	%f51, %f46, 0f437F0000;
	mul.ftz.f32 	%f52, %f48, 0f437F0000;
	ld.const.f32 	%f53, [kYCbCrOffset];
	div.approx.ftz.f32 	%f54, %f50, %f50;
	mul.ftz.f32 	%f7, %f53, %f54;
	sub.ftz.f32 	%f55, %f52, %f7;
	ld.const.f32 	%f56, [kYCbCrOffset+4];
	mul.ftz.f32 	%f8, %f56, %f54;
	sub.ftz.f32 	%f57, %f51, %f8;
	ld.const.f32 	%f58, [kYCbCrOffset+8];
	mul.ftz.f32 	%f9, %f58, %f54;
	sub.ftz.f32 	%f59, %f49, %f9;
	ld.const.f32 	%f10, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f11, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f60, %f57, %f11;
	fma.rn.ftz.f32 	%f61, %f55, %f10, %f60;
	ld.const.f32 	%f12, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f59, %f12, %f61;
	ld.const.f32 	%f14, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f15, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f57, %f15;
	fma.rn.ftz.f32 	%f63, %f55, %f14, %f62;
	ld.const.f32 	%f16, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f17, %f59, %f16, %f63;
	ld.const.f32 	%f18, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f19, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f64, %f57, %f19;
	fma.rn.ftz.f32 	%f65, %f55, %f18, %f64;
	ld.const.f32 	%f20, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f21, %f59, %f20, %f65;
	setp.eq.s32	%p4, %r9, 0;
	@%p4 bra 	BB8_3;

	cvta.to.global.u64 	%rd11, %rd4;
	mul.lo.s32 	%r35, %r2, %r8;
	shl.b32 	%r36, %r35, 1;
	add.s32 	%r42, %r15, %r36;
	add.s32 	%r43, %r42, 1;
	mul.wide.s32 	%rd12, %r43, 16;
	add.s64 	%rd13, %rd11, %rd12;
	mov.f32 	%f66, 0f3F800000;
	st.global.v4.f32 	[%rd13], {%f21, %f17, %f13, %f66};
	bra.uni 	BB8_4;

BB8_3:
	cvta.to.global.u64 	%rd14, %rd4;
	mul.lo.s32 	%r48, %r2, %r8;
	shl.b32 	%r49, %r48, 1;
	add.s32 	%r55, %r15, %r49;
	add.s32 	%r56, %r55, 1;
	mul.wide.s32 	%rd15, %r56, 8;
	add.s64 	%rd16, %rd14, %rd15;
	mov.f32 	%f67, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f67;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f21;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd16], {%rs4, %rs3, %rs2, %rs1};

BB8_4:
	mov.b32 	 %f68, %r24;
	mul.ftz.f32 	%f69, %f68, 0f437F0000;
	mov.b32 	 %f70, %r23;
	mul.ftz.f32 	%f71, %f70, 0f437F0000;
	mul.ftz.f32 	%f72, %f6, 0f437F0000;
	sub.ftz.f32 	%f73, %f72, %f7;
	sub.ftz.f32 	%f74, %f71, %f8;
	sub.ftz.f32 	%f75, %f69, %f9;
	mul.ftz.f32 	%f76, %f74, %f11;
	fma.rn.ftz.f32 	%f77, %f73, %f10, %f76;
	fma.rn.ftz.f32 	%f22, %f75, %f12, %f77;
	mul.ftz.f32 	%f78, %f74, %f15;
	fma.rn.ftz.f32 	%f79, %f73, %f14, %f78;
	fma.rn.ftz.f32 	%f23, %f75, %f16, %f79;
	mul.ftz.f32 	%f80, %f74, %f19;
	fma.rn.ftz.f32 	%f81, %f73, %f18, %f80;
	fma.rn.ftz.f32 	%f24, %f75, %f20, %f81;
	shl.b32 	%r66, %r2, 1;
	mad.lo.s32 	%r67, %r66, %r8, %r15;
	cvt.s64.s32	%rd1, %r67;
	@%p4 bra 	BB8_6;

	cvta.to.global.u64 	%rd17, %rd4;
	shl.b64 	%rd18, %rd1, 4;
	add.s64 	%rd19, %rd17, %rd18;
	mov.f32 	%f82, 0f3F800000;
	st.global.v4.f32 	[%rd19], {%f24, %f23, %f22, %f82};
	bra.uni 	BB8_7;

BB8_6:
	cvta.to.global.u64 	%rd20, %rd4;
	shl.b64 	%rd21, %rd1, 3;
	add.s64 	%rd22, %rd20, %rd21;
	mov.f32 	%f83, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f83;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f22;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f23;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f24;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd22], {%rs8, %rs7, %rs6, %rs5};

BB8_7:
	add.ftz.f32 	%f90, %f1, 0f3E000000;
	max.ftz.f32 	%f92, %f90, %f41;
	min.ftz.f32 	%f93, %f92, %f2;
	fma.rn.ftz.f32 	%f94, %f93, 0f3F000000, %f2;
	add.ftz.f32 	%f87, %f94, 0f3F000000;
	mov.f32 	%f95, 0f40000000;
	div.approx.ftz.f32 	%f96, %f4, %f95;
	add.ftz.f32 	%f97, %f5, %f96;
	add.ftz.f32 	%f84, %f97, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r68, %r69, %r70, %r71}, [inTexture, {%f84, %f87}];
	// inline asm
	mov.b32 	 %f98, %r68;
	mov.b32 	 %f99, %r69;
	add.ftz.f32 	%f100, %f3, %f96;
	add.ftz.f32 	%f88, %f100, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r72, %r73, %r74, %r75}, [inTexture, {%f88, %f87}];
	// inline asm
	mov.b32 	 %f25, %r72;
	mov.b32 	 %f26, %r73;
	// inline asm
	tex.2d.v4.u32.f32 {%r76, %r77, %r78, %r79}, [inTexture, {%f88, %f39}];
	// inline asm
	add.s32 	%r85, %r66, 1;
	mov.b32 	 %f27, %r76;
	mov.b32 	 %f101, %r77;
	mul.ftz.f32 	%f102, %f99, 0f437F0000;
	mul.ftz.f32 	%f103, %f98, 0f437F0000;
	mul.ftz.f32 	%f104, %f101, 0f437F0000;
	sub.ftz.f32 	%f105, %f104, %f7;
	sub.ftz.f32 	%f106, %f103, %f8;
	sub.ftz.f32 	%f107, %f102, %f9;
	mul.ftz.f32 	%f108, %f106, %f11;
	fma.rn.ftz.f32 	%f109, %f105, %f10, %f108;
	fma.rn.ftz.f32 	%f28, %f107, %f12, %f109;
	mul.ftz.f32 	%f110, %f106, %f15;
	fma.rn.ftz.f32 	%f111, %f105, %f14, %f110;
	fma.rn.ftz.f32 	%f29, %f107, %f16, %f111;
	mul.ftz.f32 	%f112, %f106, %f19;
	fma.rn.ftz.f32 	%f113, %f105, %f18, %f112;
	fma.rn.ftz.f32 	%f30, %f107, %f20, %f113;
	mul.lo.s32 	%r7, %r85, %r8;
	add.s32 	%r91, %r15, %r7;
	add.s32 	%r92, %r91, 1;
	cvt.s64.s32	%rd2, %r92;
	@%p4 bra 	BB8_9;

	cvta.to.global.u64 	%rd29, %rd4;
	shl.b64 	%rd30, %rd2, 4;
	add.s64 	%rd31, %rd29, %rd30;
	mov.f32 	%f114, 0f3F800000;
	st.global.v4.f32 	[%rd31], {%f30, %f29, %f28, %f114};
	bra.uni 	BB8_10;

BB8_9:
	cvta.to.global.u64 	%rd32, %rd4;
	shl.b64 	%rd33, %rd2, 3;
	add.s64 	%rd34, %rd32, %rd33;
	mov.f32 	%f115, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f115;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f28;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f30;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd34], {%rs12, %rs11, %rs10, %rs9};

BB8_10:
	mul.ftz.f32 	%f116, %f27, 0f437F0000;
	sub.ftz.f32 	%f117, %f116, %f7;
	mul.ftz.f32 	%f118, %f25, 0f437F0000;
	sub.ftz.f32 	%f119, %f118, %f8;
	mul.ftz.f32 	%f120, %f26, 0f437F0000;
	sub.ftz.f32 	%f121, %f120, %f9;
	mul.ftz.f32 	%f122, %f119, %f11;
	fma.rn.ftz.f32 	%f123, %f117, %f10, %f122;
	fma.rn.ftz.f32 	%f31, %f121, %f12, %f123;
	mul.ftz.f32 	%f124, %f119, %f15;
	fma.rn.ftz.f32 	%f125, %f117, %f14, %f124;
	fma.rn.ftz.f32 	%f32, %f121, %f16, %f125;
	mul.ftz.f32 	%f126, %f119, %f19;
	fma.rn.ftz.f32 	%f127, %f117, %f18, %f126;
	fma.rn.ftz.f32 	%f33, %f121, %f20, %f127;
	add.s32 	%r98, %r7, %r15;
	cvt.s64.s32	%rd3, %r98;
	@%p4 bra 	BB8_12;

	cvta.to.global.u64 	%rd35, %rd4;
	shl.b64 	%rd36, %rd3, 4;
	add.s64 	%rd37, %rd35, %rd36;
	mov.f32 	%f128, 0f3F800000;
	st.global.v4.f32 	[%rd37], {%f33, %f32, %f31, %f128};
	bra.uni 	BB8_13;

BB8_12:
	cvta.to.global.u64 	%rd38, %rd4;
	shl.b64 	%rd39, %rd3, 3;
	add.s64 	%rd40, %rd38, %rd39;
	mov.f32 	%f129, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f129;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f31;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f32;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f33;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd40], {%rs16, %rs15, %rs14, %rs13};

BB8_13:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FRAME_709_Kernel(
	.param .u64 PixelFormatConvert_NV12_FRAME_709_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FRAME_709_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_Kernel_param_5
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<62>;
	.reg .f32 	%f<75>;
	.reg .s64 	%rd<21>;


	ld.param.u64 	%rd2, [PixelFormatConvert_NV12_FRAME_709_Kernel_param_1];
	ld.param.u32 	%r7, [PixelFormatConvert_NV12_FRAME_709_Kernel_param_2];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FRAME_709_Kernel_param_3];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FRAME_709_Kernel_param_4];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FRAME_709_Kernel_param_5];
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	shl.b32 	%r14, %r1, 1;
	mov.u32 	%r15, %ntid.y;
	mov.u32 	%r16, %ctaid.y;
	mov.u32 	%r17, %tid.y;
	mad.lo.s32 	%r2, %r15, %r16, %r17;
	setp.ge.u32	%p1, %r2, %r10;
	setp.ge.u32	%p2, %r14, %r9;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB9_7;

	cvt.rn.f32.s32	%f26, %r2;
	add.ftz.f32 	%f27, %f26, 0fBE800000;
	mov.f32 	%f28, 0f00000000;
	max.ftz.f32 	%f29, %f27, %f28;
	cvt.rn.f32.u32	%f30, %r10;
	min.ftz.f32 	%f31, %f29, %f30;
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, %f30;
	add.ftz.f32 	%f23, %f32, 0f3F000000;
	cvt.rn.f32.s32	%f33, %r1;
	add.ftz.f32 	%f24, %f33, 0f3F000000;
	cvt.rn.f32.u32	%f34, %r9;
	min.ftz.f32 	%f35, %f24, %f34;
	add.ftz.f32 	%f36, %f35, 0f00000000;
	add.ftz.f32 	%f20, %f36, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [inTexture, {%f20, %f23}];
	// inline asm
	mov.b32 	 %f37, %r18;
	mov.b32 	 %f38, %r19;
	// inline asm
	tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [inTexture, {%f24, %f23}];
	// inline asm
	add.ftz.f32 	%f25, %f26, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inTexture, {%f24, %f25}];
	// inline asm
	mov.b32 	 %f1, %r26;
	mov.b32 	 %f39, %r27;
	mul.ftz.f32 	%f40, %f38, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	mul.ftz.f32 	%f42, %f37, 0f437F0000;
	mul.ftz.f32 	%f43, %f39, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrOffset];
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f2, %f44, %f45;
	sub.ftz.f32 	%f46, %f43, %f2;
	ld.const.f32 	%f47, [kYCbCrOffset+4];
	mul.ftz.f32 	%f3, %f47, %f45;
	sub.ftz.f32 	%f48, %f42, %f3;
	ld.const.f32 	%f49, [kYCbCrOffset+8];
	mul.ftz.f32 	%f4, %f49, %f45;
	sub.ftz.f32 	%f50, %f40, %f4;
	ld.const.f32 	%f5, [k709YCbCr_To_RGB32f];
	ld.const.f32 	%f6, [k709YCbCr_To_RGB32f+4];
	mul.ftz.f32 	%f51, %f48, %f6;
	fma.rn.ftz.f32 	%f52, %f46, %f5, %f51;
	ld.const.f32 	%f7, [k709YCbCr_To_RGB32f+8];
	fma.rn.ftz.f32 	%f8, %f50, %f7, %f52;
	ld.const.f32 	%f9, [k709YCbCr_To_RGB32f+12];
	ld.const.f32 	%f10, [k709YCbCr_To_RGB32f+16];
	mul.ftz.f32 	%f53, %f48, %f10;
	fma.rn.ftz.f32 	%f54, %f46, %f9, %f53;
	ld.const.f32 	%f11, [k709YCbCr_To_RGB32f+20];
	fma.rn.ftz.f32 	%f12, %f50, %f11, %f54;
	ld.const.f32 	%f13, [k709YCbCr_To_RGB32f+24];
	ld.const.f32 	%f14, [k709YCbCr_To_RGB32f+28];
	mul.ftz.f32 	%f55, %f48, %f14;
	fma.rn.ftz.f32 	%f56, %f46, %f13, %f55;
	ld.const.f32 	%f15, [k709YCbCr_To_RGB32f+32];
	fma.rn.ftz.f32 	%f16, %f50, %f15, %f56;
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB9_3;

	cvta.to.global.u64 	%rd9, %rd2;
	mad.lo.s32 	%r39, %r2, %r7, %r14;
	add.s32 	%r40, %r39, 1;
	mul.wide.s32 	%rd10, %r40, 16;
	add.s64 	%rd11, %rd9, %rd10;
	mov.f32 	%f57, 0f3F800000;
	st.global.v4.f32 	[%rd11], {%f16, %f12, %f8, %f57};
	bra.uni 	BB9_4;

BB9_3:
	cvta.to.global.u64 	%rd12, %rd2;
	mad.lo.s32 	%r50, %r2, %r7, %r14;
	add.s32 	%r51, %r50, 1;
	mul.wide.s32 	%rd13, %r51, 8;
	add.s64 	%rd14, %rd12, %rd13;
	mov.f32 	%f58, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs4, %rs3, %rs2, %rs1};

BB9_4:
	mov.b32 	 %f59, %r23;
	mul.ftz.f32 	%f60, %f59, 0f437F0000;
	mov.b32 	 %f61, %r22;
	mul.ftz.f32 	%f62, %f61, 0f437F0000;
	mul.ftz.f32 	%f63, %f1, 0f437F0000;
	sub.ftz.f32 	%f64, %f63, %f2;
	sub.ftz.f32 	%f65, %f62, %f3;
	sub.ftz.f32 	%f66, %f60, %f4;
	mul.ftz.f32 	%f67, %f65, %f6;
	fma.rn.ftz.f32 	%f68, %f64, %f5, %f67;
	fma.rn.ftz.f32 	%f17, %f66, %f7, %f68;
	mul.ftz.f32 	%f69, %f65, %f10;
	fma.rn.ftz.f32 	%f70, %f64, %f9, %f69;
	fma.rn.ftz.f32 	%f18, %f66, %f11, %f70;
	mul.ftz.f32 	%f71, %f65, %f14;
	fma.rn.ftz.f32 	%f72, %f64, %f13, %f71;
	fma.rn.ftz.f32 	%f19, %f66, %f15, %f72;
	mad.lo.s32 	%r61, %r2, %r7, %r14;
	cvt.s64.s32	%rd1, %r61;
	@%p4 bra 	BB9_6;

	cvta.to.global.u64 	%rd15, %rd2;
	shl.b64 	%rd16, %rd1, 4;
	add.s64 	%rd17, %rd15, %rd16;
	mov.f32 	%f73, 0f3F800000;
	st.global.v4.f32 	[%rd17], {%f19, %f18, %f17, %f73};
	bra.uni 	BB9_7;

BB9_6:
	cvta.to.global.u64 	%rd18, %rd2;
	shl.b64 	%rd19, %rd1, 3;
	add.s64 	%rd20, %rd18, %rd19;
	mov.f32 	%f74, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd20], {%rs8, %rs7, %rs6, %rs5};

BB9_7:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel(
	.param .u64 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_5
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<99>;
	.reg .f32 	%f<130>;
	.reg .s64 	%rd<41>;


	ld.param.u64 	%rd4, [PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_1];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_2];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_3];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_4];
	ld.param.u32 	%r11, [PixelFormatConvert_NV12_FIELD_601_FullRange_Kernel_param_5];
	mov.u32 	%r12, %ntid.x;
	mov.u32 	%r13, %ctaid.x;
	mov.u32 	%r14, %tid.x;
	mad.lo.s32 	%r1, %r12, %r13, %r14;
	shl.b32 	%r15, %r1, 1;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	setp.ge.u32	%p1, %r2, %r11;
	setp.ge.u32	%p2, %r15, %r10;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB10_13;

	cvt.rn.f32.s32	%f1, %r2;
	add.ftz.f32 	%f40, %f1, 0fBE000000;
	mov.f32 	%f41, 0f00000000;
	max.ftz.f32 	%f42, %f40, %f41;
	cvt.rn.f32.u32	%f2, %r11;
	min.ftz.f32 	%f43, %f42, %f2;
	fma.rn.ftz.f32 	%f44, %f43, 0f3F000000, %f2;
	add.ftz.f32 	%f37, %f44, 0f3F000000;
	cvt.rn.f32.s32	%f3, %r1;
	add.ftz.f32 	%f38, %f3, 0f3F000000;
	cvt.rn.f32.u32	%f4, %r10;
	min.ftz.f32 	%f5, %f38, %f4;
	add.ftz.f32 	%f45, %f5, 0f00000000;
	add.ftz.f32 	%f34, %f45, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r19, %r20, %r21, %r22}, [inTexture, {%f34, %f37}];
	// inline asm
	mov.b32 	 %f46, %r19;
	mov.b32 	 %f47, %r20;
	// inline asm
	tex.2d.v4.u32.f32 {%r23, %r24, %r25, %r26}, [inTexture, {%f38, %f37}];
	// inline asm
	add.ftz.f32 	%f39, %f1, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r27, %r28, %r29, %r30}, [inTexture, {%f38, %f39}];
	// inline asm
	mov.b32 	 %f6, %r27;
	mov.b32 	 %f48, %r28;
	mul.ftz.f32 	%f49, %f47, 0f437F0000;
	mov.f32 	%f50, 0f437F0000;
	mul.ftz.f32 	%f51, %f46, 0f437F0000;
	mul.ftz.f32 	%f52, %f48, 0f437F0000;
	ld.const.f32 	%f53, [kYCbCrFullRangeOffset];
	div.approx.ftz.f32 	%f54, %f50, %f50;
	mul.ftz.f32 	%f7, %f53, %f54;
	sub.ftz.f32 	%f55, %f52, %f7;
	ld.const.f32 	%f56, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f8, %f56, %f54;
	sub.ftz.f32 	%f57, %f51, %f8;
	ld.const.f32 	%f58, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f9, %f58, %f54;
	sub.ftz.f32 	%f59, %f49, %f9;
	ld.const.f32 	%f10, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f11, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f60, %f57, %f11;
	fma.rn.ftz.f32 	%f61, %f55, %f10, %f60;
	ld.const.f32 	%f12, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f59, %f12, %f61;
	ld.const.f32 	%f14, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f15, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f57, %f15;
	fma.rn.ftz.f32 	%f63, %f55, %f14, %f62;
	ld.const.f32 	%f16, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f17, %f59, %f16, %f63;
	ld.const.f32 	%f18, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f19, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f64, %f57, %f19;
	fma.rn.ftz.f32 	%f65, %f55, %f18, %f64;
	ld.const.f32 	%f20, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f21, %f59, %f20, %f65;
	setp.eq.s32	%p4, %r9, 0;
	@%p4 bra 	BB10_3;

	cvta.to.global.u64 	%rd11, %rd4;
	mul.lo.s32 	%r35, %r2, %r8;
	shl.b32 	%r36, %r35, 1;
	add.s32 	%r42, %r15, %r36;
	add.s32 	%r43, %r42, 1;
	mul.wide.s32 	%rd12, %r43, 16;
	add.s64 	%rd13, %rd11, %rd12;
	mov.f32 	%f66, 0f3F800000;
	st.global.v4.f32 	[%rd13], {%f21, %f17, %f13, %f66};
	bra.uni 	BB10_4;

BB10_3:
	cvta.to.global.u64 	%rd14, %rd4;
	mul.lo.s32 	%r48, %r2, %r8;
	shl.b32 	%r49, %r48, 1;
	add.s32 	%r55, %r15, %r49;
	add.s32 	%r56, %r55, 1;
	mul.wide.s32 	%rd15, %r56, 8;
	add.s64 	%rd16, %rd14, %rd15;
	mov.f32 	%f67, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f67;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f21;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd16], {%rs4, %rs3, %rs2, %rs1};

BB10_4:
	mov.b32 	 %f68, %r24;
	mul.ftz.f32 	%f69, %f68, 0f437F0000;
	mov.b32 	 %f70, %r23;
	mul.ftz.f32 	%f71, %f70, 0f437F0000;
	mul.ftz.f32 	%f72, %f6, 0f437F0000;
	sub.ftz.f32 	%f73, %f72, %f7;
	sub.ftz.f32 	%f74, %f71, %f8;
	sub.ftz.f32 	%f75, %f69, %f9;
	mul.ftz.f32 	%f76, %f74, %f11;
	fma.rn.ftz.f32 	%f77, %f73, %f10, %f76;
	fma.rn.ftz.f32 	%f22, %f75, %f12, %f77;
	mul.ftz.f32 	%f78, %f74, %f15;
	fma.rn.ftz.f32 	%f79, %f73, %f14, %f78;
	fma.rn.ftz.f32 	%f23, %f75, %f16, %f79;
	mul.ftz.f32 	%f80, %f74, %f19;
	fma.rn.ftz.f32 	%f81, %f73, %f18, %f80;
	fma.rn.ftz.f32 	%f24, %f75, %f20, %f81;
	shl.b32 	%r66, %r2, 1;
	mad.lo.s32 	%r67, %r66, %r8, %r15;
	cvt.s64.s32	%rd1, %r67;
	@%p4 bra 	BB10_6;

	cvta.to.global.u64 	%rd17, %rd4;
	shl.b64 	%rd18, %rd1, 4;
	add.s64 	%rd19, %rd17, %rd18;
	mov.f32 	%f82, 0f3F800000;
	st.global.v4.f32 	[%rd19], {%f24, %f23, %f22, %f82};
	bra.uni 	BB10_7;

BB10_6:
	cvta.to.global.u64 	%rd20, %rd4;
	shl.b64 	%rd21, %rd1, 3;
	add.s64 	%rd22, %rd20, %rd21;
	mov.f32 	%f83, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f83;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f22;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f23;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f24;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd22], {%rs8, %rs7, %rs6, %rs5};

BB10_7:
	add.ftz.f32 	%f90, %f1, 0f3E000000;
	max.ftz.f32 	%f92, %f90, %f41;
	min.ftz.f32 	%f93, %f92, %f2;
	fma.rn.ftz.f32 	%f94, %f93, 0f3F000000, %f2;
	add.ftz.f32 	%f87, %f94, 0f3F000000;
	mov.f32 	%f95, 0f40000000;
	div.approx.ftz.f32 	%f96, %f4, %f95;
	add.ftz.f32 	%f97, %f5, %f96;
	add.ftz.f32 	%f84, %f97, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r68, %r69, %r70, %r71}, [inTexture, {%f84, %f87}];
	// inline asm
	mov.b32 	 %f98, %r68;
	mov.b32 	 %f99, %r69;
	add.ftz.f32 	%f100, %f3, %f96;
	add.ftz.f32 	%f88, %f100, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r72, %r73, %r74, %r75}, [inTexture, {%f88, %f87}];
	// inline asm
	mov.b32 	 %f25, %r72;
	mov.b32 	 %f26, %r73;
	// inline asm
	tex.2d.v4.u32.f32 {%r76, %r77, %r78, %r79}, [inTexture, {%f88, %f39}];
	// inline asm
	add.s32 	%r85, %r66, 1;
	mov.b32 	 %f27, %r76;
	mov.b32 	 %f101, %r77;
	mul.ftz.f32 	%f102, %f99, 0f437F0000;
	mul.ftz.f32 	%f103, %f98, 0f437F0000;
	mul.ftz.f32 	%f104, %f101, 0f437F0000;
	sub.ftz.f32 	%f105, %f104, %f7;
	sub.ftz.f32 	%f106, %f103, %f8;
	sub.ftz.f32 	%f107, %f102, %f9;
	mul.ftz.f32 	%f108, %f106, %f11;
	fma.rn.ftz.f32 	%f109, %f105, %f10, %f108;
	fma.rn.ftz.f32 	%f28, %f107, %f12, %f109;
	mul.ftz.f32 	%f110, %f106, %f15;
	fma.rn.ftz.f32 	%f111, %f105, %f14, %f110;
	fma.rn.ftz.f32 	%f29, %f107, %f16, %f111;
	mul.ftz.f32 	%f112, %f106, %f19;
	fma.rn.ftz.f32 	%f113, %f105, %f18, %f112;
	fma.rn.ftz.f32 	%f30, %f107, %f20, %f113;
	mul.lo.s32 	%r7, %r85, %r8;
	add.s32 	%r91, %r15, %r7;
	add.s32 	%r92, %r91, 1;
	cvt.s64.s32	%rd2, %r92;
	@%p4 bra 	BB10_9;

	cvta.to.global.u64 	%rd29, %rd4;
	shl.b64 	%rd30, %rd2, 4;
	add.s64 	%rd31, %rd29, %rd30;
	mov.f32 	%f114, 0f3F800000;
	st.global.v4.f32 	[%rd31], {%f30, %f29, %f28, %f114};
	bra.uni 	BB10_10;

BB10_9:
	cvta.to.global.u64 	%rd32, %rd4;
	shl.b64 	%rd33, %rd2, 3;
	add.s64 	%rd34, %rd32, %rd33;
	mov.f32 	%f115, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f115;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f28;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f30;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd34], {%rs12, %rs11, %rs10, %rs9};

BB10_10:
	mul.ftz.f32 	%f116, %f27, 0f437F0000;
	sub.ftz.f32 	%f117, %f116, %f7;
	mul.ftz.f32 	%f118, %f25, 0f437F0000;
	sub.ftz.f32 	%f119, %f118, %f8;
	mul.ftz.f32 	%f120, %f26, 0f437F0000;
	sub.ftz.f32 	%f121, %f120, %f9;
	mul.ftz.f32 	%f122, %f119, %f11;
	fma.rn.ftz.f32 	%f123, %f117, %f10, %f122;
	fma.rn.ftz.f32 	%f31, %f121, %f12, %f123;
	mul.ftz.f32 	%f124, %f119, %f15;
	fma.rn.ftz.f32 	%f125, %f117, %f14, %f124;
	fma.rn.ftz.f32 	%f32, %f121, %f16, %f125;
	mul.ftz.f32 	%f126, %f119, %f19;
	fma.rn.ftz.f32 	%f127, %f117, %f18, %f126;
	fma.rn.ftz.f32 	%f33, %f121, %f20, %f127;
	add.s32 	%r98, %r7, %r15;
	cvt.s64.s32	%rd3, %r98;
	@%p4 bra 	BB10_12;

	cvta.to.global.u64 	%rd35, %rd4;
	shl.b64 	%rd36, %rd3, 4;
	add.s64 	%rd37, %rd35, %rd36;
	mov.f32 	%f128, 0f3F800000;
	st.global.v4.f32 	[%rd37], {%f33, %f32, %f31, %f128};
	bra.uni 	BB10_13;

BB10_12:
	cvta.to.global.u64 	%rd38, %rd4;
	shl.b64 	%rd39, %rd3, 3;
	add.s64 	%rd40, %rd38, %rd39;
	mov.f32 	%f129, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f129;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f31;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f32;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f33;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd40], {%rs16, %rs15, %rs14, %rs13};

BB10_13:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel(
	.param .u64 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_5
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<62>;
	.reg .f32 	%f<75>;
	.reg .s64 	%rd<21>;


	ld.param.u64 	%rd2, [PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_1];
	ld.param.u32 	%r7, [PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_2];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_3];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_4];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FRAME_601_FullRange_Kernel_param_5];
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	shl.b32 	%r14, %r1, 1;
	mov.u32 	%r15, %ntid.y;
	mov.u32 	%r16, %ctaid.y;
	mov.u32 	%r17, %tid.y;
	mad.lo.s32 	%r2, %r15, %r16, %r17;
	setp.ge.u32	%p1, %r2, %r10;
	setp.ge.u32	%p2, %r14, %r9;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB11_7;

	cvt.rn.f32.s32	%f26, %r2;
	add.ftz.f32 	%f27, %f26, 0fBE800000;
	mov.f32 	%f28, 0f00000000;
	max.ftz.f32 	%f29, %f27, %f28;
	cvt.rn.f32.u32	%f30, %r10;
	min.ftz.f32 	%f31, %f29, %f30;
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, %f30;
	add.ftz.f32 	%f23, %f32, 0f3F000000;
	cvt.rn.f32.s32	%f33, %r1;
	add.ftz.f32 	%f24, %f33, 0f3F000000;
	cvt.rn.f32.u32	%f34, %r9;
	min.ftz.f32 	%f35, %f24, %f34;
	add.ftz.f32 	%f36, %f35, 0f00000000;
	add.ftz.f32 	%f20, %f36, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [inTexture, {%f20, %f23}];
	// inline asm
	mov.b32 	 %f37, %r18;
	mov.b32 	 %f38, %r19;
	// inline asm
	tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [inTexture, {%f24, %f23}];
	// inline asm
	add.ftz.f32 	%f25, %f26, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inTexture, {%f24, %f25}];
	// inline asm
	mov.b32 	 %f1, %r26;
	mov.b32 	 %f39, %r27;
	mul.ftz.f32 	%f40, %f38, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	mul.ftz.f32 	%f42, %f37, 0f437F0000;
	mul.ftz.f32 	%f43, %f39, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrFullRangeOffset];
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f2, %f44, %f45;
	sub.ftz.f32 	%f46, %f43, %f2;
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f3, %f47, %f45;
	sub.ftz.f32 	%f48, %f42, %f3;
	ld.const.f32 	%f49, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f4, %f49, %f45;
	sub.ftz.f32 	%f50, %f40, %f4;
	ld.const.f32 	%f5, [k601YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f6, [k601YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f51, %f48, %f6;
	fma.rn.ftz.f32 	%f52, %f46, %f5, %f51;
	ld.const.f32 	%f7, [k601YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f8, %f50, %f7, %f52;
	ld.const.f32 	%f9, [k601YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f10, [k601YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f53, %f48, %f10;
	fma.rn.ftz.f32 	%f54, %f46, %f9, %f53;
	ld.const.f32 	%f11, [k601YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f12, %f50, %f11, %f54;
	ld.const.f32 	%f13, [k601YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f14, [k601YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f55, %f48, %f14;
	fma.rn.ftz.f32 	%f56, %f46, %f13, %f55;
	ld.const.f32 	%f15, [k601YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f16, %f50, %f15, %f56;
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB11_3;

	cvta.to.global.u64 	%rd9, %rd2;
	mad.lo.s32 	%r39, %r2, %r7, %r14;
	add.s32 	%r40, %r39, 1;
	mul.wide.s32 	%rd10, %r40, 16;
	add.s64 	%rd11, %rd9, %rd10;
	mov.f32 	%f57, 0f3F800000;
	st.global.v4.f32 	[%rd11], {%f16, %f12, %f8, %f57};
	bra.uni 	BB11_4;

BB11_3:
	cvta.to.global.u64 	%rd12, %rd2;
	mad.lo.s32 	%r50, %r2, %r7, %r14;
	add.s32 	%r51, %r50, 1;
	mul.wide.s32 	%rd13, %r51, 8;
	add.s64 	%rd14, %rd12, %rd13;
	mov.f32 	%f58, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs4, %rs3, %rs2, %rs1};

BB11_4:
	mov.b32 	 %f59, %r23;
	mul.ftz.f32 	%f60, %f59, 0f437F0000;
	mov.b32 	 %f61, %r22;
	mul.ftz.f32 	%f62, %f61, 0f437F0000;
	mul.ftz.f32 	%f63, %f1, 0f437F0000;
	sub.ftz.f32 	%f64, %f63, %f2;
	sub.ftz.f32 	%f65, %f62, %f3;
	sub.ftz.f32 	%f66, %f60, %f4;
	mul.ftz.f32 	%f67, %f65, %f6;
	fma.rn.ftz.f32 	%f68, %f64, %f5, %f67;
	fma.rn.ftz.f32 	%f17, %f66, %f7, %f68;
	mul.ftz.f32 	%f69, %f65, %f10;
	fma.rn.ftz.f32 	%f70, %f64, %f9, %f69;
	fma.rn.ftz.f32 	%f18, %f66, %f11, %f70;
	mul.ftz.f32 	%f71, %f65, %f14;
	fma.rn.ftz.f32 	%f72, %f64, %f13, %f71;
	fma.rn.ftz.f32 	%f19, %f66, %f15, %f72;
	mad.lo.s32 	%r61, %r2, %r7, %r14;
	cvt.s64.s32	%rd1, %r61;
	@%p4 bra 	BB11_6;

	cvta.to.global.u64 	%rd15, %rd2;
	shl.b64 	%rd16, %rd1, 4;
	add.s64 	%rd17, %rd15, %rd16;
	mov.f32 	%f73, 0f3F800000;
	st.global.v4.f32 	[%rd17], {%f19, %f18, %f17, %f73};
	bra.uni 	BB11_7;

BB11_6:
	cvta.to.global.u64 	%rd18, %rd2;
	shl.b64 	%rd19, %rd1, 3;
	add.s64 	%rd20, %rd18, %rd19;
	mov.f32 	%f74, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd20], {%rs8, %rs7, %rs6, %rs5};

BB11_7:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel(
	.param .u64 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_5
)
{
	.reg .pred 	%p<8>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<99>;
	.reg .f32 	%f<130>;
	.reg .s64 	%rd<41>;


	ld.param.u64 	%rd4, [PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_1];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_2];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_3];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_4];
	ld.param.u32 	%r11, [PixelFormatConvert_NV12_FIELD_709_FullRange_Kernel_param_5];
	mov.u32 	%r12, %ntid.x;
	mov.u32 	%r13, %ctaid.x;
	mov.u32 	%r14, %tid.x;
	mad.lo.s32 	%r1, %r12, %r13, %r14;
	shl.b32 	%r15, %r1, 1;
	mov.u32 	%r16, %ntid.y;
	mov.u32 	%r17, %ctaid.y;
	mov.u32 	%r18, %tid.y;
	mad.lo.s32 	%r2, %r16, %r17, %r18;
	setp.ge.u32	%p1, %r2, %r11;
	setp.ge.u32	%p2, %r15, %r10;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB12_13;

	cvt.rn.f32.s32	%f1, %r2;
	add.ftz.f32 	%f40, %f1, 0fBE000000;
	mov.f32 	%f41, 0f00000000;
	max.ftz.f32 	%f42, %f40, %f41;
	cvt.rn.f32.u32	%f2, %r11;
	min.ftz.f32 	%f43, %f42, %f2;
	fma.rn.ftz.f32 	%f44, %f43, 0f3F000000, %f2;
	add.ftz.f32 	%f37, %f44, 0f3F000000;
	cvt.rn.f32.s32	%f3, %r1;
	add.ftz.f32 	%f38, %f3, 0f3F000000;
	cvt.rn.f32.u32	%f4, %r10;
	min.ftz.f32 	%f5, %f38, %f4;
	add.ftz.f32 	%f45, %f5, 0f00000000;
	add.ftz.f32 	%f34, %f45, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r19, %r20, %r21, %r22}, [inTexture, {%f34, %f37}];
	// inline asm
	mov.b32 	 %f46, %r19;
	mov.b32 	 %f47, %r20;
	// inline asm
	tex.2d.v4.u32.f32 {%r23, %r24, %r25, %r26}, [inTexture, {%f38, %f37}];
	// inline asm
	add.ftz.f32 	%f39, %f1, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r27, %r28, %r29, %r30}, [inTexture, {%f38, %f39}];
	// inline asm
	mov.b32 	 %f6, %r27;
	mov.b32 	 %f48, %r28;
	mul.ftz.f32 	%f49, %f47, 0f437F0000;
	mov.f32 	%f50, 0f437F0000;
	mul.ftz.f32 	%f51, %f46, 0f437F0000;
	mul.ftz.f32 	%f52, %f48, 0f437F0000;
	ld.const.f32 	%f53, [kYCbCrFullRangeOffset];
	div.approx.ftz.f32 	%f54, %f50, %f50;
	mul.ftz.f32 	%f7, %f53, %f54;
	sub.ftz.f32 	%f55, %f52, %f7;
	ld.const.f32 	%f56, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f8, %f56, %f54;
	sub.ftz.f32 	%f57, %f51, %f8;
	ld.const.f32 	%f58, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f9, %f58, %f54;
	sub.ftz.f32 	%f59, %f49, %f9;
	ld.const.f32 	%f10, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f11, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f60, %f57, %f11;
	fma.rn.ftz.f32 	%f61, %f55, %f10, %f60;
	ld.const.f32 	%f12, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f13, %f59, %f12, %f61;
	ld.const.f32 	%f14, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f15, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f62, %f57, %f15;
	fma.rn.ftz.f32 	%f63, %f55, %f14, %f62;
	ld.const.f32 	%f16, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f17, %f59, %f16, %f63;
	ld.const.f32 	%f18, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f19, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f64, %f57, %f19;
	fma.rn.ftz.f32 	%f65, %f55, %f18, %f64;
	ld.const.f32 	%f20, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f21, %f59, %f20, %f65;
	setp.eq.s32	%p4, %r9, 0;
	@%p4 bra 	BB12_3;

	cvta.to.global.u64 	%rd11, %rd4;
	mul.lo.s32 	%r35, %r2, %r8;
	shl.b32 	%r36, %r35, 1;
	add.s32 	%r42, %r15, %r36;
	add.s32 	%r43, %r42, 1;
	mul.wide.s32 	%rd12, %r43, 16;
	add.s64 	%rd13, %rd11, %rd12;
	mov.f32 	%f66, 0f3F800000;
	st.global.v4.f32 	[%rd13], {%f21, %f17, %f13, %f66};
	bra.uni 	BB12_4;

BB12_3:
	cvta.to.global.u64 	%rd14, %rd4;
	mul.lo.s32 	%r48, %r2, %r8;
	shl.b32 	%r49, %r48, 1;
	add.s32 	%r55, %r15, %r49;
	add.s32 	%r56, %r55, 1;
	mul.wide.s32 	%rd15, %r56, 8;
	add.s64 	%rd16, %rd14, %rd15;
	mov.f32 	%f67, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f67;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f13;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f21;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd16], {%rs4, %rs3, %rs2, %rs1};

BB12_4:
	mov.b32 	 %f68, %r24;
	mul.ftz.f32 	%f69, %f68, 0f437F0000;
	mov.b32 	 %f70, %r23;
	mul.ftz.f32 	%f71, %f70, 0f437F0000;
	mul.ftz.f32 	%f72, %f6, 0f437F0000;
	sub.ftz.f32 	%f73, %f72, %f7;
	sub.ftz.f32 	%f74, %f71, %f8;
	sub.ftz.f32 	%f75, %f69, %f9;
	mul.ftz.f32 	%f76, %f74, %f11;
	fma.rn.ftz.f32 	%f77, %f73, %f10, %f76;
	fma.rn.ftz.f32 	%f22, %f75, %f12, %f77;
	mul.ftz.f32 	%f78, %f74, %f15;
	fma.rn.ftz.f32 	%f79, %f73, %f14, %f78;
	fma.rn.ftz.f32 	%f23, %f75, %f16, %f79;
	mul.ftz.f32 	%f80, %f74, %f19;
	fma.rn.ftz.f32 	%f81, %f73, %f18, %f80;
	fma.rn.ftz.f32 	%f24, %f75, %f20, %f81;
	shl.b32 	%r66, %r2, 1;
	mad.lo.s32 	%r67, %r66, %r8, %r15;
	cvt.s64.s32	%rd1, %r67;
	@%p4 bra 	BB12_6;

	cvta.to.global.u64 	%rd17, %rd4;
	shl.b64 	%rd18, %rd1, 4;
	add.s64 	%rd19, %rd17, %rd18;
	mov.f32 	%f82, 0f3F800000;
	st.global.v4.f32 	[%rd19], {%f24, %f23, %f22, %f82};
	bra.uni 	BB12_7;

BB12_6:
	cvta.to.global.u64 	%rd20, %rd4;
	shl.b64 	%rd21, %rd1, 3;
	add.s64 	%rd22, %rd20, %rd21;
	mov.f32 	%f83, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f83;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f22;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f23;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f24;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd22], {%rs8, %rs7, %rs6, %rs5};

BB12_7:
	add.ftz.f32 	%f90, %f1, 0f3E000000;
	max.ftz.f32 	%f92, %f90, %f41;
	min.ftz.f32 	%f93, %f92, %f2;
	fma.rn.ftz.f32 	%f94, %f93, 0f3F000000, %f2;
	add.ftz.f32 	%f87, %f94, 0f3F000000;
	mov.f32 	%f95, 0f40000000;
	div.approx.ftz.f32 	%f96, %f4, %f95;
	add.ftz.f32 	%f97, %f5, %f96;
	add.ftz.f32 	%f84, %f97, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r68, %r69, %r70, %r71}, [inTexture, {%f84, %f87}];
	// inline asm
	mov.b32 	 %f98, %r68;
	mov.b32 	 %f99, %r69;
	add.ftz.f32 	%f100, %f3, %f96;
	add.ftz.f32 	%f88, %f100, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r72, %r73, %r74, %r75}, [inTexture, {%f88, %f87}];
	// inline asm
	mov.b32 	 %f25, %r72;
	mov.b32 	 %f26, %r73;
	// inline asm
	tex.2d.v4.u32.f32 {%r76, %r77, %r78, %r79}, [inTexture, {%f88, %f39}];
	// inline asm
	add.s32 	%r85, %r66, 1;
	mov.b32 	 %f27, %r76;
	mov.b32 	 %f101, %r77;
	mul.ftz.f32 	%f102, %f99, 0f437F0000;
	mul.ftz.f32 	%f103, %f98, 0f437F0000;
	mul.ftz.f32 	%f104, %f101, 0f437F0000;
	sub.ftz.f32 	%f105, %f104, %f7;
	sub.ftz.f32 	%f106, %f103, %f8;
	sub.ftz.f32 	%f107, %f102, %f9;
	mul.ftz.f32 	%f108, %f106, %f11;
	fma.rn.ftz.f32 	%f109, %f105, %f10, %f108;
	fma.rn.ftz.f32 	%f28, %f107, %f12, %f109;
	mul.ftz.f32 	%f110, %f106, %f15;
	fma.rn.ftz.f32 	%f111, %f105, %f14, %f110;
	fma.rn.ftz.f32 	%f29, %f107, %f16, %f111;
	mul.ftz.f32 	%f112, %f106, %f19;
	fma.rn.ftz.f32 	%f113, %f105, %f18, %f112;
	fma.rn.ftz.f32 	%f30, %f107, %f20, %f113;
	mul.lo.s32 	%r7, %r85, %r8;
	add.s32 	%r91, %r15, %r7;
	add.s32 	%r92, %r91, 1;
	cvt.s64.s32	%rd2, %r92;
	@%p4 bra 	BB12_9;

	cvta.to.global.u64 	%rd29, %rd4;
	shl.b64 	%rd30, %rd2, 4;
	add.s64 	%rd31, %rd29, %rd30;
	mov.f32 	%f114, 0f3F800000;
	st.global.v4.f32 	[%rd31], {%f30, %f29, %f28, %f114};
	bra.uni 	BB12_10;

BB12_9:
	cvta.to.global.u64 	%rd32, %rd4;
	shl.b64 	%rd33, %rd2, 3;
	add.s64 	%rd34, %rd32, %rd33;
	mov.f32 	%f115, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f115;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f28;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f30;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd34], {%rs12, %rs11, %rs10, %rs9};

BB12_10:
	mul.ftz.f32 	%f116, %f27, 0f437F0000;
	sub.ftz.f32 	%f117, %f116, %f7;
	mul.ftz.f32 	%f118, %f25, 0f437F0000;
	sub.ftz.f32 	%f119, %f118, %f8;
	mul.ftz.f32 	%f120, %f26, 0f437F0000;
	sub.ftz.f32 	%f121, %f120, %f9;
	mul.ftz.f32 	%f122, %f119, %f11;
	fma.rn.ftz.f32 	%f123, %f117, %f10, %f122;
	fma.rn.ftz.f32 	%f31, %f121, %f12, %f123;
	mul.ftz.f32 	%f124, %f119, %f15;
	fma.rn.ftz.f32 	%f125, %f117, %f14, %f124;
	fma.rn.ftz.f32 	%f32, %f121, %f16, %f125;
	mul.ftz.f32 	%f126, %f119, %f19;
	fma.rn.ftz.f32 	%f127, %f117, %f18, %f126;
	fma.rn.ftz.f32 	%f33, %f121, %f20, %f127;
	add.s32 	%r98, %r7, %r15;
	cvt.s64.s32	%rd3, %r98;
	@%p4 bra 	BB12_12;

	cvta.to.global.u64 	%rd35, %rd4;
	shl.b64 	%rd36, %rd3, 4;
	add.s64 	%rd37, %rd35, %rd36;
	mov.f32 	%f128, 0f3F800000;
	st.global.v4.f32 	[%rd37], {%f33, %f32, %f31, %f128};
	bra.uni 	BB12_13;

BB12_12:
	cvta.to.global.u64 	%rd38, %rd4;
	shl.b64 	%rd39, %rd3, 3;
	add.s64 	%rd40, %rd38, %rd39;
	mov.f32 	%f129, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f129;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f31;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f32;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f33;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd40], {%rs16, %rs15, %rs14, %rs13};

BB12_13:
	ret;
}

.visible .entry PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel(
	.param .u64 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_0,
	.param .u64 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_1,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_2,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_3,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_4,
	.param .u32 PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_5
)
{
	.reg .pred 	%p<6>;
	.reg .s16 	%rs<9>;
	.reg .s32 	%r<62>;
	.reg .f32 	%f<75>;
	.reg .s64 	%rd<21>;


	ld.param.u64 	%rd2, [PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_1];
	ld.param.u32 	%r7, [PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_2];
	ld.param.u32 	%r8, [PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_3];
	ld.param.u32 	%r9, [PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_4];
	ld.param.u32 	%r10, [PixelFormatConvert_NV12_FRAME_709_FullRange_Kernel_param_5];
	mov.u32 	%r11, %ntid.x;
	mov.u32 	%r12, %ctaid.x;
	mov.u32 	%r13, %tid.x;
	mad.lo.s32 	%r1, %r11, %r12, %r13;
	shl.b32 	%r14, %r1, 1;
	mov.u32 	%r15, %ntid.y;
	mov.u32 	%r16, %ctaid.y;
	mov.u32 	%r17, %tid.y;
	mad.lo.s32 	%r2, %r15, %r16, %r17;
	setp.ge.u32	%p1, %r2, %r10;
	setp.ge.u32	%p2, %r14, %r9;
	or.pred  	%p3, %p1, %p2;
	@%p3 bra 	BB13_7;

	cvt.rn.f32.s32	%f26, %r2;
	add.ftz.f32 	%f27, %f26, 0fBE800000;
	mov.f32 	%f28, 0f00000000;
	max.ftz.f32 	%f29, %f27, %f28;
	cvt.rn.f32.u32	%f30, %r10;
	min.ftz.f32 	%f31, %f29, %f30;
	fma.rn.ftz.f32 	%f32, %f31, 0f3F000000, %f30;
	add.ftz.f32 	%f23, %f32, 0f3F000000;
	cvt.rn.f32.s32	%f33, %r1;
	add.ftz.f32 	%f24, %f33, 0f3F000000;
	cvt.rn.f32.u32	%f34, %r9;
	min.ftz.f32 	%f35, %f24, %f34;
	add.ftz.f32 	%f36, %f35, 0f00000000;
	add.ftz.f32 	%f20, %f36, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r18, %r19, %r20, %r21}, [inTexture, {%f20, %f23}];
	// inline asm
	mov.b32 	 %f37, %r18;
	mov.b32 	 %f38, %r19;
	// inline asm
	tex.2d.v4.u32.f32 {%r22, %r23, %r24, %r25}, [inTexture, {%f24, %f23}];
	// inline asm
	add.ftz.f32 	%f25, %f26, 0f3F000000;
	// inline asm
	tex.2d.v4.u32.f32 {%r26, %r27, %r28, %r29}, [inTexture, {%f24, %f25}];
	// inline asm
	mov.b32 	 %f1, %r26;
	mov.b32 	 %f39, %r27;
	mul.ftz.f32 	%f40, %f38, 0f437F0000;
	mov.f32 	%f41, 0f437F0000;
	mul.ftz.f32 	%f42, %f37, 0f437F0000;
	mul.ftz.f32 	%f43, %f39, 0f437F0000;
	ld.const.f32 	%f44, [kYCbCrFullRangeOffset];
	div.approx.ftz.f32 	%f45, %f41, %f41;
	mul.ftz.f32 	%f2, %f44, %f45;
	sub.ftz.f32 	%f46, %f43, %f2;
	ld.const.f32 	%f47, [kYCbCrFullRangeOffset+4];
	mul.ftz.f32 	%f3, %f47, %f45;
	sub.ftz.f32 	%f48, %f42, %f3;
	ld.const.f32 	%f49, [kYCbCrFullRangeOffset+8];
	mul.ftz.f32 	%f4, %f49, %f45;
	sub.ftz.f32 	%f50, %f40, %f4;
	ld.const.f32 	%f5, [k709YCbCrFullRange_To_RGB32f];
	ld.const.f32 	%f6, [k709YCbCrFullRange_To_RGB32f+4];
	mul.ftz.f32 	%f51, %f48, %f6;
	fma.rn.ftz.f32 	%f52, %f46, %f5, %f51;
	ld.const.f32 	%f7, [k709YCbCrFullRange_To_RGB32f+8];
	fma.rn.ftz.f32 	%f8, %f50, %f7, %f52;
	ld.const.f32 	%f9, [k709YCbCrFullRange_To_RGB32f+12];
	ld.const.f32 	%f10, [k709YCbCrFullRange_To_RGB32f+16];
	mul.ftz.f32 	%f53, %f48, %f10;
	fma.rn.ftz.f32 	%f54, %f46, %f9, %f53;
	ld.const.f32 	%f11, [k709YCbCrFullRange_To_RGB32f+20];
	fma.rn.ftz.f32 	%f12, %f50, %f11, %f54;
	ld.const.f32 	%f13, [k709YCbCrFullRange_To_RGB32f+24];
	ld.const.f32 	%f14, [k709YCbCrFullRange_To_RGB32f+28];
	mul.ftz.f32 	%f55, %f48, %f14;
	fma.rn.ftz.f32 	%f56, %f46, %f13, %f55;
	ld.const.f32 	%f15, [k709YCbCrFullRange_To_RGB32f+32];
	fma.rn.ftz.f32 	%f16, %f50, %f15, %f56;
	setp.eq.s32	%p4, %r8, 0;
	@%p4 bra 	BB13_3;

	cvta.to.global.u64 	%rd9, %rd2;
	mad.lo.s32 	%r39, %r2, %r7, %r14;
	add.s32 	%r40, %r39, 1;
	mul.wide.s32 	%rd10, %r40, 16;
	add.s64 	%rd11, %rd9, %rd10;
	mov.f32 	%f57, 0f3F800000;
	st.global.v4.f32 	[%rd11], {%f16, %f12, %f8, %f57};
	bra.uni 	BB13_4;

BB13_3:
	cvta.to.global.u64 	%rd12, %rd2;
	mad.lo.s32 	%r50, %r2, %r7, %r14;
	add.s32 	%r51, %r50, 1;
	mul.wide.s32 	%rd13, %r51, 8;
	add.s64 	%rd14, %rd12, %rd13;
	mov.f32 	%f58, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f58;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd14], {%rs4, %rs3, %rs2, %rs1};

BB13_4:
	mov.b32 	 %f59, %r23;
	mul.ftz.f32 	%f60, %f59, 0f437F0000;
	mov.b32 	 %f61, %r22;
	mul.ftz.f32 	%f62, %f61, 0f437F0000;
	mul.ftz.f32 	%f63, %f1, 0f437F0000;
	sub.ftz.f32 	%f64, %f63, %f2;
	sub.ftz.f32 	%f65, %f62, %f3;
	sub.ftz.f32 	%f66, %f60, %f4;
	mul.ftz.f32 	%f67, %f65, %f6;
	fma.rn.ftz.f32 	%f68, %f64, %f5, %f67;
	fma.rn.ftz.f32 	%f17, %f66, %f7, %f68;
	mul.ftz.f32 	%f69, %f65, %f10;
	fma.rn.ftz.f32 	%f70, %f64, %f9, %f69;
	fma.rn.ftz.f32 	%f18, %f66, %f11, %f70;
	mul.ftz.f32 	%f71, %f65, %f14;
	fma.rn.ftz.f32 	%f72, %f64, %f13, %f71;
	fma.rn.ftz.f32 	%f19, %f66, %f15, %f72;
	mad.lo.s32 	%r61, %r2, %r7, %r14;
	cvt.s64.s32	%rd1, %r61;
	@%p4 bra 	BB13_6;

	cvta.to.global.u64 	%rd15, %rd2;
	shl.b64 	%rd16, %rd1, 4;
	add.s64 	%rd17, %rd15, %rd16;
	mov.f32 	%f73, 0f3F800000;
	st.global.v4.f32 	[%rd17], {%f19, %f18, %f17, %f73};
	bra.uni 	BB13_7;

BB13_6:
	cvta.to.global.u64 	%rd18, %rd2;
	shl.b64 	%rd19, %rd1, 3;
	add.s64 	%rd20, %rd18, %rd19;
	mov.f32 	%f74, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f17;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f18;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f19;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd20], {%rs8, %rs7, %rs6, %rs5};

BB13_7:
	ret;
}

.visible .entry CopyNV12_Kernel(
	.param .u64 CopyNV12_Kernel_param_0,
	.param .u32 CopyNV12_Kernel_param_1,
	.param .u32 CopyNV12_Kernel_param_2,
	.param .u64 CopyNV12_Kernel_param_3,
	.param .u32 CopyNV12_Kernel_param_4,
	.param .u32 CopyNV12_Kernel_param_5,
	.param .u32 CopyNV12_Kernel_param_6
)
{
	.reg .pred 	%p<5>;
	.reg .s32 	%r<19>;
	.reg .s64 	%rd<9>;


	ld.param.u64 	%rd1, [CopyNV12_Kernel_param_0];
	ld.param.u32 	%r3, [CopyNV12_Kernel_param_1];
	ld.param.u32 	%r4, [CopyNV12_Kernel_param_2];
	ld.param.u64 	%rd2, [CopyNV12_Kernel_param_3];
	ld.param.u32 	%r5, [CopyNV12_Kernel_param_4];
	ld.param.u32 	%r7, [CopyNV12_Kernel_param_5];
	ld.param.u32 	%r6, [CopyNV12_Kernel_param_6];
	mov.u32 	%r8, %ntid.x;
	mov.u32 	%r9, %ctaid.x;
	mov.u32 	%r10, %tid.x;
	mad.lo.s32 	%r1, %r8, %r9, %r10;
	setp.lt.s32	%p1, %r1, %r5;
	mov.u32 	%r11, %ntid.y;
	mov.u32 	%r12, %ctaid.y;
	mov.u32 	%r13, %tid.y;
	mad.lo.s32 	%r2, %r11, %r12, %r13;
	setp.lt.s32	%p2, %r2, %r7;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB14_2;
	bra.uni 	BB14_1;

BB14_1:
	cvta.to.global.u64 	%rd3, %rd2;
	cvta.to.global.u64 	%rd4, %rd1;
	setp.lt.s32	%p4, %r2, %r4;
	selp.b32	%r14, 0, %r6, %p4;
	add.s32 	%r15, %r14, %r2;
	mad.lo.s32 	%r16, %r15, %r3, %r1;
	mul.wide.s32 	%rd5, %r16, 4;
	add.s64 	%rd6, %rd4, %rd5;
	mad.lo.s32 	%r17, %r2, %r5, %r1;
	mul.wide.s32 	%rd7, %r17, 4;
	add.s64 	%rd8, %rd3, %rd7;
	ld.global.u32 	%r18, [%rd6];
	st.global.u32 	[%rd8], %r18;

BB14_2:
	ret;
}

.visible .entry NV12FillFail(
	.param .u64 NV12FillFail_param_0,
	.param .u32 NV12FillFail_param_1,
	.param .u32 NV12FillFail_param_2,
	.param .u32 NV12FillFail_param_3,
	.param .u32 NV12FillFail_param_4,
	.param .u32 NV12FillFail_param_5
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<3>;
	.reg .s32 	%r<15>;
	.reg .s64 	%rd<5>;


	ld.param.u64 	%rd1, [NV12FillFail_param_0];
	ld.param.u32 	%r3, [NV12FillFail_param_1];
	ld.param.u32 	%r5, [NV12FillFail_param_3];
	ld.param.u32 	%r4, [NV12FillFail_param_4];
	ld.param.u32 	%r6, [NV12FillFail_param_5];
	mov.u32 	%r7, %ntid.x;
	mov.u32 	%r8, %ctaid.x;
	mov.u32 	%r9, %tid.x;
	mad.lo.s32 	%r1, %r7, %r8, %r9;
	setp.lt.s32	%p1, %r1, %r5;
	mov.u32 	%r10, %ntid.y;
	mov.u32 	%r11, %ctaid.y;
	mov.u32 	%r12, %tid.y;
	mad.lo.s32 	%r2, %r10, %r11, %r12;
	setp.lt.s32	%p2, %r2, %r6;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB15_2;
	bra.uni 	BB15_1;

BB15_1:
	cvta.to.global.u64 	%rd2, %rd1;
	shl.b32 	%r13, %r1, 1;
	mad.lo.s32 	%r14, %r2, %r3, %r13;
	cvt.s64.s32	%rd3, %r14;
	add.s64 	%rd4, %rd2, %rd3;
	setp.lt.s32	%p4, %r2, %r4;
	selp.b16	%rs1, 76, -128, %p4;
	st.global.u8 	[%rd4], %rs1;
	selp.b16	%rs2, 76, -1, %p4;
	st.global.u8 	[%rd4+1], %rs2;

BB15_2:
	ret;
}


