//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Wed Jul 10 12:41:20 2013 (1373485280)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_30
.address_size 64

	.file	1 "D:/singlebarrel/releases/2014.03/shared/adobe/MediaCore/GPUFoundation/Src/ImageProcessing/PixelFormatConvert_Bayer.cu", 1399785311, 20545
	.file	2 "d:\\singlebarrel\\releases\\2014.03\\shared\\adobe\\mediacore\\external\\3rdparty\\nvidia\\cuda\\win\\include\\device_functions.h", 1399785281, 191626
.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 147, 24, 206, 61, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .u32 kRandMax = 32767;
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix has been demoted
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};

.visible .func _Z16SharedLoadMatrixPfPKfb(
	.param .b64 _Z16SharedLoadMatrixPfPKfb_param_0,
	.param .b64 _Z16SharedLoadMatrixPfPKfb_param_1,
	.param .b32 _Z16SharedLoadMatrixPfPKfb_param_2
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<3>;
	.reg .s32 	%r<4>;
	.reg .f32 	%f<4>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd1, [_Z16SharedLoadMatrixPfPKfb_param_0];
	ld.param.u64 	%rd2, [_Z16SharedLoadMatrixPfPKfb_param_1];
	.loc 1 91 1
	mov.u32 	%r2, %tid.y;
	setp.eq.s32	%p1, %r2, 0;
	mov.u32 	%r1, %tid.x;
	setp.lt.s32	%p2, %r1, 3;
	and.pred  	%p3, %p1, %p2;
	ld.param.s8 	%rs1, [_Z16SharedLoadMatrixPfPKfb_param_2];
	.loc 1 91 1
	@!%p3 bra 	BB0_2;
	bra.uni 	BB0_1;

BB0_1:
	.loc 1 91 1
	mul.lo.s32 	%r3, %r1, 3;
	cvt.s64.s32	%rd3, %r3;
	and.b16  	%rs2, %rs1, 255;
	setp.eq.s16	%p4, %rs2, 0;
	selp.b64	%rd4, 0, 2, %p4;
	add.s64 	%rd5, %rd3, %rd4;
	shl.b64 	%rd6, %rd5, 2;
	add.s64 	%rd7, %rd2, %rd6;
	mul.wide.s32 	%rd8, %r3, 4;
	add.s64 	%rd9, %rd1, %rd8;
	.loc 1 91 1
	ld.f32 	%f1, [%rd7];
	st.f32 	[%rd9], %f1;
	add.s64 	%rd10, %rd2, %rd8;
	.loc 1 91 1
	ld.f32 	%f2, [%rd10+4];
	st.f32 	[%rd9+4], %f2;
	selp.b64	%rd11, 2, 0, %p4;
	add.s64 	%rd12, %rd3, %rd11;
	shl.b64 	%rd13, %rd12, 2;
	add.s64 	%rd14, %rd2, %rd13;
	ld.f32 	%f3, [%rd14];
	st.f32 	[%rd9+8], %f3;

BB0_2:
	.loc 1 91 1
	bar.sync 	0;
	.loc 1 91 2
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_11,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12
)
{
	.reg .pred 	%p<33>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<156>;
	.reg .f32 	%f<197>;
	.reg .s64 	%rd<79>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix[36];

	ld.param.u64 	%rd3, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_0];
	ld.param.u32 	%r9, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_1];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_2];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_3];
	ld.param.u64 	%rd4, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_5];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_6];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_7];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_8];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_9];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_10];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_11];
	ld.param.u32 	%r18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12];
	cvta.to.global.u64 	%rd1, %rd3;
	.loc 1 169 1
	cvt.s64.s32	%rd2, %r18;
	.loc 1 91 1
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB1_2;
	bra.uni 	BB1_1;

BB1_1:
	setp.ne.s32	%p4, %r17, 0;
	.loc 1 91 1
	mul.lo.s32 	%r19, %r2, 3;
	cvt.s64.s32	%rd5, %r19;
	add.s32 	%r20, %r19, %r18;
	.loc 1 91 1
	add.s64 	%rd6, %rd5, %rd2;
	selp.b64	%rd7, 2, 0, %p4;
	add.s64 	%rd8, %rd6, %rd7;
	shl.b64 	%rd9, %rd8, 2;
	add.s64 	%rd10, %rd1, %rd9;
	mul.wide.s32 	%rd11, %r19, 4;
	mov.u64 	%rd12, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix;
	add.s64 	%rd13, %rd12, %rd11;
	.loc 1 91 1
	ld.global.f32 	%f82, [%rd10];
	st.shared.f32 	[%rd13], %f82;
	mul.wide.s32 	%rd14, %r20, 4;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 1 91 1
	ld.global.f32 	%f83, [%rd15+4];
	st.shared.f32 	[%rd13+4], %f83;
	selp.b64	%rd16, 0, 2, %p4;
	add.s64 	%rd17, %rd6, %rd16;
	shl.b64 	%rd18, %rd17, 2;
	add.s64 	%rd19, %rd1, %rd18;
	ld.global.f32 	%f84, [%rd19];
	st.shared.f32 	[%rd13+8], %f84;

BB1_2:
	.loc 1 91 1
	bar.sync 	0;
	.loc 1 169 1
	mov.u32 	%r21, %ntid.x;
	mov.u32 	%r22, %ctaid.x;
	mad.lo.s32 	%r23, %r21, %r22, %r2;
	shl.b32 	%r3, %r23, 1;
	mov.u32 	%r24, %ntid.y;
	mov.u32 	%r25, %ctaid.y;
	mad.lo.s32 	%r26, %r24, %r25, %r1;
	shl.b32 	%r4, %r26, 1;
	.loc 1 169 1
	setp.lt.s32	%p5, %r3, %r15;
	setp.lt.s32	%p6, %r4, %r16;
	and.pred  	%p7, %p5, %p6;
	.loc 1 169 1
	@!%p7 bra 	BB1_51;
	bra.uni 	BB1_3;

BB1_3:
	.loc 1 169 1
	cvt.s64.s32	%rd20, %r9;
	.loc 1 169 1
	add.s32 	%r27, %r16, -1;
	mul.lo.s32 	%r28, %r27, %r10;
	cvt.s64.s32	%rd21, %r28;
	neg.s32 	%r29, %r10;
	.loc 1 169 1
	setp.eq.s32	%p8, %r11, 0;
	selp.b32	%r30, %r10, %r29, %p8;
	selp.b64	%rd22, 0, %rd21, %p8;
	add.s64 	%rd23, %rd22, %rd20;
	.loc 1 169 1
	mad.lo.s32 	%r31, %r4, %r30, %r3;
	cvt.s64.s32	%rd24, %r31;
	add.s64 	%rd25, %rd24, %rd23;
	shl.b64 	%rd26, %rd25, 2;
	add.s64 	%rd27, %rd1, %rd26;
	ld.global.f32 	%f1, [%rd27];
	mul.ftz.f32 	%f2, %f1, 0f00000000;
	.loc 1 169 1
	ld.shared.f32 	%f3, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+24];
	ld.shared.f32 	%f4, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+28];
	mul.ftz.f32 	%f85, %f2, %f4;
	fma.rn.ftz.f32 	%f86, %f2, %f3, %f85;
	ld.shared.f32 	%f5, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+32];
	fma.rn.ftz.f32 	%f6, %f1, %f5, %f86;
	setp.ltu.ftz.f32	%p9, %f6, 0f00000000;
	@%p9 bra 	BB1_5;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f87, %f6;
	mul.ftz.f32 	%f88, %f87, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f185, %f88;
	bra.uni 	BB1_6;

BB1_5:
	neg.ftz.f32 	%f89, %f6;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f90, %f89;
	mul.ftz.f32 	%f91, %f90, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f92, %f91;
	neg.ftz.f32 	%f185, %f92;

BB1_6:
	ld.shared.f32 	%f10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+12];
	ld.shared.f32 	%f11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+16];
	mul.ftz.f32 	%f93, %f2, %f11;
	fma.rn.ftz.f32 	%f94, %f2, %f10, %f93;
	ld.shared.f32 	%f12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+20];
	fma.rn.ftz.f32 	%f13, %f1, %f12, %f94;
	setp.ltu.ftz.f32	%p10, %f13, 0f00000000;
	@%p10 bra 	BB1_8;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f95, %f13;
	mul.ftz.f32 	%f96, %f95, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f186, %f96;
	bra.uni 	BB1_9;

BB1_8:
	neg.ftz.f32 	%f97, %f13;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f98, %f97;
	mul.ftz.f32 	%f99, %f98, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f100, %f99;
	neg.ftz.f32 	%f186, %f100;

BB1_9:
	ld.shared.f32 	%f17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix];
	ld.shared.f32 	%f18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+4];
	mul.ftz.f32 	%f101, %f2, %f18;
	fma.rn.ftz.f32 	%f102, %f2, %f17, %f101;
	ld.shared.f32 	%f19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_171015_384_non_const_matrix+8];
	fma.rn.ftz.f32 	%f20, %f1, %f19, %f102;
	setp.ltu.ftz.f32	%p11, %f20, 0f00000000;
	@%p11 bra 	BB1_11;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f103, %f20;
	mul.ftz.f32 	%f104, %f103, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f187, %f104;
	bra.uni 	BB1_12;

BB1_11:
	neg.ftz.f32 	%f105, %f20;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f106, %f105;
	mul.ftz.f32 	%f107, %f106, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f108, %f107;
	neg.ftz.f32 	%f187, %f108;

BB1_12:
	mov.f32 	%f27, 0f3F800000;
	.loc 1 169 1
	sub.s32 	%r38, %r27, %r4;
	setp.eq.s32	%p12, %r13, 0;
	selp.b32	%r39, %r4, %r38, %p12;
	.loc 1 169 1
	mad.lo.s32 	%r5, %r39, %r12, %r3;
	.loc 1 169 1
	setp.eq.s32	%p13, %r14, 0;
	@%p13 bra 	BB1_14;

	cvta.to.global.u64 	%rd28, %rd4;
	mul.wide.s32 	%rd29, %r5, 16;
	add.s64 	%rd30, %rd28, %rd29;
	.loc 1 169 1
	st.global.v4.f32 	[%rd30], {%f185, %f186, %f187, %f27};
	bra.uni 	BB1_15;

BB1_14:
	cvta.to.global.u64 	%rd31, %rd4;
	mul.wide.s32 	%rd32, %r5, 8;
	add.s64 	%rd33, %rd31, %rd32;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f185;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f186;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f187;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f27;
	mov.b16 	%rs4, %temp;
}
	.loc 1 169 231
	st.global.v4.u16 	[%rd33], {%rs1, %rs2, %rs3, %rs4};

BB1_15:
	.loc 1 169 1
	mul.lo.s32 	%r56, %r26, %r30;
	shl.b32 	%r57, %r56, 1;
	add.s32 	%r58, %r3, %r57;
	.loc 1 169 1
	add.s32 	%r59, %r58, 1;
	cvt.s64.s32	%rd34, %r59;
	add.s64 	%rd39, %rd34, %rd23;
	shl.b64 	%rd41, %rd39, 2;
	add.s64 	%rd42, %rd1, %rd41;
	ld.global.f32 	%f28, [%rd42];
	mul.ftz.f32 	%f29, %f28, 0f00000000;
	.loc 1 169 1
	mul.ftz.f32 	%f110, %f28, %f4;
	fma.rn.ftz.f32 	%f111, %f29, %f3, %f110;
	fma.rn.ftz.f32 	%f30, %f29, %f5, %f111;
	setp.ltu.ftz.f32	%p15, %f30, 0f00000000;
	@%p15 bra 	BB1_17;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f112, %f30;
	mul.ftz.f32 	%f113, %f112, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f188, %f113;
	bra.uni 	BB1_18;

BB1_17:
	neg.ftz.f32 	%f114, %f30;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f115, %f114;
	mul.ftz.f32 	%f116, %f115, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f117, %f116;
	neg.ftz.f32 	%f188, %f117;

BB1_18:
	mul.ftz.f32 	%f118, %f28, %f11;
	fma.rn.ftz.f32 	%f119, %f29, %f10, %f118;
	fma.rn.ftz.f32 	%f34, %f29, %f12, %f119;
	setp.ltu.ftz.f32	%p16, %f34, 0f00000000;
	@%p16 bra 	BB1_20;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f120, %f34;
	mul.ftz.f32 	%f121, %f120, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f189, %f121;
	bra.uni 	BB1_21;

BB1_20:
	neg.ftz.f32 	%f122, %f34;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f123, %f122;
	mul.ftz.f32 	%f124, %f123, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f125, %f124;
	neg.ftz.f32 	%f189, %f125;

BB1_21:
	mul.ftz.f32 	%f126, %f28, %f18;
	fma.rn.ftz.f32 	%f127, %f29, %f17, %f126;
	fma.rn.ftz.f32 	%f38, %f29, %f19, %f127;
	setp.ltu.ftz.f32	%p17, %f38, 0f00000000;
	@%p17 bra 	BB1_23;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f128, %f38;
	mul.ftz.f32 	%f129, %f128, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f190, %f129;
	bra.uni 	BB1_24;

BB1_23:
	neg.ftz.f32 	%f130, %f38;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f131, %f130;
	mul.ftz.f32 	%f132, %f131, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f133, %f132;
	neg.ftz.f32 	%f190, %f133;

BB1_24:
	.loc 1 169 1
	add.s32 	%r6, %r5, 1;
	.loc 1 169 1
	@%p13 bra 	BB1_26;

	cvta.to.global.u64 	%rd43, %rd4;
	mul.wide.s32 	%rd44, %r6, 16;
	add.s64 	%rd45, %rd43, %rd44;
	.loc 1 169 1
	st.global.v4.f32 	[%rd45], {%f188, %f189, %f190, %f27};
	bra.uni 	BB1_27;

BB1_26:
	cvta.to.global.u64 	%rd46, %rd4;
	mul.wide.s32 	%rd47, %r6, 8;
	add.s64 	%rd48, %rd46, %rd47;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f188;
	mov.b16 	%rs5, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f189;
	mov.b16 	%rs6, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f190;
	mov.b16 	%rs7, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f27;
	mov.b16 	%rs8, %temp;
}
	.loc 1 169 231
	st.global.v4.u16 	[%rd48], {%rs5, %rs6, %rs7, %rs8};

BB1_27:
	.loc 1 169 1
	add.s32 	%r81, %r4, 1;
	.loc 1 169 1
	mad.lo.s32 	%r89, %r81, %r30, %r3;
	cvt.s64.s32	%rd49, %r89;
	add.s64 	%rd54, %rd49, %rd23;
	shl.b64 	%rd56, %rd54, 2;
	add.s64 	%rd57, %rd1, %rd56;
	ld.global.f32 	%f46, [%rd57];
	mul.ftz.f32 	%f47, %f46, 0f00000000;
	.loc 1 169 1
	mul.ftz.f32 	%f135, %f46, %f4;
	fma.rn.ftz.f32 	%f136, %f47, %f3, %f135;
	fma.rn.ftz.f32 	%f48, %f47, %f5, %f136;
	setp.ltu.ftz.f32	%p21, %f48, 0f00000000;
	@%p21 bra 	BB1_29;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f137, %f48;
	mul.ftz.f32 	%f138, %f137, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f191, %f138;
	bra.uni 	BB1_30;

BB1_29:
	neg.ftz.f32 	%f139, %f48;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f140, %f139;
	mul.ftz.f32 	%f141, %f140, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f142, %f141;
	neg.ftz.f32 	%f191, %f142;

BB1_30:
	mul.ftz.f32 	%f143, %f46, %f11;
	fma.rn.ftz.f32 	%f144, %f47, %f10, %f143;
	fma.rn.ftz.f32 	%f52, %f47, %f12, %f144;
	setp.ltu.ftz.f32	%p22, %f52, 0f00000000;
	@%p22 bra 	BB1_32;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f145, %f52;
	mul.ftz.f32 	%f146, %f145, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f192, %f146;
	bra.uni 	BB1_33;

BB1_32:
	neg.ftz.f32 	%f147, %f52;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f148, %f147;
	mul.ftz.f32 	%f149, %f148, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f150, %f149;
	neg.ftz.f32 	%f192, %f150;

BB1_33:
	mul.ftz.f32 	%f151, %f46, %f18;
	fma.rn.ftz.f32 	%f152, %f47, %f17, %f151;
	fma.rn.ftz.f32 	%f56, %f47, %f19, %f152;
	setp.ltu.ftz.f32	%p23, %f56, 0f00000000;
	@%p23 bra 	BB1_35;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f153, %f56;
	mul.ftz.f32 	%f154, %f153, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f193, %f154;
	bra.uni 	BB1_36;

BB1_35:
	neg.ftz.f32 	%f155, %f56;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f156, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f158, %f157;
	neg.ftz.f32 	%f193, %f158;

BB1_36:
	.loc 1 169 1
	selp.b32	%r100, 1, -1, %p12;
	add.s32 	%r101, %r39, %r100;
	.loc 1 169 1
	mad.lo.s32 	%r7, %r101, %r12, %r3;
	.loc 1 169 1
	@%p13 bra 	BB1_38;

	cvta.to.global.u64 	%rd58, %rd4;
	mul.wide.s32 	%rd59, %r7, 16;
	add.s64 	%rd60, %rd58, %rd59;
	.loc 1 169 1
	st.global.v4.f32 	[%rd60], {%f191, %f192, %f193, %f27};
	bra.uni 	BB1_39;

BB1_38:
	cvta.to.global.u64 	%rd61, %rd4;
	mul.wide.s32 	%rd62, %r7, 8;
	add.s64 	%rd63, %rd61, %rd62;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f191;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f192;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f193;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f27;
	mov.b16 	%rs12, %temp;
}
	.loc 1 169 241
	st.global.v4.u16 	[%rd63], {%rs9, %rs10, %rs11, %rs12};

BB1_39:
	.loc 1 169 1
	add.s32 	%r137, %r89, 1;
	cvt.s64.s32	%rd64, %r137;
	add.s64 	%rd69, %rd64, %rd23;
	shl.b64 	%rd71, %rd69, 2;
	add.s64 	%rd72, %rd1, %rd71;
	ld.global.f32 	%f64, [%rd72];
	mul.ftz.f32 	%f65, %f64, 0f00000000;
	.loc 1 169 1
	mul.ftz.f32 	%f160, %f65, %f4;
	fma.rn.ftz.f32 	%f161, %f64, %f3, %f160;
	fma.rn.ftz.f32 	%f66, %f65, %f5, %f161;
	setp.ltu.ftz.f32	%p28, %f66, 0f00000000;
	@%p28 bra 	BB1_41;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f162, %f66;
	mul.ftz.f32 	%f163, %f162, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f194, %f163;
	bra.uni 	BB1_42;

BB1_41:
	neg.ftz.f32 	%f164, %f66;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f165, %f164;
	mul.ftz.f32 	%f166, %f165, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f167, %f166;
	neg.ftz.f32 	%f194, %f167;

BB1_42:
	mul.ftz.f32 	%f168, %f65, %f11;
	fma.rn.ftz.f32 	%f169, %f64, %f10, %f168;
	fma.rn.ftz.f32 	%f70, %f65, %f12, %f169;
	setp.ltu.ftz.f32	%p29, %f70, 0f00000000;
	@%p29 bra 	BB1_44;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f170, %f70;
	mul.ftz.f32 	%f171, %f170, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f195, %f171;
	bra.uni 	BB1_45;

BB1_44:
	neg.ftz.f32 	%f172, %f70;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f173, %f172;
	mul.ftz.f32 	%f174, %f173, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f175, %f174;
	neg.ftz.f32 	%f195, %f175;

BB1_45:
	mul.ftz.f32 	%f176, %f65, %f18;
	fma.rn.ftz.f32 	%f177, %f64, %f17, %f176;
	fma.rn.ftz.f32 	%f74, %f65, %f19, %f177;
	setp.ltu.ftz.f32	%p30, %f74, 0f00000000;
	@%p30 bra 	BB1_47;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f178, %f74;
	mul.ftz.f32 	%f179, %f178, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f196, %f179;
	bra.uni 	BB1_48;

BB1_47:
	neg.ftz.f32 	%f180, %f74;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f181, %f180;
	mul.ftz.f32 	%f182, %f181, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f183, %f182;
	neg.ftz.f32 	%f196, %f183;

BB1_48:
	.loc 1 169 1
	add.s32 	%r8, %r7, 1;
	.loc 1 169 1
	@%p13 bra 	BB1_50;

	cvta.to.global.u64 	%rd73, %rd4;
	mul.wide.s32 	%rd74, %r8, 16;
	add.s64 	%rd75, %rd73, %rd74;
	.loc 1 169 1
	st.global.v4.f32 	[%rd75], {%f194, %f195, %f196, %f27};
	bra.uni 	BB1_51;

BB1_50:
	cvta.to.global.u64 	%rd76, %rd4;
	mul.wide.s32 	%rd77, %r8, 8;
	add.s64 	%rd78, %rd76, %rd77;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f194;
	mov.b16 	%rs13, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f195;
	mov.b16 	%rs14, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f196;
	mov.b16 	%rs15, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f27;
	mov.b16 	%rs16, %temp;
}
	.loc 1 169 241
	st.global.v4.u16 	[%rd78], {%rs13, %rs14, %rs15, %rs16};

BB1_51:
	.loc 1 169 2
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_11,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12
)
{
	.reg .pred 	%p<29>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<91>;
	.reg .f32 	%f<175>;
	.reg .s64 	%rd<56>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix[36];

	ld.param.u64 	%rd4, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_0];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_1];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_2];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_3];
	ld.param.u64 	%rd3, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_5];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_6];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_7];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_8];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_9];
	ld.param.u32 	%r18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_10];
	ld.param.u32 	%r19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_11];
	ld.param.u32 	%r20, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12];
	cvta.to.global.u64 	%rd1, %rd4;
	.loc 1 217 1
	cvt.s64.s32	%rd2, %r20;
	.loc 1 91 1
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB2_2;
	bra.uni 	BB2_1;

BB2_1:
	setp.ne.s32	%p4, %r19, 0;
	.loc 1 91 1
	mul.lo.s32 	%r21, %r2, 3;
	cvt.s64.s32	%rd5, %r21;
	add.s32 	%r22, %r21, %r20;
	.loc 1 91 1
	add.s64 	%rd6, %rd5, %rd2;
	selp.b64	%rd7, 2, 0, %p4;
	add.s64 	%rd8, %rd6, %rd7;
	shl.b64 	%rd9, %rd8, 2;
	add.s64 	%rd10, %rd1, %rd9;
	mul.wide.s32 	%rd11, %r21, 4;
	mov.u64 	%rd12, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix;
	add.s64 	%rd13, %rd12, %rd11;
	.loc 1 91 1
	ld.global.f32 	%f72, [%rd10];
	st.shared.f32 	[%rd13], %f72;
	mul.wide.s32 	%rd14, %r22, 4;
	add.s64 	%rd15, %rd1, %rd14;
	.loc 1 91 1
	ld.global.f32 	%f73, [%rd15+4];
	st.shared.f32 	[%rd13+4], %f73;
	selp.b64	%rd16, 0, 2, %p4;
	add.s64 	%rd17, %rd6, %rd16;
	shl.b64 	%rd18, %rd17, 2;
	add.s64 	%rd19, %rd1, %rd18;
	ld.global.f32 	%f74, [%rd19];
	st.shared.f32 	[%rd13+8], %f74;

BB2_2:
	.loc 1 91 1
	bar.sync 	0;
	.loc 1 217 1
	mov.u32 	%r23, %ntid.x;
	mov.u32 	%r24, %ctaid.x;
	mad.lo.s32 	%r25, %r23, %r24, %r2;
	shl.b32 	%r3, %r25, 1;
	mov.u32 	%r26, %ntid.y;
	mov.u32 	%r27, %ctaid.y;
	mad.lo.s32 	%r28, %r26, %r27, %r1;
	shl.b32 	%r4, %r28, 1;
	.loc 1 217 1
	setp.lt.s32	%p5, %r3, %r17;
	setp.lt.s32	%p6, %r4, %r18;
	and.pred  	%p7, %p5, %p6;
	.loc 1 217 1
	@!%p7 bra 	BB2_51;
	bra.uni 	BB2_3;

BB2_3:
	.loc 1 217 1
	cvt.s64.s32	%rd20, %r11;
	.loc 1 217 1
	add.s32 	%r29, %r18, -1;
	mul.lo.s32 	%r30, %r29, %r12;
	cvt.s64.s32	%rd21, %r30;
	neg.s32 	%r31, %r12;
	.loc 1 217 1
	setp.eq.s32	%p8, %r13, 0;
	selp.b32	%r32, %r12, %r31, %p8;
	selp.b64	%rd22, 0, %rd21, %p8;
	add.s64 	%rd23, %rd22, %rd20;
	.loc 1 217 1
	sub.s32 	%r33, %r29, %r4;
	setp.eq.s32	%p9, %r15, 0;
	selp.b32	%r5, %r4, %r33, %p9;
	mad.lo.s32 	%r34, %r4, %r32, %r3;
	cvt.s64.s32	%rd24, %r34;
	add.s64 	%rd25, %rd24, %rd23;
	shl.b64 	%rd26, %rd25, 2;
	add.s64 	%rd27, %rd1, %rd26;
	add.s32 	%r35, %r4, 1;
	mad.lo.s32 	%r36, %r35, %r32, %r3;
	cvt.s64.s32	%rd28, %r36;
	add.s64 	%rd29, %rd28, %rd23;
	shl.b64 	%rd30, %rd29, 2;
	add.s64 	%rd31, %rd1, %rd30;
	ld.global.f32 	%f1, [%rd31];
	.loc 1 217 1
	ld.shared.f32 	%f75, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+24];
	.loc 1 217 1
	ld.global.f32 	%f2, [%rd27];
	.loc 1 217 1
	mul.ftz.f32 	%f3, %f2, %f75;
	ld.shared.f32 	%f4, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+28];
	.loc 1 217 1
	ld.global.f32 	%f5, [%rd27+4];
	.loc 1 217 1
	fma.rn.ftz.f32 	%f76, %f5, %f4, %f3;
	ld.shared.f32 	%f77, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+32];
	.loc 1 217 1
	ld.global.f32 	%f6, [%rd31+4];
	.loc 1 217 1
	mul.ftz.f32 	%f7, %f6, %f77;
	add.ftz.f32 	%f8, %f76, %f7;
	setp.ltu.ftz.f32	%p10, %f8, 0f00000000;
	@%p10 bra 	BB2_5;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f78, %f8;
	mul.ftz.f32 	%f79, %f78, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f163, %f79;
	bra.uni 	BB2_6;

BB2_5:
	neg.ftz.f32 	%f80, %f8;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f81, %f80;
	mul.ftz.f32 	%f82, %f81, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f83, %f82;
	neg.ftz.f32 	%f163, %f83;

BB2_6:
	ld.shared.f32 	%f84, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+12];
	mul.ftz.f32 	%f12, %f2, %f84;
	ld.shared.f32 	%f13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+16];
	fma.rn.ftz.f32 	%f85, %f5, %f13, %f12;
	ld.shared.f32 	%f86, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+20];
	mul.ftz.f32 	%f14, %f6, %f86;
	add.ftz.f32 	%f15, %f85, %f14;
	setp.ltu.ftz.f32	%p11, %f15, 0f00000000;
	@%p11 bra 	BB2_8;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f87, %f15;
	mul.ftz.f32 	%f88, %f87, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f164, %f88;
	bra.uni 	BB2_9;

BB2_8:
	neg.ftz.f32 	%f89, %f15;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f90, %f89;
	mul.ftz.f32 	%f91, %f90, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f92, %f91;
	neg.ftz.f32 	%f164, %f92;

BB2_9:
	ld.shared.f32 	%f93, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix];
	mul.ftz.f32 	%f19, %f2, %f93;
	ld.shared.f32 	%f20, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+4];
	fma.rn.ftz.f32 	%f94, %f5, %f20, %f19;
	ld.shared.f32 	%f95, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_171022_384_non_const_matrix+8];
	mul.ftz.f32 	%f21, %f6, %f95;
	add.ftz.f32 	%f22, %f94, %f21;
	setp.ltu.ftz.f32	%p12, %f22, 0f00000000;
	@%p12 bra 	BB2_11;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f96, %f22;
	mul.ftz.f32 	%f97, %f96, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f165, %f97;
	bra.uni 	BB2_12;

BB2_11:
	neg.ftz.f32 	%f98, %f22;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f99, %f98;
	mul.ftz.f32 	%f100, %f99, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f101, %f100;
	neg.ftz.f32 	%f165, %f101;

BB2_12:
	mov.f32 	%f29, 0f3F800000;
	.loc 1 217 1
	mad.lo.s32 	%r6, %r5, %r14, %r3;
	.loc 1 217 1
	setp.eq.s32	%p13, %r16, 0;
	@%p13 bra 	BB2_14;

	cvta.to.global.u64 	%rd32, %rd3;
	mul.wide.s32 	%rd33, %r6, 16;
	add.s64 	%rd34, %rd32, %rd33;
	.loc 1 217 1
	st.global.v4.f32 	[%rd34], {%f163, %f164, %f165, %f29};
	bra.uni 	BB2_15;

BB2_14:
	cvta.to.global.u64 	%rd35, %rd3;
	mul.wide.s32 	%rd36, %r6, 8;
	add.s64 	%rd37, %rd35, %rd36;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f163;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f164;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f165;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs4, %temp;
}
	.loc 1 217 241
	st.global.v4.u16 	[%rd37], {%rs1, %rs2, %rs3, %rs4};

BB2_15:
	.loc 1 217 1
	@%p10 bra 	BB2_17;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f103, %f8;
	mul.ftz.f32 	%f104, %f103, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f166, %f104;
	bra.uni 	BB2_18;

BB2_17:
	neg.ftz.f32 	%f105, %f8;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f106, %f105;
	mul.ftz.f32 	%f107, %f106, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f108, %f107;
	neg.ftz.f32 	%f166, %f108;

BB2_18:
	@%p11 bra 	BB2_20;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f109, %f15;
	mul.ftz.f32 	%f110, %f109, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f167, %f110;
	bra.uni 	BB2_21;

BB2_20:
	neg.ftz.f32 	%f111, %f15;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f112, %f111;
	mul.ftz.f32 	%f113, %f112, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f114, %f113;
	neg.ftz.f32 	%f167, %f114;

BB2_21:
	@%p12 bra 	BB2_23;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f115, %f22;
	mul.ftz.f32 	%f116, %f115, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f168, %f116;
	bra.uni 	BB2_24;

BB2_23:
	neg.ftz.f32 	%f117, %f22;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f118, %f117;
	mul.ftz.f32 	%f119, %f118, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f120, %f119;
	neg.ftz.f32 	%f168, %f120;

BB2_24:
	.loc 1 217 1
	add.s32 	%r7, %r6, 1;
	.loc 1 217 1
	@%p13 bra 	BB2_26;

	cvta.to.global.u64 	%rd38, %rd3;
	mul.wide.s32 	%rd39, %r7, 16;
	add.s64 	%rd40, %rd38, %rd39;
	.loc 1 217 1
	st.global.v4.f32 	[%rd40], {%f166, %f167, %f168, %f29};
	bra.uni 	BB2_27;

BB2_26:
	cvta.to.global.u64 	%rd41, %rd3;
	mul.wide.s32 	%rd42, %r7, 8;
	add.s64 	%rd43, %rd41, %rd42;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f166;
	mov.b16 	%rs5, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f167;
	mov.b16 	%rs6, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f168;
	mov.b16 	%rs7, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs8, %temp;
}
	.loc 1 217 241
	st.global.v4.u16 	[%rd43], {%rs5, %rs6, %rs7, %rs8};

BB2_27:
	.loc 1 217 1
	fma.rn.ftz.f32 	%f122, %f1, %f4, %f3;
	add.ftz.f32 	%f43, %f122, %f7;
	setp.ltu.ftz.f32	%p20, %f43, 0f00000000;
	@%p20 bra 	BB2_29;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f123, %f43;
	mul.ftz.f32 	%f124, %f123, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f169, %f124;
	bra.uni 	BB2_30;

BB2_29:
	neg.ftz.f32 	%f125, %f43;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f126, %f125;
	mul.ftz.f32 	%f127, %f126, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f128, %f127;
	neg.ftz.f32 	%f169, %f128;

BB2_30:
	fma.rn.ftz.f32 	%f129, %f1, %f13, %f12;
	add.ftz.f32 	%f47, %f129, %f14;
	setp.ltu.ftz.f32	%p21, %f47, 0f00000000;
	@%p21 bra 	BB2_32;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f130, %f47;
	mul.ftz.f32 	%f131, %f130, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f170, %f131;
	bra.uni 	BB2_33;

BB2_32:
	neg.ftz.f32 	%f132, %f47;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f133, %f132;
	mul.ftz.f32 	%f134, %f133, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f135, %f134;
	neg.ftz.f32 	%f170, %f135;

BB2_33:
	fma.rn.ftz.f32 	%f136, %f1, %f20, %f19;
	add.ftz.f32 	%f51, %f136, %f21;
	setp.ltu.ftz.f32	%p22, %f51, 0f00000000;
	@%p22 bra 	BB2_35;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f137, %f51;
	mul.ftz.f32 	%f138, %f137, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f171, %f138;
	bra.uni 	BB2_36;

BB2_35:
	neg.ftz.f32 	%f139, %f51;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f140, %f139;
	mul.ftz.f32 	%f141, %f140, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f142, %f141;
	neg.ftz.f32 	%f171, %f142;

BB2_36:
	.loc 1 217 1
	selp.b32	%r78, 1, -1, %p9;
	add.s32 	%r79, %r5, %r78;
	.loc 1 217 1
	mul.lo.s32 	%r8, %r79, %r14;
	add.s32 	%r9, %r8, %r3;
	.loc 1 217 1
	@%p13 bra 	BB2_38;

	cvta.to.global.u64 	%rd44, %rd3;
	mul.wide.s32 	%rd45, %r9, 16;
	add.s64 	%rd46, %rd44, %rd45;
	.loc 1 217 1
	st.global.v4.f32 	[%rd46], {%f169, %f170, %f171, %f29};
	bra.uni 	BB2_39;

BB2_38:
	cvta.to.global.u64 	%rd47, %rd3;
	mul.wide.s32 	%rd48, %r9, 8;
	add.s64 	%rd49, %rd47, %rd48;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f169;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f170;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f171;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs12, %temp;
}
	.loc 1 217 241
	st.global.v4.u16 	[%rd49], {%rs9, %rs10, %rs11, %rs12};

BB2_39:
	.loc 1 217 1
	@%p20 bra 	BB2_41;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f144, %f43;
	mul.ftz.f32 	%f145, %f144, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f172, %f145;
	bra.uni 	BB2_42;

BB2_41:
	neg.ftz.f32 	%f146, %f43;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f147, %f146;
	mul.ftz.f32 	%f148, %f147, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f149, %f148;
	neg.ftz.f32 	%f172, %f149;

BB2_42:
	@%p21 bra 	BB2_44;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f150, %f47;
	mul.ftz.f32 	%f151, %f150, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f173, %f151;
	bra.uni 	BB2_45;

BB2_44:
	neg.ftz.f32 	%f152, %f47;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f153, %f152;
	mul.ftz.f32 	%f154, %f153, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f155, %f154;
	neg.ftz.f32 	%f173, %f155;

BB2_45:
	@%p22 bra 	BB2_47;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f156, %f51;
	mul.ftz.f32 	%f157, %f156, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f174, %f157;
	bra.uni 	BB2_48;

BB2_47:
	neg.ftz.f32 	%f158, %f51;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f159, %f158;
	mul.ftz.f32 	%f160, %f159, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f161, %f160;
	neg.ftz.f32 	%f174, %f161;

BB2_48:
	.loc 1 217 1
	add.s32 	%r90, %r3, %r8;
	.loc 1 217 1
	add.s32 	%r10, %r90, 1;
	.loc 1 217 1
	@%p13 bra 	BB2_50;

	cvta.to.global.u64 	%rd50, %rd3;
	mul.wide.s32 	%rd51, %r10, 16;
	add.s64 	%rd52, %rd50, %rd51;
	.loc 1 217 1
	st.global.v4.f32 	[%rd52], {%f172, %f173, %f174, %f29};
	bra.uni 	BB2_51;

BB2_50:
	cvta.to.global.u64 	%rd53, %rd3;
	mul.wide.s32 	%rd54, %r10, 8;
	add.s64 	%rd55, %rd53, %rd54;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f172;
	mov.b16 	%rs13, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f173;
	mov.b16 	%rs14, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f174;
	mov.b16 	%rs15, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f29;
	mov.b16 	%rs16, %temp;
}
	.loc 1 217 241
	st.global.v4.u16 	[%rd55], {%rs13, %rs14, %rs15, %rs16};

BB2_51:
	.loc 1 217 2
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_11,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12
)
{
	.reg .pred 	%p<43>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<107>;
	.reg .f32 	%f<252>;
	.reg .s64 	%rd<72>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix[36];

	ld.param.u64 	%rd9, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_0];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_1];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_2];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_3];
	ld.param.u64 	%rd8, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_5];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_6];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_7];
	ld.param.u32 	%r18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_8];
	ld.param.u32 	%r19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_9];
	ld.param.u32 	%r20, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_10];
	ld.param.u32 	%r21, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_11];
	ld.param.u32 	%r22, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12];
	cvta.to.global.u64 	%rd1, %rd9;
	.loc 1 268 1
	cvt.s64.s32	%rd2, %r22;
	.loc 1 91 1
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB3_2;
	bra.uni 	BB3_1;

BB3_1:
	setp.ne.s32	%p4, %r21, 0;
	.loc 1 91 1
	mul.lo.s32 	%r23, %r2, 3;
	cvt.s64.s32	%rd10, %r23;
	add.s32 	%r24, %r23, %r22;
	.loc 1 91 1
	add.s64 	%rd11, %rd10, %rd2;
	selp.b64	%rd12, 2, 0, %p4;
	add.s64 	%rd13, %rd11, %rd12;
	shl.b64 	%rd14, %rd13, 2;
	add.s64 	%rd15, %rd1, %rd14;
	mul.wide.s32 	%rd16, %r23, 4;
	mov.u64 	%rd17, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix;
	add.s64 	%rd18, %rd17, %rd16;
	.loc 1 91 1
	ld.global.f32 	%f107, [%rd15];
	st.shared.f32 	[%rd18], %f107;
	mul.wide.s32 	%rd19, %r24, 4;
	add.s64 	%rd20, %rd1, %rd19;
	.loc 1 91 1
	ld.global.f32 	%f108, [%rd20+4];
	st.shared.f32 	[%rd18+4], %f108;
	selp.b64	%rd21, 0, 2, %p4;
	add.s64 	%rd22, %rd11, %rd21;
	shl.b64 	%rd23, %rd22, 2;
	add.s64 	%rd24, %rd1, %rd23;
	ld.global.f32 	%f109, [%rd24];
	st.shared.f32 	[%rd18+8], %f109;

BB3_2:
	.loc 1 91 1
	bar.sync 	0;
	.loc 1 268 1
	mov.u32 	%r25, %ntid.x;
	mov.u32 	%r26, %ctaid.x;
	mad.lo.s32 	%r27, %r25, %r26, %r2;
	shl.b32 	%r3, %r27, 1;
	mov.u32 	%r28, %ntid.y;
	mov.u32 	%r29, %ctaid.y;
	mad.lo.s32 	%r30, %r28, %r29, %r1;
	shl.b32 	%r4, %r30, 1;
	.loc 1 268 1
	setp.lt.s32	%p5, %r3, %r19;
	setp.lt.s32	%p6, %r4, %r20;
	and.pred  	%p7, %p5, %p6;
	.loc 1 268 1
	@!%p7 bra 	BB3_71;
	bra.uni 	BB3_3;

BB3_3:
	.loc 1 268 1
	cvt.s64.s32	%rd25, %r13;
	.loc 1 268 1
	add.s32 	%r31, %r20, -1;
	mul.lo.s32 	%r32, %r31, %r14;
	cvt.s64.s32	%rd26, %r32;
	neg.s32 	%r33, %r14;
	.loc 1 268 1
	setp.eq.s32	%p8, %r15, 0;
	selp.b32	%r5, %r14, %r33, %p8;
	selp.b64	%rd27, 0, %rd26, %p8;
	add.s64 	%rd3, %rd27, %rd25;
	.loc 1 268 1
	add.s32 	%r6, %r19, -2;
	add.s32 	%r7, %r20, -2;
	mad.lo.s32 	%r34, %r4, %r5, %r3;
	cvt.s64.s32	%rd28, %r34;
	add.s64 	%rd29, %rd28, %rd3;
	shl.b64 	%rd30, %rd29, 2;
	add.s64 	%rd4, %rd1, %rd30;
	ld.global.f32 	%f1, [%rd4];
	ld.global.f32 	%f2, [%rd4+4];
	add.s32 	%r35, %r4, 1;
	mad.lo.s32 	%r36, %r35, %r5, %r3;
	cvt.s64.s32	%rd31, %r36;
	add.s64 	%rd32, %rd31, %rd3;
	shl.b64 	%rd33, %rd32, 2;
	add.s64 	%rd5, %rd1, %rd33;
	ld.global.f32 	%f3, [%rd5];
	ld.global.f32 	%f4, [%rd5+4];
	setp.eq.s32	%p9, %r3, 0;
	mov.f32 	%f231, %f4;
	@%p9 bra 	BB3_5;

	ld.global.f32 	%f5, [%rd5+-4];
	mov.f32 	%f231, %f5;

BB3_5:
	.loc 1 268 1
	mov.f32 	%f6, %f231;
	setp.ge.s32	%p10, %r3, %r6;
	.loc 1 268 1
	mov.f32 	%f226, %f3;
	@%p10 bra 	BB3_7;

	ld.global.f32 	%f7, [%rd5+8];
	mov.f32 	%f226, %f7;

BB3_7:
	.loc 1 268 1
	mov.f32 	%f8, %f226;
	mov.f32 	%f234, %f2;
	@%p9 bra 	BB3_9;

	ld.global.f32 	%f9, [%rd4+-4];
	mov.f32 	%f234, %f9;

BB3_9:
	.loc 1 268 1
	mov.f32 	%f10, %f234;
	mov.f32 	%f239, %f1;
	@%p10 bra 	BB3_11;

	ld.global.f32 	%f11, [%rd4+8];
	mov.f32 	%f239, %f11;

BB3_11:
	.loc 1 268 1
	mov.f32 	%f12, %f239;
	setp.eq.s32	%p14, %r4, 0;
	.loc 1 268 1
	or.pred  	%p15, %p14, %p9;
	add.s32 	%r37, %r4, -1;
	mad.lo.s32 	%r38, %r37, %r5, %r3;
	cvt.s64.s32	%rd34, %r38;
	add.s64 	%rd35, %rd34, %rd3;
	shl.b64 	%rd36, %rd35, 2;
	add.s64 	%rd6, %rd1, %rd36;
	.loc 1 268 1
	mov.f32 	%f230, %f4;
	@%p15 bra 	BB3_13;

	ld.global.f32 	%f230, [%rd6+-4];

BB3_13:
	mov.f32 	%f225, %f3;
	@%p14 bra 	BB3_15;

	ld.global.f32 	%f225, [%rd6];

BB3_15:
	mov.f32 	%f229, %f4;
	@%p14 bra 	BB3_17;

	ld.global.f32 	%f229, [%rd6+4];

BB3_17:
	setp.ge.s32	%p18, %r4, %r7;
	.loc 1 268 1
	mov.f32 	%f238, %f1;
	@%p18 bra 	BB3_19;

	add.s32 	%r41, %r4, 2;
	mad.lo.s32 	%r42, %r41, %r5, %r3;
	cvt.s64.s32	%rd41, %r42;
	add.s64 	%rd42, %rd41, %rd3;
	shl.b64 	%rd43, %rd42, 2;
	add.s64 	%rd44, %rd1, %rd43;
	ld.global.f32 	%f238, [%rd44];

BB3_19:
	add.s32 	%r43, %r4, 2;
	mad.lo.s32 	%r44, %r43, %r5, %r3;
	cvt.s64.s32	%rd45, %r44;
	add.s64 	%rd46, %rd45, %rd3;
	shl.b64 	%rd47, %rd46, 2;
	add.s64 	%rd7, %rd1, %rd47;
	.loc 1 268 1
	mov.f32 	%f233, %f2;
	@%p18 bra 	BB3_21;

	ld.global.f32 	%f233, [%rd7+4];

BB3_21:
	or.pred  	%p22, %p18, %p10;
	mov.f32 	%f237, %f1;
	@%p22 bra 	BB3_23;

	ld.global.f32 	%f237, [%rd7+8];

BB3_23:
	.loc 1 268 1
	add.ftz.f32 	%f110, %f230, %f229;
	add.ftz.f32 	%f111, %f110, %f6;
	add.ftz.f32 	%f112, %f111, %f4;
	mul.ftz.f32 	%f25, %f112, 0f3E800000;
	add.ftz.f32 	%f113, %f10, %f2;
	add.ftz.f32 	%f114, %f113, %f225;
	add.ftz.f32 	%f115, %f114, %f3;
	mul.ftz.f32 	%f26, %f115, 0f3E800000;
	ld.shared.f32 	%f27, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+24];
	ld.shared.f32 	%f28, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+28];
	mul.ftz.f32 	%f116, %f26, %f28;
	fma.rn.ftz.f32 	%f117, %f1, %f27, %f116;
	ld.shared.f32 	%f29, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+32];
	fma.rn.ftz.f32 	%f30, %f25, %f29, %f117;
	setp.ltu.ftz.f32	%p23, %f30, 0f00000000;
	@%p23 bra 	BB3_25;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f118, %f30;
	mul.ftz.f32 	%f119, %f118, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f240, %f119;
	bra.uni 	BB3_26;

BB3_25:
	neg.ftz.f32 	%f120, %f30;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f121, %f120;
	mul.ftz.f32 	%f122, %f121, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f123, %f122;
	neg.ftz.f32 	%f240, %f123;

BB3_26:
	ld.shared.f32 	%f34, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+12];
	ld.shared.f32 	%f35, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+16];
	mul.ftz.f32 	%f124, %f26, %f35;
	fma.rn.ftz.f32 	%f125, %f1, %f34, %f124;
	ld.shared.f32 	%f36, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+20];
	fma.rn.ftz.f32 	%f37, %f25, %f36, %f125;
	setp.ltu.ftz.f32	%p24, %f37, 0f00000000;
	@%p24 bra 	BB3_28;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f126, %f37;
	mul.ftz.f32 	%f127, %f126, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f241, %f127;
	bra.uni 	BB3_29;

BB3_28:
	neg.ftz.f32 	%f128, %f37;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f129, %f128;
	mul.ftz.f32 	%f130, %f129, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f131, %f130;
	neg.ftz.f32 	%f241, %f131;

BB3_29:
	ld.shared.f32 	%f41, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix];
	ld.shared.f32 	%f42, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+4];
	mul.ftz.f32 	%f132, %f26, %f42;
	fma.rn.ftz.f32 	%f133, %f1, %f41, %f132;
	ld.shared.f32 	%f43, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_171026_383_non_const_matrix+8];
	fma.rn.ftz.f32 	%f44, %f25, %f43, %f133;
	setp.ltu.ftz.f32	%p25, %f44, 0f00000000;
	@%p25 bra 	BB3_31;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f134, %f44;
	mul.ftz.f32 	%f135, %f134, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f242, %f135;
	bra.uni 	BB3_32;

BB3_31:
	neg.ftz.f32 	%f136, %f44;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f137, %f136;
	mul.ftz.f32 	%f138, %f137, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f139, %f138;
	neg.ftz.f32 	%f242, %f139;

BB3_32:
	mov.f32 	%f51, 0f3F800000;
	.loc 1 268 1
	sub.s32 	%r51, %r31, %r4;
	setp.eq.s32	%p26, %r17, 0;
	selp.b32	%r52, %r4, %r51, %p26;
	.loc 1 268 1
	mad.lo.s32 	%r8, %r52, %r16, %r3;
	.loc 1 268 1
	setp.eq.s32	%p27, %r18, 0;
	@%p27 bra 	BB3_34;

	cvta.to.global.u64 	%rd48, %rd8;
	mul.wide.s32 	%rd49, %r8, 16;
	add.s64 	%rd50, %rd48, %rd49;
	.loc 1 268 1
	st.global.v4.f32 	[%rd50], {%f240, %f241, %f242, %f51};
	bra.uni 	BB3_35;

BB3_34:
	cvta.to.global.u64 	%rd51, %rd8;
	mul.wide.s32 	%rd52, %r8, 8;
	add.s64 	%rd53, %rd51, %rd52;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f240;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f241;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f242;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f51;
	mov.b16 	%rs4, %temp;
}
	.loc 1 268 241
	st.global.v4.u16 	[%rd53], {%rs1, %rs2, %rs3, %rs4};

BB3_35:
	.loc 1 268 1
	add.ftz.f32 	%f141, %f229, %f4;
	mul.ftz.f32 	%f52, %f141, 0f3F000000;
	add.ftz.f32 	%f53, %f1, %f12;
	mul.ftz.f32 	%f54, %f53, 0f3F000000;
	mul.ftz.f32 	%f142, %f2, %f28;
	fma.rn.ftz.f32 	%f143, %f54, %f27, %f142;
	fma.rn.ftz.f32 	%f55, %f52, %f29, %f143;
	setp.ltu.ftz.f32	%p29, %f55, 0f00000000;
	@%p29 bra 	BB3_37;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f144, %f55;
	mul.ftz.f32 	%f145, %f144, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f243, %f145;
	bra.uni 	BB3_38;

BB3_37:
	neg.ftz.f32 	%f146, %f55;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f147, %f146;
	mul.ftz.f32 	%f148, %f147, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f149, %f148;
	neg.ftz.f32 	%f243, %f149;

BB3_38:
	mul.ftz.f32 	%f150, %f2, %f35;
	fma.rn.ftz.f32 	%f151, %f54, %f34, %f150;
	fma.rn.ftz.f32 	%f59, %f52, %f36, %f151;
	setp.ltu.ftz.f32	%p30, %f59, 0f00000000;
	@%p30 bra 	BB3_40;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f152, %f59;
	mul.ftz.f32 	%f153, %f152, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f244, %f153;
	bra.uni 	BB3_41;

BB3_40:
	neg.ftz.f32 	%f154, %f59;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f155, %f154;
	mul.ftz.f32 	%f156, %f155, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f157, %f156;
	neg.ftz.f32 	%f244, %f157;

BB3_41:
	mul.ftz.f32 	%f158, %f2, %f42;
	fma.rn.ftz.f32 	%f159, %f54, %f41, %f158;
	fma.rn.ftz.f32 	%f63, %f52, %f43, %f159;
	setp.ltu.ftz.f32	%p31, %f63, 0f00000000;
	@%p31 bra 	BB3_43;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f160, %f63;
	mul.ftz.f32 	%f161, %f160, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f245, %f161;
	bra.uni 	BB3_44;

BB3_43:
	neg.ftz.f32 	%f162, %f63;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f163, %f162;
	mul.ftz.f32 	%f164, %f163, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f165, %f164;
	neg.ftz.f32 	%f245, %f165;

BB3_44:
	.loc 1 268 1
	add.s32 	%r9, %r8, 1;
	.loc 1 268 1
	@%p27 bra 	BB3_46;

	cvta.to.global.u64 	%rd54, %rd8;
	mul.wide.s32 	%rd55, %r9, 16;
	add.s64 	%rd56, %rd54, %rd55;
	.loc 1 268 1
	st.global.v4.f32 	[%rd56], {%f243, %f244, %f245, %f51};
	bra.uni 	BB3_47;

BB3_46:
	cvta.to.global.u64 	%rd57, %rd8;
	mul.wide.s32 	%rd58, %r9, 8;
	add.s64 	%rd59, %rd57, %rd58;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f243;
	mov.b16 	%rs5, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f244;
	mov.b16 	%rs6, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f245;
	mov.b16 	%rs7, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f51;
	mov.b16 	%rs8, %temp;
}
	.loc 1 268 241
	st.global.v4.u16 	[%rd59], {%rs5, %rs6, %rs7, %rs8};

BB3_47:
	.loc 1 268 1
	add.ftz.f32 	%f167, %f6, %f4;
	mul.ftz.f32 	%f71, %f167, 0f3F000000;
	add.ftz.f32 	%f168, %f1, %f238;
	mul.ftz.f32 	%f72, %f168, 0f3F000000;
	mul.ftz.f32 	%f169, %f3, %f28;
	fma.rn.ftz.f32 	%f170, %f72, %f27, %f169;
	fma.rn.ftz.f32 	%f73, %f71, %f29, %f170;
	setp.ltu.ftz.f32	%p34, %f73, 0f00000000;
	@%p34 bra 	BB3_49;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f171, %f73;
	mul.ftz.f32 	%f172, %f171, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f246, %f172;
	bra.uni 	BB3_50;

BB3_49:
	neg.ftz.f32 	%f173, %f73;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f174, %f173;
	mul.ftz.f32 	%f175, %f174, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f176, %f175;
	neg.ftz.f32 	%f246, %f176;

BB3_50:
	mul.ftz.f32 	%f177, %f3, %f35;
	fma.rn.ftz.f32 	%f178, %f72, %f34, %f177;
	fma.rn.ftz.f32 	%f77, %f71, %f36, %f178;
	setp.ltu.ftz.f32	%p35, %f77, 0f00000000;
	@%p35 bra 	BB3_52;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f179, %f77;
	mul.ftz.f32 	%f180, %f179, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f247, %f180;
	bra.uni 	BB3_53;

BB3_52:
	neg.ftz.f32 	%f181, %f77;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f182, %f181;
	mul.ftz.f32 	%f183, %f182, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f184, %f183;
	neg.ftz.f32 	%f247, %f184;

BB3_53:
	mul.ftz.f32 	%f185, %f3, %f42;
	fma.rn.ftz.f32 	%f186, %f72, %f41, %f185;
	fma.rn.ftz.f32 	%f81, %f71, %f43, %f186;
	setp.ltu.ftz.f32	%p36, %f81, 0f00000000;
	@%p36 bra 	BB3_55;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f187, %f81;
	mul.ftz.f32 	%f188, %f187, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f248, %f188;
	bra.uni 	BB3_56;

BB3_55:
	neg.ftz.f32 	%f189, %f81;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f190, %f189;
	mul.ftz.f32 	%f191, %f190, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f192, %f191;
	neg.ftz.f32 	%f248, %f192;

BB3_56:
	.loc 1 268 1
	selp.b32	%r94, 1, -1, %p26;
	add.s32 	%r95, %r52, %r94;
	.loc 1 268 1
	mul.lo.s32 	%r10, %r95, %r16;
	add.s32 	%r11, %r10, %r3;
	.loc 1 268 1
	@%p27 bra 	BB3_58;

	cvta.to.global.u64 	%rd60, %rd8;
	mul.wide.s32 	%rd61, %r11, 16;
	add.s64 	%rd62, %rd60, %rd61;
	.loc 1 268 1
	st.global.v4.f32 	[%rd62], {%f246, %f247, %f248, %f51};
	bra.uni 	BB3_59;

BB3_58:
	cvta.to.global.u64 	%rd63, %rd8;
	mul.wide.s32 	%rd64, %r11, 8;
	add.s64 	%rd65, %rd63, %rd64;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f246;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f247;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f248;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f51;
	mov.b16 	%rs12, %temp;
}
	.loc 1 268 241
	st.global.v4.u16 	[%rd65], {%rs9, %rs10, %rs11, %rs12};

BB3_59:
	.loc 1 268 1
	add.ftz.f32 	%f194, %f2, %f3;
	add.ftz.f32 	%f195, %f194, %f8;
	add.ftz.f32 	%f196, %f195, %f233;
	mul.ftz.f32 	%f89, %f196, 0f3E800000;
	add.ftz.f32 	%f197, %f53, %f238;
	add.ftz.f32 	%f198, %f197, %f237;
	mul.ftz.f32 	%f90, %f198, 0f3E800000;
	mul.ftz.f32 	%f199, %f89, %f28;
	fma.rn.ftz.f32 	%f200, %f90, %f27, %f199;
	fma.rn.ftz.f32 	%f91, %f4, %f29, %f200;
	setp.ltu.ftz.f32	%p39, %f91, 0f00000000;
	@%p39 bra 	BB3_61;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f201, %f91;
	mul.ftz.f32 	%f202, %f201, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f249, %f202;
	bra.uni 	BB3_62;

BB3_61:
	neg.ftz.f32 	%f203, %f91;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f204, %f203;
	mul.ftz.f32 	%f205, %f204, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f206, %f205;
	neg.ftz.f32 	%f249, %f206;

BB3_62:
	mul.ftz.f32 	%f207, %f89, %f35;
	fma.rn.ftz.f32 	%f208, %f90, %f34, %f207;
	fma.rn.ftz.f32 	%f95, %f4, %f36, %f208;
	setp.ltu.ftz.f32	%p40, %f95, 0f00000000;
	@%p40 bra 	BB3_64;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f209, %f95;
	mul.ftz.f32 	%f210, %f209, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f250, %f210;
	bra.uni 	BB3_65;

BB3_64:
	neg.ftz.f32 	%f211, %f95;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f212, %f211;
	mul.ftz.f32 	%f213, %f212, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f214, %f213;
	neg.ftz.f32 	%f250, %f214;

BB3_65:
	mul.ftz.f32 	%f215, %f89, %f42;
	fma.rn.ftz.f32 	%f216, %f90, %f41, %f215;
	fma.rn.ftz.f32 	%f99, %f4, %f43, %f216;
	setp.ltu.ftz.f32	%p41, %f99, 0f00000000;
	@%p41 bra 	BB3_67;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f217, %f99;
	mul.ftz.f32 	%f218, %f217, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f251, %f218;
	bra.uni 	BB3_68;

BB3_67:
	neg.ftz.f32 	%f219, %f99;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f220, %f219;
	mul.ftz.f32 	%f221, %f220, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f222, %f221;
	neg.ftz.f32 	%f251, %f222;

BB3_68:
	.loc 1 268 1
	add.s32 	%r106, %r3, %r10;
	.loc 1 268 1
	add.s32 	%r12, %r106, 1;
	.loc 1 268 1
	@%p27 bra 	BB3_70;

	cvta.to.global.u64 	%rd66, %rd8;
	mul.wide.s32 	%rd67, %r12, 16;
	add.s64 	%rd68, %rd66, %rd67;
	.loc 1 268 1
	st.global.v4.f32 	[%rd68], {%f249, %f250, %f251, %f51};
	bra.uni 	BB3_71;

BB3_70:
	cvta.to.global.u64 	%rd69, %rd8;
	mul.wide.s32 	%rd70, %r12, 8;
	add.s64 	%rd71, %rd69, %rd70;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f249;
	mov.b16 	%rs13, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f250;
	mov.b16 	%rs14, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f251;
	mov.b16 	%rs15, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f51;
	mov.b16 	%rs16, %temp;
}
	.loc 1 268 241
	st.global.v4.u16 	[%rd71], {%rs13, %rs14, %rs15, %rs16};

BB3_71:
	.loc 1 268 2
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_10
)
{
	.reg .pred 	%p<58>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<283>;
	.reg .f32 	%f<316>;
	.reg .s64 	%rd<135>;
	.reg .f64 	%fd<13>;


	ld.param.u64 	%rd7, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_0];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_1];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_2];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_3];
	ld.param.u64 	%rd8, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_5];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_6];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_7];
	ld.param.u32 	%r18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_8];
	ld.param.u32 	%r19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_9];
	cvta.to.global.u64 	%rd1, %rd7;
	.loc 1 398 1
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r23, %r20, %r21, %r22;
	shl.b32 	%r1, %r23, 1;
	mov.u32 	%r24, %ntid.y;
	mov.u32 	%r25, %ctaid.y;
	mov.u32 	%r26, %tid.y;
	mad.lo.s32 	%r27, %r24, %r25, %r26;
	shl.b32 	%r2, %r27, 1;
	.loc 1 398 1
	setp.lt.s32	%p1, %r1, %r18;
	setp.lt.s32	%p2, %r2, %r19;
	and.pred  	%p3, %p1, %p2;
	.loc 1 398 1
	@!%p3 bra 	BB4_81;
	bra.uni 	BB4_1;

BB4_1:
	.loc 1 398 1
	cvt.s64.s32	%rd9, %r13;
	.loc 1 398 1
	add.s32 	%r28, %r19, -1;
	mul.lo.s32 	%r29, %r28, %r14;
	cvt.s64.s32	%rd10, %r29;
	neg.s32 	%r30, %r14;
	.loc 1 398 1
	setp.eq.s32	%p4, %r15, 0;
	selp.b32	%r3, %r14, %r30, %p4;
	selp.b64	%rd11, 0, %rd10, %p4;
	add.s64 	%rd2, %rd11, %rd9;
	.loc 1 398 1
	add.s32 	%r4, %r18, -3;
	mul.lo.s32 	%r5, %r2, %r3;
	add.s32 	%r31, %r5, %r1;
	cvt.s64.s32	%rd12, %r31;
	add.s64 	%rd13, %rd12, %rd2;
	shl.b64 	%rd14, %rd13, 2;
	add.s64 	%rd3, %rd1, %rd14;
	ld.global.f32 	%f1, [%rd3];
	setp.ltu.ftz.f32	%p5, %f1, 0f00000000;
	@%p5 bra 	BB4_3;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f97, %f1;
	mul.ftz.f32 	%f98, %f97, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f304, %f98;
	bra.uni 	BB4_4;

BB4_3:
	.loc 1 398 231
	neg.ftz.f32 	%f99, %f1;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f100, %f99;
	mul.ftz.f32 	%f101, %f100, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f102, %f101;
	neg.ftz.f32 	%f304, %f102;

BB4_4:
	.loc 1 398 1
	mov.f32 	%f4, %f304;
	add.s32 	%r32, %r1, %r5;
	.loc 1 398 1
	add.s32 	%r33, %r32, 1;
	cvt.s64.s32	%rd15, %r32;
	.loc 1 398 1
	cvt.s64.s32	%rd16, %r33;
	add.s64 	%rd17, %rd15, %rd2;
	.loc 1 398 1
	add.s64 	%rd18, %rd16, %rd2;
	shl.b64 	%rd19, %rd17, 2;
	add.s64 	%rd4, %rd1, %rd19;
	.loc 1 398 1
	shl.b64 	%rd20, %rd18, 2;
	add.s64 	%rd5, %rd1, %rd20;
	ld.global.f32 	%f5, [%rd4+4];
	setp.ltu.ftz.f32	%p6, %f5, 0f00000000;
	@%p6 bra 	BB4_6;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f103, %f5;
	mul.ftz.f32 	%f104, %f103, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f293, %f104;
	bra.uni 	BB4_7;

BB4_6:
	neg.ftz.f32 	%f105, %f5;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f106, %f105;
	mul.ftz.f32 	%f107, %f106, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f108, %f107;
	neg.ftz.f32 	%f293, %f108;

BB4_7:
	.loc 1 398 1
	mov.f32 	%f8, %f293;
	add.s32 	%r34, %r2, 1;
	mul.lo.s32 	%r6, %r34, %r3;
	shl.b32 	%r35, %r3, 2;
	cvt.s64.s32	%rd21, %r35;
	add.s64 	%rd22, %rd3, %rd21;
	.loc 1 398 1
	ld.global.f32 	%f9, [%rd22];
	setp.ltu.ftz.f32	%p7, %f9, 0f00000000;
	@%p7 bra 	BB4_9;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f109, %f9;
	mul.ftz.f32 	%f110, %f109, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f288, %f110;
	bra.uni 	BB4_10;

BB4_9:
	neg.ftz.f32 	%f111, %f9;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f112, %f111;
	mul.ftz.f32 	%f113, %f112, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f114, %f113;
	neg.ftz.f32 	%f288, %f114;

BB4_10:
	.loc 1 398 1
	mov.f32 	%f12, %f288;
	add.s64 	%rd24, %rd5, %rd21;
	.loc 1 398 1
	ld.global.f32 	%f13, [%rd24];
	setp.ltu.ftz.f32	%p8, %f13, 0f00000000;
	@%p8 bra 	BB4_12;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f115, %f13;
	mul.ftz.f32 	%f116, %f115, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f315, %f116;
	bra.uni 	BB4_13;

BB4_12:
	neg.ftz.f32 	%f117, %f13;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f118, %f117;
	mul.ftz.f32 	%f119, %f118, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f120, %f119;
	neg.ftz.f32 	%f315, %f120;

BB4_13:
	.loc 1 398 1
	mov.f32 	%f16, %f315;
	add.s32 	%r37, %r1, %r6;
	cvt.s64.s32	%rd25, %r37;
	add.s64 	%rd30, %rd25, %rd2;
	shl.b64 	%rd32, %rd30, 2;
	add.s64 	%rd6, %rd1, %rd32;
	setp.lt.s32	%p10, %r1, 1;
	.loc 1 398 1
	mov.f32 	%f314, %f16;
	@%p10 bra 	BB4_17;

	ld.global.f32 	%f17, [%rd6+-4];
	setp.ltu.ftz.f32	%p11, %f17, 0f00000000;
	@%p11 bra 	BB4_16;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f121, %f17;
	mul.ftz.f32 	%f122, %f121, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f18, %f122;
	mov.f32 	%f314, %f18;
	bra.uni 	BB4_17;

BB4_16:
	neg.ftz.f32 	%f123, %f17;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f124, %f123;
	mul.ftz.f32 	%f125, %f124, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f126, %f125;
	neg.ftz.f32 	%f19, %f126;
	mov.f32 	%f314, %f19;

BB4_17:
	.loc 1 398 1
	mov.f32 	%f20, %f314;
	add.s32 	%r40, %r18, -2;
	setp.ge.s32	%p12, %r1, %r40;
	.loc 1 398 1
	mov.f32 	%f287, %f12;
	@%p12 bra 	BB4_21;

	ld.global.f32 	%f21, [%rd6+8];
	setp.ltu.ftz.f32	%p13, %f21, 0f00000000;
	@%p13 bra 	BB4_20;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f127, %f21;
	mul.ftz.f32 	%f128, %f127, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f22, %f128;
	mov.f32 	%f287, %f22;
	bra.uni 	BB4_21;

BB4_20:
	neg.ftz.f32 	%f129, %f21;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f130, %f129;
	mul.ftz.f32 	%f131, %f130, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f132, %f131;
	neg.ftz.f32 	%f23, %f132;
	mov.f32 	%f287, %f23;

BB4_21:
	.loc 1 398 1
	mov.f32 	%f24, %f287;
	setp.ge.s32	%p14, %r1, %r4;
	mov.f32 	%f313, %f16;
	@%p14 bra 	BB4_25;

	ld.global.f32 	%f25, [%rd6+12];
	setp.ltu.ftz.f32	%p15, %f25, 0f00000000;
	@%p15 bra 	BB4_24;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f133, %f25;
	mul.ftz.f32 	%f134, %f133, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f313, %f134;
	bra.uni 	BB4_25;

BB4_24:
	neg.ftz.f32 	%f135, %f25;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f136, %f135;
	mul.ftz.f32 	%f137, %f136, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f138, %f137;
	neg.ftz.f32 	%f313, %f138;

BB4_25:
	setp.lt.s32	%p16, %r1, 2;
	.loc 1 398 1
	mov.f32 	%f303, %f4;
	@%p16 bra 	BB4_29;

	ld.global.f32 	%f29, [%rd4+-8];
	setp.ltu.ftz.f32	%p17, %f29, 0f00000000;
	@%p17 bra 	BB4_28;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f139, %f29;
	mul.ftz.f32 	%f140, %f139, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f30, %f140;
	mov.f32 	%f303, %f30;
	bra.uni 	BB4_29;

BB4_28:
	neg.ftz.f32 	%f141, %f29;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f142, %f141;
	mul.ftz.f32 	%f143, %f142, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f144, %f143;
	neg.ftz.f32 	%f31, %f144;
	mov.f32 	%f303, %f31;

BB4_29:
	.loc 1 398 1
	mov.f32 	%f32, %f303;
	mov.f32 	%f292, %f8;
	@%p10 bra 	BB4_33;

	ld.global.f32 	%f33, [%rd4+-4];
	setp.ltu.ftz.f32	%p19, %f33, 0f00000000;
	@%p19 bra 	BB4_32;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f145, %f33;
	mul.ftz.f32 	%f146, %f145, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f34, %f146;
	mov.f32 	%f292, %f34;
	bra.uni 	BB4_33;

BB4_32:
	neg.ftz.f32 	%f147, %f33;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f148, %f147;
	mul.ftz.f32 	%f149, %f148, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f150, %f149;
	neg.ftz.f32 	%f35, %f150;
	mov.f32 	%f292, %f35;

BB4_33:
	.loc 1 398 1
	mov.f32 	%f36, %f292;
	mov.f32 	%f302, %f4;
	@%p12 bra 	BB4_37;

	ld.global.f32 	%f37, [%rd4+8];
	setp.ltu.ftz.f32	%p21, %f37, 0f00000000;
	@%p21 bra 	BB4_36;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f151, %f37;
	mul.ftz.f32 	%f152, %f151, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f302, %f152;
	bra.uni 	BB4_37;

BB4_36:
	neg.ftz.f32 	%f153, %f37;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f154, %f153;
	mul.ftz.f32 	%f155, %f154, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f156, %f155;
	neg.ftz.f32 	%f302, %f156;

BB4_37:
	setp.lt.s32	%p22, %r2, 1;
	.loc 1 398 1
	or.pred  	%p24, %p22, %p10;
	mov.f32 	%f312, %f16;
	@%p24 bra 	BB4_41;

	add.s32 	%r77, %r2, -1;
	mad.lo.s32 	%r85, %r77, %r3, %r1;
	cvt.s64.s32	%rd33, %r85;
	add.s64 	%rd38, %rd33, %rd2;
	shl.b64 	%rd40, %rd38, 2;
	add.s64 	%rd41, %rd1, %rd40;
	.loc 1 398 1
	ld.global.f32 	%f41, [%rd41+-4];
	setp.ltu.ftz.f32	%p26, %f41, 0f00000000;
	@%p26 bra 	BB4_40;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f157, %f41;
	mul.ftz.f32 	%f158, %f157, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f312, %f158;
	bra.uni 	BB4_41;

BB4_40:
	neg.ftz.f32 	%f159, %f41;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f160, %f159;
	mul.ftz.f32 	%f161, %f160, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f162, %f161;
	neg.ftz.f32 	%f312, %f162;

BB4_41:
	.loc 1 398 1
	mov.f32 	%f286, %f12;
	@%p22 bra 	BB4_45;

	add.s32 	%r98, %r2, -1;
	mad.lo.s32 	%r106, %r98, %r3, %r1;
	cvt.s64.s32	%rd42, %r106;
	add.s64 	%rd47, %rd42, %rd2;
	shl.b64 	%rd49, %rd47, 2;
	add.s64 	%rd50, %rd1, %rd49;
	ld.global.f32 	%f45, [%rd50];
	setp.ltu.ftz.f32	%p29, %f45, 0f00000000;
	@%p29 bra 	BB4_44;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f163, %f45;
	mul.ftz.f32 	%f164, %f163, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f286, %f164;
	bra.uni 	BB4_45;

BB4_44:
	neg.ftz.f32 	%f165, %f45;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f166, %f165;
	mul.ftz.f32 	%f167, %f166, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f168, %f167;
	neg.ftz.f32 	%f286, %f168;

BB4_45:
	.loc 1 398 1
	mov.f32 	%f311, %f16;
	@%p22 bra 	BB4_49;

	add.s32 	%r124, %r2, -1;
	mad.lo.s32 	%r127, %r124, %r3, %r1;
	cvt.s64.s32	%rd52, %r127;
	add.s64 	%rd57, %rd52, %rd2;
	shl.b64 	%rd58, %rd57, 2;
	add.s64 	%rd59, %rd1, %rd58;
	.loc 1 398 1
	ld.global.f32 	%f49, [%rd59+4];
	setp.ltu.ftz.f32	%p32, %f49, 0f00000000;
	@%p32 bra 	BB4_48;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f169, %f49;
	mul.ftz.f32 	%f170, %f169, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f311, %f170;
	bra.uni 	BB4_49;

BB4_48:
	neg.ftz.f32 	%f171, %f49;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f172, %f171;
	mul.ftz.f32 	%f173, %f172, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f174, %f173;
	neg.ftz.f32 	%f311, %f174;

BB4_49:
	setp.lt.s32	%p33, %r2, 2;
	.loc 1 398 1
	mov.f32 	%f301, %f4;
	@%p33 bra 	BB4_53;

	add.s32 	%r140, %r2, -2;
	mad.lo.s32 	%r148, %r140, %r3, %r1;
	cvt.s64.s32	%rd60, %r148;
	add.s64 	%rd65, %rd60, %rd2;
	shl.b64 	%rd67, %rd65, 2;
	add.s64 	%rd68, %rd1, %rd67;
	ld.global.f32 	%f53, [%rd68];
	setp.ltu.ftz.f32	%p35, %f53, 0f00000000;
	@%p35 bra 	BB4_52;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f175, %f53;
	mul.ftz.f32 	%f176, %f175, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f301, %f176;
	bra.uni 	BB4_53;

BB4_52:
	neg.ftz.f32 	%f177, %f53;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f178, %f177;
	mul.ftz.f32 	%f179, %f178, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f180, %f179;
	neg.ftz.f32 	%f301, %f180;

BB4_53:
	.loc 1 398 1
	add.s32 	%r156, %r19, -2;
	setp.ge.s32	%p36, %r2, %r156;
	.loc 1 398 1
	mov.f32 	%f300, %f4;
	@%p36 bra 	BB4_57;

	mad.lo.s32 	%r169, %r2, %r3, %r1;
	cvt.s64.s32	%rd70, %r169;
	add.s64 	%rd75, %rd70, %rd2;
	shl.b64 	%rd76, %rd75, 2;
	add.s64 	%rd77, %rd1, %rd76;
	shl.b32 	%r172, %r3, 3;
	cvt.s64.s32	%rd78, %r172;
	add.s64 	%rd79, %rd77, %rd78;
	.loc 1 398 1
	ld.global.f32 	%f57, [%rd79];
	setp.ltu.ftz.f32	%p38, %f57, 0f00000000;
	@%p38 bra 	BB4_56;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f181, %f57;
	mul.ftz.f32 	%f182, %f181, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f300, %f182;
	bra.uni 	BB4_57;

BB4_56:
	neg.ftz.f32 	%f183, %f57;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f184, %f183;
	mul.ftz.f32 	%f185, %f184, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f186, %f185;
	neg.ftz.f32 	%f300, %f186;

BB4_57:
	.loc 1 398 1
	mov.f32 	%f291, %f8;
	@%p36 bra 	BB4_61;

	add.s32 	%r182, %r35, 4;
	.loc 1 398 1
	mul.lo.s32 	%r192, %r27, %r3;
	shl.b32 	%r193, %r192, 1;
	add.s32 	%r194, %r1, %r193;
	add.s32 	%r195, %r194, 1;
	cvt.s64.s32	%rd80, %r195;
	add.s64 	%rd85, %rd80, %rd2;
	shl.b64 	%rd87, %rd85, 2;
	add.s64 	%rd88, %rd1, %rd87;
	cvt.s64.s32	%rd90, %r182;
	add.s64 	%rd91, %rd88, %rd21;
	add.s64 	%rd92, %rd91, %rd90;
	.loc 1 398 1
	ld.global.f32 	%f61, [%rd92+-4];
	setp.ltu.ftz.f32	%p41, %f61, 0f00000000;
	@%p41 bra 	BB4_60;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f187, %f61;
	mul.ftz.f32 	%f188, %f187, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f291, %f188;
	bra.uni 	BB4_61;

BB4_60:
	neg.ftz.f32 	%f189, %f61;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f190, %f189;
	mul.ftz.f32 	%f191, %f190, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f192, %f191;
	neg.ftz.f32 	%f291, %f192;

BB4_61:
	.loc 1 398 1
	or.pred  	%p44, %p36, %p12;
	mov.f32 	%f299, %f4;
	@%p44 bra 	BB4_65;

	add.s32 	%r215, %r2, 2;
	mad.lo.s32 	%r223, %r215, %r3, %r1;
	add.s32 	%r224, %r223, 2;
	cvt.s64.s32	%rd93, %r224;
	add.s64 	%rd98, %rd93, %rd2;
	shl.b64 	%rd100, %rd98, 2;
	add.s64 	%rd101, %rd1, %rd100;
	ld.global.f32 	%f65, [%rd101];
	setp.ltu.ftz.f32	%p46, %f65, 0f00000000;
	@%p46 bra 	BB4_64;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f193, %f65;
	mul.ftz.f32 	%f194, %f193, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f299, %f194;
	bra.uni 	BB4_65;

BB4_64:
	neg.ftz.f32 	%f195, %f65;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f196, %f195;
	mul.ftz.f32 	%f197, %f196, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f198, %f197;
	neg.ftz.f32 	%f299, %f198;

BB4_65:
	.loc 1 398 1
	add.s32 	%r232, %r19, -3;
	setp.ge.s32	%p47, %r2, %r232;
	mov.f32 	%f310, %f16;
	@%p47 bra 	BB4_69;

	add.s32 	%r238, %r2, 3;
	mad.lo.s32 	%r246, %r238, %r3, %r1;
	add.s32 	%r247, %r246, 1;
	cvt.s64.s32	%rd102, %r247;
	add.s64 	%rd107, %rd102, %rd2;
	shl.b64 	%rd109, %rd107, 2;
	add.s64 	%rd110, %rd1, %rd109;
	ld.global.f32 	%f69, [%rd110];
	setp.ltu.ftz.f32	%p49, %f69, 0f00000000;
	@%p49 bra 	BB4_68;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f199, %f69;
	mul.ftz.f32 	%f200, %f199, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f310, %f200;
	bra.uni 	BB4_69;

BB4_68:
	neg.ftz.f32 	%f201, %f69;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f202, %f201;
	mul.ftz.f32 	%f203, %f202, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f204, %f203;
	neg.ftz.f32 	%f310, %f204;

BB4_69:
	.loc 1 398 1
	sub.ftz.f32 	%f205, %f4, %f300;
	sub.ftz.f32 	%f206, %f4, %f301;
	add.ftz.f32 	%f207, %f206, %f205;
	sub.ftz.f32 	%f208, %f4, %f32;
	sub.ftz.f32 	%f209, %f4, %f302;
	add.ftz.f32 	%f210, %f209, %f208;
	add.ftz.f32 	%f211, %f12, %f286;
	mul.ftz.f32 	%f212, %f211, 0f3F000000;
	add.ftz.f32 	%f213, %f8, %f36;
	mul.ftz.f32 	%f214, %f213, 0f3F000000;
	cvt.ftz.f64.f32	%fd1, %f212;
	cvt.ftz.f64.f32	%fd2, %f207;
	fma.rn.f64 	%fd3, %fd2, 0d3FD0000000000000, %fd1;
	cvt.rn.ftz.f32.f64	%f215, %fd3;
	cvt.ftz.f64.f32	%fd4, %f214;
	cvt.ftz.f64.f32	%fd5, %f210;
	fma.rn.f64 	%fd6, %fd5, 0d3FD0000000000000, %fd4;
	cvt.rn.ftz.f32.f64	%f216, %fd6;
	add.ftz.f32 	%f217, %f215, %f216;
	mul.ftz.f32 	%f218, %f217, 0f3F000000;
	add.ftz.f32 	%f219, %f12, %f36;
	add.ftz.f32 	%f220, %f219, %f286;
	add.ftz.f32 	%f221, %f220, %f8;
	add.ftz.f32 	%f222, %f221, %f300;
	add.ftz.f32 	%f223, %f222, %f32;
	add.ftz.f32 	%f224, %f223, %f301;
	add.ftz.f32 	%f225, %f224, %f302;
	mul.ftz.f32 	%f226, %f225, 0f3C4CCCCD;
	sub.ftz.f32 	%f227, %f12, %f286;
	.loc 2 2750 10
	abs.ftz.f32 	%f228, %f227;
	abs.ftz.f32 	%f229, %f207;
	add.ftz.f32 	%f230, %f228, %f229;
	.loc 1 398 1
	sub.ftz.f32 	%f231, %f36, %f8;
	.loc 2 2750 10
	abs.ftz.f32 	%f232, %f231;
	abs.ftz.f32 	%f233, %f210;
	add.ftz.f32 	%f234, %f232, %f233;
	sub.ftz.f32 	%f235, %f230, %f234;
	setp.lt.ftz.f32	%p50, %f235, 0f00000000;
	selp.f32	%f236, %f215, %f216, %p50;
	.loc 2 2750 10
	abs.ftz.f32 	%f237, %f235;
	setp.lt.ftz.f32	%p51, %f237, %f226;
	selp.f32	%f82, %f218, %f236, %p51;
	.loc 1 398 1
	sub.ftz.f32 	%f238, %f16, %f310;
	sub.ftz.f32 	%f239, %f16, %f311;
	add.ftz.f32 	%f240, %f239, %f238;
	sub.ftz.f32 	%f241, %f16, %f20;
	sub.ftz.f32 	%f242, %f16, %f313;
	add.ftz.f32 	%f243, %f242, %f241;
	add.ftz.f32 	%f244, %f291, %f8;
	mul.ftz.f32 	%f245, %f244, 0f3F000000;
	add.ftz.f32 	%f246, %f24, %f12;
	mul.ftz.f32 	%f247, %f246, 0f3F000000;
	cvt.ftz.f64.f32	%fd7, %f245;
	cvt.ftz.f64.f32	%fd8, %f240;
	fma.rn.f64 	%fd9, %fd8, 0d3FD0000000000000, %fd7;
	cvt.rn.ftz.f32.f64	%f248, %fd9;
	cvt.ftz.f64.f32	%fd10, %f247;
	cvt.ftz.f64.f32	%fd11, %f243;
	fma.rn.f64 	%fd12, %fd11, 0d3FD0000000000000, %fd10;
	cvt.rn.ftz.f32.f64	%f249, %fd12;
	add.ftz.f32 	%f250, %f248, %f249;
	mul.ftz.f32 	%f251, %f250, 0f3F000000;
	add.ftz.f32 	%f252, %f291, %f12;
	add.ftz.f32 	%f253, %f252, %f8;
	add.ftz.f32 	%f254, %f253, %f24;
	add.ftz.f32 	%f255, %f254, %f310;
	add.ftz.f32 	%f256, %f255, %f20;
	add.ftz.f32 	%f257, %f256, %f311;
	add.ftz.f32 	%f258, %f257, %f313;
	mul.ftz.f32 	%f259, %f258, 0f3C4CCCCD;
	sub.ftz.f32 	%f260, %f291, %f8;
	.loc 2 2750 10
	abs.ftz.f32 	%f261, %f260;
	abs.ftz.f32 	%f262, %f240;
	add.ftz.f32 	%f263, %f261, %f262;
	.loc 1 398 1
	sub.ftz.f32 	%f264, %f12, %f24;
	.loc 2 2750 10
	abs.ftz.f32 	%f265, %f264;
	abs.ftz.f32 	%f266, %f243;
	add.ftz.f32 	%f267, %f265, %f266;
	sub.ftz.f32 	%f268, %f263, %f267;
	setp.lt.ftz.f32	%p52, %f268, 0f00000000;
	selp.f32	%f269, %f248, %f249, %p52;
	.loc 2 2750 10
	abs.ftz.f32 	%f270, %f268;
	setp.lt.ftz.f32	%p53, %f270, %f259;
	selp.f32	%f74, %f251, %f269, %p53;
	.loc 1 398 1
	add.ftz.f32 	%f271, %f312, %f311;
	add.ftz.f32 	%f272, %f271, %f20;
	add.ftz.f32 	%f273, %f272, %f16;
	mul.ftz.f32 	%f81, %f273, 0f3E800000;
	add.ftz.f32 	%f274, %f311, %f16;
	mul.ftz.f32 	%f76, %f274, 0f3F000000;
	add.ftz.f32 	%f275, %f4, %f302;
	mul.ftz.f32 	%f77, %f275, 0f3F000000;
	add.ftz.f32 	%f276, %f20, %f16;
	mul.ftz.f32 	%f78, %f276, 0f3F000000;
	add.ftz.f32 	%f277, %f4, %f300;
	mul.ftz.f32 	%f79, %f277, 0f3F000000;
	add.ftz.f32 	%f278, %f275, %f300;
	add.ftz.f32 	%f279, %f278, %f299;
	mul.ftz.f32 	%f80, %f279, 0f3E800000;
	mov.f32 	%f84, 0f3F800000;
	.loc 1 398 1
	mul.lo.s32 	%r254, %r27, %r16;
	shl.b32 	%r7, %r254, 1;
	add.s32 	%r8, %r7, %r1;
	.loc 1 398 1
	setp.eq.s32	%p54, %r17, 0;
	@%p54 bra 	BB4_71;

	cvta.to.global.u64 	%rd111, %rd8;
	mul.wide.s32 	%rd112, %r8, 16;
	add.s64 	%rd113, %rd111, %rd112;
	.loc 1 398 1
	st.global.v4.f32 	[%rd113], {%f81, %f82, %f4, %f84};
	bra.uni 	BB4_72;

BB4_71:
	cvta.to.global.u64 	%rd114, %rd8;
	mul.wide.s32 	%rd115, %r8, 8;
	add.s64 	%rd116, %rd114, %rd115;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f81;
	mov.b16 	%rs1, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f82;
	mov.b16 	%rs2, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs3, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f84;
	mov.b16 	%rs4, %temp;
}
	.loc 1 398 241
	st.global.v4.u16 	[%rd116], {%rs1, %rs2, %rs3, %rs4};

BB4_72:
	.loc 1 398 1
	add.s32 	%r265, %r1, %r7;
	.loc 1 398 1
	add.s32 	%r9, %r265, 1;
	.loc 1 398 1
	@%p54 bra 	BB4_74;

	cvta.to.global.u64 	%rd117, %rd8;
	mul.wide.s32 	%rd118, %r9, 16;
	add.s64 	%rd119, %rd117, %rd118;
	.loc 1 398 1
	st.global.v4.f32 	[%rd119], {%f76, %f8, %f77, %f84};
	bra.uni 	BB4_75;

BB4_74:
	cvta.to.global.u64 	%rd120, %rd8;
	mul.wide.s32 	%rd121, %r9, 8;
	add.s64 	%rd122, %rd120, %rd121;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f76;
	mov.b16 	%rs5, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs6, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f77;
	mov.b16 	%rs7, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f84;
	mov.b16 	%rs8, %temp;
}
	.loc 1 398 241
	st.global.v4.u16 	[%rd122], {%rs5, %rs6, %rs7, %rs8};

BB4_75:
	.loc 1 398 1
	mul.lo.s32 	%r10, %r34, %r16;
	add.s32 	%r11, %r10, %r1;
	.loc 1 398 1
	@%p54 bra 	BB4_77;

	cvta.to.global.u64 	%rd123, %rd8;
	mul.wide.s32 	%rd124, %r11, 16;
	add.s64 	%rd125, %rd123, %rd124;
	.loc 1 398 1
	st.global.v4.f32 	[%rd125], {%f78, %f12, %f79, %f84};
	bra.uni 	BB4_78;

BB4_77:
	cvta.to.global.u64 	%rd126, %rd8;
	mul.wide.s32 	%rd127, %r11, 8;
	add.s64 	%rd128, %rd126, %rd127;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f78;
	mov.b16 	%rs9, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs10, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f79;
	mov.b16 	%rs11, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f84;
	mov.b16 	%rs12, %temp;
}
	.loc 1 398 241
	st.global.v4.u16 	[%rd128], {%rs9, %rs10, %rs11, %rs12};

BB4_78:
	.loc 1 398 1
	add.s32 	%r282, %r1, %r10;
	.loc 1 398 1
	add.s32 	%r12, %r282, 1;
	.loc 1 398 1
	@%p54 bra 	BB4_80;

	cvta.to.global.u64 	%rd129, %rd8;
	mul.wide.s32 	%rd130, %r12, 16;
	add.s64 	%rd131, %rd129, %rd130;
	.loc 1 398 1
	st.global.v4.f32 	[%rd131], {%f16, %f74, %f80, %f84};
	bra.uni 	BB4_81;

BB4_80:
	cvta.to.global.u64 	%rd132, %rd8;
	mul.wide.s32 	%rd133, %r12, 8;
	add.s64 	%rd134, %rd132, %rd133;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs13, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs14, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f80;
	mov.b16 	%rs15, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f84;
	mov.b16 	%rs16, %temp;
}
	.loc 1 398 241
	st.global.v4.u16 	[%rd134], {%rs13, %rs14, %rs15, %rs16};

BB4_81:
	.loc 1 398 2
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_0,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_4,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_7,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9
)
{
	.reg .pred 	%p<58>;
	.reg .s16 	%rs<97>;
	.reg .s32 	%r<82>;
	.reg .f32 	%f<546>;
	.reg .s64 	%rd<74>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix[36];

	ld.param.u64 	%rd11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_0];
	ld.param.u64 	%rd10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_1];
	ld.param.u32 	%r19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_2];
	ld.param.u32 	%r20, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_3];
	ld.param.u32 	%r21, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_4];
	ld.param.u32 	%r22, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_5];
	ld.param.u32 	%r23, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_6];
	ld.param.u32 	%r24, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_7];
	ld.param.u64 	%rd12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_8];
	ld.param.u32 	%r25, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9];
	cvta.to.global.u64 	%rd1, %rd11;
	cvta.to.global.u64 	%rd2, %rd12;
	.loc 1 486 1
	cvt.s64.s32	%rd3, %r25;
	.loc 1 91 1
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB5_2;
	bra.uni 	BB5_1;

BB5_1:
	setp.ne.s32	%p4, %r24, 0;
	.loc 1 91 1
	mul.lo.s32 	%r26, %r2, 3;
	cvt.s64.s32	%rd13, %r26;
	add.s32 	%r27, %r26, %r25;
	.loc 1 91 1
	add.s64 	%rd14, %rd13, %rd3;
	selp.b64	%rd15, 2, 0, %p4;
	add.s64 	%rd16, %rd14, %rd15;
	shl.b64 	%rd17, %rd16, 2;
	add.s64 	%rd18, %rd2, %rd17;
	mul.wide.s32 	%rd19, %r26, 4;
	mov.u64 	%rd20, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix;
	add.s64 	%rd21, %rd20, %rd19;
	.loc 1 91 1
	ld.global.f32 	%f261, [%rd18];
	st.shared.f32 	[%rd21], %f261;
	mul.wide.s32 	%rd22, %r27, 4;
	add.s64 	%rd23, %rd2, %rd22;
	.loc 1 91 1
	ld.global.f32 	%f262, [%rd23+4];
	st.shared.f32 	[%rd21+4], %f262;
	selp.b64	%rd24, 0, 2, %p4;
	add.s64 	%rd25, %rd14, %rd24;
	shl.b64 	%rd26, %rd25, 2;
	add.s64 	%rd27, %rd2, %rd26;
	ld.global.f32 	%f263, [%rd27];
	st.shared.f32 	[%rd21+8], %f263;

BB5_2:
	.loc 1 91 1
	bar.sync 	0;
	.loc 1 486 1
	mov.u32 	%r28, %ntid.x;
	mov.u32 	%r29, %ctaid.x;
	mad.lo.s32 	%r30, %r28, %r29, %r2;
	shl.b32 	%r3, %r30, 1;
	mov.u32 	%r31, %ntid.y;
	mov.u32 	%r32, %ctaid.y;
	mad.lo.s32 	%r33, %r31, %r32, %r1;
	shl.b32 	%r4, %r33, 1;
	.loc 1 486 1
	setp.lt.s32	%p5, %r3, %r22;
	setp.lt.s32	%p6, %r4, %r23;
	and.pred  	%p7, %p5, %p6;
	.loc 1 486 1
	@!%p7 bra 	BB5_129;
	bra.uni 	BB5_3;

BB5_3:
	.loc 1 486 1
	add.s32 	%r5, %r22, -2;
	add.s32 	%r6, %r23, -2;
	mul.lo.s32 	%r7, %r4, %r19;
	add.s32 	%r8, %r7, %r3;
	setp.eq.s32	%p8, %r21, 0;
	@%p8 bra 	BB5_5;

	mul.wide.s32 	%rd28, %r8, 16;
	add.s64 	%rd29, %rd1, %rd28;
	ld.global.v4.f32 	{%f264, %f265, %f266, %f267}, [%rd29];
	mov.f32 	%f494, %f267;
	mov.f32 	%f493, %f266;
	mov.f32 	%f492, %f265;
	mov.f32 	%f491, %f264;
	bra.uni 	BB5_6;

BB5_5:
	mul.wide.s32 	%rd30, %r8, 8;
	add.s64 	%rd31, %rd1, %rd30;
	.loc 1 486 1
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd31];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f491, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f492, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f493, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f494, %temp;
	}

BB5_6:
	add.s32 	%r34, %r3, %r7;
	mul.wide.s32 	%rd32, %r34, 16;
	add.s64 	%rd4, %rd1, %rd32;
	mul.wide.s32 	%rd33, %r34, 8;
	add.s64 	%rd5, %rd1, %rd33;
	.loc 1 486 1
	@%p8 bra 	BB5_8;

	ld.global.v4.f32 	{%f268, %f269, %f270, %f271}, [%rd4+16];
	mov.f32 	%f498, %f271;
	mov.f32 	%f497, %f270;
	mov.f32 	%f496, %f269;
	mov.f32 	%f495, %f268;
	bra.uni 	BB5_9;

BB5_8:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd5+8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs9;
	cvt.f32.f16 	%f495, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs10;
	cvt.f32.f16 	%f496, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs11;
	cvt.f32.f16 	%f497, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs12;
	cvt.f32.f16 	%f498, %temp;
	}

BB5_9:
	.loc 1 486 1
	add.s32 	%r35, %r4, 1;
	mul.lo.s32 	%r9, %r35, %r19;
	add.s32 	%r10, %r9, %r3;
	mov.f32 	%f30, %f496;
	.loc 1 486 1
	@%p8 bra 	BB5_11;

	mul.wide.s32 	%rd34, %r10, 16;
	add.s64 	%rd35, %rd1, %rd34;
	ld.global.v4.f32 	{%f272, %f273, %f274, %f275}, [%rd35];
	mov.f32 	%f502, %f275;
	mov.f32 	%f501, %f274;
	mov.f32 	%f500, %f273;
	mov.f32 	%f499, %f272;
	bra.uni 	BB5_12;

BB5_11:
	mul.wide.s32 	%rd36, %r10, 8;
	add.s64 	%rd37, %rd1, %rd36;
	.loc 1 486 1
	ld.global.v4.u16 	{%rs17, %rs18, %rs19, %rs20}, [%rd37];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs17;
	cvt.f32.f16 	%f499, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs18;
	cvt.f32.f16 	%f500, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs19;
	cvt.f32.f16 	%f501, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs20;
	cvt.f32.f16 	%f502, %temp;
	}

BB5_12:
	add.s32 	%r36, %r3, %r9;
	mul.wide.s32 	%rd38, %r36, 16;
	add.s64 	%rd6, %rd1, %rd38;
	mul.wide.s32 	%rd39, %r36, 8;
	add.s64 	%rd7, %rd1, %rd39;
	mov.f32 	%f46, %f500;
	.loc 1 486 1
	@%p8 bra 	BB5_14;

	ld.global.v4.f32 	{%f276, %f277, %f278, %f279}, [%rd6+16];
	mov.f32 	%f506, %f279;
	mov.f32 	%f505, %f278;
	mov.f32 	%f504, %f277;
	mov.f32 	%f503, %f276;
	bra.uni 	BB5_15;

BB5_14:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs25, %rs26, %rs27, %rs28}, [%rd7+8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs25;
	cvt.f32.f16 	%f503, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs26;
	cvt.f32.f16 	%f504, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs27;
	cvt.f32.f16 	%f505, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs28;
	cvt.f32.f16 	%f506, %temp;
	}

BB5_15:
	setp.ge.s32	%p12, %r3, %r5;
	mov.f32 	%f60, %f504;
	.loc 1 486 1
	mov.f32 	%f521, %f46;
	@%p12 bra 	BB5_20;

	@%p8 bra 	BB5_18;

	ld.global.v4.f32 	{%f280, %f281, %f282, %f283}, [%rd4+32];
	mov.f32 	%f65, %f283;
	mov.f32 	%f64, %f282;
	mov.f32 	%f507, %f281;
	mov.f32 	%f62, %f280;
	bra.uni 	BB5_19;

BB5_18:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs33, %rs34, %rs35, %rs36}, [%rd5+16];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs34;
	cvt.f32.f16 	%f507, %temp;
	}

BB5_19:
	mov.f32 	%f74, %f507;
	mov.f32 	%f521, %f74;

BB5_20:
	.loc 1 486 1
	mov.f32 	%f75, %f521;
	setp.ge.s32	%p14, %r4, %r6;
	.loc 1 486 1
	mov.f32 	%f517, %f30;
	@%p14 bra 	BB5_25;

	add.s32 	%r37, %r4, 2;
	mad.lo.s32 	%r11, %r37, %r19, %r3;
	@%p8 bra 	BB5_23;

	mul.wide.s32 	%rd40, %r11, 16;
	add.s64 	%rd41, %rd1, %rd40;
	ld.global.v4.f32 	{%f284, %f285, %f286, %f287}, [%rd41];
	mov.f32 	%f79, %f287;
	mov.f32 	%f78, %f286;
	mov.f32 	%f508, %f285;
	mov.f32 	%f76, %f284;
	bra.uni 	BB5_24;

BB5_23:
	mul.wide.s32 	%rd42, %r11, 8;
	add.s64 	%rd43, %rd1, %rd42;
	.loc 1 486 1
	ld.global.v4.u16 	{%rs41, %rs42, %rs43, %rs44}, [%rd43];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs42;
	cvt.f32.f16 	%f508, %temp;
	}

BB5_24:
	mov.f32 	%f88, %f508;
	mov.f32 	%f517, %f88;

BB5_25:
	.loc 1 486 1
	mov.f32 	%f89, %f517;
	or.pred  	%p18, %p12, %p14;
	mov.f32 	%f513, %f60;
	@%p18 bra 	BB5_30;

	add.s32 	%r38, %r4, 2;
	mad.lo.s32 	%r39, %r38, %r19, %r3;
	add.s32 	%r12, %r39, 2;
	@%p8 bra 	BB5_28;

	mul.wide.s32 	%rd44, %r12, 16;
	add.s64 	%rd45, %rd1, %rd44;
	ld.global.v4.f32 	{%f288, %f289, %f290, %f291}, [%rd45];
	mov.f32 	%f93, %f291;
	mov.f32 	%f92, %f290;
	mov.f32 	%f509, %f289;
	mov.f32 	%f90, %f288;
	bra.uni 	BB5_29;

BB5_28:
	mul.wide.s32 	%rd46, %r12, 8;
	add.s64 	%rd47, %rd1, %rd46;
	.loc 1 486 1
	ld.global.v4.u16 	{%rs49, %rs50, %rs51, %rs52}, [%rd47];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs50;
	cvt.f32.f16 	%f509, %temp;
	}

BB5_29:
	mov.f32 	%f102, %f509;
	mov.f32 	%f513, %f102;

BB5_30:
	.loc 1 486 1
	mov.f32 	%f103, %f513;
	add.ftz.f32 	%f292, %f492, %f75;
	mul.ftz.f32 	%f293, %f292, 0f3F000000;
	sub.ftz.f32 	%f294, %f496, %f293;
	add.ftz.f32 	%f104, %f497, %f294;
	add.ftz.f32 	%f295, %f492, %f89;
	mul.ftz.f32 	%f296, %f295, 0f3F000000;
	sub.ftz.f32 	%f297, %f500, %f296;
	add.ftz.f32 	%f105, %f501, %f297;
	add.ftz.f32 	%f298, %f292, %f89;
	add.ftz.f32 	%f299, %f298, %f103;
	fma.rn.ftz.f32 	%f300, %f299, 0fBE800000, %f504;
	add.ftz.f32 	%f106, %f505, %f300;
	setp.lt.s32	%p20, %r3, 1;
	setp.lt.s32	%p21, %r4, 1;
	.loc 1 486 1
	or.pred  	%p22, %p20, %p21;
	add.s32 	%r40, %r4, -1;
	mad.lo.s32 	%r41, %r40, %r19, %r3;
	mul.wide.s32 	%rd48, %r41, 16;
	add.s64 	%rd8, %rd1, %rd48;
	mul.wide.s32 	%rd49, %r41, 8;
	add.s64 	%rd9, %rd1, %rd49;
	.loc 1 486 1
	mov.f32 	%f512, %f60;
	@%p22 bra 	BB5_35;

	@%p8 bra 	BB5_33;

	ld.global.v4.f32 	{%f301, %f302, %f303, %f304}, [%rd8+-16];
	mov.f32 	%f110, %f304;
	mov.f32 	%f109, %f303;
	mov.f32 	%f510, %f302;
	mov.f32 	%f107, %f301;
	bra.uni 	BB5_34;

BB5_33:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs57, %rs58, %rs59, %rs60}, [%rd9+-8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs58;
	cvt.f32.f16 	%f510, %temp;
	}

BB5_34:
	mov.f32 	%f512, %f510;

BB5_35:
	.loc 1 486 1
	mov.f32 	%f516, %f30;
	@%p21 bra 	BB5_40;

	@%p8 bra 	BB5_38;

	ld.global.v4.f32 	{%f305, %f306, %f307, %f308}, [%rd8+16];
	mov.f32 	%f124, %f308;
	mov.f32 	%f123, %f307;
	mov.f32 	%f514, %f306;
	mov.f32 	%f121, %f305;
	bra.uni 	BB5_39;

BB5_38:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs65, %rs66, %rs67, %rs68}, [%rd9+8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs66;
	cvt.f32.f16 	%f514, %temp;
	}

BB5_39:
	mov.f32 	%f516, %f514;

BB5_40:
	.loc 1 486 1
	mov.f32 	%f520, %f46;
	@%p20 bra 	BB5_45;

	@%p8 bra 	BB5_43;

	ld.global.v4.f32 	{%f309, %f310, %f311, %f312}, [%rd6+-16];
	mov.f32 	%f138, %f312;
	mov.f32 	%f137, %f311;
	mov.f32 	%f518, %f310;
	mov.f32 	%f135, %f309;
	bra.uni 	BB5_44;

BB5_43:
	.loc 1 486 1
	ld.global.v4.u16 	{%rs73, %rs74, %rs75, %rs76}, [%rd7+-8];
	.loc 2 3518 10
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs74;
	cvt.f32.f16 	%f518, %temp;
	}

BB5_44:
	mov.f32 	%f520, %f518;

BB5_45:
	.loc 1 486 1
	add.ftz.f32 	%f313, %f512, %f516;
	add.ftz.f32 	%f314, %f313, %f520;
	add.ftz.f32 	%f315, %f314, %f504;
	fma.rn.ftz.f32 	%f316, %f315, 0fBE800000, %f492;
	add.ftz.f32 	%f149, %f491, %f316;
	add.ftz.f32 	%f317, %f516, %f504;
	mul.ftz.f32 	%f318, %f317, 0f3F000000;
	sub.ftz.f32 	%f319, %f496, %f318;
	add.ftz.f32 	%f150, %f495, %f319;
	add.ftz.f32 	%f320, %f520, %f504;
	mul.ftz.f32 	%f321, %f320, 0f3F000000;
	sub.ftz.f32 	%f322, %f500, %f321;
	add.ftz.f32 	%f151, %f499, %f322;
	setp.ltu.ftz.f32	%p28, %f493, 0f00000000;
	@%p28 bra 	BB5_47;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f323, %f493;
	mul.ftz.f32 	%f324, %f323, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f522, %f324;
	bra.uni 	BB5_48;

BB5_47:
	.loc 1 486 171
	neg.ftz.f32 	%f325, %f493;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f326, %f325;
	mul.ftz.f32 	%f327, %f326, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f328, %f327;
	.loc 1 486 209
	neg.ftz.f32 	%f522, %f328;

BB5_48:
	setp.ltu.ftz.f32	%p29, %f492, 0f00000000;
	@%p29 bra 	BB5_50;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f329, %f492;
	mul.ftz.f32 	%f330, %f329, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f523, %f330;
	bra.uni 	BB5_51;

BB5_50:
	neg.ftz.f32 	%f331, %f492;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f332, %f331;
	mul.ftz.f32 	%f333, %f332, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f334, %f333;
	neg.ftz.f32 	%f523, %f334;

BB5_51:
	setp.ltu.ftz.f32	%p30, %f149, 0f00000000;
	@%p30 bra 	BB5_53;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f335, %f149;
	mul.ftz.f32 	%f336, %f335, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f524, %f336;
	bra.uni 	BB5_54;

BB5_53:
	neg.ftz.f32 	%f337, %f149;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f338, %f337;
	mul.ftz.f32 	%f339, %f338, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f340, %f339;
	neg.ftz.f32 	%f524, %f340;

BB5_54:
	ld.shared.f32 	%f161, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+24];
	ld.shared.f32 	%f162, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+28];
	mul.ftz.f32 	%f341, %f523, %f162;
	fma.rn.ftz.f32 	%f342, %f522, %f161, %f341;
	ld.shared.f32 	%f163, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+32];
	fma.rn.ftz.f32 	%f164, %f524, %f163, %f342;
	setp.ltu.ftz.f32	%p31, %f164, 0f00000000;
	@%p31 bra 	BB5_56;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f343, %f164;
	mul.ftz.f32 	%f344, %f343, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f525, %f344;
	bra.uni 	BB5_57;

BB5_56:
	neg.ftz.f32 	%f345, %f164;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f346, %f345;
	mul.ftz.f32 	%f347, %f346, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f348, %f347;
	neg.ftz.f32 	%f525, %f348;

BB5_57:
	ld.shared.f32 	%f168, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+12];
	ld.shared.f32 	%f169, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+16];
	mul.ftz.f32 	%f349, %f523, %f169;
	fma.rn.ftz.f32 	%f350, %f522, %f168, %f349;
	ld.shared.f32 	%f170, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+20];
	fma.rn.ftz.f32 	%f171, %f524, %f170, %f350;
	setp.ltu.ftz.f32	%p32, %f171, 0f00000000;
	@%p32 bra 	BB5_59;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f351, %f171;
	mul.ftz.f32 	%f352, %f351, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f526, %f352;
	bra.uni 	BB5_60;

BB5_59:
	neg.ftz.f32 	%f353, %f171;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f354, %f353;
	mul.ftz.f32 	%f355, %f354, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f356, %f355;
	neg.ftz.f32 	%f526, %f356;

BB5_60:
	ld.shared.f32 	%f175, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix];
	ld.shared.f32 	%f176, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+4];
	mul.ftz.f32 	%f357, %f523, %f176;
	fma.rn.ftz.f32 	%f358, %f522, %f175, %f357;
	ld.shared.f32 	%f177, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_171050_343_non_const_matrix+8];
	fma.rn.ftz.f32 	%f178, %f524, %f177, %f358;
	setp.ltu.ftz.f32	%p33, %f178, 0f00000000;
	@%p33 bra 	BB5_62;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f359, %f178;
	mul.ftz.f32 	%f360, %f359, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f527, %f360;
	bra.uni 	BB5_63;

BB5_62:
	neg.ftz.f32 	%f361, %f178;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f362, %f361;
	mul.ftz.f32 	%f363, %f362, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f364, %f363;
	neg.ftz.f32 	%f527, %f364;

BB5_63:
	.loc 1 486 1
	setp.ltu.ftz.f32	%p34, %f104, 0f00000000;
	@%p34 bra 	BB5_65;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f365, %f104;
	mul.ftz.f32 	%f366, %f365, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f528, %f366;
	bra.uni 	BB5_66;

BB5_65:
	.loc 1 486 171
	neg.ftz.f32 	%f367, %f104;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f368, %f367;
	mul.ftz.f32 	%f369, %f368, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f370, %f369;
	.loc 1 486 209
	neg.ftz.f32 	%f528, %f370;

BB5_66:
	setp.ltu.ftz.f32	%p35, %f496, 0f00000000;
	@%p35 bra 	BB5_68;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f371, %f496;
	mul.ftz.f32 	%f372, %f371, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f529, %f372;
	bra.uni 	BB5_69;

BB5_68:
	neg.ftz.f32 	%f373, %f496;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f374, %f373;
	mul.ftz.f32 	%f375, %f374, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f376, %f375;
	neg.ftz.f32 	%f529, %f376;

BB5_69:
	setp.ltu.ftz.f32	%p36, %f150, 0f00000000;
	@%p36 bra 	BB5_71;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f377, %f150;
	mul.ftz.f32 	%f378, %f377, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f530, %f378;
	bra.uni 	BB5_72;

BB5_71:
	neg.ftz.f32 	%f379, %f150;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f380, %f379;
	mul.ftz.f32 	%f381, %f380, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f382, %f381;
	neg.ftz.f32 	%f530, %f382;

BB5_72:
	mul.ftz.f32 	%f383, %f529, %f162;
	fma.rn.ftz.f32 	%f384, %f528, %f161, %f383;
	fma.rn.ftz.f32 	%f191, %f530, %f163, %f384;
	setp.ltu.ftz.f32	%p37, %f191, 0f00000000;
	@%p37 bra 	BB5_74;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f385, %f191;
	mul.ftz.f32 	%f386, %f385, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f531, %f386;
	bra.uni 	BB5_75;

BB5_74:
	neg.ftz.f32 	%f387, %f191;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f388, %f387;
	mul.ftz.f32 	%f389, %f388, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f390, %f389;
	neg.ftz.f32 	%f531, %f390;

BB5_75:
	mul.ftz.f32 	%f391, %f529, %f169;
	fma.rn.ftz.f32 	%f392, %f528, %f168, %f391;
	fma.rn.ftz.f32 	%f195, %f530, %f170, %f392;
	setp.ltu.ftz.f32	%p38, %f195, 0f00000000;
	@%p38 bra 	BB5_77;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f393, %f195;
	mul.ftz.f32 	%f394, %f393, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f532, %f394;
	bra.uni 	BB5_78;

BB5_77:
	neg.ftz.f32 	%f395, %f195;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f396, %f395;
	mul.ftz.f32 	%f397, %f396, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f398, %f397;
	neg.ftz.f32 	%f532, %f398;

BB5_78:
	mul.ftz.f32 	%f399, %f529, %f176;
	fma.rn.ftz.f32 	%f400, %f528, %f175, %f399;
	fma.rn.ftz.f32 	%f199, %f530, %f177, %f400;
	setp.ltu.ftz.f32	%p39, %f199, 0f00000000;
	@%p39 bra 	BB5_80;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f401, %f199;
	mul.ftz.f32 	%f402, %f401, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f533, %f402;
	bra.uni 	BB5_81;

BB5_80:
	neg.ftz.f32 	%f403, %f199;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f404, %f403;
	mul.ftz.f32 	%f405, %f404, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f406, %f405;
	neg.ftz.f32 	%f533, %f406;

BB5_81:
	.loc 1 486 1
	setp.ltu.ftz.f32	%p40, %f105, 0f00000000;
	@%p40 bra 	BB5_83;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f407, %f105;
	mul.ftz.f32 	%f408, %f407, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f534, %f408;
	bra.uni 	BB5_84;

BB5_83:
	.loc 1 486 171
	neg.ftz.f32 	%f409, %f105;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f410, %f409;
	mul.ftz.f32 	%f411, %f410, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f412, %f411;
	.loc 1 486 209
	neg.ftz.f32 	%f534, %f412;

BB5_84:
	setp.ltu.ftz.f32	%p41, %f500, 0f00000000;
	@%p41 bra 	BB5_86;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f413, %f500;
	mul.ftz.f32 	%f414, %f413, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f535, %f414;
	bra.uni 	BB5_87;

BB5_86:
	neg.ftz.f32 	%f415, %f500;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f416, %f415;
	mul.ftz.f32 	%f417, %f416, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f418, %f417;
	neg.ftz.f32 	%f535, %f418;

BB5_87:
	setp.ltu.ftz.f32	%p42, %f151, 0f00000000;
	@%p42 bra 	BB5_89;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f419, %f151;
	mul.ftz.f32 	%f420, %f419, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f536, %f420;
	bra.uni 	BB5_90;

BB5_89:
	neg.ftz.f32 	%f421, %f151;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f422, %f421;
	mul.ftz.f32 	%f423, %f422, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f424, %f423;
	neg.ftz.f32 	%f536, %f424;

BB5_90:
	mul.ftz.f32 	%f425, %f535, %f162;
	fma.rn.ftz.f32 	%f426, %f534, %f161, %f425;
	fma.rn.ftz.f32 	%f212, %f536, %f163, %f426;
	setp.ltu.ftz.f32	%p43, %f212, 0f00000000;
	@%p43 bra 	BB5_92;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f427, %f212;
	mul.ftz.f32 	%f428, %f427, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f537, %f428;
	bra.uni 	BB5_93;

BB5_92:
	neg.ftz.f32 	%f429, %f212;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f430, %f429;
	mul.ftz.f32 	%f431, %f430, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f432, %f431;
	neg.ftz.f32 	%f537, %f432;

BB5_93:
	mul.ftz.f32 	%f433, %f535, %f169;
	fma.rn.ftz.f32 	%f434, %f534, %f168, %f433;
	fma.rn.ftz.f32 	%f216, %f536, %f170, %f434;
	setp.ltu.ftz.f32	%p44, %f216, 0f00000000;
	@%p44 bra 	BB5_95;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f435, %f216;
	mul.ftz.f32 	%f436, %f435, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f538, %f436;
	bra.uni 	BB5_96;

BB5_95:
	neg.ftz.f32 	%f437, %f216;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f438, %f437;
	mul.ftz.f32 	%f439, %f438, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f440, %f439;
	neg.ftz.f32 	%f538, %f440;

BB5_96:
	mul.ftz.f32 	%f441, %f535, %f176;
	fma.rn.ftz.f32 	%f442, %f534, %f175, %f441;
	fma.rn.ftz.f32 	%f220, %f536, %f177, %f442;
	setp.ltu.ftz.f32	%p45, %f220, 0f00000000;
	@%p45 bra 	BB5_98;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f443, %f220;
	mul.ftz.f32 	%f444, %f443, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f539, %f444;
	bra.uni 	BB5_99;

BB5_98:
	neg.ftz.f32 	%f445, %f220;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f446, %f445;
	mul.ftz.f32 	%f447, %f446, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f448, %f447;
	neg.ftz.f32 	%f539, %f448;

BB5_99:
	.loc 1 486 1
	setp.ltu.ftz.f32	%p46, %f106, 0f00000000;
	@%p46 bra 	BB5_101;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f449, %f106;
	mul.ftz.f32 	%f450, %f449, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f540, %f450;
	bra.uni 	BB5_102;

BB5_101:
	.loc 1 486 171
	neg.ftz.f32 	%f451, %f106;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f452, %f451;
	mul.ftz.f32 	%f453, %f452, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f454, %f453;
	.loc 1 486 209
	neg.ftz.f32 	%f540, %f454;

BB5_102:
	setp.ltu.ftz.f32	%p47, %f504, 0f00000000;
	@%p47 bra 	BB5_104;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f455, %f504;
	mul.ftz.f32 	%f456, %f455, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f541, %f456;
	bra.uni 	BB5_105;

BB5_104:
	neg.ftz.f32 	%f457, %f504;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f458, %f457;
	mul.ftz.f32 	%f459, %f458, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f460, %f459;
	neg.ftz.f32 	%f541, %f460;

BB5_105:
	setp.ltu.ftz.f32	%p48, %f503, 0f00000000;
	@%p48 bra 	BB5_107;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f461, %f503;
	mul.ftz.f32 	%f462, %f461, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f542, %f462;
	bra.uni 	BB5_108;

BB5_107:
	neg.ftz.f32 	%f463, %f503;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f464, %f463;
	mul.ftz.f32 	%f465, %f464, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f466, %f465;
	neg.ftz.f32 	%f542, %f466;

BB5_108:
	mul.ftz.f32 	%f467, %f541, %f162;
	fma.rn.ftz.f32 	%f468, %f540, %f161, %f467;
	fma.rn.ftz.f32 	%f233, %f542, %f163, %f468;
	setp.ltu.ftz.f32	%p49, %f233, 0f00000000;
	@%p49 bra 	BB5_110;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f469, %f233;
	mul.ftz.f32 	%f470, %f469, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f543, %f470;
	bra.uni 	BB5_111;

BB5_110:
	neg.ftz.f32 	%f471, %f233;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f472, %f471;
	mul.ftz.f32 	%f473, %f472, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f474, %f473;
	neg.ftz.f32 	%f543, %f474;

BB5_111:
	mul.ftz.f32 	%f475, %f541, %f169;
	fma.rn.ftz.f32 	%f476, %f540, %f168, %f475;
	fma.rn.ftz.f32 	%f237, %f542, %f170, %f476;
	setp.ltu.ftz.f32	%p50, %f237, 0f00000000;
	@%p50 bra 	BB5_113;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f477, %f237;
	mul.ftz.f32 	%f478, %f477, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f544, %f478;
	bra.uni 	BB5_114;

BB5_113:
	neg.ftz.f32 	%f479, %f237;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f480, %f479;
	mul.ftz.f32 	%f481, %f480, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f482, %f481;
	neg.ftz.f32 	%f544, %f482;

BB5_114:
	mul.ftz.f32 	%f483, %f541, %f176;
	fma.rn.ftz.f32 	%f484, %f540, %f175, %f483;
	fma.rn.ftz.f32 	%f241, %f542, %f177, %f484;
	setp.ltu.ftz.f32	%p51, %f241, 0f00000000;
	@%p51 bra 	BB5_116;

	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f485, %f241;
	mul.ftz.f32 	%f486, %f485, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f545, %f486;
	bra.uni 	BB5_117;

BB5_116:
	neg.ftz.f32 	%f487, %f241;
	.loc 2 3600 10
	lg2.approx.ftz.f32 	%f488, %f487;
	mul.ftz.f32 	%f489, %f488, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f490, %f489;
	neg.ftz.f32 	%f545, %f490;

BB5_117:
	.loc 1 486 1
	add.s32 	%r47, %r23, -1;
	sub.s32 	%r48, %r47, %r4;
	setp.eq.s32	%p52, %r20, 0;
	selp.b32	%r49, %r4, %r48, %p52;
	.loc 1 486 1
	mul.lo.s32 	%r13, %r49, %r19;
	add.s32 	%r14, %r13, %r3;
	.loc 1 486 1
	@%p8 bra 	BB5_119;

	cvta.to.global.u64 	%rd50, %rd10;
	mul.wide.s32 	%rd51, %r14, 16;
	add.s64 	%rd52, %rd50, %rd51;
	.loc 1 486 1
	st.global.v4.f32 	[%rd52], {%f525, %f526, %f527, %f494};
	bra.uni 	BB5_120;

BB5_119:
	cvta.to.global.u64 	%rd53, %rd10;
	mul.wide.s32 	%rd54, %r14, 8;
	add.s64 	%rd55, %rd53, %rd54;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f525;
	mov.b16 	%rs81, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f526;
	mov.b16 	%rs82, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f527;
	mov.b16 	%rs83, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f494;
	mov.b16 	%rs84, %temp;
}
	.loc 1 486 241
	st.global.v4.u16 	[%rd55], {%rs81, %rs82, %rs83, %rs84};

BB5_120:
	.loc 1 486 1
	add.s32 	%r60, %r3, %r13;
	.loc 1 486 1
	add.s32 	%r15, %r60, 1;
	.loc 1 486 1
	@%p8 bra 	BB5_122;

	cvta.to.global.u64 	%rd56, %rd10;
	mul.wide.s32 	%rd57, %r15, 16;
	add.s64 	%rd58, %rd56, %rd57;
	.loc 1 486 1
	st.global.v4.f32 	[%rd58], {%f531, %f532, %f533, %f498};
	bra.uni 	BB5_123;

BB5_122:
	cvta.to.global.u64 	%rd59, %rd10;
	mul.wide.s32 	%rd60, %r15, 8;
	add.s64 	%rd61, %rd59, %rd60;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f531;
	mov.b16 	%rs85, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f532;
	mov.b16 	%rs86, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f533;
	mov.b16 	%rs87, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f498;
	mov.b16 	%rs88, %temp;
}
	.loc 1 486 241
	st.global.v4.u16 	[%rd61], {%rs85, %rs86, %rs87, %rs88};

BB5_123:
	.loc 1 486 1
	selp.b32	%r69, 1, -1, %p52;
	add.s32 	%r70, %r49, %r69;
	.loc 1 486 1
	mul.lo.s32 	%r16, %r70, %r19;
	add.s32 	%r17, %r16, %r3;
	.loc 1 486 1
	@%p8 bra 	BB5_125;

	cvta.to.global.u64 	%rd62, %rd10;
	mul.wide.s32 	%rd63, %r17, 16;
	add.s64 	%rd64, %rd62, %rd63;
	.loc 1 486 1
	st.global.v4.f32 	[%rd64], {%f537, %f538, %f539, %f502};
	bra.uni 	BB5_126;

BB5_125:
	cvta.to.global.u64 	%rd65, %rd10;
	mul.wide.s32 	%rd66, %r17, 8;
	add.s64 	%rd67, %rd65, %rd66;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f502;
	mov.b16 	%rs89, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f537;
	mov.b16 	%rs90, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f538;
	mov.b16 	%rs91, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f539;
	mov.b16 	%rs92, %temp;
}
	.loc 1 486 241
	st.global.v4.u16 	[%rd67], {%rs90, %rs91, %rs92, %rs89};

BB5_126:
	.loc 1 486 1
	add.s32 	%r81, %r3, %r16;
	.loc 1 486 1
	add.s32 	%r18, %r81, 1;
	.loc 1 486 1
	@%p8 bra 	BB5_128;

	cvta.to.global.u64 	%rd68, %rd10;
	mul.wide.s32 	%rd69, %r18, 16;
	add.s64 	%rd70, %rd68, %rd69;
	.loc 1 486 1
	st.global.v4.f32 	[%rd70], {%f543, %f544, %f545, %f506};
	bra.uni 	BB5_129;

BB5_128:
	cvta.to.global.u64 	%rd71, %rd10;
	mul.wide.s32 	%rd72, %r18, 8;
	add.s64 	%rd73, %rd71, %rd72;
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f506;
	mov.b16 	%rs93, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f543;
	mov.b16 	%rs94, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f544;
	mov.b16 	%rs95, %temp;
}
	.loc 2 3513 10
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f545;
	mov.b16 	%rs96, %temp;
}
	.loc 1 486 241
	st.global.v4.u16 	[%rd73], {%rs94, %rs95, %rs96, %rs93};

BB5_129:
	.loc 1 486 2
	ret;
}


