//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64

.const .align 4 .b8 kRGB32f_To_601YPbPr[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 33, 201, 44, 190, 111, 155, 169, 190, 0, 0, 0, 63, 0, 0, 0, 63, 70, 94, 214, 190, 232, 134, 166, 189};
.const .align 4 .b8 k601YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 188, 116, 179, 63, 0, 0, 128, 63, 152, 50, 176, 190, 158, 209, 54, 191, 0, 0, 128, 63, 229, 208, 226, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCr[36] = {70, 246, 130, 66, 145, 141, 0, 67, 94, 186, 199, 65, 33, 48, 23, 194, 240, 103, 148, 194, 0, 0, 224, 66, 0, 0, 224, 66, 111, 146, 187, 194, 70, 182, 145, 193};
.const .align 4 .b8 k601YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 182, 23, 205, 59, 37, 160, 149, 59, 40, 15, 201, 186, 156, 239, 80, 187, 37, 160, 149, 59, 236, 155, 1, 60, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCr[36] = {219, 121, 131, 62, 152, 14, 1, 63, 18, 131, 200, 61, 174, 199, 23, 190, 238, 252, 148, 190, 197, 224, 224, 62, 197, 224, 224, 62, 217, 78, 188, 190, 174, 71, 146, 189};
.const .align 4 .b8 k601YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 160, 74, 204, 63, 127, 10, 149, 63, 254, 148, 200, 190, 184, 30, 80, 191, 127, 10, 149, 63, 78, 26, 1, 64, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_601YCbCrFullRange[36] = {135, 22, 153, 62, 162, 69, 22, 63, 213, 120, 233, 61, 166, 27, 44, 190, 39, 241, 168, 190, 250, 254, 254, 62, 250, 254, 254, 62, 43, 135, 213, 190, 59, 223, 165, 189};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB8u[36] = {0, 0, 128, 63, 0, 0, 0, 0, 72, 193, 178, 63, 0, 0, 128, 63, 143, 130, 175, 190, 225, 26, 54, 191, 0, 0, 128, 63, 20, 238, 225, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_601YCbCrFullRange[36] = {113, 125, 152, 66, 92, 175, 21, 67, 92, 143, 232, 65, 158, 111, 43, 194, 49, 72, 168, 194, 0, 0, 254, 66, 0, 0, 254, 66, 170, 177, 212, 194, 88, 57, 165, 193};
.const .align 4 .b8 k601YCbCrFullRange_To_RGB32f[36] = {129, 128, 128, 59, 0, 0, 0, 0, 188, 116, 179, 59, 129, 128, 128, 59, 194, 50, 176, 186, 179, 209, 54, 187, 129, 128, 128, 59, 229, 208, 226, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YPbPr[36] = {208, 179, 89, 62, 89, 23, 55, 63, 152, 221, 147, 61, 186, 164, 234, 189, 210, 86, 197, 190, 0, 0, 0, 63, 0, 0, 0, 63, 190, 134, 232, 190, 16, 202, 59, 189};
.const .align 4 .b8 k709YPbPr_To_RGB32f[36] = {0, 0, 128, 63, 0, 0, 0, 0, 12, 147, 201, 63, 0, 0, 128, 63, 221, 209, 63, 190, 243, 173, 239, 190, 0, 0, 128, 63, 77, 132, 237, 63, 0, 0, 0, 0};
.const .align 4 .b8 kRGB32f_To_709YCbCr[36] = {106, 60, 58, 66, 6, 161, 28, 67, 244, 253, 124, 65, 223, 79, 205, 193, 8, 172, 172, 194, 0, 0, 224, 66, 0, 0, 224, 66, 195, 117, 203, 194, 236, 81, 36, 193};
.const .align 4 .b8 k709YCbCr_To_RGB32f[36] = {37, 160, 149, 59, 0, 0, 0, 0, 239, 94, 230, 59, 37, 160, 149, 59, 33, 57, 91, 186, 178, 245, 8, 187, 37, 160, 149, 59, 82, 185, 7, 60, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCrFullRange_To_RGB32f[36] = {131, 128, 128, 59, 0, 0, 0, 0, 28, 147, 201, 59, 131, 128, 128, 59, 61, 210, 63, 186, 248, 173, 239, 186, 131, 128, 128, 59, 82, 132, 237, 59, 0, 0, 0, 0};
.const .align 4 .b8 kRGB8u_To_709YCbCr[36] = {207, 247, 58, 62, 53, 62, 29, 63, 231, 251, 125, 61, 184, 30, 206, 189, 23, 89, 173, 190, 197, 224, 224, 62, 197, 224, 224, 62, 12, 66, 204, 190, 195, 245, 36, 189};
.const .align 4 .b8 k709YCbCr_To_RGB8u[36] = {127, 10, 149, 63, 0, 0, 0, 0, 147, 120, 229, 63, 127, 10, 149, 63, 53, 94, 90, 190, 205, 108, 8, 191, 127, 10, 149, 63, 154, 49, 7, 64, 0, 0, 0, 0};
.const .align 4 .b8 k709YCbCr_To_601YCbCr[36] = {0, 0, 128, 63, 23, 100, 203, 61, 1, 77, 68, 62, 0, 0, 0, 0, 18, 103, 125, 63, 10, 158, 226, 189, 0, 0, 0, 0, 61, 98, 148, 189, 249, 191, 123, 63};
.const .align 4 .b8 k601YCbCr_To_709YCbCr[36] = {0, 0, 128, 63, 122, 165, 236, 189, 179, 237, 84, 190, 0, 0, 0, 0, 204, 98, 130, 63, 216, 188, 234, 61, 0, 0, 0, 0, 74, 179, 153, 61, 234, 61, 131, 63};
.const .align 4 .b8 kYCbCrOffset[12] = {0, 0, 128, 65, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .b8 kYCbCrFullRangeOffset[12] = {0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67};
.const .align 4 .u32 kRandMax = 32767;
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix has been demoted
// PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix has been demoted

.visible .func  (.param .align 16 .b8 func_retval0[16]) _Z7Recolor8PixelRGB6float4Pfi(
	.param .align 16 .b8 _Z7Recolor8PixelRGB6float4Pfi_param_0[16],
	.param .align 16 .b8 _Z7Recolor8PixelRGB6float4Pfi_param_1[16],
	.param .b64 _Z7Recolor8PixelRGB6float4Pfi_param_2,
	.param .b32 _Z7Recolor8PixelRGB6float4Pfi_param_3
)
{
	.reg .pred 	%p<5>;
	.reg .s32 	%r<2>;
	.reg .f32 	%f<85>;
	.reg .s64 	%rd<2>;


	ld.param.f32 	%f1, [_Z7Recolor8PixelRGB6float4Pfi_param_0+12];
	ld.param.f32 	%f29, [_Z7Recolor8PixelRGB6float4Pfi_param_0];
	ld.param.f32 	%f30, [_Z7Recolor8PixelRGB6float4Pfi_param_0+4];
	ld.param.f32 	%f31, [_Z7Recolor8PixelRGB6float4Pfi_param_0+8];
	ld.param.f32 	%f32, [_Z7Recolor8PixelRGB6float4Pfi_param_1+8];
	ld.param.f32 	%f33, [_Z7Recolor8PixelRGB6float4Pfi_param_1+4];
	ld.param.f32 	%f34, [_Z7Recolor8PixelRGB6float4Pfi_param_1];
	ld.param.u64 	%rd1, [_Z7Recolor8PixelRGB6float4Pfi_param_2];
	ld.param.u32 	%r1, [_Z7Recolor8PixelRGB6float4Pfi_param_3];
	min.ftz.f32 	%f2, %f31, %f34;
	min.ftz.f32 	%f3, %f30, %f33;
	min.ftz.f32 	%f4, %f29, %f32;
	ld.f32 	%f35, [%rd1+24];
	ld.f32 	%f36, [%rd1+28];
	mul.ftz.f32 	%f37, %f3, %f36;
	fma.rn.ftz.f32 	%f38, %f2, %f35, %f37;
	ld.f32 	%f39, [%rd1+32];
	fma.rn.ftz.f32 	%f5, %f4, %f39, %f38;
	setp.eq.s32	%p1, %r1, 0;
	@%p1 bra 	BB0_11;

	setp.ltu.ftz.f32	%p2, %f5, 0f00000000;
	@%p2 bra 	BB0_3;

	lg2.approx.ftz.f32 	%f40, %f5;
	mul.ftz.f32 	%f41, %f40, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f78, %f41;
	bra.uni 	BB0_4;

BB0_3:
	neg.ftz.f32 	%f42, %f5;
	lg2.approx.ftz.f32 	%f43, %f42;
	mul.ftz.f32 	%f44, %f43, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f45, %f44;
	neg.ftz.f32 	%f78, %f45;

BB0_4:
	ld.f32 	%f46, [%rd1+12];
	ld.f32 	%f47, [%rd1+16];
	mul.ftz.f32 	%f48, %f3, %f47;
	fma.rn.ftz.f32 	%f49, %f2, %f46, %f48;
	ld.f32 	%f50, [%rd1+20];
	fma.rn.ftz.f32 	%f9, %f4, %f50, %f49;
	setp.ltu.ftz.f32	%p3, %f9, 0f00000000;
	@%p3 bra 	BB0_6;

	lg2.approx.ftz.f32 	%f51, %f9;
	mul.ftz.f32 	%f52, %f51, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f79, %f52;
	bra.uni 	BB0_7;

BB0_6:
	neg.ftz.f32 	%f53, %f9;
	lg2.approx.ftz.f32 	%f54, %f53;
	mul.ftz.f32 	%f55, %f54, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f56, %f55;
	neg.ftz.f32 	%f79, %f56;

BB0_7:
	ld.f32 	%f57, [%rd1];
	ld.f32 	%f58, [%rd1+4];
	mul.ftz.f32 	%f59, %f3, %f58;
	fma.rn.ftz.f32 	%f60, %f2, %f57, %f59;
	ld.f32 	%f61, [%rd1+8];
	fma.rn.ftz.f32 	%f13, %f4, %f61, %f60;
	setp.ltu.ftz.f32	%p4, %f13, 0f00000000;
	@%p4 bra 	BB0_9;

	lg2.approx.ftz.f32 	%f62, %f13;
	mul.ftz.f32 	%f63, %f62, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f80, %f63;
	bra.uni 	BB0_10;

BB0_9:
	neg.ftz.f32 	%f64, %f13;
	lg2.approx.ftz.f32 	%f65, %f64;
	mul.ftz.f32 	%f66, %f65, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f67, %f66;
	neg.ftz.f32 	%f80, %f67;

BB0_10:
	mov.f32 	%f84, %f1;
	mov.f32 	%f83, %f80;
	mov.f32 	%f82, %f79;
	mov.f32 	%f81, %f78;
	bra.uni 	BB0_12;

BB0_11:
	ld.f32 	%f68, [%rd1+12];
	ld.f32 	%f69, [%rd1+16];
	mul.ftz.f32 	%f70, %f3, %f69;
	fma.rn.ftz.f32 	%f71, %f2, %f68, %f70;
	ld.f32 	%f72, [%rd1+20];
	fma.rn.ftz.f32 	%f82, %f4, %f72, %f71;
	ld.f32 	%f73, [%rd1];
	ld.f32 	%f74, [%rd1+4];
	mul.ftz.f32 	%f75, %f3, %f74;
	fma.rn.ftz.f32 	%f76, %f2, %f73, %f75;
	ld.f32 	%f77, [%rd1+8];
	fma.rn.ftz.f32 	%f83, %f4, %f77, %f76;
	mov.f32 	%f84, %f1;
	mov.f32 	%f81, %f5;

BB0_12:
	st.param.f32	[func_retval0+0], %f81;
	st.param.f32	[func_retval0+4], %f82;
	st.param.f32	[func_retval0+8], %f83;
	st.param.f32	[func_retval0+12], %f84;
	ret;
}

.visible .func _Z16SharedLoadMatrixPfPKfb(
	.param .b64 _Z16SharedLoadMatrixPfPKfb_param_0,
	.param .b64 _Z16SharedLoadMatrixPfPKfb_param_1,
	.param .b32 _Z16SharedLoadMatrixPfPKfb_param_2
)
{
	.reg .pred 	%p<5>;
	.reg .s16 	%rs<3>;
	.reg .s32 	%r<4>;
	.reg .f32 	%f<4>;
	.reg .s64 	%rd<15>;


	ld.param.u64 	%rd1, [_Z16SharedLoadMatrixPfPKfb_param_0];
	ld.param.u64 	%rd2, [_Z16SharedLoadMatrixPfPKfb_param_1];
	ld.param.s8 	%rs1, [_Z16SharedLoadMatrixPfPKfb_param_2];
	mov.u32 	%r2, %tid.y;
	setp.eq.s32	%p1, %r2, 0;
	mov.u32 	%r1, %tid.x;
	setp.lt.s32	%p2, %r1, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB1_2;
	bra.uni 	BB1_1;

BB1_1:
	mul.lo.s32 	%r3, %r1, 3;
	cvt.s64.s32	%rd3, %r3;
	and.b16  	%rs2, %rs1, 255;
	setp.eq.s16	%p4, %rs2, 0;
	selp.b64	%rd4, 0, 2, %p4;
	add.s64 	%rd5, %rd3, %rd4;
	shl.b64 	%rd6, %rd5, 2;
	add.s64 	%rd7, %rd2, %rd6;
	mul.wide.s32 	%rd8, %r3, 4;
	add.s64 	%rd9, %rd1, %rd8;
	ld.f32 	%f1, [%rd7];
	st.f32 	[%rd9], %f1;
	add.s64 	%rd10, %rd2, %rd8;
	ld.f32 	%f2, [%rd10+4];
	st.f32 	[%rd9+4], %f2;
	selp.b64	%rd11, 2, 0, %p4;
	add.s64 	%rd12, %rd3, %rd11;
	shl.b64 	%rd13, %rd12, 2;
	add.s64 	%rd14, %rd2, %rd13;
	ld.f32 	%f3, [%rd14];
	st.f32 	[%rd9+8], %f3;

BB1_2:
	bar.sync 	0;
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_11,
	.param .align 16 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12[16],
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_13,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_14
)
{
	.reg .pred 	%p<36>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<140>;
	.reg .f32 	%f<307>;
	.reg .s64 	%rd<83>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix[36];

	ld.param.u64 	%rd6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_0];
	ld.param.u32 	%r5, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_1];
	ld.param.u32 	%r6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_2];
	ld.param.u32 	%r7, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_3];
	ld.param.u64 	%rd7, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_5];
	ld.param.u32 	%r8, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_6];
	ld.param.u32 	%r9, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_7];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_8];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_9];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_10];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_11];
	ld.param.f32 	%f144, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12+8];
	ld.param.f32 	%f143, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12+4];
	ld.param.f32 	%f142, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_12];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_13];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel_param_14];
	cvta.to.global.u64 	%rd1, %rd6;
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB2_2;
	bra.uni 	BB2_1;

BB2_1:
	cvt.s64.s32	%rd8, %r14;
	setp.ne.s32	%p4, %r13, 0;
	mul.lo.s32 	%r16, %r2, 3;
	cvt.s64.s32	%rd9, %r16;
	add.s64 	%rd10, %rd9, %rd8;
	selp.b64	%rd11, 2, 0, %p4;
	add.s64 	%rd12, %rd10, %rd11;
	shl.b64 	%rd13, %rd12, 2;
	add.s64 	%rd14, %rd1, %rd13;
	mul.wide.s32 	%rd15, %r16, 4;
	mov.u64 	%rd16, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix;
	add.s64 	%rd17, %rd16, %rd15;
	ld.global.f32 	%f146, [%rd14];
	st.shared.f32 	[%rd17], %f146;
	shl.b64 	%rd18, %rd10, 2;
	add.s64 	%rd19, %rd1, %rd18;
	ld.global.f32 	%f147, [%rd19+4];
	st.shared.f32 	[%rd17+4], %f147;
	selp.b64	%rd20, 0, 2, %p4;
	add.s64 	%rd21, %rd10, %rd20;
	shl.b64 	%rd22, %rd21, 2;
	add.s64 	%rd23, %rd1, %rd22;
	ld.global.f32 	%f148, [%rd23];
	st.shared.f32 	[%rd17+8], %f148;

BB2_2:
	bar.sync 	0;
	mov.u32 	%r17, %ntid.x;
	mov.u32 	%r18, %ctaid.x;
	mad.lo.s32 	%r19, %r17, %r18, %r2;
	shl.b32 	%r3, %r19, 1;
	mov.u32 	%r20, %ntid.y;
	mov.u32 	%r21, %ctaid.y;
	mad.lo.s32 	%r22, %r20, %r21, %r1;
	shl.b32 	%r4, %r22, 1;
	setp.lt.s32	%p5, %r3, %r11;
	setp.lt.s32	%p6, %r4, %r12;
	and.pred  	%p7, %p5, %p6;
	@!%p7 bra 	BB2_63;
	bra.uni 	BB2_3;

BB2_3:
	cvt.s64.s32	%rd24, %r5;
	add.s32 	%r23, %r12, -1;
	mul.lo.s32 	%r24, %r23, %r6;
	cvt.s64.s32	%rd25, %r24;
	neg.s32 	%r25, %r6;
	setp.eq.s32	%p8, %r7, 0;
	selp.b32	%r26, %r6, %r25, %p8;
	selp.b64	%rd26, 0, %rd25, %p8;
	add.s64 	%rd27, %rd26, %rd24;
	mad.lo.s32 	%r27, %r4, %r26, %r3;
	cvt.s64.s32	%rd28, %r27;
	add.s64 	%rd29, %rd28, %rd27;
	shl.b64 	%rd30, %rd29, 2;
	add.s64 	%rd31, %rd1, %rd30;
	ld.global.f32 	%f149, [%rd31];
	mul.ftz.f32 	%f150, %f149, 0f00000000;
	min.ftz.f32 	%f1, %f150, %f142;
	min.ftz.f32 	%f2, %f150, %f143;
	min.ftz.f32 	%f3, %f149, %f144;
	ld.shared.f32 	%f4, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+24];
	ld.shared.f32 	%f5, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+28];
	mul.ftz.f32 	%f151, %f2, %f5;
	fma.rn.ftz.f32 	%f152, %f1, %f4, %f151;
	ld.shared.f32 	%f6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+32];
	fma.rn.ftz.f32 	%f7, %f3, %f6, %f152;
	setp.eq.s32	%p9, %r15, 0;
	mov.f32 	%f285, 0f3F800000;
	@%p9 bra 	BB2_14;

	setp.ltu.ftz.f32	%p10, %f7, 0f00000000;
	@%p10 bra 	BB2_6;

	lg2.approx.ftz.f32 	%f153, %f7;
	mul.ftz.f32 	%f154, %f153, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f273, %f154;
	bra.uni 	BB2_7;

BB2_6:
	neg.ftz.f32 	%f155, %f7;
	lg2.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f156, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f158, %f157;
	neg.ftz.f32 	%f273, %f158;

BB2_7:
	ld.shared.f32 	%f281, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+12];
	ld.shared.f32 	%f280, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+16];
	mul.ftz.f32 	%f159, %f2, %f280;
	fma.rn.ftz.f32 	%f160, %f1, %f281, %f159;
	ld.shared.f32 	%f279, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+20];
	fma.rn.ftz.f32 	%f14, %f3, %f279, %f160;
	setp.ltu.ftz.f32	%p11, %f14, 0f00000000;
	@%p11 bra 	BB2_9;

	lg2.approx.ftz.f32 	%f161, %f14;
	mul.ftz.f32 	%f162, %f161, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f274, %f162;
	bra.uni 	BB2_10;

BB2_9:
	neg.ftz.f32 	%f163, %f14;
	lg2.approx.ftz.f32 	%f164, %f163;
	mul.ftz.f32 	%f165, %f164, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f166, %f165;
	neg.ftz.f32 	%f274, %f166;

BB2_10:
	ld.shared.f32 	%f278, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix];
	ld.shared.f32 	%f277, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+4];
	mul.ftz.f32 	%f167, %f2, %f277;
	fma.rn.ftz.f32 	%f168, %f1, %f278, %f167;
	ld.shared.f32 	%f276, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+8];
	fma.rn.ftz.f32 	%f21, %f3, %f276, %f168;
	setp.ltu.ftz.f32	%p12, %f21, 0f00000000;
	@%p12 bra 	BB2_12;

	lg2.approx.ftz.f32 	%f169, %f21;
	mul.ftz.f32 	%f170, %f169, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f275, %f170;
	bra.uni 	BB2_13;

BB2_12:
	neg.ftz.f32 	%f171, %f21;
	lg2.approx.ftz.f32 	%f172, %f171;
	mul.ftz.f32 	%f173, %f172, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f174, %f173;
	neg.ftz.f32 	%f275, %f174;

BB2_13:
	mov.f32 	%f284, %f275;
	mov.f32 	%f283, %f274;
	mov.f32 	%f282, %f273;
	bra.uni 	BB2_15;

BB2_14:
	ld.shared.f32 	%f281, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+12];
	ld.shared.f32 	%f280, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+16];
	mul.ftz.f32 	%f175, %f2, %f280;
	fma.rn.ftz.f32 	%f176, %f1, %f281, %f175;
	ld.shared.f32 	%f279, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+20];
	fma.rn.ftz.f32 	%f283, %f3, %f279, %f176;
	ld.shared.f32 	%f278, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix];
	ld.shared.f32 	%f277, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+4];
	mul.ftz.f32 	%f177, %f2, %f277;
	fma.rn.ftz.f32 	%f178, %f1, %f278, %f177;
	ld.shared.f32 	%f276, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Bayered_Kernel$__cuda_local_var_180705_417_non_const_matrix+8];
	fma.rn.ftz.f32 	%f284, %f3, %f276, %f178;
	mov.f32 	%f282, %f7;

BB2_15:
	sub.s32 	%r34, %r23, %r4;
	setp.eq.s32	%p13, %r9, 0;
	selp.b32	%r35, %r4, %r34, %p13;
	mad.lo.s32 	%r41, %r35, %r8, %r3;
	cvt.s64.s32	%rd2, %r41;
	setp.eq.s32	%p14, %r10, 0;
	@%p14 bra 	BB2_17;

	cvta.to.global.u64 	%rd32, %rd7;
	shl.b64 	%rd33, %rd2, 4;
	add.s64 	%rd34, %rd32, %rd33;
	st.global.v4.f32 	[%rd34], {%f282, %f283, %f284, %f285};
	bra.uni 	BB2_18;

BB2_17:
	cvta.to.global.u64 	%rd35, %rd7;
	shl.b64 	%rd36, %rd2, 3;
	add.s64 	%rd37, %rd35, %rd36;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f285;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f284;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f283;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f282;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd37], {%rs4, %rs3, %rs2, %rs1};

BB2_18:
	mul.lo.s32 	%r53, %r22, %r26;
	shl.b32 	%r54, %r53, 1;
	add.s32 	%r55, %r3, %r54;
	add.s32 	%r56, %r55, 1;
	cvt.s64.s32	%rd38, %r56;
	add.s64 	%rd43, %rd38, %rd27;
	shl.b64 	%rd45, %rd43, 2;
	add.s64 	%rd46, %rd1, %rd45;
	ld.global.f32 	%f179, [%rd46];
	mul.ftz.f32 	%f180, %f179, 0f00000000;
	min.ftz.f32 	%f53, %f180, %f142;
	min.ftz.f32 	%f54, %f179, %f143;
	min.ftz.f32 	%f55, %f180, %f144;
	mul.ftz.f32 	%f181, %f54, %f5;
	fma.rn.ftz.f32 	%f182, %f53, %f4, %f181;
	fma.rn.ftz.f32 	%f56, %f55, %f6, %f182;
	mov.f32 	%f292, 0f3F800000;
	@%p9 bra 	BB2_29;

	setp.ltu.ftz.f32	%p17, %f56, 0f00000000;
	@%p17 bra 	BB2_21;

	lg2.approx.ftz.f32 	%f183, %f56;
	mul.ftz.f32 	%f184, %f183, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f286, %f184;
	bra.uni 	BB2_22;

BB2_21:
	neg.ftz.f32 	%f185, %f56;
	lg2.approx.ftz.f32 	%f186, %f185;
	mul.ftz.f32 	%f187, %f186, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f188, %f187;
	neg.ftz.f32 	%f286, %f188;

BB2_22:
	mul.ftz.f32 	%f189, %f54, %f280;
	fma.rn.ftz.f32 	%f190, %f53, %f281, %f189;
	fma.rn.ftz.f32 	%f60, %f55, %f279, %f190;
	setp.ltu.ftz.f32	%p18, %f60, 0f00000000;
	@%p18 bra 	BB2_24;

	lg2.approx.ftz.f32 	%f191, %f60;
	mul.ftz.f32 	%f192, %f191, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f287, %f192;
	bra.uni 	BB2_25;

BB2_24:
	neg.ftz.f32 	%f193, %f60;
	lg2.approx.ftz.f32 	%f194, %f193;
	mul.ftz.f32 	%f195, %f194, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f196, %f195;
	neg.ftz.f32 	%f287, %f196;

BB2_25:
	mul.ftz.f32 	%f197, %f54, %f277;
	fma.rn.ftz.f32 	%f198, %f53, %f278, %f197;
	fma.rn.ftz.f32 	%f64, %f55, %f276, %f198;
	setp.ltu.ftz.f32	%p19, %f64, 0f00000000;
	@%p19 bra 	BB2_27;

	lg2.approx.ftz.f32 	%f199, %f64;
	mul.ftz.f32 	%f200, %f199, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f288, %f200;
	bra.uni 	BB2_28;

BB2_27:
	neg.ftz.f32 	%f201, %f64;
	lg2.approx.ftz.f32 	%f202, %f201;
	mul.ftz.f32 	%f203, %f202, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f204, %f203;
	neg.ftz.f32 	%f288, %f204;

BB2_28:
	mov.f32 	%f291, %f288;
	mov.f32 	%f290, %f287;
	mov.f32 	%f289, %f286;
	bra.uni 	BB2_30;

BB2_29:
	mul.ftz.f32 	%f205, %f54, %f280;
	fma.rn.ftz.f32 	%f206, %f53, %f281, %f205;
	fma.rn.ftz.f32 	%f290, %f55, %f279, %f206;
	mul.ftz.f32 	%f207, %f54, %f277;
	fma.rn.ftz.f32 	%f208, %f53, %f278, %f207;
	fma.rn.ftz.f32 	%f291, %f55, %f276, %f208;
	mov.f32 	%f289, %f56;

BB2_30:
	add.s32 	%r73, %r41, 1;
	cvt.s64.s32	%rd3, %r73;
	@%p14 bra 	BB2_32;

	cvta.to.global.u64 	%rd47, %rd7;
	shl.b64 	%rd48, %rd3, 4;
	add.s64 	%rd49, %rd47, %rd48;
	st.global.v4.f32 	[%rd49], {%f289, %f290, %f291, %f292};
	bra.uni 	BB2_33;

BB2_32:
	cvta.to.global.u64 	%rd50, %rd7;
	shl.b64 	%rd51, %rd3, 3;
	add.s64 	%rd52, %rd50, %rd51;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f292;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f291;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f290;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f289;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd52], {%rs8, %rs7, %rs6, %rs5};

BB2_33:
	add.s32 	%r79, %r4, 1;
	mad.lo.s32 	%r87, %r79, %r26, %r3;
	cvt.s64.s32	%rd53, %r87;
	add.s64 	%rd58, %rd53, %rd27;
	shl.b64 	%rd60, %rd58, 2;
	add.s64 	%rd61, %rd1, %rd60;
	ld.global.f32 	%f209, [%rd61];
	mul.ftz.f32 	%f210, %f209, 0f00000000;
	min.ftz.f32 	%f84, %f210, %f142;
	min.ftz.f32 	%f85, %f209, %f143;
	min.ftz.f32 	%f86, %f210, %f144;
	mul.ftz.f32 	%f211, %f85, %f5;
	fma.rn.ftz.f32 	%f212, %f84, %f4, %f211;
	fma.rn.ftz.f32 	%f87, %f86, %f6, %f212;
	mov.f32 	%f299, 0f3F800000;
	@%p9 bra 	BB2_44;

	setp.ltu.ftz.f32	%p24, %f87, 0f00000000;
	@%p24 bra 	BB2_36;

	lg2.approx.ftz.f32 	%f213, %f87;
	mul.ftz.f32 	%f214, %f213, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f293, %f214;
	bra.uni 	BB2_37;

BB2_36:
	neg.ftz.f32 	%f215, %f87;
	lg2.approx.ftz.f32 	%f216, %f215;
	mul.ftz.f32 	%f217, %f216, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f218, %f217;
	neg.ftz.f32 	%f293, %f218;

BB2_37:
	mul.ftz.f32 	%f219, %f85, %f280;
	fma.rn.ftz.f32 	%f220, %f84, %f281, %f219;
	fma.rn.ftz.f32 	%f91, %f86, %f279, %f220;
	setp.ltu.ftz.f32	%p25, %f91, 0f00000000;
	@%p25 bra 	BB2_39;

	lg2.approx.ftz.f32 	%f221, %f91;
	mul.ftz.f32 	%f222, %f221, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f294, %f222;
	bra.uni 	BB2_40;

BB2_39:
	neg.ftz.f32 	%f223, %f91;
	lg2.approx.ftz.f32 	%f224, %f223;
	mul.ftz.f32 	%f225, %f224, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f226, %f225;
	neg.ftz.f32 	%f294, %f226;

BB2_40:
	mul.ftz.f32 	%f227, %f85, %f277;
	fma.rn.ftz.f32 	%f228, %f84, %f278, %f227;
	fma.rn.ftz.f32 	%f95, %f86, %f276, %f228;
	setp.ltu.ftz.f32	%p26, %f95, 0f00000000;
	@%p26 bra 	BB2_42;

	lg2.approx.ftz.f32 	%f229, %f95;
	mul.ftz.f32 	%f230, %f229, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f295, %f230;
	bra.uni 	BB2_43;

BB2_42:
	neg.ftz.f32 	%f231, %f95;
	lg2.approx.ftz.f32 	%f232, %f231;
	mul.ftz.f32 	%f233, %f232, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f234, %f233;
	neg.ftz.f32 	%f295, %f234;

BB2_43:
	mov.f32 	%f298, %f295;
	mov.f32 	%f297, %f294;
	mov.f32 	%f296, %f293;
	bra.uni 	BB2_45;

BB2_44:
	mul.ftz.f32 	%f235, %f85, %f280;
	fma.rn.ftz.f32 	%f236, %f84, %f281, %f235;
	fma.rn.ftz.f32 	%f297, %f86, %f279, %f236;
	mul.ftz.f32 	%f237, %f85, %f277;
	fma.rn.ftz.f32 	%f238, %f84, %f278, %f237;
	fma.rn.ftz.f32 	%f298, %f86, %f276, %f238;
	mov.f32 	%f296, %f87;

BB2_45:
	selp.b32	%r98, 1, -1, %p13;
	add.s32 	%r99, %r35, %r98;
	mad.lo.s32 	%r105, %r99, %r8, %r3;
	cvt.s64.s32	%rd4, %r105;
	@%p14 bra 	BB2_47;

	cvta.to.global.u64 	%rd62, %rd7;
	shl.b64 	%rd63, %rd4, 4;
	add.s64 	%rd64, %rd62, %rd63;
	st.global.v4.f32 	[%rd64], {%f296, %f297, %f298, %f299};
	bra.uni 	BB2_48;

BB2_47:
	cvta.to.global.u64 	%rd65, %rd7;
	shl.b64 	%rd66, %rd4, 3;
	add.s64 	%rd67, %rd65, %rd66;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f296;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f297;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f298;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f299;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd67], {%rs9, %rs10, %rs11, %rs12};

BB2_48:
	add.s32 	%r120, %r87, 1;
	cvt.s64.s32	%rd68, %r120;
	add.s64 	%rd73, %rd68, %rd27;
	shl.b64 	%rd75, %rd73, 2;
	add.s64 	%rd76, %rd1, %rd75;
	ld.global.f32 	%f239, [%rd76];
	mul.ftz.f32 	%f240, %f239, 0f00000000;
	min.ftz.f32 	%f111, %f239, %f142;
	min.ftz.f32 	%f112, %f240, %f143;
	min.ftz.f32 	%f113, %f240, %f144;
	mul.ftz.f32 	%f241, %f112, %f5;
	fma.rn.ftz.f32 	%f242, %f111, %f4, %f241;
	fma.rn.ftz.f32 	%f114, %f113, %f6, %f242;
	mov.f32 	%f306, 0f3F800000;
	@%p9 bra 	BB2_59;

	setp.ltu.ftz.f32	%p31, %f114, 0f00000000;
	@%p31 bra 	BB2_51;

	lg2.approx.ftz.f32 	%f243, %f114;
	mul.ftz.f32 	%f244, %f243, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f300, %f244;
	bra.uni 	BB2_52;

BB2_51:
	neg.ftz.f32 	%f245, %f114;
	lg2.approx.ftz.f32 	%f246, %f245;
	mul.ftz.f32 	%f247, %f246, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f248, %f247;
	neg.ftz.f32 	%f300, %f248;

BB2_52:
	mul.ftz.f32 	%f249, %f112, %f280;
	fma.rn.ftz.f32 	%f250, %f111, %f281, %f249;
	fma.rn.ftz.f32 	%f118, %f113, %f279, %f250;
	setp.ltu.ftz.f32	%p32, %f118, 0f00000000;
	@%p32 bra 	BB2_54;

	lg2.approx.ftz.f32 	%f251, %f118;
	mul.ftz.f32 	%f252, %f251, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f301, %f252;
	bra.uni 	BB2_55;

BB2_54:
	neg.ftz.f32 	%f253, %f118;
	lg2.approx.ftz.f32 	%f254, %f253;
	mul.ftz.f32 	%f255, %f254, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f256, %f255;
	neg.ftz.f32 	%f301, %f256;

BB2_55:
	mul.ftz.f32 	%f257, %f112, %f277;
	fma.rn.ftz.f32 	%f258, %f111, %f278, %f257;
	fma.rn.ftz.f32 	%f122, %f113, %f276, %f258;
	setp.ltu.ftz.f32	%p33, %f122, 0f00000000;
	@%p33 bra 	BB2_57;

	lg2.approx.ftz.f32 	%f259, %f122;
	mul.ftz.f32 	%f260, %f259, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f302, %f260;
	bra.uni 	BB2_58;

BB2_57:
	neg.ftz.f32 	%f261, %f122;
	lg2.approx.ftz.f32 	%f262, %f261;
	mul.ftz.f32 	%f263, %f262, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f264, %f263;
	neg.ftz.f32 	%f302, %f264;

BB2_58:
	mov.f32 	%f305, %f302;
	mov.f32 	%f304, %f301;
	mov.f32 	%f303, %f300;
	bra.uni 	BB2_60;

BB2_59:
	mul.ftz.f32 	%f265, %f112, %f280;
	fma.rn.ftz.f32 	%f266, %f111, %f281, %f265;
	fma.rn.ftz.f32 	%f304, %f113, %f279, %f266;
	mul.ftz.f32 	%f267, %f112, %f277;
	fma.rn.ftz.f32 	%f268, %f111, %f278, %f267;
	fma.rn.ftz.f32 	%f305, %f113, %f276, %f268;
	mov.f32 	%f303, %f114;

BB2_60:
	add.s32 	%r139, %r105, 1;
	cvt.s64.s32	%rd5, %r139;
	@%p14 bra 	BB2_62;

	cvta.to.global.u64 	%rd77, %rd7;
	shl.b64 	%rd78, %rd5, 4;
	add.s64 	%rd79, %rd77, %rd78;
	st.global.v4.f32 	[%rd79], {%f303, %f304, %f305, %f306};
	bra.uni 	BB2_63;

BB2_62:
	cvta.to.global.u64 	%rd80, %rd7;
	shl.b64 	%rd81, %rd5, 3;
	add.s64 	%rd82, %rd80, %rd81;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f306;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f305;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f304;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f303;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd82], {%rs16, %rs15, %rs14, %rs13};

BB2_63:
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_11,
	.param .align 16 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12[16],
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_13,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_14
)
{
	.reg .pred 	%p<33>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<102>;
	.reg .f32 	%f<289>;
	.reg .s64 	%rd<67>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix[36];

	ld.param.u64 	%rd6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_0];
	ld.param.u32 	%r6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_1];
	ld.param.u32 	%r7, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_2];
	ld.param.u32 	%r8, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_3];
	ld.param.u64 	%rd5, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_5];
	ld.param.u32 	%r9, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_6];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_7];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_8];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_9];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_10];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_11];
	ld.param.f32 	%f135, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12+8];
	ld.param.f32 	%f134, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12+4];
	ld.param.f32 	%f133, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_12];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_13];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel_param_14];
	cvta.to.global.u64 	%rd1, %rd6;
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB3_2;
	bra.uni 	BB3_1;

BB3_1:
	cvt.s64.s32	%rd7, %r15;
	setp.ne.s32	%p4, %r14, 0;
	mul.lo.s32 	%r17, %r2, 3;
	cvt.s64.s32	%rd8, %r17;
	add.s64 	%rd9, %rd8, %rd7;
	selp.b64	%rd10, 2, 0, %p4;
	add.s64 	%rd11, %rd9, %rd10;
	shl.b64 	%rd12, %rd11, 2;
	add.s64 	%rd13, %rd1, %rd12;
	mul.wide.s32 	%rd14, %r17, 4;
	mov.u64 	%rd15, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix;
	add.s64 	%rd16, %rd15, %rd14;
	ld.global.f32 	%f137, [%rd13];
	st.shared.f32 	[%rd16], %f137;
	shl.b64 	%rd17, %rd9, 2;
	add.s64 	%rd18, %rd1, %rd17;
	ld.global.f32 	%f138, [%rd18+4];
	st.shared.f32 	[%rd16+4], %f138;
	selp.b64	%rd19, 0, 2, %p4;
	add.s64 	%rd20, %rd9, %rd19;
	shl.b64 	%rd21, %rd20, 2;
	add.s64 	%rd22, %rd1, %rd21;
	ld.global.f32 	%f139, [%rd22];
	st.shared.f32 	[%rd16+8], %f139;

BB3_2:
	bar.sync 	0;
	mov.u32 	%r18, %ntid.x;
	mov.u32 	%r19, %ctaid.x;
	mad.lo.s32 	%r20, %r18, %r19, %r2;
	shl.b32 	%r3, %r20, 1;
	mov.u32 	%r21, %ntid.y;
	mov.u32 	%r22, %ctaid.y;
	mad.lo.s32 	%r23, %r21, %r22, %r1;
	shl.b32 	%r4, %r23, 1;
	setp.lt.s32	%p5, %r3, %r12;
	setp.lt.s32	%p6, %r4, %r13;
	and.pred  	%p7, %p5, %p6;
	@!%p7 bra 	BB3_63;
	bra.uni 	BB3_3;

BB3_3:
	cvt.s64.s32	%rd23, %r6;
	add.s32 	%r24, %r13, -1;
	mul.lo.s32 	%r25, %r24, %r7;
	cvt.s64.s32	%rd24, %r25;
	neg.s32 	%r26, %r7;
	setp.eq.s32	%p8, %r8, 0;
	selp.b32	%r27, %r7, %r26, %p8;
	selp.b64	%rd25, 0, %rd24, %p8;
	add.s64 	%rd26, %rd25, %rd23;
	mul.lo.s32 	%r28, %r4, %r27;
	add.s32 	%r29, %r28, %r3;
	cvt.s64.s32	%rd27, %r29;
	add.s64 	%rd28, %rd27, %rd26;
	shl.b64 	%rd29, %rd28, 2;
	add.s64 	%rd30, %rd1, %rd29;
	add.s32 	%r30, %r3, 1;
	add.s32 	%r31, %r28, %r30;
	cvt.s64.s32	%rd31, %r31;
	add.s64 	%rd32, %rd31, %rd26;
	shl.b64 	%rd33, %rd32, 2;
	add.s64 	%rd34, %rd1, %rd33;
	add.s32 	%r32, %r4, 1;
	mul.lo.s32 	%r33, %r32, %r27;
	add.s32 	%r34, %r33, %r3;
	cvt.s64.s32	%rd35, %r34;
	add.s64 	%rd36, %rd35, %rd26;
	shl.b64 	%rd37, %rd36, 2;
	add.s64 	%rd38, %rd1, %rd37;
	ld.global.f32 	%f1, [%rd38];
	add.s32 	%r35, %r33, %r30;
	cvt.s64.s32	%rd39, %r35;
	add.s64 	%rd40, %rd39, %rd26;
	shl.b64 	%rd41, %rd40, 2;
	add.s64 	%rd42, %rd1, %rd41;
	ld.global.f32 	%f140, [%rd30];
	min.ftz.f32 	%f2, %f140, %f133;
	ld.global.f32 	%f141, [%rd34];
	min.ftz.f32 	%f3, %f141, %f134;
	ld.global.f32 	%f142, [%rd42];
	min.ftz.f32 	%f4, %f142, %f135;
	ld.shared.f32 	%f143, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+24];
	mul.ftz.f32 	%f5, %f2, %f143;
	ld.shared.f32 	%f6, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+28];
	fma.rn.ftz.f32 	%f144, %f3, %f6, %f5;
	ld.shared.f32 	%f145, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+32];
	mul.ftz.f32 	%f7, %f4, %f145;
	add.ftz.f32 	%f8, %f144, %f7;
	setp.eq.s32	%p9, %r16, 0;
	mov.f32 	%f267, 0f3F800000;
	@%p9 bra 	BB3_14;

	setp.ltu.ftz.f32	%p10, %f8, 0f00000000;
	@%p10 bra 	BB3_6;

	lg2.approx.ftz.f32 	%f146, %f8;
	mul.ftz.f32 	%f147, %f146, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f255, %f147;
	bra.uni 	BB3_7;

BB3_6:
	neg.ftz.f32 	%f148, %f8;
	lg2.approx.ftz.f32 	%f149, %f148;
	mul.ftz.f32 	%f150, %f149, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f151, %f150;
	neg.ftz.f32 	%f255, %f151;

BB3_7:
	ld.shared.f32 	%f263, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+12];
	ld.shared.f32 	%f262, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+16];
	mul.ftz.f32 	%f152, %f3, %f262;
	fma.rn.ftz.f32 	%f153, %f2, %f263, %f152;
	ld.shared.f32 	%f261, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+20];
	fma.rn.ftz.f32 	%f15, %f4, %f261, %f153;
	setp.ltu.ftz.f32	%p11, %f15, 0f00000000;
	@%p11 bra 	BB3_9;

	lg2.approx.ftz.f32 	%f154, %f15;
	mul.ftz.f32 	%f155, %f154, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f256, %f155;
	bra.uni 	BB3_10;

BB3_9:
	neg.ftz.f32 	%f156, %f15;
	lg2.approx.ftz.f32 	%f157, %f156;
	mul.ftz.f32 	%f158, %f157, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f159, %f158;
	neg.ftz.f32 	%f256, %f159;

BB3_10:
	ld.shared.f32 	%f260, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix];
	ld.shared.f32 	%f259, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+4];
	mul.ftz.f32 	%f160, %f3, %f259;
	fma.rn.ftz.f32 	%f161, %f2, %f260, %f160;
	ld.shared.f32 	%f258, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+8];
	fma.rn.ftz.f32 	%f22, %f4, %f258, %f161;
	setp.ltu.ftz.f32	%p12, %f22, 0f00000000;
	@%p12 bra 	BB3_12;

	lg2.approx.ftz.f32 	%f162, %f22;
	mul.ftz.f32 	%f163, %f162, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f257, %f163;
	bra.uni 	BB3_13;

BB3_12:
	neg.ftz.f32 	%f164, %f22;
	lg2.approx.ftz.f32 	%f165, %f164;
	mul.ftz.f32 	%f166, %f165, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f167, %f166;
	neg.ftz.f32 	%f257, %f167;

BB3_13:
	mov.f32 	%f266, %f257;
	mov.f32 	%f265, %f256;
	mov.f32 	%f264, %f255;
	bra.uni 	BB3_15;

BB3_14:
	ld.shared.f32 	%f263, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+12];
	ld.shared.f32 	%f262, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+16];
	mul.ftz.f32 	%f168, %f3, %f262;
	fma.rn.ftz.f32 	%f169, %f2, %f263, %f168;
	ld.shared.f32 	%f261, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+20];
	fma.rn.ftz.f32 	%f265, %f4, %f261, %f169;
	ld.shared.f32 	%f260, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix];
	ld.shared.f32 	%f259, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+4];
	mul.ftz.f32 	%f170, %f3, %f259;
	fma.rn.ftz.f32 	%f171, %f2, %f260, %f170;
	ld.shared.f32 	%f258, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Nearest_Kernel$__cuda_local_var_180712_417_non_const_matrix+8];
	fma.rn.ftz.f32 	%f266, %f4, %f258, %f171;
	mov.f32 	%f264, %f8;

BB3_15:
	setp.eq.s32	%p13, %r11, 0;
	@%p13 bra 	BB3_17;

	sub.s32 	%r42, %r24, %r4;
	setp.eq.s32	%p14, %r10, 0;
	selp.b32	%r43, %r4, %r42, %p14;
	mad.lo.s32 	%r49, %r43, %r9, %r3;
	cvta.to.global.u64 	%rd43, %rd5;
	mul.wide.s32 	%rd44, %r49, 16;
	add.s64 	%rd45, %rd43, %rd44;
	st.global.v4.f32 	[%rd45], {%f264, %f265, %f266, %f267};
	bra.uni 	BB3_18;

BB3_17:
	cvta.to.global.u64 	%rd46, %rd5;
	sub.s32 	%r56, %r24, %r4;
	setp.eq.s32	%p15, %r10, 0;
	selp.b32	%r57, %r4, %r56, %p15;
	mad.lo.s32 	%r63, %r57, %r9, %r3;
	mul.wide.s32 	%rd47, %r63, 8;
	add.s64 	%rd48, %rd46, %rd47;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f264;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f265;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f266;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f267;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd48], {%rs1, %rs2, %rs3, %rs4};

BB3_18:
	mov.f32 	%f274, 0f3F800000;
	@%p9 bra 	BB3_29;

	setp.ltu.ftz.f32	%p17, %f8, 0f00000000;
	@%p17 bra 	BB3_21;

	lg2.approx.ftz.f32 	%f172, %f8;
	mul.ftz.f32 	%f173, %f172, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f268, %f173;
	bra.uni 	BB3_22;

BB3_21:
	neg.ftz.f32 	%f174, %f8;
	lg2.approx.ftz.f32 	%f175, %f174;
	mul.ftz.f32 	%f176, %f175, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f177, %f176;
	neg.ftz.f32 	%f268, %f177;

BB3_22:
	mul.ftz.f32 	%f178, %f3, %f262;
	fma.rn.ftz.f32 	%f179, %f2, %f263, %f178;
	fma.rn.ftz.f32 	%f53, %f4, %f261, %f179;
	setp.ltu.ftz.f32	%p18, %f53, 0f00000000;
	@%p18 bra 	BB3_24;

	lg2.approx.ftz.f32 	%f180, %f53;
	mul.ftz.f32 	%f181, %f180, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f269, %f181;
	bra.uni 	BB3_25;

BB3_24:
	neg.ftz.f32 	%f182, %f53;
	lg2.approx.ftz.f32 	%f183, %f182;
	mul.ftz.f32 	%f184, %f183, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f185, %f184;
	neg.ftz.f32 	%f269, %f185;

BB3_25:
	mul.ftz.f32 	%f186, %f3, %f259;
	fma.rn.ftz.f32 	%f187, %f2, %f260, %f186;
	fma.rn.ftz.f32 	%f57, %f4, %f258, %f187;
	setp.ltu.ftz.f32	%p19, %f57, 0f00000000;
	@%p19 bra 	BB3_27;

	lg2.approx.ftz.f32 	%f188, %f57;
	mul.ftz.f32 	%f189, %f188, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f270, %f189;
	bra.uni 	BB3_28;

BB3_27:
	neg.ftz.f32 	%f190, %f57;
	lg2.approx.ftz.f32 	%f191, %f190;
	mul.ftz.f32 	%f192, %f191, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f193, %f192;
	neg.ftz.f32 	%f270, %f193;

BB3_28:
	mov.f32 	%f273, %f270;
	mov.f32 	%f272, %f269;
	mov.f32 	%f271, %f268;
	bra.uni 	BB3_30;

BB3_29:
	mul.ftz.f32 	%f194, %f3, %f262;
	fma.rn.ftz.f32 	%f195, %f2, %f263, %f194;
	fma.rn.ftz.f32 	%f272, %f4, %f261, %f195;
	mul.ftz.f32 	%f196, %f3, %f259;
	fma.rn.ftz.f32 	%f197, %f2, %f260, %f196;
	fma.rn.ftz.f32 	%f273, %f4, %f258, %f197;
	mov.f32 	%f271, %f8;

BB3_30:
	sub.s32 	%r70, %r24, %r4;
	setp.eq.s32	%p20, %r10, 0;
	selp.b32	%r71, %r4, %r70, %p20;
	mad.lo.s32 	%r77, %r71, %r9, %r3;
	add.s32 	%r78, %r77, 1;
	cvt.s64.s32	%rd2, %r78;
	@%p13 bra 	BB3_32;

	cvta.to.global.u64 	%rd49, %rd5;
	shl.b64 	%rd50, %rd2, 4;
	add.s64 	%rd51, %rd49, %rd50;
	st.global.v4.f32 	[%rd51], {%f271, %f272, %f273, %f274};
	bra.uni 	BB3_33;

BB3_32:
	cvta.to.global.u64 	%rd52, %rd5;
	shl.b64 	%rd53, %rd2, 3;
	add.s64 	%rd54, %rd52, %rd53;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f274;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f273;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f272;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f271;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd54], {%rs8, %rs7, %rs6, %rs5};

BB3_33:
	min.ftz.f32 	%f77, %f1, %f134;
	fma.rn.ftz.f32 	%f198, %f77, %f6, %f5;
	add.ftz.f32 	%f78, %f198, %f7;
	mov.f32 	%f281, 0f3F800000;
	@%p9 bra 	BB3_44;

	setp.ltu.ftz.f32	%p23, %f78, 0f00000000;
	@%p23 bra 	BB3_36;

	lg2.approx.ftz.f32 	%f199, %f78;
	mul.ftz.f32 	%f200, %f199, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f275, %f200;
	bra.uni 	BB3_37;

BB3_36:
	neg.ftz.f32 	%f201, %f78;
	lg2.approx.ftz.f32 	%f202, %f201;
	mul.ftz.f32 	%f203, %f202, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f204, %f203;
	neg.ftz.f32 	%f275, %f204;

BB3_37:
	mul.ftz.f32 	%f205, %f77, %f262;
	fma.rn.ftz.f32 	%f206, %f2, %f263, %f205;
	fma.rn.ftz.f32 	%f82, %f4, %f261, %f206;
	setp.ltu.ftz.f32	%p24, %f82, 0f00000000;
	@%p24 bra 	BB3_39;

	lg2.approx.ftz.f32 	%f207, %f82;
	mul.ftz.f32 	%f208, %f207, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f276, %f208;
	bra.uni 	BB3_40;

BB3_39:
	neg.ftz.f32 	%f209, %f82;
	lg2.approx.ftz.f32 	%f210, %f209;
	mul.ftz.f32 	%f211, %f210, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f212, %f211;
	neg.ftz.f32 	%f276, %f212;

BB3_40:
	mul.ftz.f32 	%f213, %f77, %f259;
	fma.rn.ftz.f32 	%f214, %f2, %f260, %f213;
	fma.rn.ftz.f32 	%f86, %f4, %f258, %f214;
	setp.ltu.ftz.f32	%p25, %f86, 0f00000000;
	@%p25 bra 	BB3_42;

	lg2.approx.ftz.f32 	%f215, %f86;
	mul.ftz.f32 	%f216, %f215, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f277, %f216;
	bra.uni 	BB3_43;

BB3_42:
	neg.ftz.f32 	%f217, %f86;
	lg2.approx.ftz.f32 	%f218, %f217;
	mul.ftz.f32 	%f219, %f218, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f220, %f219;
	neg.ftz.f32 	%f277, %f220;

BB3_43:
	mov.f32 	%f280, %f277;
	mov.f32 	%f279, %f276;
	mov.f32 	%f278, %f275;
	bra.uni 	BB3_45;

BB3_44:
	mul.ftz.f32 	%f221, %f77, %f262;
	fma.rn.ftz.f32 	%f222, %f2, %f263, %f221;
	fma.rn.ftz.f32 	%f279, %f4, %f261, %f222;
	mul.ftz.f32 	%f223, %f77, %f259;
	fma.rn.ftz.f32 	%f224, %f2, %f260, %f223;
	fma.rn.ftz.f32 	%f280, %f4, %f258, %f224;
	mov.f32 	%f278, %f78;

BB3_45:
	selp.b32	%r87, 1, -1, %p20;
	add.s32 	%r88, %r71, %r87;
	mul.lo.s32 	%r5, %r88, %r9;
	add.s32 	%r94, %r5, %r3;
	cvt.s64.s32	%rd3, %r94;
	@%p13 bra 	BB3_47;

	cvta.to.global.u64 	%rd55, %rd5;
	shl.b64 	%rd56, %rd3, 4;
	add.s64 	%rd57, %rd55, %rd56;
	st.global.v4.f32 	[%rd57], {%f278, %f279, %f280, %f281};
	bra.uni 	BB3_48;

BB3_47:
	cvta.to.global.u64 	%rd58, %rd5;
	shl.b64 	%rd59, %rd3, 3;
	add.s64 	%rd60, %rd58, %rd59;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f281;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f280;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f279;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f278;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd60], {%rs12, %rs11, %rs10, %rs9};

BB3_48:
	mov.f32 	%f288, 0f3F800000;
	@%p9 bra 	BB3_59;

	setp.ltu.ftz.f32	%p29, %f78, 0f00000000;
	@%p29 bra 	BB3_51;

	lg2.approx.ftz.f32 	%f225, %f78;
	mul.ftz.f32 	%f226, %f225, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f282, %f226;
	bra.uni 	BB3_52;

BB3_51:
	neg.ftz.f32 	%f227, %f78;
	lg2.approx.ftz.f32 	%f228, %f227;
	mul.ftz.f32 	%f229, %f228, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f230, %f229;
	neg.ftz.f32 	%f282, %f230;

BB3_52:
	mul.ftz.f32 	%f231, %f77, %f262;
	fma.rn.ftz.f32 	%f232, %f2, %f263, %f231;
	fma.rn.ftz.f32 	%f109, %f4, %f261, %f232;
	setp.ltu.ftz.f32	%p30, %f109, 0f00000000;
	@%p30 bra 	BB3_54;

	lg2.approx.ftz.f32 	%f233, %f109;
	mul.ftz.f32 	%f234, %f233, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f283, %f234;
	bra.uni 	BB3_55;

BB3_54:
	neg.ftz.f32 	%f235, %f109;
	lg2.approx.ftz.f32 	%f236, %f235;
	mul.ftz.f32 	%f237, %f236, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f238, %f237;
	neg.ftz.f32 	%f283, %f238;

BB3_55:
	mul.ftz.f32 	%f239, %f77, %f259;
	fma.rn.ftz.f32 	%f240, %f2, %f260, %f239;
	fma.rn.ftz.f32 	%f113, %f4, %f258, %f240;
	setp.ltu.ftz.f32	%p31, %f113, 0f00000000;
	@%p31 bra 	BB3_57;

	lg2.approx.ftz.f32 	%f241, %f113;
	mul.ftz.f32 	%f242, %f241, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f284, %f242;
	bra.uni 	BB3_58;

BB3_57:
	neg.ftz.f32 	%f243, %f113;
	lg2.approx.ftz.f32 	%f244, %f243;
	mul.ftz.f32 	%f245, %f244, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f246, %f245;
	neg.ftz.f32 	%f284, %f246;

BB3_58:
	mov.f32 	%f287, %f284;
	mov.f32 	%f286, %f283;
	mov.f32 	%f285, %f282;
	bra.uni 	BB3_60;

BB3_59:
	mul.ftz.f32 	%f247, %f77, %f262;
	fma.rn.ftz.f32 	%f248, %f2, %f263, %f247;
	fma.rn.ftz.f32 	%f286, %f4, %f261, %f248;
	mul.ftz.f32 	%f249, %f77, %f259;
	fma.rn.ftz.f32 	%f250, %f2, %f260, %f249;
	fma.rn.ftz.f32 	%f287, %f4, %f258, %f250;
	mov.f32 	%f285, %f78;

BB3_60:
	add.s32 	%r100, %r3, %r5;
	add.s32 	%r101, %r100, 1;
	cvt.s64.s32	%rd4, %r101;
	@%p13 bra 	BB3_62;

	cvta.to.global.u64 	%rd61, %rd5;
	shl.b64 	%rd62, %rd4, 4;
	add.s64 	%rd63, %rd61, %rd62;
	st.global.v4.f32 	[%rd63], {%f285, %f286, %f287, %f288};
	bra.uni 	BB3_63;

BB3_62:
	cvta.to.global.u64 	%rd64, %rd5;
	shl.b64 	%rd65, %rd4, 3;
	add.s64 	%rd66, %rd64, %rd65;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f288;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f287;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f286;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f285;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd66], {%rs16, %rs15, %rs14, %rs13};

BB3_63:
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_11,
	.param .align 16 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12[16],
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_13,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_14
)
{
	.reg .pred 	%p<47>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<111>;
	.reg .f32 	%f<362>;
	.reg .s64 	%rd<79>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix[36];

	ld.param.u64 	%rd11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_0];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_1];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_2];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_3];
	ld.param.u64 	%rd10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_5];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_6];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_7];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_8];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_9];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_10];
	ld.param.u32 	%r18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_11];
	ld.param.f32 	%f169, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12+8];
	ld.param.f32 	%f168, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12+4];
	ld.param.f32 	%f167, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_12];
	ld.param.u32 	%r19, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_13];
	ld.param.u32 	%r20, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel_param_14];
	cvta.to.global.u64 	%rd1, %rd11;
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB4_2;
	bra.uni 	BB4_1;

BB4_1:
	cvt.s64.s32	%rd12, %r19;
	setp.ne.s32	%p4, %r18, 0;
	mul.lo.s32 	%r21, %r2, 3;
	cvt.s64.s32	%rd13, %r21;
	add.s64 	%rd14, %rd13, %rd12;
	selp.b64	%rd15, 2, 0, %p4;
	add.s64 	%rd16, %rd14, %rd15;
	shl.b64 	%rd17, %rd16, 2;
	add.s64 	%rd18, %rd1, %rd17;
	mul.wide.s32 	%rd19, %r21, 4;
	mov.u64 	%rd20, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix;
	add.s64 	%rd21, %rd20, %rd19;
	ld.global.f32 	%f171, [%rd18];
	st.shared.f32 	[%rd21], %f171;
	shl.b64 	%rd22, %rd14, 2;
	add.s64 	%rd23, %rd1, %rd22;
	ld.global.f32 	%f172, [%rd23+4];
	st.shared.f32 	[%rd21+4], %f172;
	selp.b64	%rd24, 0, 2, %p4;
	add.s64 	%rd25, %rd14, %rd24;
	shl.b64 	%rd26, %rd25, 2;
	add.s64 	%rd27, %rd1, %rd26;
	ld.global.f32 	%f173, [%rd27];
	st.shared.f32 	[%rd21+8], %f173;

BB4_2:
	bar.sync 	0;
	mov.u32 	%r22, %ntid.x;
	mov.u32 	%r23, %ctaid.x;
	mad.lo.s32 	%r24, %r22, %r23, %r2;
	shl.b32 	%r3, %r24, 1;
	mov.u32 	%r25, %ntid.y;
	mov.u32 	%r26, %ctaid.y;
	mad.lo.s32 	%r27, %r25, %r26, %r1;
	shl.b32 	%r4, %r27, 1;
	setp.lt.s32	%p5, %r3, %r16;
	setp.lt.s32	%p6, %r4, %r17;
	and.pred  	%p7, %p5, %p6;
	@!%p7 bra 	BB4_83;
	bra.uni 	BB4_3;

BB4_3:
	cvt.s64.s32	%rd28, %r10;
	add.s32 	%r28, %r17, -1;
	mul.lo.s32 	%r29, %r28, %r11;
	cvt.s64.s32	%rd29, %r29;
	neg.s32 	%r30, %r11;
	setp.eq.s32	%p8, %r12, 0;
	selp.b32	%r5, %r11, %r30, %p8;
	selp.b64	%rd30, 0, %rd29, %p8;
	add.s64 	%rd2, %rd30, %rd28;
	add.s32 	%r6, %r16, -2;
	add.s32 	%r7, %r17, -2;
	mul.lo.s32 	%r8, %r4, %r5;
	add.s32 	%r31, %r8, %r3;
	cvt.s64.s32	%rd31, %r31;
	add.s64 	%rd32, %rd31, %rd2;
	shl.b64 	%rd33, %rd32, 2;
	add.s64 	%rd34, %rd1, %rd33;
	ld.global.f32 	%f1, [%rd34];
	ld.global.f32 	%f2, [%rd34+4];
	add.s32 	%r32, %r4, 1;
	mad.lo.s32 	%r33, %r32, %r5, %r3;
	cvt.s64.s32	%rd35, %r33;
	add.s64 	%rd36, %rd35, %rd2;
	shl.b64 	%rd37, %rd36, 2;
	add.s64 	%rd3, %rd1, %rd37;
	ld.global.f32 	%f3, [%rd3];
	ld.global.f32 	%f4, [%rd3+4];
	setp.eq.s32	%p9, %r3, 0;
	mov.f32 	%f319, %f4;
	@%p9 bra 	BB4_5;

	ld.global.f32 	%f5, [%rd3+-4];
	mov.f32 	%f319, %f5;

BB4_5:
	mov.f32 	%f6, %f319;
	setp.ge.s32	%p10, %r3, %r6;
	mov.f32 	%f314, %f3;
	@%p10 bra 	BB4_7;

	ld.global.f32 	%f7, [%rd3+8];
	mov.f32 	%f314, %f7;

BB4_7:
	mov.f32 	%f8, %f314;
	mov.f32 	%f322, %f2;
	@%p9 bra 	BB4_9;

	ld.global.f32 	%f9, [%rd34+-4];
	mov.f32 	%f322, %f9;

BB4_9:
	mov.f32 	%f10, %f322;
	mov.f32 	%f327, %f1;
	@%p10 bra 	BB4_11;

	ld.global.f32 	%f11, [%rd34+8];
	mov.f32 	%f327, %f11;

BB4_11:
	mov.f32 	%f12, %f327;
	setp.eq.s32	%p14, %r4, 0;
	or.pred  	%p15, %p14, %p9;
	add.s32 	%r35, %r4, -1;
	mad.lo.s32 	%r36, %r35, %r5, %r3;
	add.s32 	%r37, %r36, -1;
	cvt.s64.s32	%rd41, %r37;
	add.s64 	%rd42, %rd41, %rd2;
	shl.b64 	%rd43, %rd42, 2;
	add.s64 	%rd5, %rd1, %rd43;
	mov.f32 	%f318, %f4;
	@%p15 bra 	BB4_13;

	ld.global.f32 	%f318, [%rd5];

BB4_13:
	mov.f32 	%f313, %f3;
	@%p14 bra 	BB4_15;

	cvt.s64.s32	%rd44, %r36;
	add.s64 	%rd45, %rd44, %rd2;
	shl.b64 	%rd46, %rd45, 2;
	add.s64 	%rd47, %rd1, %rd46;
	ld.global.f32 	%f313, [%rd47];

BB4_15:
	mov.f32 	%f317, %f4;
	@%p14 bra 	BB4_17;

	ld.global.f32 	%f317, [%rd5+8];

BB4_17:
	setp.ge.s32	%p18, %r4, %r7;
	mov.f32 	%f326, %f1;
	@%p18 bra 	BB4_19;

	add.s32 	%r40, %r4, 2;
	mad.lo.s32 	%r41, %r40, %r5, %r3;
	cvt.s64.s32	%rd48, %r41;
	add.s64 	%rd49, %rd48, %rd2;
	shl.b64 	%rd50, %rd49, 2;
	add.s64 	%rd51, %rd1, %rd50;
	ld.global.f32 	%f326, [%rd51];

BB4_19:
	add.s32 	%r42, %r4, 2;
	mad.lo.s32 	%r43, %r42, %r5, %r3;
	add.s32 	%r44, %r43, 1;
	cvt.s64.s32	%rd52, %r44;
	add.s64 	%rd53, %rd52, %rd2;
	shl.b64 	%rd54, %rd53, 2;
	add.s64 	%rd6, %rd1, %rd54;
	mov.f32 	%f321, %f2;
	@%p18 bra 	BB4_21;

	ld.global.f32 	%f321, [%rd6];

BB4_21:
	or.pred  	%p22, %p18, %p10;
	mov.f32 	%f325, %f1;
	@%p22 bra 	BB4_23;

	ld.global.f32 	%f325, [%rd6+4];

BB4_23:
	add.ftz.f32 	%f174, %f318, %f317;
	add.ftz.f32 	%f175, %f174, %f6;
	add.ftz.f32 	%f176, %f175, %f4;
	mul.ftz.f32 	%f177, %f176, 0f3E800000;
	add.ftz.f32 	%f178, %f10, %f2;
	add.ftz.f32 	%f179, %f178, %f313;
	add.ftz.f32 	%f180, %f179, %f3;
	mul.ftz.f32 	%f181, %f180, 0f3E800000;
	min.ftz.f32 	%f25, %f1, %f167;
	min.ftz.f32 	%f26, %f181, %f168;
	min.ftz.f32 	%f27, %f177, %f169;
	ld.shared.f32 	%f28, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+24];
	ld.shared.f32 	%f29, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+28];
	mul.ftz.f32 	%f182, %f26, %f29;
	fma.rn.ftz.f32 	%f183, %f25, %f28, %f182;
	ld.shared.f32 	%f30, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+32];
	fma.rn.ftz.f32 	%f31, %f27, %f30, %f183;
	setp.eq.s32	%p23, %r20, 0;
	mov.f32 	%f340, 0f3F800000;
	@%p23 bra 	BB4_34;

	setp.ltu.ftz.f32	%p24, %f31, 0f00000000;
	@%p24 bra 	BB4_26;

	lg2.approx.ftz.f32 	%f184, %f31;
	mul.ftz.f32 	%f185, %f184, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f328, %f185;
	bra.uni 	BB4_27;

BB4_26:
	neg.ftz.f32 	%f186, %f31;
	lg2.approx.ftz.f32 	%f187, %f186;
	mul.ftz.f32 	%f188, %f187, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f189, %f188;
	neg.ftz.f32 	%f328, %f189;

BB4_27:
	ld.shared.f32 	%f336, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+12];
	ld.shared.f32 	%f335, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+16];
	mul.ftz.f32 	%f190, %f26, %f335;
	fma.rn.ftz.f32 	%f191, %f25, %f336, %f190;
	ld.shared.f32 	%f334, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+20];
	fma.rn.ftz.f32 	%f38, %f27, %f334, %f191;
	setp.ltu.ftz.f32	%p25, %f38, 0f00000000;
	@%p25 bra 	BB4_29;

	lg2.approx.ftz.f32 	%f192, %f38;
	mul.ftz.f32 	%f193, %f192, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f329, %f193;
	bra.uni 	BB4_30;

BB4_29:
	neg.ftz.f32 	%f194, %f38;
	lg2.approx.ftz.f32 	%f195, %f194;
	mul.ftz.f32 	%f196, %f195, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f197, %f196;
	neg.ftz.f32 	%f329, %f197;

BB4_30:
	ld.shared.f32 	%f333, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix];
	ld.shared.f32 	%f332, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+4];
	mul.ftz.f32 	%f198, %f26, %f332;
	fma.rn.ftz.f32 	%f199, %f25, %f333, %f198;
	ld.shared.f32 	%f331, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+8];
	fma.rn.ftz.f32 	%f45, %f27, %f331, %f199;
	setp.ltu.ftz.f32	%p26, %f45, 0f00000000;
	@%p26 bra 	BB4_32;

	lg2.approx.ftz.f32 	%f200, %f45;
	mul.ftz.f32 	%f201, %f200, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f330, %f201;
	bra.uni 	BB4_33;

BB4_32:
	neg.ftz.f32 	%f202, %f45;
	lg2.approx.ftz.f32 	%f203, %f202;
	mul.ftz.f32 	%f204, %f203, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f205, %f204;
	neg.ftz.f32 	%f330, %f205;

BB4_33:
	mov.f32 	%f339, %f330;
	mov.f32 	%f338, %f329;
	mov.f32 	%f337, %f328;
	bra.uni 	BB4_35;

BB4_34:
	ld.shared.f32 	%f336, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+12];
	ld.shared.f32 	%f335, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+16];
	mul.ftz.f32 	%f206, %f26, %f335;
	fma.rn.ftz.f32 	%f207, %f25, %f336, %f206;
	ld.shared.f32 	%f334, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+20];
	fma.rn.ftz.f32 	%f338, %f27, %f334, %f207;
	ld.shared.f32 	%f333, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix];
	ld.shared.f32 	%f332, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+4];
	mul.ftz.f32 	%f208, %f26, %f332;
	fma.rn.ftz.f32 	%f209, %f25, %f333, %f208;
	ld.shared.f32 	%f331, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Linear_Kernel$__cuda_local_var_180716_416_non_const_matrix+8];
	fma.rn.ftz.f32 	%f339, %f27, %f331, %f209;
	mov.f32 	%f337, %f31;

BB4_35:
	setp.eq.s32	%p27, %r15, 0;
	@%p27 bra 	BB4_37;

	sub.s32 	%r51, %r28, %r4;
	setp.eq.s32	%p28, %r14, 0;
	selp.b32	%r52, %r4, %r51, %p28;
	mad.lo.s32 	%r58, %r52, %r13, %r3;
	cvta.to.global.u64 	%rd55, %rd10;
	mul.wide.s32 	%rd56, %r58, 16;
	add.s64 	%rd57, %rd55, %rd56;
	st.global.v4.f32 	[%rd57], {%f337, %f338, %f339, %f340};
	bra.uni 	BB4_38;

BB4_37:
	cvta.to.global.u64 	%rd58, %rd10;
	sub.s32 	%r65, %r28, %r4;
	setp.eq.s32	%p29, %r14, 0;
	selp.b32	%r66, %r4, %r65, %p29;
	mad.lo.s32 	%r72, %r66, %r13, %r3;
	mul.wide.s32 	%rd59, %r72, 8;
	add.s64 	%rd60, %rd58, %rd59;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f337;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f338;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f339;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f340;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd60], {%rs1, %rs2, %rs3, %rs4};

BB4_38:
	add.ftz.f32 	%f210, %f317, %f4;
	mul.ftz.f32 	%f211, %f210, 0f3F000000;
	add.ftz.f32 	%f73, %f1, %f12;
	mul.ftz.f32 	%f212, %f73, 0f3F000000;
	min.ftz.f32 	%f74, %f212, %f167;
	min.ftz.f32 	%f75, %f2, %f168;
	min.ftz.f32 	%f76, %f211, %f169;
	mul.ftz.f32 	%f213, %f75, %f29;
	fma.rn.ftz.f32 	%f214, %f74, %f28, %f213;
	fma.rn.ftz.f32 	%f77, %f76, %f30, %f214;
	mov.f32 	%f347, 0f3F800000;
	@%p23 bra 	BB4_49;

	setp.ltu.ftz.f32	%p31, %f77, 0f00000000;
	@%p31 bra 	BB4_41;

	lg2.approx.ftz.f32 	%f215, %f77;
	mul.ftz.f32 	%f216, %f215, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f341, %f216;
	bra.uni 	BB4_42;

BB4_41:
	neg.ftz.f32 	%f217, %f77;
	lg2.approx.ftz.f32 	%f218, %f217;
	mul.ftz.f32 	%f219, %f218, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f220, %f219;
	neg.ftz.f32 	%f341, %f220;

BB4_42:
	mul.ftz.f32 	%f221, %f75, %f335;
	fma.rn.ftz.f32 	%f222, %f74, %f336, %f221;
	fma.rn.ftz.f32 	%f81, %f76, %f334, %f222;
	setp.ltu.ftz.f32	%p32, %f81, 0f00000000;
	@%p32 bra 	BB4_44;

	lg2.approx.ftz.f32 	%f223, %f81;
	mul.ftz.f32 	%f224, %f223, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f342, %f224;
	bra.uni 	BB4_45;

BB4_44:
	neg.ftz.f32 	%f225, %f81;
	lg2.approx.ftz.f32 	%f226, %f225;
	mul.ftz.f32 	%f227, %f226, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f228, %f227;
	neg.ftz.f32 	%f342, %f228;

BB4_45:
	mul.ftz.f32 	%f229, %f75, %f332;
	fma.rn.ftz.f32 	%f230, %f74, %f333, %f229;
	fma.rn.ftz.f32 	%f85, %f76, %f331, %f230;
	setp.ltu.ftz.f32	%p33, %f85, 0f00000000;
	@%p33 bra 	BB4_47;

	lg2.approx.ftz.f32 	%f231, %f85;
	mul.ftz.f32 	%f232, %f231, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f343, %f232;
	bra.uni 	BB4_48;

BB4_47:
	neg.ftz.f32 	%f233, %f85;
	lg2.approx.ftz.f32 	%f234, %f233;
	mul.ftz.f32 	%f235, %f234, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f236, %f235;
	neg.ftz.f32 	%f343, %f236;

BB4_48:
	mov.f32 	%f346, %f343;
	mov.f32 	%f345, %f342;
	mov.f32 	%f344, %f341;
	bra.uni 	BB4_50;

BB4_49:
	mul.ftz.f32 	%f237, %f75, %f335;
	fma.rn.ftz.f32 	%f238, %f74, %f336, %f237;
	fma.rn.ftz.f32 	%f345, %f76, %f334, %f238;
	mul.ftz.f32 	%f239, %f75, %f332;
	fma.rn.ftz.f32 	%f240, %f74, %f333, %f239;
	fma.rn.ftz.f32 	%f346, %f76, %f331, %f240;
	mov.f32 	%f344, %f77;

BB4_50:
	sub.s32 	%r84, %r28, %r4;
	setp.eq.s32	%p34, %r14, 0;
	selp.b32	%r85, %r4, %r84, %p34;
	mad.lo.s32 	%r86, %r85, %r13, %r3;
	add.s32 	%r87, %r86, 1;
	cvt.s64.s32	%rd7, %r87;
	@%p27 bra 	BB4_52;

	cvta.to.global.u64 	%rd61, %rd10;
	shl.b64 	%rd62, %rd7, 4;
	add.s64 	%rd63, %rd61, %rd62;
	st.global.v4.f32 	[%rd63], {%f344, %f345, %f346, %f347};
	bra.uni 	BB4_53;

BB4_52:
	cvta.to.global.u64 	%rd64, %rd10;
	shl.b64 	%rd65, %rd7, 3;
	add.s64 	%rd66, %rd64, %rd65;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f347;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f346;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f345;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f344;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd66], {%rs8, %rs7, %rs6, %rs5};

BB4_53:
	add.ftz.f32 	%f241, %f6, %f4;
	mul.ftz.f32 	%f242, %f241, 0f3F000000;
	add.ftz.f32 	%f243, %f1, %f326;
	mul.ftz.f32 	%f244, %f243, 0f3F000000;
	min.ftz.f32 	%f105, %f244, %f167;
	min.ftz.f32 	%f106, %f3, %f168;
	min.ftz.f32 	%f107, %f242, %f169;
	mul.ftz.f32 	%f245, %f106, %f29;
	fma.rn.ftz.f32 	%f246, %f105, %f28, %f245;
	fma.rn.ftz.f32 	%f108, %f107, %f30, %f246;
	mov.f32 	%f354, 0f3F800000;
	@%p23 bra 	BB4_64;

	setp.ltu.ftz.f32	%p37, %f108, 0f00000000;
	@%p37 bra 	BB4_56;

	lg2.approx.ftz.f32 	%f247, %f108;
	mul.ftz.f32 	%f248, %f247, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f348, %f248;
	bra.uni 	BB4_57;

BB4_56:
	neg.ftz.f32 	%f249, %f108;
	lg2.approx.ftz.f32 	%f250, %f249;
	mul.ftz.f32 	%f251, %f250, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f252, %f251;
	neg.ftz.f32 	%f348, %f252;

BB4_57:
	mul.ftz.f32 	%f253, %f106, %f335;
	fma.rn.ftz.f32 	%f254, %f105, %f336, %f253;
	fma.rn.ftz.f32 	%f112, %f107, %f334, %f254;
	setp.ltu.ftz.f32	%p38, %f112, 0f00000000;
	@%p38 bra 	BB4_59;

	lg2.approx.ftz.f32 	%f255, %f112;
	mul.ftz.f32 	%f256, %f255, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f349, %f256;
	bra.uni 	BB4_60;

BB4_59:
	neg.ftz.f32 	%f257, %f112;
	lg2.approx.ftz.f32 	%f258, %f257;
	mul.ftz.f32 	%f259, %f258, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f260, %f259;
	neg.ftz.f32 	%f349, %f260;

BB4_60:
	mul.ftz.f32 	%f261, %f106, %f332;
	fma.rn.ftz.f32 	%f262, %f105, %f333, %f261;
	fma.rn.ftz.f32 	%f116, %f107, %f331, %f262;
	setp.ltu.ftz.f32	%p39, %f116, 0f00000000;
	@%p39 bra 	BB4_62;

	lg2.approx.ftz.f32 	%f263, %f116;
	mul.ftz.f32 	%f264, %f263, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f350, %f264;
	bra.uni 	BB4_63;

BB4_62:
	neg.ftz.f32 	%f265, %f116;
	lg2.approx.ftz.f32 	%f266, %f265;
	mul.ftz.f32 	%f267, %f266, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f268, %f267;
	neg.ftz.f32 	%f350, %f268;

BB4_63:
	mov.f32 	%f353, %f350;
	mov.f32 	%f352, %f349;
	mov.f32 	%f351, %f348;
	bra.uni 	BB4_65;

BB4_64:
	mul.ftz.f32 	%f269, %f106, %f335;
	fma.rn.ftz.f32 	%f270, %f105, %f336, %f269;
	fma.rn.ftz.f32 	%f352, %f107, %f334, %f270;
	mul.ftz.f32 	%f271, %f106, %f332;
	fma.rn.ftz.f32 	%f272, %f105, %f333, %f271;
	fma.rn.ftz.f32 	%f353, %f107, %f331, %f272;
	mov.f32 	%f351, %f108;

BB4_65:
	selp.b32	%r96, 1, -1, %p34;
	add.s32 	%r97, %r85, %r96;
	mul.lo.s32 	%r9, %r97, %r13;
	add.s32 	%r103, %r9, %r3;
	cvt.s64.s32	%rd8, %r103;
	@%p27 bra 	BB4_67;

	cvta.to.global.u64 	%rd67, %rd10;
	shl.b64 	%rd68, %rd8, 4;
	add.s64 	%rd69, %rd67, %rd68;
	st.global.v4.f32 	[%rd69], {%f351, %f352, %f353, %f354};
	bra.uni 	BB4_68;

BB4_67:
	cvta.to.global.u64 	%rd70, %rd10;
	shl.b64 	%rd71, %rd8, 3;
	add.s64 	%rd72, %rd70, %rd71;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f354;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f353;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f352;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f351;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd72], {%rs12, %rs11, %rs10, %rs9};

BB4_68:
	add.ftz.f32 	%f273, %f2, %f3;
	add.ftz.f32 	%f274, %f273, %f8;
	add.ftz.f32 	%f275, %f274, %f321;
	mul.ftz.f32 	%f276, %f275, 0f3E800000;
	add.ftz.f32 	%f277, %f73, %f326;
	add.ftz.f32 	%f278, %f277, %f325;
	mul.ftz.f32 	%f279, %f278, 0f3E800000;
	min.ftz.f32 	%f136, %f279, %f167;
	min.ftz.f32 	%f137, %f276, %f168;
	min.ftz.f32 	%f138, %f4, %f169;
	mul.ftz.f32 	%f280, %f137, %f29;
	fma.rn.ftz.f32 	%f281, %f136, %f28, %f280;
	fma.rn.ftz.f32 	%f139, %f138, %f30, %f281;
	mov.f32 	%f361, 0f3F800000;
	@%p23 bra 	BB4_79;

	setp.ltu.ftz.f32	%p43, %f139, 0f00000000;
	@%p43 bra 	BB4_71;

	lg2.approx.ftz.f32 	%f282, %f139;
	mul.ftz.f32 	%f283, %f282, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f355, %f283;
	bra.uni 	BB4_72;

BB4_71:
	neg.ftz.f32 	%f284, %f139;
	lg2.approx.ftz.f32 	%f285, %f284;
	mul.ftz.f32 	%f286, %f285, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f287, %f286;
	neg.ftz.f32 	%f355, %f287;

BB4_72:
	mul.ftz.f32 	%f288, %f137, %f335;
	fma.rn.ftz.f32 	%f289, %f136, %f336, %f288;
	fma.rn.ftz.f32 	%f143, %f138, %f334, %f289;
	setp.ltu.ftz.f32	%p44, %f143, 0f00000000;
	@%p44 bra 	BB4_74;

	lg2.approx.ftz.f32 	%f290, %f143;
	mul.ftz.f32 	%f291, %f290, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f356, %f291;
	bra.uni 	BB4_75;

BB4_74:
	neg.ftz.f32 	%f292, %f143;
	lg2.approx.ftz.f32 	%f293, %f292;
	mul.ftz.f32 	%f294, %f293, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f295, %f294;
	neg.ftz.f32 	%f356, %f295;

BB4_75:
	mul.ftz.f32 	%f296, %f137, %f332;
	fma.rn.ftz.f32 	%f297, %f136, %f333, %f296;
	fma.rn.ftz.f32 	%f147, %f138, %f331, %f297;
	setp.ltu.ftz.f32	%p45, %f147, 0f00000000;
	@%p45 bra 	BB4_77;

	lg2.approx.ftz.f32 	%f298, %f147;
	mul.ftz.f32 	%f299, %f298, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f357, %f299;
	bra.uni 	BB4_78;

BB4_77:
	neg.ftz.f32 	%f300, %f147;
	lg2.approx.ftz.f32 	%f301, %f300;
	mul.ftz.f32 	%f302, %f301, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f303, %f302;
	neg.ftz.f32 	%f357, %f303;

BB4_78:
	mov.f32 	%f360, %f357;
	mov.f32 	%f359, %f356;
	mov.f32 	%f358, %f355;
	bra.uni 	BB4_80;

BB4_79:
	mul.ftz.f32 	%f304, %f137, %f335;
	fma.rn.ftz.f32 	%f305, %f136, %f336, %f304;
	fma.rn.ftz.f32 	%f359, %f138, %f334, %f305;
	mul.ftz.f32 	%f306, %f137, %f332;
	fma.rn.ftz.f32 	%f307, %f136, %f333, %f306;
	fma.rn.ftz.f32 	%f360, %f138, %f331, %f307;
	mov.f32 	%f358, %f139;

BB4_80:
	add.s32 	%r109, %r3, %r9;
	add.s32 	%r110, %r109, 1;
	cvt.s64.s32	%rd9, %r110;
	@%p27 bra 	BB4_82;

	cvta.to.global.u64 	%rd73, %rd10;
	shl.b64 	%rd74, %rd9, 4;
	add.s64 	%rd75, %rd73, %rd74;
	st.global.v4.f32 	[%rd75], {%f358, %f359, %f360, %f361};
	bra.uni 	BB4_83;

BB4_82:
	cvta.to.global.u64 	%rd76, %rd10;
	shl.b64 	%rd77, %rd9, 3;
	add.s64 	%rd78, %rd76, %rd77;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f361;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f360;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f359;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f358;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd78], {%rs16, %rs15, %rs14, %rs13};

BB4_83:
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_0,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_4,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_7,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_8,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_9,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_11
)
{
	.reg .pred 	%p<59>;
	.reg .s16 	%rs<17>;
	.reg .s32 	%r<295>;
	.reg .f32 	%f<304>;
	.reg .s64 	%rd<134>;


	ld.param.u64 	%rd11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_0];
	ld.param.u32 	%r8, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_1];
	ld.param.u32 	%r9, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_2];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_3];
	ld.param.u64 	%rd12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_5];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_6];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_7];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_8];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Green_Kernel_param_9];
	cvta.to.global.u64 	%rd1, %rd11;
	mov.u32 	%r15, %ntid.x;
	mov.u32 	%r16, %ctaid.x;
	mov.u32 	%r17, %tid.x;
	mad.lo.s32 	%r18, %r15, %r16, %r17;
	shl.b32 	%r1, %r18, 1;
	mov.u32 	%r19, %ntid.y;
	mov.u32 	%r20, %ctaid.y;
	mov.u32 	%r21, %tid.y;
	mad.lo.s32 	%r22, %r19, %r20, %r21;
	shl.b32 	%r2, %r22, 1;
	setp.lt.s32	%p1, %r1, %r13;
	setp.lt.s32	%p2, %r2, %r14;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB5_81;
	bra.uni 	BB5_1;

BB5_1:
	cvt.s64.s32	%rd13, %r8;
	add.s32 	%r23, %r14, -1;
	mul.lo.s32 	%r24, %r23, %r9;
	cvt.s64.s32	%rd14, %r24;
	neg.s32 	%r25, %r9;
	setp.eq.s32	%p4, %r10, 0;
	selp.b32	%r3, %r9, %r25, %p4;
	selp.b64	%rd15, 0, %rd14, %p4;
	add.s64 	%rd2, %rd15, %rd13;
	mul.lo.s32 	%r4, %r2, %r3;
	add.s32 	%r26, %r4, %r1;
	cvt.s64.s32	%rd16, %r26;
	add.s64 	%rd17, %rd16, %rd2;
	shl.b64 	%rd18, %rd17, 2;
	add.s64 	%rd3, %rd1, %rd18;
	ld.global.f32 	%f1, [%rd3];
	setp.ltu.ftz.f32	%p5, %f1, 0f00000000;
	@%p5 bra 	BB5_3;

	lg2.approx.ftz.f32 	%f81, %f1;
	mul.ftz.f32 	%f82, %f81, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f292, %f82;
	bra.uni 	BB5_4;

BB5_3:
	neg.ftz.f32 	%f83, %f1;
	lg2.approx.ftz.f32 	%f84, %f83;
	mul.ftz.f32 	%f85, %f84, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f86, %f85;
	neg.ftz.f32 	%f292, %f86;

BB5_4:
	mov.f32 	%f4, %f292;
	ld.global.f32 	%f5, [%rd3+4];
	setp.ltu.ftz.f32	%p6, %f5, 0f00000000;
	@%p6 bra 	BB5_6;

	lg2.approx.ftz.f32 	%f87, %f5;
	mul.ftz.f32 	%f88, %f87, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f281, %f88;
	bra.uni 	BB5_7;

BB5_6:
	neg.ftz.f32 	%f89, %f5;
	lg2.approx.ftz.f32 	%f90, %f89;
	mul.ftz.f32 	%f91, %f90, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f92, %f91;
	neg.ftz.f32 	%f281, %f92;

BB5_7:
	mov.f32 	%f8, %f281;
	add.s32 	%r27, %r2, 1;
	mul.lo.s32 	%r5, %r27, %r3;
	add.s32 	%r28, %r5, %r1;
	cvt.s64.s32	%rd19, %r28;
	add.s64 	%rd20, %rd19, %rd2;
	shl.b64 	%rd21, %rd20, 2;
	add.s64 	%rd4, %rd1, %rd21;
	ld.global.f32 	%f9, [%rd4];
	setp.ltu.ftz.f32	%p7, %f9, 0f00000000;
	@%p7 bra 	BB5_9;

	lg2.approx.ftz.f32 	%f93, %f9;
	mul.ftz.f32 	%f94, %f93, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f276, %f94;
	bra.uni 	BB5_10;

BB5_9:
	neg.ftz.f32 	%f95, %f9;
	lg2.approx.ftz.f32 	%f96, %f95;
	mul.ftz.f32 	%f97, %f96, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f98, %f97;
	neg.ftz.f32 	%f276, %f98;

BB5_10:
	mov.f32 	%f12, %f276;
	ld.global.f32 	%f13, [%rd4+4];
	setp.ltu.ftz.f32	%p8, %f13, 0f00000000;
	@%p8 bra 	BB5_12;

	lg2.approx.ftz.f32 	%f99, %f13;
	mul.ftz.f32 	%f100, %f99, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f303, %f100;
	bra.uni 	BB5_13;

BB5_12:
	neg.ftz.f32 	%f101, %f13;
	lg2.approx.ftz.f32 	%f102, %f101;
	mul.ftz.f32 	%f103, %f102, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f104, %f103;
	neg.ftz.f32 	%f303, %f104;

BB5_13:
	mov.f32 	%f16, %f303;
	setp.lt.s32	%p9, %r1, 1;
	add.s32 	%r34, %r1, %r5;
	add.s32 	%r35, %r34, -1;
	cvt.s64.s32	%rd22, %r35;
	add.s64 	%rd27, %rd22, %rd2;
	shl.b64 	%rd29, %rd27, 2;
	add.s64 	%rd5, %rd1, %rd29;
	mov.f32 	%f302, %f16;
	@%p9 bra 	BB5_17;

	ld.global.f32 	%f17, [%rd5];
	setp.ltu.ftz.f32	%p11, %f17, 0f00000000;
	@%p11 bra 	BB5_16;

	lg2.approx.ftz.f32 	%f105, %f17;
	mul.ftz.f32 	%f106, %f105, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f18, %f106;
	mov.f32 	%f302, %f18;
	bra.uni 	BB5_17;

BB5_16:
	neg.ftz.f32 	%f107, %f17;
	lg2.approx.ftz.f32 	%f108, %f107;
	mul.ftz.f32 	%f109, %f108, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f110, %f109;
	neg.ftz.f32 	%f19, %f110;
	mov.f32 	%f302, %f19;

BB5_17:
	mov.f32 	%f20, %f302;
	add.s32 	%r43, %r13, -2;
	setp.ge.s32	%p12, %r1, %r43;
	mov.f32 	%f275, %f12;
	@%p12 bra 	BB5_21;

	ld.global.f32 	%f21, [%rd4+8];
	setp.ltu.ftz.f32	%p13, %f21, 0f00000000;
	@%p13 bra 	BB5_20;

	lg2.approx.ftz.f32 	%f111, %f21;
	mul.ftz.f32 	%f112, %f111, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f22, %f112;
	mov.f32 	%f275, %f22;
	bra.uni 	BB5_21;

BB5_20:
	neg.ftz.f32 	%f113, %f21;
	lg2.approx.ftz.f32 	%f114, %f113;
	mul.ftz.f32 	%f115, %f114, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f116, %f115;
	neg.ftz.f32 	%f23, %f116;
	mov.f32 	%f275, %f23;

BB5_21:
	mov.f32 	%f24, %f275;
	add.s32 	%r49, %r13, -3;
	setp.ge.s32	%p14, %r1, %r49;
	mov.f32 	%f301, %f16;
	@%p14 bra 	BB5_25;

	ld.global.f32 	%f25, [%rd5+16];
	setp.ltu.ftz.f32	%p15, %f25, 0f00000000;
	@%p15 bra 	BB5_24;

	lg2.approx.ftz.f32 	%f117, %f25;
	mul.ftz.f32 	%f118, %f117, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f301, %f118;
	bra.uni 	BB5_25;

BB5_24:
	neg.ftz.f32 	%f119, %f25;
	lg2.approx.ftz.f32 	%f120, %f119;
	mul.ftz.f32 	%f121, %f120, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f122, %f121;
	neg.ftz.f32 	%f301, %f122;

BB5_25:
	setp.lt.s32	%p16, %r1, 2;
	add.s32 	%r55, %r1, %r4;
	add.s32 	%r56, %r55, -2;
	cvt.s64.s32	%rd30, %r56;
	add.s64 	%rd35, %rd30, %rd2;
	shl.b64 	%rd37, %rd35, 2;
	add.s64 	%rd6, %rd1, %rd37;
	mov.f32 	%f291, %f4;
	@%p16 bra 	BB5_29;

	ld.global.f32 	%f29, [%rd6];
	setp.ltu.ftz.f32	%p18, %f29, 0f00000000;
	@%p18 bra 	BB5_28;

	lg2.approx.ftz.f32 	%f123, %f29;
	mul.ftz.f32 	%f124, %f123, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f30, %f124;
	mov.f32 	%f291, %f30;
	bra.uni 	BB5_29;

BB5_28:
	neg.ftz.f32 	%f125, %f29;
	lg2.approx.ftz.f32 	%f126, %f125;
	mul.ftz.f32 	%f127, %f126, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f128, %f127;
	neg.ftz.f32 	%f31, %f128;
	mov.f32 	%f291, %f31;

BB5_29:
	mov.f32 	%f32, %f291;
	mov.f32 	%f280, %f8;
	@%p9 bra 	BB5_33;

	ld.global.f32 	%f33, [%rd6+4];
	setp.ltu.ftz.f32	%p20, %f33, 0f00000000;
	@%p20 bra 	BB5_32;

	lg2.approx.ftz.f32 	%f129, %f33;
	mul.ftz.f32 	%f130, %f129, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f34, %f130;
	mov.f32 	%f280, %f34;
	bra.uni 	BB5_33;

BB5_32:
	neg.ftz.f32 	%f131, %f33;
	lg2.approx.ftz.f32 	%f132, %f131;
	mul.ftz.f32 	%f133, %f132, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f134, %f133;
	neg.ftz.f32 	%f35, %f134;
	mov.f32 	%f280, %f35;

BB5_33:
	mov.f32 	%f36, %f280;
	mov.f32 	%f290, %f4;
	@%p12 bra 	BB5_37;

	ld.global.f32 	%f37, [%rd3+8];
	setp.ltu.ftz.f32	%p22, %f37, 0f00000000;
	@%p22 bra 	BB5_36;

	lg2.approx.ftz.f32 	%f135, %f37;
	mul.ftz.f32 	%f136, %f135, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f290, %f136;
	bra.uni 	BB5_37;

BB5_36:
	neg.ftz.f32 	%f137, %f37;
	lg2.approx.ftz.f32 	%f138, %f137;
	mul.ftz.f32 	%f139, %f138, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f140, %f139;
	neg.ftz.f32 	%f290, %f140;

BB5_37:
	setp.lt.s32	%p23, %r2, 1;
	or.pred  	%p25, %p23, %p9;
	mov.f32 	%f300, %f16;
	@%p25 bra 	BB5_41;

	add.s32 	%r90, %r2, -1;
	mad.lo.s32 	%r93, %r90, %r3, %r1;
	add.s32 	%r94, %r93, -1;
	cvt.s64.s32	%rd38, %r94;
	add.s64 	%rd43, %rd38, %rd2;
	shl.b64 	%rd45, %rd43, 2;
	add.s64 	%rd46, %rd1, %rd45;
	ld.global.f32 	%f41, [%rd46];
	setp.ltu.ftz.f32	%p27, %f41, 0f00000000;
	@%p27 bra 	BB5_40;

	lg2.approx.ftz.f32 	%f141, %f41;
	mul.ftz.f32 	%f142, %f141, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f300, %f142;
	bra.uni 	BB5_41;

BB5_40:
	neg.ftz.f32 	%f143, %f41;
	lg2.approx.ftz.f32 	%f144, %f143;
	mul.ftz.f32 	%f145, %f144, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f146, %f145;
	neg.ftz.f32 	%f300, %f146;

BB5_41:
	mov.f32 	%f274, %f12;
	@%p23 bra 	BB5_45;

	add.s32 	%r107, %r2, -1;
	mad.lo.s32 	%r115, %r107, %r3, %r1;
	cvt.s64.s32	%rd47, %r115;
	add.s64 	%rd52, %rd47, %rd2;
	shl.b64 	%rd54, %rd52, 2;
	add.s64 	%rd55, %rd1, %rd54;
	ld.global.f32 	%f45, [%rd55];
	setp.ltu.ftz.f32	%p30, %f45, 0f00000000;
	@%p30 bra 	BB5_44;

	lg2.approx.ftz.f32 	%f147, %f45;
	mul.ftz.f32 	%f148, %f147, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f274, %f148;
	bra.uni 	BB5_45;

BB5_44:
	neg.ftz.f32 	%f149, %f45;
	lg2.approx.ftz.f32 	%f150, %f149;
	mul.ftz.f32 	%f151, %f150, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f152, %f151;
	neg.ftz.f32 	%f274, %f152;

BB5_45:
	mov.f32 	%f299, %f16;
	@%p23 bra 	BB5_49;

	add.s32 	%r133, %r2, -1;
	mad.lo.s32 	%r136, %r133, %r3, %r1;
	add.s32 	%r137, %r136, -1;
	cvt.s64.s32	%rd57, %r137;
	add.s64 	%rd62, %rd57, %rd2;
	shl.b64 	%rd63, %rd62, 2;
	add.s64 	%rd64, %rd1, %rd63;
	ld.global.f32 	%f49, [%rd64+8];
	setp.ltu.ftz.f32	%p33, %f49, 0f00000000;
	@%p33 bra 	BB5_48;

	lg2.approx.ftz.f32 	%f153, %f49;
	mul.ftz.f32 	%f154, %f153, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f299, %f154;
	bra.uni 	BB5_49;

BB5_48:
	neg.ftz.f32 	%f155, %f49;
	lg2.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f156, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f158, %f157;
	neg.ftz.f32 	%f299, %f158;

BB5_49:
	setp.lt.s32	%p34, %r2, 2;
	mov.f32 	%f289, %f4;
	@%p34 bra 	BB5_53;

	add.s32 	%r150, %r2, -2;
	mad.lo.s32 	%r158, %r150, %r3, %r1;
	cvt.s64.s32	%rd65, %r158;
	add.s64 	%rd70, %rd65, %rd2;
	shl.b64 	%rd72, %rd70, 2;
	add.s64 	%rd73, %rd1, %rd72;
	ld.global.f32 	%f53, [%rd73];
	setp.ltu.ftz.f32	%p36, %f53, 0f00000000;
	@%p36 bra 	BB5_52;

	lg2.approx.ftz.f32 	%f159, %f53;
	mul.ftz.f32 	%f160, %f159, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f289, %f160;
	bra.uni 	BB5_53;

BB5_52:
	neg.ftz.f32 	%f161, %f53;
	lg2.approx.ftz.f32 	%f162, %f161;
	mul.ftz.f32 	%f163, %f162, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f164, %f163;
	neg.ftz.f32 	%f289, %f164;

BB5_53:
	add.s32 	%r166, %r14, -2;
	setp.ge.s32	%p37, %r2, %r166;
	mov.f32 	%f288, %f4;
	@%p37 bra 	BB5_57;

	add.s32 	%r172, %r2, 2;
	mad.lo.s32 	%r180, %r172, %r3, %r1;
	cvt.s64.s32	%rd74, %r180;
	add.s64 	%rd79, %rd74, %rd2;
	shl.b64 	%rd81, %rd79, 2;
	add.s64 	%rd82, %rd1, %rd81;
	ld.global.f32 	%f57, [%rd82];
	setp.ltu.ftz.f32	%p39, %f57, 0f00000000;
	@%p39 bra 	BB5_56;

	lg2.approx.ftz.f32 	%f165, %f57;
	mul.ftz.f32 	%f166, %f165, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f288, %f166;
	bra.uni 	BB5_57;

BB5_56:
	neg.ftz.f32 	%f167, %f57;
	lg2.approx.ftz.f32 	%f168, %f167;
	mul.ftz.f32 	%f169, %f168, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f170, %f169;
	neg.ftz.f32 	%f288, %f170;

BB5_57:
	mov.f32 	%f279, %f8;
	@%p37 bra 	BB5_61;

	add.s32 	%r194, %r2, 2;
	mad.lo.s32 	%r202, %r194, %r3, %r1;
	add.s32 	%r203, %r202, 1;
	cvt.s64.s32	%rd83, %r203;
	add.s64 	%rd88, %rd83, %rd2;
	shl.b64 	%rd90, %rd88, 2;
	add.s64 	%rd91, %rd1, %rd90;
	ld.global.f32 	%f61, [%rd91];
	setp.ltu.ftz.f32	%p42, %f61, 0f00000000;
	@%p42 bra 	BB5_60;

	lg2.approx.ftz.f32 	%f171, %f61;
	mul.ftz.f32 	%f172, %f171, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f279, %f172;
	bra.uni 	BB5_61;

BB5_60:
	neg.ftz.f32 	%f173, %f61;
	lg2.approx.ftz.f32 	%f174, %f173;
	mul.ftz.f32 	%f175, %f174, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f176, %f175;
	neg.ftz.f32 	%f279, %f176;

BB5_61:
	or.pred  	%p45, %p37, %p12;
	mov.f32 	%f287, %f4;
	@%p45 bra 	BB5_65;

	add.s32 	%r223, %r2, 2;
	mad.lo.s32 	%r231, %r223, %r3, %r1;
	add.s32 	%r232, %r231, 1;
	cvt.s64.s32	%rd93, %r232;
	add.s64 	%rd98, %rd93, %rd2;
	shl.b64 	%rd99, %rd98, 2;
	add.s64 	%rd100, %rd1, %rd99;
	ld.global.f32 	%f65, [%rd100+4];
	setp.ltu.ftz.f32	%p47, %f65, 0f00000000;
	@%p47 bra 	BB5_64;

	lg2.approx.ftz.f32 	%f177, %f65;
	mul.ftz.f32 	%f178, %f177, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f287, %f178;
	bra.uni 	BB5_65;

BB5_64:
	neg.ftz.f32 	%f179, %f65;
	lg2.approx.ftz.f32 	%f180, %f179;
	mul.ftz.f32 	%f181, %f180, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f182, %f181;
	neg.ftz.f32 	%f287, %f182;

BB5_65:
	add.s32 	%r240, %r14, -3;
	setp.ge.s32	%p48, %r2, %r240;
	mov.f32 	%f298, %f16;
	@%p48 bra 	BB5_69;

	add.s32 	%r246, %r2, 3;
	mad.lo.s32 	%r254, %r246, %r3, %r1;
	add.s32 	%r255, %r254, 1;
	cvt.s64.s32	%rd101, %r255;
	add.s64 	%rd106, %rd101, %rd2;
	shl.b64 	%rd108, %rd106, 2;
	add.s64 	%rd109, %rd1, %rd108;
	ld.global.f32 	%f69, [%rd109];
	setp.ltu.ftz.f32	%p50, %f69, 0f00000000;
	@%p50 bra 	BB5_68;

	lg2.approx.ftz.f32 	%f183, %f69;
	mul.ftz.f32 	%f184, %f183, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f298, %f184;
	bra.uni 	BB5_69;

BB5_68:
	neg.ftz.f32 	%f185, %f69;
	lg2.approx.ftz.f32 	%f186, %f185;
	mul.ftz.f32 	%f187, %f186, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f188, %f187;
	neg.ftz.f32 	%f298, %f188;

BB5_69:
	sub.ftz.f32 	%f189, %f4, %f288;
	sub.ftz.f32 	%f190, %f4, %f289;
	add.ftz.f32 	%f191, %f190, %f189;
	sub.ftz.f32 	%f192, %f4, %f32;
	sub.ftz.f32 	%f193, %f4, %f290;
	add.ftz.f32 	%f194, %f193, %f192;
	add.ftz.f32 	%f195, %f12, %f274;
	add.ftz.f32 	%f196, %f8, %f36;
	mul.ftz.f32 	%f197, %f191, 0f3E800000;
	fma.rn.ftz.f32 	%f198, %f195, 0f3F000000, %f197;
	mul.ftz.f32 	%f199, %f194, 0f3E800000;
	fma.rn.ftz.f32 	%f200, %f196, 0f3F000000, %f199;
	add.ftz.f32 	%f201, %f198, %f200;
	mul.ftz.f32 	%f202, %f201, 0f3F000000;
	add.ftz.f32 	%f203, %f12, %f36;
	add.ftz.f32 	%f204, %f203, %f274;
	add.ftz.f32 	%f205, %f204, %f8;
	add.ftz.f32 	%f206, %f205, %f288;
	add.ftz.f32 	%f207, %f206, %f32;
	add.ftz.f32 	%f208, %f207, %f289;
	add.ftz.f32 	%f209, %f208, %f290;
	mul.ftz.f32 	%f210, %f209, 0f3C4CCCCD;
	sub.ftz.f32 	%f211, %f12, %f274;
	abs.ftz.f32 	%f212, %f211;
	abs.ftz.f32 	%f213, %f191;
	add.ftz.f32 	%f214, %f212, %f213;
	sub.ftz.f32 	%f215, %f36, %f8;
	abs.ftz.f32 	%f216, %f215;
	abs.ftz.f32 	%f217, %f194;
	add.ftz.f32 	%f218, %f216, %f217;
	sub.ftz.f32 	%f219, %f214, %f218;
	setp.lt.ftz.f32	%p51, %f219, 0f00000000;
	selp.f32	%f220, %f198, %f200, %p51;
	abs.ftz.f32 	%f221, %f219;
	setp.lt.ftz.f32	%p52, %f221, %f210;
	selp.f32	%f73, %f202, %f220, %p52;
	sub.ftz.f32 	%f222, %f16, %f298;
	sub.ftz.f32 	%f223, %f16, %f299;
	add.ftz.f32 	%f224, %f223, %f222;
	sub.ftz.f32 	%f225, %f16, %f20;
	sub.ftz.f32 	%f226, %f16, %f301;
	add.ftz.f32 	%f227, %f226, %f225;
	add.ftz.f32 	%f228, %f279, %f8;
	add.ftz.f32 	%f229, %f24, %f12;
	mul.ftz.f32 	%f230, %f224, 0f3E800000;
	fma.rn.ftz.f32 	%f231, %f228, 0f3F000000, %f230;
	mul.ftz.f32 	%f232, %f227, 0f3E800000;
	fma.rn.ftz.f32 	%f233, %f229, 0f3F000000, %f232;
	add.ftz.f32 	%f234, %f231, %f233;
	mul.ftz.f32 	%f235, %f234, 0f3F000000;
	add.ftz.f32 	%f236, %f279, %f12;
	add.ftz.f32 	%f237, %f236, %f8;
	add.ftz.f32 	%f238, %f237, %f24;
	add.ftz.f32 	%f239, %f238, %f298;
	add.ftz.f32 	%f240, %f239, %f20;
	add.ftz.f32 	%f241, %f240, %f299;
	add.ftz.f32 	%f242, %f241, %f301;
	mul.ftz.f32 	%f243, %f242, 0f3C4CCCCD;
	sub.ftz.f32 	%f244, %f279, %f8;
	abs.ftz.f32 	%f245, %f244;
	abs.ftz.f32 	%f246, %f224;
	add.ftz.f32 	%f247, %f245, %f246;
	sub.ftz.f32 	%f248, %f12, %f24;
	abs.ftz.f32 	%f249, %f248;
	abs.ftz.f32 	%f250, %f227;
	add.ftz.f32 	%f251, %f249, %f250;
	sub.ftz.f32 	%f252, %f247, %f251;
	setp.lt.ftz.f32	%p53, %f252, 0f00000000;
	selp.f32	%f253, %f231, %f233, %p53;
	abs.ftz.f32 	%f254, %f252;
	setp.lt.ftz.f32	%p54, %f254, %f243;
	selp.f32	%f74, %f235, %f253, %p54;
	add.ftz.f32 	%f255, %f300, %f299;
	add.ftz.f32 	%f256, %f255, %f20;
	add.ftz.f32 	%f257, %f256, %f16;
	mul.ftz.f32 	%f75, %f257, 0f3E800000;
	add.ftz.f32 	%f258, %f299, %f16;
	mul.ftz.f32 	%f76, %f258, 0f3F000000;
	add.ftz.f32 	%f259, %f4, %f290;
	mul.ftz.f32 	%f77, %f259, 0f3F000000;
	add.ftz.f32 	%f260, %f20, %f16;
	mul.ftz.f32 	%f78, %f260, 0f3F000000;
	add.ftz.f32 	%f261, %f4, %f288;
	mul.ftz.f32 	%f79, %f261, 0f3F000000;
	add.ftz.f32 	%f262, %f259, %f288;
	add.ftz.f32 	%f263, %f262, %f287;
	mul.ftz.f32 	%f80, %f263, 0f3E800000;
	mul.lo.s32 	%r262, %r22, %r11;
	shl.b32 	%r6, %r262, 1;
	add.s32 	%r268, %r6, %r1;
	cvt.s64.s32	%rd7, %r268;
	setp.eq.s32	%p55, %r12, 0;
	@%p55 bra 	BB5_71;

	cvta.to.global.u64 	%rd110, %rd12;
	shl.b64 	%rd111, %rd7, 4;
	add.s64 	%rd112, %rd110, %rd111;
	mov.f32 	%f264, 0f3F800000;
	st.global.v4.f32 	[%rd112], {%f75, %f73, %f4, %f264};
	bra.uni 	BB5_72;

BB5_71:
	cvta.to.global.u64 	%rd113, %rd12;
	shl.b64 	%rd114, %rd7, 3;
	add.s64 	%rd115, %rd113, %rd114;
	mov.f32 	%f265, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f265;
	mov.b16 	%rs1, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f4;
	mov.b16 	%rs2, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f73;
	mov.b16 	%rs3, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f75;
	mov.b16 	%rs4, %temp;
}
	st.global.v4.u16 	[%rd115], {%rs4, %rs3, %rs2, %rs1};

BB5_72:
	add.s32 	%r274, %r1, %r6;
	add.s32 	%r275, %r274, 1;
	cvt.s64.s32	%rd8, %r275;
	@%p55 bra 	BB5_74;

	cvta.to.global.u64 	%rd116, %rd12;
	shl.b64 	%rd117, %rd8, 4;
	add.s64 	%rd118, %rd116, %rd117;
	mov.f32 	%f266, 0f3F800000;
	st.global.v4.f32 	[%rd118], {%f76, %f8, %f77, %f266};
	bra.uni 	BB5_75;

BB5_74:
	cvta.to.global.u64 	%rd119, %rd12;
	shl.b64 	%rd120, %rd8, 3;
	add.s64 	%rd121, %rd119, %rd120;
	mov.f32 	%f267, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f267;
	mov.b16 	%rs5, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f77;
	mov.b16 	%rs6, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f8;
	mov.b16 	%rs7, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f76;
	mov.b16 	%rs8, %temp;
}
	st.global.v4.u16 	[%rd121], {%rs8, %rs7, %rs6, %rs5};

BB5_75:
	mul.lo.s32 	%r7, %r27, %r11;
	add.s32 	%r287, %r7, %r1;
	cvt.s64.s32	%rd9, %r287;
	@%p55 bra 	BB5_77;

	cvta.to.global.u64 	%rd122, %rd12;
	shl.b64 	%rd123, %rd9, 4;
	add.s64 	%rd124, %rd122, %rd123;
	mov.f32 	%f268, 0f3F800000;
	st.global.v4.f32 	[%rd124], {%f78, %f12, %f79, %f268};
	bra.uni 	BB5_78;

BB5_77:
	cvta.to.global.u64 	%rd125, %rd12;
	shl.b64 	%rd126, %rd9, 3;
	add.s64 	%rd127, %rd125, %rd126;
	mov.f32 	%f269, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f269;
	mov.b16 	%rs9, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f79;
	mov.b16 	%rs10, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f12;
	mov.b16 	%rs11, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f78;
	mov.b16 	%rs12, %temp;
}
	st.global.v4.u16 	[%rd127], {%rs12, %rs11, %rs10, %rs9};

BB5_78:
	add.s32 	%r293, %r1, %r7;
	add.s32 	%r294, %r293, 1;
	cvt.s64.s32	%rd10, %r294;
	@%p55 bra 	BB5_80;

	cvta.to.global.u64 	%rd128, %rd12;
	shl.b64 	%rd129, %rd10, 4;
	add.s64 	%rd130, %rd128, %rd129;
	mov.f32 	%f270, 0f3F800000;
	st.global.v4.f32 	[%rd130], {%f16, %f74, %f80, %f270};
	bra.uni 	BB5_81;

BB5_80:
	cvta.to.global.u64 	%rd131, %rd12;
	shl.b64 	%rd132, %rd10, 3;
	add.s64 	%rd133, %rd131, %rd132;
	mov.f32 	%f271, 0f3F800000;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f271;
	mov.b16 	%rs13, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f80;
	mov.b16 	%rs14, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f74;
	mov.b16 	%rs15, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f16;
	mov.b16 	%rs16, %temp;
}
	st.global.v4.u16 	[%rd133], {%rs16, %rs15, %rs14, %rs13};

BB5_81:
	ret;
}

.visible .entry PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel(
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_0,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_1,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_2,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_3,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_4,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_5,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_6,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_7,
	.param .u64 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_8,
	.param .align 16 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9[16],
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_10,
	.param .u32 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_11
)
{
	.reg .pred 	%p<62>;
	.reg .s16 	%rs<97>;
	.reg .s32 	%r<86>;
	.reg .f32 	%f<642>;
	.reg .s64 	%rd<82>;
	// demoted variable
	.shared .align 4 .b8 PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix[36];

	ld.param.u64 	%rd18, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_0];
	ld.param.u64 	%rd16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_1];
	ld.param.u32 	%r10, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_2];
	ld.param.u32 	%r11, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_3];
	ld.param.u32 	%r12, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_4];
	ld.param.u32 	%r13, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_5];
	ld.param.u32 	%r14, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_6];
	ld.param.u32 	%r15, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_7];
	ld.param.u64 	%rd17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_8];
	ld.param.f32 	%f320, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9+8];
	ld.param.f32 	%f319, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9+4];
	ld.param.f32 	%f318, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_9];
	ld.param.u32 	%r16, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_10];
	ld.param.u32 	%r17, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel_param_11];
	cvta.to.global.u64 	%rd1, %rd18;
	mov.u32 	%r1, %tid.y;
	setp.eq.s32	%p1, %r1, 0;
	mov.u32 	%r2, %tid.x;
	setp.lt.s32	%p2, %r2, 3;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB6_2;
	bra.uni 	BB6_1;

BB6_1:
	cvta.to.global.u64 	%rd19, %rd17;
	cvt.s64.s32	%rd20, %r16;
	setp.ne.s32	%p4, %r15, 0;
	mul.lo.s32 	%r18, %r2, 3;
	cvt.s64.s32	%rd21, %r18;
	add.s64 	%rd22, %rd21, %rd20;
	selp.b64	%rd23, 2, 0, %p4;
	add.s64 	%rd24, %rd22, %rd23;
	shl.b64 	%rd25, %rd24, 2;
	add.s64 	%rd26, %rd19, %rd25;
	mul.wide.s32 	%rd27, %r18, 4;
	mov.u64 	%rd28, PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix;
	add.s64 	%rd29, %rd28, %rd27;
	ld.global.f32 	%f322, [%rd26];
	st.shared.f32 	[%rd29], %f322;
	shl.b64 	%rd30, %rd22, 2;
	add.s64 	%rd31, %rd19, %rd30;
	ld.global.f32 	%f323, [%rd31+4];
	st.shared.f32 	[%rd29+4], %f323;
	selp.b64	%rd32, 0, 2, %p4;
	add.s64 	%rd33, %rd22, %rd32;
	shl.b64 	%rd34, %rd33, 2;
	add.s64 	%rd35, %rd19, %rd34;
	ld.global.f32 	%f324, [%rd35];
	st.shared.f32 	[%rd29+8], %f324;

BB6_2:
	bar.sync 	0;
	mov.u32 	%r19, %ntid.x;
	mov.u32 	%r20, %ctaid.x;
	mad.lo.s32 	%r21, %r19, %r20, %r2;
	shl.b32 	%r3, %r21, 1;
	mov.u32 	%r22, %ntid.y;
	mov.u32 	%r23, %ctaid.y;
	mad.lo.s32 	%r24, %r22, %r23, %r1;
	shl.b32 	%r4, %r24, 1;
	setp.lt.s32	%p5, %r3, %r13;
	setp.lt.s32	%p6, %r4, %r14;
	and.pred  	%p7, %p5, %p6;
	@!%p7 bra 	BB6_141;
	bra.uni 	BB6_3;

BB6_3:
	add.s32 	%r5, %r13, -2;
	mul.lo.s32 	%r6, %r4, %r10;
	add.s32 	%r25, %r6, %r3;
	cvt.s64.s32	%rd2, %r25;
	setp.eq.s32	%p8, %r12, 0;
	@%p8 bra 	BB6_5;

	shl.b64 	%rd36, %rd2, 4;
	add.s64 	%rd37, %rd1, %rd36;
	ld.global.v4.f32 	{%f325, %f326, %f327, %f328}, [%rd37];
	mov.f32 	%f571, %f328;
	mov.f32 	%f570, %f327;
	mov.f32 	%f569, %f326;
	mov.f32 	%f568, %f325;
	bra.uni 	BB6_6;

BB6_5:
	shl.b64 	%rd38, %rd2, 3;
	add.s64 	%rd39, %rd1, %rd38;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd39];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f568, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f569, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f570, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f571, %temp;
	}

BB6_6:
	mul.wide.s32 	%rd40, %r25, 16;
	add.s64 	%rd3, %rd1, %rd40;
	mul.wide.s32 	%rd41, %r25, 8;
	add.s64 	%rd4, %rd1, %rd41;
	@%p8 bra 	BB6_8;

	ld.global.v4.f32 	{%f329, %f330, %f331, %f332}, [%rd3+16];
	mov.f32 	%f575, %f332;
	mov.f32 	%f574, %f331;
	mov.f32 	%f573, %f330;
	mov.f32 	%f572, %f329;
	bra.uni 	BB6_9;

BB6_8:
	ld.global.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd4+8];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs9;
	cvt.f32.f16 	%f572, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs10;
	cvt.f32.f16 	%f573, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs11;
	cvt.f32.f16 	%f574, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs12;
	cvt.f32.f16 	%f575, %temp;
	}

BB6_9:
	add.s32 	%r27, %r4, 1;
	mul.lo.s32 	%r7, %r27, %r10;
	add.s32 	%r28, %r7, %r3;
	cvt.s64.s32	%rd5, %r28;
	@%p8 bra 	BB6_11;

	shl.b64 	%rd42, %rd5, 4;
	add.s64 	%rd43, %rd1, %rd42;
	ld.global.v4.f32 	{%f333, %f334, %f335, %f336}, [%rd43];
	mov.f32 	%f579, %f336;
	mov.f32 	%f578, %f335;
	mov.f32 	%f577, %f334;
	mov.f32 	%f576, %f333;
	bra.uni 	BB6_12;

BB6_11:
	shl.b64 	%rd44, %rd5, 3;
	add.s64 	%rd45, %rd1, %rd44;
	ld.global.v4.u16 	{%rs17, %rs18, %rs19, %rs20}, [%rd45];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs17;
	cvt.f32.f16 	%f576, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs18;
	cvt.f32.f16 	%f577, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs19;
	cvt.f32.f16 	%f578, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs20;
	cvt.f32.f16 	%f579, %temp;
	}

BB6_12:
	add.s32 	%r29, %r3, %r7;
	add.s32 	%r30, %r29, -1;
	mul.wide.s32 	%rd46, %r30, 16;
	add.s64 	%rd6, %rd1, %rd46;
	mul.wide.s32 	%rd47, %r30, 8;
	add.s64 	%rd7, %rd1, %rd47;
	mov.f32 	%f585, %f577;
	@%p8 bra 	BB6_14;

	ld.global.v4.f32 	{%f337, %f338, %f339, %f340}, [%rd6+32];
	mov.f32 	%f583, %f340;
	mov.f32 	%f582, %f339;
	mov.f32 	%f581, %f338;
	mov.f32 	%f580, %f337;
	bra.uni 	BB6_15;

BB6_14:
	ld.global.v4.u16 	{%rs25, %rs26, %rs27, %rs28}, [%rd7+16];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs25;
	cvt.f32.f16 	%f580, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs26;
	cvt.f32.f16 	%f581, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs27;
	cvt.f32.f16 	%f582, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs28;
	cvt.f32.f16 	%f583, %temp;
	}

BB6_15:
	setp.ge.s32	%p12, %r3, %r5;
	@%p12 bra 	BB6_20;

	@%p8 bra 	BB6_18;

	ld.global.v4.f32 	{%f341, %f342, %f343, %f344}, [%rd3+32];
	mov.f32 	%f53, %f344;
	mov.f32 	%f52, %f343;
	mov.f32 	%f584, %f342;
	mov.f32 	%f50, %f341;
	bra.uni 	BB6_19;

BB6_18:
	ld.global.v4.u16 	{%rs33, %rs34, %rs35, %rs36}, [%rd4+16];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs34;
	cvt.f32.f16 	%f584, %temp;
	}

BB6_19:
	mov.f32 	%f585, %f584;

BB6_20:
	add.s32 	%r31, %r14, -2;
	setp.ge.s32	%p14, %r4, %r31;
	mov.f32 	%f587, %f573;
	@%p14 bra 	BB6_25;

	add.s32 	%r32, %r4, 2;
	mad.lo.s32 	%r33, %r32, %r10, %r3;
	cvt.s64.s32	%rd8, %r33;
	@%p8 bra 	BB6_23;

	shl.b64 	%rd48, %rd8, 4;
	add.s64 	%rd49, %rd1, %rd48;
	ld.global.v4.f32 	{%f345, %f346, %f347, %f348}, [%rd49];
	mov.f32 	%f68, %f348;
	mov.f32 	%f67, %f347;
	mov.f32 	%f586, %f346;
	mov.f32 	%f65, %f345;
	bra.uni 	BB6_24;

BB6_23:
	shl.b64 	%rd50, %rd8, 3;
	add.s64 	%rd51, %rd1, %rd50;
	ld.global.v4.u16 	{%rs41, %rs42, %rs43, %rs44}, [%rd51];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs42;
	cvt.f32.f16 	%f586, %temp;
	}

BB6_24:
	mov.f32 	%f587, %f586;

BB6_25:
	or.pred  	%p18, %p12, %p14;
	mov.f32 	%f589, %f581;
	@%p18 bra 	BB6_30;

	add.s32 	%r36, %r4, 2;
	mad.lo.s32 	%r37, %r36, %r10, %r3;
	add.s32 	%r38, %r37, 2;
	cvt.s64.s32	%rd9, %r38;
	@%p8 bra 	BB6_28;

	shl.b64 	%rd52, %rd9, 4;
	add.s64 	%rd53, %rd1, %rd52;
	ld.global.v4.f32 	{%f349, %f350, %f351, %f352}, [%rd53];
	mov.f32 	%f83, %f352;
	mov.f32 	%f82, %f351;
	mov.f32 	%f588, %f350;
	mov.f32 	%f80, %f349;
	bra.uni 	BB6_29;

BB6_28:
	shl.b64 	%rd54, %rd9, 3;
	add.s64 	%rd55, %rd1, %rd54;
	ld.global.v4.u16 	{%rs49, %rs50, %rs51, %rs52}, [%rd55];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs50;
	cvt.f32.f16 	%f588, %temp;
	}

BB6_29:
	mov.f32 	%f589, %f588;

BB6_30:
	add.ftz.f32 	%f353, %f569, %f585;
	mul.ftz.f32 	%f354, %f353, 0f3F000000;
	sub.ftz.f32 	%f355, %f573, %f354;
	add.ftz.f32 	%f94, %f574, %f355;
	add.ftz.f32 	%f356, %f569, %f587;
	mul.ftz.f32 	%f357, %f356, 0f3F000000;
	sub.ftz.f32 	%f358, %f577, %f357;
	add.ftz.f32 	%f95, %f578, %f358;
	add.ftz.f32 	%f359, %f353, %f587;
	add.ftz.f32 	%f360, %f359, %f589;
	fma.rn.ftz.f32 	%f361, %f360, 0fBE800000, %f581;
	add.ftz.f32 	%f97, %f582, %f361;
	setp.lt.s32	%p20, %r3, 1;
	setp.lt.s32	%p21, %r4, 1;
	or.pred  	%p22, %p20, %p21;
	add.s32 	%r39, %r4, -1;
	mad.lo.s32 	%r40, %r39, %r10, %r3;
	add.s32 	%r41, %r40, -1;
	mul.wide.s32 	%rd56, %r41, 16;
	add.s64 	%rd10, %rd1, %rd56;
	mul.wide.s32 	%rd57, %r41, 8;
	add.s64 	%rd11, %rd1, %rd57;
	mov.f32 	%f591, %f581;
	@%p22 bra 	BB6_35;

	@%p8 bra 	BB6_33;

	ld.global.v4.f32 	{%f362, %f363, %f364, %f365}, [%rd10];
	mov.f32 	%f101, %f365;
	mov.f32 	%f100, %f364;
	mov.f32 	%f590, %f363;
	mov.f32 	%f98, %f362;
	bra.uni 	BB6_34;

BB6_33:
	ld.global.v4.u16 	{%rs57, %rs58, %rs59, %rs60}, [%rd11];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs58;
	cvt.f32.f16 	%f590, %temp;
	}

BB6_34:
	mov.f32 	%f591, %f590;

BB6_35:
	mov.f32 	%f593, %f573;
	@%p21 bra 	BB6_40;

	@%p8 bra 	BB6_38;

	ld.global.v4.f32 	{%f366, %f367, %f368, %f369}, [%rd10+32];
	mov.f32 	%f116, %f369;
	mov.f32 	%f115, %f368;
	mov.f32 	%f592, %f367;
	mov.f32 	%f113, %f366;
	bra.uni 	BB6_39;

BB6_38:
	ld.global.v4.u16 	{%rs65, %rs66, %rs67, %rs68}, [%rd11+16];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs66;
	cvt.f32.f16 	%f592, %temp;
	}

BB6_39:
	mov.f32 	%f593, %f592;

BB6_40:
	mov.f32 	%f595, %f577;
	@%p20 bra 	BB6_45;

	@%p8 bra 	BB6_43;

	ld.global.v4.f32 	{%f370, %f371, %f372, %f373}, [%rd6];
	mov.f32 	%f131, %f373;
	mov.f32 	%f130, %f372;
	mov.f32 	%f594, %f371;
	mov.f32 	%f128, %f370;
	bra.uni 	BB6_44;

BB6_43:
	ld.global.v4.u16 	{%rs73, %rs74, %rs75, %rs76}, [%rd7];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs74;
	cvt.f32.f16 	%f594, %temp;
	}

BB6_44:
	mov.f32 	%f595, %f594;

BB6_45:
	add.ftz.f32 	%f374, %f591, %f593;
	add.ftz.f32 	%f375, %f374, %f595;
	add.ftz.f32 	%f376, %f375, %f581;
	fma.rn.ftz.f32 	%f377, %f376, 0fBE800000, %f569;
	add.ftz.f32 	%f142, %f568, %f377;
	add.ftz.f32 	%f378, %f593, %f581;
	mul.ftz.f32 	%f379, %f378, 0f3F000000;
	sub.ftz.f32 	%f380, %f573, %f379;
	add.ftz.f32 	%f143, %f572, %f380;
	add.ftz.f32 	%f381, %f595, %f581;
	mul.ftz.f32 	%f382, %f381, 0f3F000000;
	sub.ftz.f32 	%f383, %f577, %f382;
	add.ftz.f32 	%f144, %f576, %f383;
	setp.ltu.ftz.f32	%p28, %f570, 0f00000000;
	@%p28 bra 	BB6_47;

	lg2.approx.ftz.f32 	%f384, %f570;
	mul.ftz.f32 	%f385, %f384, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f596, %f385;
	bra.uni 	BB6_48;

BB6_47:
	neg.ftz.f32 	%f386, %f570;
	lg2.approx.ftz.f32 	%f387, %f386;
	mul.ftz.f32 	%f388, %f387, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f389, %f388;
	neg.ftz.f32 	%f596, %f389;

BB6_48:
	setp.ltu.ftz.f32	%p29, %f569, 0f00000000;
	@%p29 bra 	BB6_50;

	lg2.approx.ftz.f32 	%f390, %f569;
	mul.ftz.f32 	%f391, %f390, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f597, %f391;
	bra.uni 	BB6_51;

BB6_50:
	neg.ftz.f32 	%f392, %f569;
	lg2.approx.ftz.f32 	%f393, %f392;
	mul.ftz.f32 	%f394, %f393, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f395, %f394;
	neg.ftz.f32 	%f597, %f395;

BB6_51:
	setp.ltu.ftz.f32	%p30, %f142, 0f00000000;
	@%p30 bra 	BB6_53;

	lg2.approx.ftz.f32 	%f396, %f142;
	mul.ftz.f32 	%f397, %f396, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f598, %f397;
	bra.uni 	BB6_54;

BB6_53:
	neg.ftz.f32 	%f398, %f142;
	lg2.approx.ftz.f32 	%f399, %f398;
	mul.ftz.f32 	%f400, %f399, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f401, %f400;
	neg.ftz.f32 	%f598, %f401;

BB6_54:
	min.ftz.f32 	%f154, %f596, %f318;
	min.ftz.f32 	%f155, %f597, %f319;
	min.ftz.f32 	%f156, %f598, %f320;
	ld.shared.f32 	%f157, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+24];
	ld.shared.f32 	%f158, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+28];
	mul.ftz.f32 	%f402, %f155, %f158;
	fma.rn.ftz.f32 	%f403, %f154, %f157, %f402;
	ld.shared.f32 	%f159, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+32];
	fma.rn.ftz.f32 	%f160, %f156, %f159, %f403;
	setp.eq.s32	%p31, %r17, 0;
	@%p31 bra 	BB6_65;

	setp.ltu.ftz.f32	%p32, %f160, 0f00000000;
	@%p32 bra 	BB6_57;

	lg2.approx.ftz.f32 	%f404, %f160;
	mul.ftz.f32 	%f405, %f404, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f599, %f405;
	bra.uni 	BB6_58;

BB6_57:
	neg.ftz.f32 	%f406, %f160;
	lg2.approx.ftz.f32 	%f407, %f406;
	mul.ftz.f32 	%f408, %f407, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f409, %f408;
	neg.ftz.f32 	%f599, %f409;

BB6_58:
	ld.shared.f32 	%f607, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+12];
	ld.shared.f32 	%f606, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+16];
	mul.ftz.f32 	%f410, %f155, %f606;
	fma.rn.ftz.f32 	%f411, %f154, %f607, %f410;
	ld.shared.f32 	%f605, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+20];
	fma.rn.ftz.f32 	%f167, %f156, %f605, %f411;
	setp.ltu.ftz.f32	%p33, %f167, 0f00000000;
	@%p33 bra 	BB6_60;

	lg2.approx.ftz.f32 	%f412, %f167;
	mul.ftz.f32 	%f413, %f412, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f600, %f413;
	bra.uni 	BB6_61;

BB6_60:
	neg.ftz.f32 	%f414, %f167;
	lg2.approx.ftz.f32 	%f415, %f414;
	mul.ftz.f32 	%f416, %f415, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f417, %f416;
	neg.ftz.f32 	%f600, %f417;

BB6_61:
	ld.shared.f32 	%f604, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix];
	ld.shared.f32 	%f603, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+4];
	mul.ftz.f32 	%f418, %f155, %f603;
	fma.rn.ftz.f32 	%f419, %f154, %f604, %f418;
	ld.shared.f32 	%f602, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+8];
	fma.rn.ftz.f32 	%f174, %f156, %f602, %f419;
	setp.ltu.ftz.f32	%p34, %f174, 0f00000000;
	@%p34 bra 	BB6_63;

	lg2.approx.ftz.f32 	%f420, %f174;
	mul.ftz.f32 	%f421, %f420, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f601, %f421;
	bra.uni 	BB6_64;

BB6_63:
	neg.ftz.f32 	%f422, %f174;
	lg2.approx.ftz.f32 	%f423, %f422;
	mul.ftz.f32 	%f424, %f423, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f425, %f424;
	neg.ftz.f32 	%f601, %f425;

BB6_64:
	mov.f32 	%f610, %f601;
	mov.f32 	%f609, %f600;
	mov.f32 	%f608, %f599;
	bra.uni 	BB6_66;

BB6_65:
	ld.shared.f32 	%f607, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+12];
	ld.shared.f32 	%f606, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+16];
	mul.ftz.f32 	%f426, %f155, %f606;
	fma.rn.ftz.f32 	%f427, %f154, %f607, %f426;
	ld.shared.f32 	%f605, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+20];
	fma.rn.ftz.f32 	%f609, %f156, %f605, %f427;
	ld.shared.f32 	%f604, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix];
	ld.shared.f32 	%f603, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+4];
	mul.ftz.f32 	%f428, %f155, %f603;
	fma.rn.ftz.f32 	%f429, %f154, %f604, %f428;
	ld.shared.f32 	%f602, [PixelFormatConvert_Bayer_To_BGRA_4444_32f_Lumetri_Chroma_Tweak_Kernel$__cuda_local_var_180740_376_non_const_matrix+8];
	fma.rn.ftz.f32 	%f610, %f156, %f602, %f429;
	mov.f32 	%f608, %f160;

BB6_66:
	mov.f32 	%f611, %f571;
	setp.ltu.ftz.f32	%p35, %f94, 0f00000000;
	@%p35 bra 	BB6_68;

	lg2.approx.ftz.f32 	%f430, %f94;
	mul.ftz.f32 	%f431, %f430, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f612, %f431;
	bra.uni 	BB6_69;

BB6_68:
	neg.ftz.f32 	%f432, %f94;
	lg2.approx.ftz.f32 	%f433, %f432;
	mul.ftz.f32 	%f434, %f433, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f435, %f434;
	neg.ftz.f32 	%f612, %f435;

BB6_69:
	setp.ltu.ftz.f32	%p36, %f573, 0f00000000;
	@%p36 bra 	BB6_71;

	lg2.approx.ftz.f32 	%f436, %f573;
	mul.ftz.f32 	%f437, %f436, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f613, %f437;
	bra.uni 	BB6_72;

BB6_71:
	neg.ftz.f32 	%f438, %f573;
	lg2.approx.ftz.f32 	%f439, %f438;
	mul.ftz.f32 	%f440, %f439, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f441, %f440;
	neg.ftz.f32 	%f613, %f441;

BB6_72:
	setp.ltu.ftz.f32	%p37, %f143, 0f00000000;
	@%p37 bra 	BB6_74;

	lg2.approx.ftz.f32 	%f442, %f143;
	mul.ftz.f32 	%f443, %f442, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f614, %f443;
	bra.uni 	BB6_75;

BB6_74:
	neg.ftz.f32 	%f444, %f143;
	lg2.approx.ftz.f32 	%f445, %f444;
	mul.ftz.f32 	%f446, %f445, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f447, %f446;
	neg.ftz.f32 	%f614, %f447;

BB6_75:
	min.ftz.f32 	%f211, %f612, %f318;
	min.ftz.f32 	%f212, %f613, %f319;
	min.ftz.f32 	%f213, %f614, %f320;
	mul.ftz.f32 	%f448, %f212, %f158;
	fma.rn.ftz.f32 	%f449, %f211, %f157, %f448;
	fma.rn.ftz.f32 	%f214, %f213, %f159, %f449;
	@%p31 bra 	BB6_86;

	setp.ltu.ftz.f32	%p39, %f214, 0f00000000;
	@%p39 bra 	BB6_78;

	lg2.approx.ftz.f32 	%f450, %f214;
	mul.ftz.f32 	%f451, %f450, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f615, %f451;
	bra.uni 	BB6_79;

BB6_78:
	neg.ftz.f32 	%f452, %f214;
	lg2.approx.ftz.f32 	%f453, %f452;
	mul.ftz.f32 	%f454, %f453, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f455, %f454;
	neg.ftz.f32 	%f615, %f455;

BB6_79:
	mul.ftz.f32 	%f456, %f212, %f606;
	fma.rn.ftz.f32 	%f457, %f211, %f607, %f456;
	fma.rn.ftz.f32 	%f218, %f213, %f605, %f457;
	setp.ltu.ftz.f32	%p40, %f218, 0f00000000;
	@%p40 bra 	BB6_81;

	lg2.approx.ftz.f32 	%f458, %f218;
	mul.ftz.f32 	%f459, %f458, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f616, %f459;
	bra.uni 	BB6_82;

BB6_81:
	neg.ftz.f32 	%f460, %f218;
	lg2.approx.ftz.f32 	%f461, %f460;
	mul.ftz.f32 	%f462, %f461, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f463, %f462;
	neg.ftz.f32 	%f616, %f463;

BB6_82:
	mul.ftz.f32 	%f464, %f212, %f603;
	fma.rn.ftz.f32 	%f465, %f211, %f604, %f464;
	fma.rn.ftz.f32 	%f222, %f213, %f602, %f465;
	setp.ltu.ftz.f32	%p41, %f222, 0f00000000;
	@%p41 bra 	BB6_84;

	lg2.approx.ftz.f32 	%f466, %f222;
	mul.ftz.f32 	%f467, %f466, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f617, %f467;
	bra.uni 	BB6_85;

BB6_84:
	neg.ftz.f32 	%f468, %f222;
	lg2.approx.ftz.f32 	%f469, %f468;
	mul.ftz.f32 	%f470, %f469, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f471, %f470;
	neg.ftz.f32 	%f617, %f471;

BB6_85:
	mov.f32 	%f620, %f617;
	mov.f32 	%f619, %f616;
	mov.f32 	%f618, %f615;
	bra.uni 	BB6_87;

BB6_86:
	mul.ftz.f32 	%f472, %f212, %f606;
	fma.rn.ftz.f32 	%f473, %f211, %f607, %f472;
	fma.rn.ftz.f32 	%f619, %f213, %f605, %f473;
	mul.ftz.f32 	%f474, %f212, %f603;
	fma.rn.ftz.f32 	%f475, %f211, %f604, %f474;
	fma.rn.ftz.f32 	%f620, %f213, %f602, %f475;
	mov.f32 	%f618, %f214;

BB6_87:
	mov.f32 	%f621, %f575;
	setp.ltu.ftz.f32	%p42, %f95, 0f00000000;
	@%p42 bra 	BB6_89;

	lg2.approx.ftz.f32 	%f476, %f95;
	mul.ftz.f32 	%f477, %f476, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f622, %f477;
	bra.uni 	BB6_90;

BB6_89:
	neg.ftz.f32 	%f478, %f95;
	lg2.approx.ftz.f32 	%f479, %f478;
	mul.ftz.f32 	%f480, %f479, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f481, %f480;
	neg.ftz.f32 	%f622, %f481;

BB6_90:
	setp.ltu.ftz.f32	%p43, %f577, 0f00000000;
	@%p43 bra 	BB6_92;

	lg2.approx.ftz.f32 	%f482, %f577;
	mul.ftz.f32 	%f483, %f482, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f623, %f483;
	bra.uni 	BB6_93;

BB6_92:
	neg.ftz.f32 	%f484, %f577;
	lg2.approx.ftz.f32 	%f485, %f484;
	mul.ftz.f32 	%f486, %f485, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f487, %f486;
	neg.ftz.f32 	%f623, %f487;

BB6_93:
	setp.ltu.ftz.f32	%p44, %f144, 0f00000000;
	@%p44 bra 	BB6_95;

	lg2.approx.ftz.f32 	%f488, %f144;
	mul.ftz.f32 	%f489, %f488, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f624, %f489;
	bra.uni 	BB6_96;

BB6_95:
	neg.ftz.f32 	%f490, %f144;
	lg2.approx.ftz.f32 	%f491, %f490;
	mul.ftz.f32 	%f492, %f491, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f493, %f492;
	neg.ftz.f32 	%f624, %f493;

BB6_96:
	min.ftz.f32 	%f247, %f622, %f318;
	min.ftz.f32 	%f248, %f623, %f319;
	min.ftz.f32 	%f249, %f624, %f320;
	mul.ftz.f32 	%f494, %f248, %f158;
	fma.rn.ftz.f32 	%f495, %f247, %f157, %f494;
	fma.rn.ftz.f32 	%f250, %f249, %f159, %f495;
	@%p31 bra 	BB6_107;

	setp.ltu.ftz.f32	%p46, %f250, 0f00000000;
	@%p46 bra 	BB6_99;

	lg2.approx.ftz.f32 	%f496, %f250;
	mul.ftz.f32 	%f497, %f496, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f625, %f497;
	bra.uni 	BB6_100;

BB6_99:
	neg.ftz.f32 	%f498, %f250;
	lg2.approx.ftz.f32 	%f499, %f498;
	mul.ftz.f32 	%f500, %f499, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f501, %f500;
	neg.ftz.f32 	%f625, %f501;

BB6_100:
	mul.ftz.f32 	%f502, %f248, %f606;
	fma.rn.ftz.f32 	%f503, %f247, %f607, %f502;
	fma.rn.ftz.f32 	%f254, %f249, %f605, %f503;
	setp.ltu.ftz.f32	%p47, %f254, 0f00000000;
	@%p47 bra 	BB6_102;

	lg2.approx.ftz.f32 	%f504, %f254;
	mul.ftz.f32 	%f505, %f504, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f626, %f505;
	bra.uni 	BB6_103;

BB6_102:
	neg.ftz.f32 	%f506, %f254;
	lg2.approx.ftz.f32 	%f507, %f506;
	mul.ftz.f32 	%f508, %f507, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f509, %f508;
	neg.ftz.f32 	%f626, %f509;

BB6_103:
	mul.ftz.f32 	%f510, %f248, %f603;
	fma.rn.ftz.f32 	%f511, %f247, %f604, %f510;
	fma.rn.ftz.f32 	%f258, %f249, %f602, %f511;
	setp.ltu.ftz.f32	%p48, %f258, 0f00000000;
	@%p48 bra 	BB6_105;

	lg2.approx.ftz.f32 	%f512, %f258;
	mul.ftz.f32 	%f513, %f512, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f627, %f513;
	bra.uni 	BB6_106;

BB6_105:
	neg.ftz.f32 	%f514, %f258;
	lg2.approx.ftz.f32 	%f515, %f514;
	mul.ftz.f32 	%f516, %f515, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f517, %f516;
	neg.ftz.f32 	%f627, %f517;

BB6_106:
	mov.f32 	%f630, %f627;
	mov.f32 	%f629, %f626;
	mov.f32 	%f628, %f625;
	bra.uni 	BB6_108;

BB6_107:
	mul.ftz.f32 	%f518, %f248, %f606;
	fma.rn.ftz.f32 	%f519, %f247, %f607, %f518;
	fma.rn.ftz.f32 	%f629, %f249, %f605, %f519;
	mul.ftz.f32 	%f520, %f248, %f603;
	fma.rn.ftz.f32 	%f521, %f247, %f604, %f520;
	fma.rn.ftz.f32 	%f630, %f249, %f602, %f521;
	mov.f32 	%f628, %f250;

BB6_108:
	mov.f32 	%f631, %f579;
	setp.ltu.ftz.f32	%p49, %f97, 0f00000000;
	@%p49 bra 	BB6_110;

	lg2.approx.ftz.f32 	%f522, %f97;
	mul.ftz.f32 	%f523, %f522, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f632, %f523;
	bra.uni 	BB6_111;

BB6_110:
	neg.ftz.f32 	%f524, %f97;
	lg2.approx.ftz.f32 	%f525, %f524;
	mul.ftz.f32 	%f526, %f525, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f527, %f526;
	neg.ftz.f32 	%f632, %f527;

BB6_111:
	setp.ltu.ftz.f32	%p50, %f581, 0f00000000;
	@%p50 bra 	BB6_113;

	lg2.approx.ftz.f32 	%f528, %f581;
	mul.ftz.f32 	%f529, %f528, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f633, %f529;
	bra.uni 	BB6_114;

BB6_113:
	neg.ftz.f32 	%f530, %f581;
	lg2.approx.ftz.f32 	%f531, %f530;
	mul.ftz.f32 	%f532, %f531, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f533, %f532;
	neg.ftz.f32 	%f633, %f533;

BB6_114:
	setp.ltu.ftz.f32	%p51, %f580, 0f00000000;
	@%p51 bra 	BB6_116;

	lg2.approx.ftz.f32 	%f534, %f580;
	mul.ftz.f32 	%f535, %f534, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f634, %f535;
	bra.uni 	BB6_117;

BB6_116:
	neg.ftz.f32 	%f536, %f580;
	lg2.approx.ftz.f32 	%f537, %f536;
	mul.ftz.f32 	%f538, %f537, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f539, %f538;
	neg.ftz.f32 	%f634, %f539;

BB6_117:
	min.ftz.f32 	%f287, %f632, %f318;
	min.ftz.f32 	%f288, %f633, %f319;
	min.ftz.f32 	%f289, %f634, %f320;
	mul.ftz.f32 	%f540, %f288, %f158;
	fma.rn.ftz.f32 	%f541, %f287, %f157, %f540;
	fma.rn.ftz.f32 	%f290, %f289, %f159, %f541;
	@%p31 bra 	BB6_128;

	setp.ltu.ftz.f32	%p53, %f290, 0f00000000;
	@%p53 bra 	BB6_120;

	lg2.approx.ftz.f32 	%f542, %f290;
	mul.ftz.f32 	%f543, %f542, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f635, %f543;
	bra.uni 	BB6_121;

BB6_120:
	neg.ftz.f32 	%f544, %f290;
	lg2.approx.ftz.f32 	%f545, %f544;
	mul.ftz.f32 	%f546, %f545, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f547, %f546;
	neg.ftz.f32 	%f635, %f547;

BB6_121:
	mul.ftz.f32 	%f548, %f288, %f606;
	fma.rn.ftz.f32 	%f549, %f287, %f607, %f548;
	fma.rn.ftz.f32 	%f294, %f289, %f605, %f549;
	setp.ltu.ftz.f32	%p54, %f294, 0f00000000;
	@%p54 bra 	BB6_123;

	lg2.approx.ftz.f32 	%f550, %f294;
	mul.ftz.f32 	%f551, %f550, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f636, %f551;
	bra.uni 	BB6_124;

BB6_123:
	neg.ftz.f32 	%f552, %f294;
	lg2.approx.ftz.f32 	%f553, %f552;
	mul.ftz.f32 	%f554, %f553, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f555, %f554;
	neg.ftz.f32 	%f636, %f555;

BB6_124:
	mul.ftz.f32 	%f556, %f288, %f603;
	fma.rn.ftz.f32 	%f557, %f287, %f604, %f556;
	fma.rn.ftz.f32 	%f298, %f289, %f602, %f557;
	setp.ltu.ftz.f32	%p55, %f298, 0f00000000;
	@%p55 bra 	BB6_126;

	lg2.approx.ftz.f32 	%f558, %f298;
	mul.ftz.f32 	%f559, %f558, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f637, %f559;
	bra.uni 	BB6_127;

BB6_126:
	neg.ftz.f32 	%f560, %f298;
	lg2.approx.ftz.f32 	%f561, %f560;
	mul.ftz.f32 	%f562, %f561, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f563, %f562;
	neg.ftz.f32 	%f637, %f563;

BB6_127:
	mov.f32 	%f640, %f637;
	mov.f32 	%f639, %f636;
	mov.f32 	%f638, %f635;
	bra.uni 	BB6_129;

BB6_128:
	mul.ftz.f32 	%f564, %f288, %f606;
	fma.rn.ftz.f32 	%f565, %f287, %f607, %f564;
	fma.rn.ftz.f32 	%f639, %f289, %f605, %f565;
	mul.ftz.f32 	%f566, %f288, %f603;
	fma.rn.ftz.f32 	%f567, %f287, %f604, %f566;
	fma.rn.ftz.f32 	%f640, %f289, %f602, %f567;
	mov.f32 	%f638, %f290;

BB6_129:
	mov.f32 	%f641, %f583;
	add.s32 	%r47, %r14, -1;
	sub.s32 	%r48, %r47, %r4;
	setp.eq.s32	%p56, %r11, 0;
	selp.b32	%r49, %r4, %r48, %p56;
	mul.lo.s32 	%r8, %r49, %r10;
	add.s32 	%r55, %r8, %r3;
	cvt.s64.s32	%rd12, %r55;
	@%p8 bra 	BB6_131;

	cvta.to.global.u64 	%rd58, %rd16;
	shl.b64 	%rd59, %rd12, 4;
	add.s64 	%rd60, %rd58, %rd59;
	st.global.v4.f32 	[%rd60], {%f608, %f609, %f610, %f611};
	bra.uni 	BB6_132;

BB6_131:
	cvta.to.global.u64 	%rd61, %rd16;
	shl.b64 	%rd62, %rd12, 3;
	add.s64 	%rd63, %rd61, %rd62;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f608;
	mov.b16 	%rs81, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f609;
	mov.b16 	%rs82, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f610;
	mov.b16 	%rs83, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f611;
	mov.b16 	%rs84, %temp;
}
	st.global.v4.u16 	[%rd63], {%rs81, %rs82, %rs83, %rs84};

BB6_132:
	add.s32 	%r61, %r3, %r8;
	add.s32 	%r62, %r61, 1;
	cvt.s64.s32	%rd13, %r62;
	@%p8 bra 	BB6_134;

	cvta.to.global.u64 	%rd64, %rd16;
	shl.b64 	%rd65, %rd13, 4;
	add.s64 	%rd66, %rd64, %rd65;
	st.global.v4.f32 	[%rd66], {%f618, %f619, %f620, %f621};
	bra.uni 	BB6_135;

BB6_134:
	cvta.to.global.u64 	%rd67, %rd16;
	shl.b64 	%rd68, %rd13, 3;
	add.s64 	%rd69, %rd67, %rd68;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f618;
	mov.b16 	%rs85, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f619;
	mov.b16 	%rs86, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f620;
	mov.b16 	%rs87, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f621;
	mov.b16 	%rs88, %temp;
}
	st.global.v4.u16 	[%rd69], {%rs85, %rs86, %rs87, %rs88};

BB6_135:
	selp.b32	%r71, 1, -1, %p56;
	add.s32 	%r72, %r49, %r71;
	mul.lo.s32 	%r9, %r72, %r10;
	add.s32 	%r78, %r9, %r3;
	cvt.s64.s32	%rd14, %r78;
	@%p8 bra 	BB6_137;

	cvta.to.global.u64 	%rd70, %rd16;
	shl.b64 	%rd71, %rd14, 4;
	add.s64 	%rd72, %rd70, %rd71;
	st.global.v4.f32 	[%rd72], {%f628, %f629, %f630, %f631};
	bra.uni 	BB6_138;

BB6_137:
	cvta.to.global.u64 	%rd73, %rd16;
	shl.b64 	%rd74, %rd14, 3;
	add.s64 	%rd75, %rd73, %rd74;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f631;
	mov.b16 	%rs89, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f630;
	mov.b16 	%rs90, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f629;
	mov.b16 	%rs91, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f628;
	mov.b16 	%rs92, %temp;
}
	st.global.v4.u16 	[%rd75], {%rs92, %rs91, %rs90, %rs89};

BB6_138:
	add.s32 	%r84, %r3, %r9;
	add.s32 	%r85, %r84, 1;
	cvt.s64.s32	%rd15, %r85;
	@%p8 bra 	BB6_140;

	cvta.to.global.u64 	%rd76, %rd16;
	shl.b64 	%rd77, %rd15, 4;
	add.s64 	%rd78, %rd76, %rd77;
	st.global.v4.f32 	[%rd78], {%f638, %f639, %f640, %f641};
	bra.uni 	BB6_141;

BB6_140:
	cvta.to.global.u64 	%rd79, %rd16;
	shl.b64 	%rd80, %rd15, 3;
	add.s64 	%rd81, %rd79, %rd80;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f641;
	mov.b16 	%rs93, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f640;
	mov.b16 	%rs94, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f639;
	mov.b16 	%rs95, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f638;
	mov.b16 	%rs96, %temp;
}
	st.global.v4.u16 	[%rd81], {%rs96, %rs95, %rs94, %rs93};

BB6_141:
	ret;
}


