//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Fri Jul 25 04:36:16 2014 (1406288176)
// Cuda compilation tools, release 6.5, V6.5.13
//

.version 4.1
.target sm_30
.address_size 64


.visible .entry CrossDissolveKernel(
	.param .u64 CrossDissolveKernel_param_0,
	.param .u32 CrossDissolveKernel_param_1,
	.param .u64 CrossDissolveKernel_param_2,
	.param .u32 CrossDissolveKernel_param_3,
	.param .u64 CrossDissolveKernel_param_4,
	.param .u32 CrossDissolveKernel_param_5,
	.param .u32 CrossDissolveKernel_param_6,
	.param .u32 CrossDissolveKernel_param_7,
	.param .u32 CrossDissolveKernel_param_8,
	.param .f32 CrossDissolveKernel_param_9,
	.param .u32 CrossDissolveKernel_param_10
)
{
	.reg .pred 	%p<21>;
	.reg .s16 	%rs<21>;
	.reg .s32 	%r<27>;
	.reg .f32 	%f<216>;
	.reg .s64 	%rd<23>;


	ld.param.u64 	%rd7, [CrossDissolveKernel_param_0];
	ld.param.u32 	%r3, [CrossDissolveKernel_param_1];
	ld.param.u64 	%rd8, [CrossDissolveKernel_param_2];
	ld.param.u32 	%r4, [CrossDissolveKernel_param_3];
	ld.param.u64 	%rd6, [CrossDissolveKernel_param_4];
	ld.param.u32 	%r5, [CrossDissolveKernel_param_5];
	ld.param.u32 	%r6, [CrossDissolveKernel_param_6];
	ld.param.u32 	%r8, [CrossDissolveKernel_param_7];
	ld.param.u32 	%r9, [CrossDissolveKernel_param_8];
	ld.param.f32 	%f80, [CrossDissolveKernel_param_9];
	ld.param.u32 	%r7, [CrossDissolveKernel_param_10];
	cvta.to.global.u64 	%rd1, %rd8;
	cvta.to.global.u64 	%rd2, %rd7;
	mov.u32 	%r10, %ntid.x;
	mov.u32 	%r11, %ctaid.x;
	mov.u32 	%r12, %tid.x;
	mad.lo.s32 	%r1, %r10, %r11, %r12;
	mov.u32 	%r13, %ntid.y;
	mov.u32 	%r14, %ctaid.y;
	mov.u32 	%r15, %tid.y;
	mad.lo.s32 	%r2, %r13, %r14, %r15;
	setp.lt.s32	%p1, %r1, %r8;
	setp.lt.s32	%p2, %r2, %r9;
	and.pred  	%p3, %p1, %p2;
	@!%p3 bra 	BB0_46;
	bra.uni 	BB0_1;

BB0_1:
	mad.lo.s32 	%r16, %r2, %r3, %r1;
	cvt.s64.s32	%rd3, %r16;
	setp.eq.s32	%p4, %r6, 0;
	@%p4 bra 	BB0_3;

	shl.b64 	%rd9, %rd3, 4;
	add.s64 	%rd10, %rd2, %rd9;
	ld.global.v4.f32 	{%f81, %f82, %f83, %f84}, [%rd10];
	mov.f32 	%f168, %f84;
	mov.f32 	%f167, %f83;
	mov.f32 	%f166, %f82;
	mov.f32 	%f165, %f81;
	bra.uni 	BB0_4;

BB0_3:
	shl.b64 	%rd11, %rd3, 3;
	add.s64 	%rd12, %rd2, %rd11;
	ld.global.v4.u16 	{%rs1, %rs2, %rs3, %rs4}, [%rd12];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs1;
	cvt.f32.f16 	%f165, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs2;
	cvt.f32.f16 	%f166, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs3;
	cvt.f32.f16 	%f167, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs4;
	cvt.f32.f16 	%f168, %temp;
	}

BB0_4:
	mad.lo.s32 	%r17, %r2, %r4, %r1;
	cvt.s64.s32	%rd4, %r17;
	mov.f32 	%f186, %f165;
	mov.f32 	%f202, %f166;
	mov.f32 	%f213, %f167;
	mov.f32 	%f173, %f168;
	@%p4 bra 	BB0_6;

	shl.b64 	%rd13, %rd4, 4;
	add.s64 	%rd14, %rd1, %rd13;
	ld.global.v4.f32 	{%f85, %f86, %f87, %f88}, [%rd14];
	mov.f32 	%f172, %f88;
	mov.f32 	%f171, %f87;
	mov.f32 	%f170, %f86;
	mov.f32 	%f169, %f85;
	bra.uni 	BB0_7;

BB0_6:
	shl.b64 	%rd15, %rd4, 3;
	add.s64 	%rd16, %rd1, %rd15;
	ld.global.v4.u16 	{%rs9, %rs10, %rs11, %rs12}, [%rd16];
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs9;
	cvt.f32.f16 	%f169, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs10;
	cvt.f32.f16 	%f170, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs11;
	cvt.f32.f16 	%f171, %temp;
	}
	{
	.reg .b16 %temp;
	mov.b16 	%temp, %rs12;
	cvt.f32.f16 	%f172, %temp;
	}

BB0_7:
	setp.eq.s32	%p6, %r7, 0;
	mov.f32 	%f188, %f169;
	mov.f32 	%f204, %f170;
	mov.f32 	%f215, %f171;
	@%p6 bra 	BB0_26;

	setp.ltu.ftz.f32	%p7, %f165, 0f00000000;
	@%p7 bra 	BB0_10;

	lg2.approx.ftz.f32 	%f89, %f165;
	mul.ftz.f32 	%f90, %f89, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f187, %f90;
	bra.uni 	BB0_11;

BB0_10:
	neg.ftz.f32 	%f91, %f165;
	lg2.approx.ftz.f32 	%f92, %f91;
	mul.ftz.f32 	%f93, %f92, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f94, %f93;
	neg.ftz.f32 	%f187, %f94;

BB0_11:
	mov.f32 	%f186, %f187;
	setp.ltu.ftz.f32	%p8, %f166, 0f00000000;
	@%p8 bra 	BB0_13;

	lg2.approx.ftz.f32 	%f95, %f166;
	mul.ftz.f32 	%f96, %f95, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f203, %f96;
	bra.uni 	BB0_14;

BB0_13:
	neg.ftz.f32 	%f97, %f166;
	lg2.approx.ftz.f32 	%f98, %f97;
	mul.ftz.f32 	%f99, %f98, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f100, %f99;
	neg.ftz.f32 	%f203, %f100;

BB0_14:
	mov.f32 	%f202, %f203;
	setp.ltu.ftz.f32	%p9, %f167, 0f00000000;
	@%p9 bra 	BB0_16;

	lg2.approx.ftz.f32 	%f101, %f167;
	mul.ftz.f32 	%f102, %f101, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f214, %f102;
	bra.uni 	BB0_17;

BB0_16:
	neg.ftz.f32 	%f103, %f167;
	lg2.approx.ftz.f32 	%f104, %f103;
	mul.ftz.f32 	%f105, %f104, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f106, %f105;
	neg.ftz.f32 	%f214, %f106;

BB0_17:
	mov.f32 	%f213, %f214;
	setp.ltu.ftz.f32	%p10, %f169, 0f00000000;
	@%p10 bra 	BB0_19;

	lg2.approx.ftz.f32 	%f107, %f169;
	mul.ftz.f32 	%f108, %f107, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f189, %f108;
	bra.uni 	BB0_20;

BB0_19:
	neg.ftz.f32 	%f109, %f169;
	lg2.approx.ftz.f32 	%f110, %f109;
	mul.ftz.f32 	%f111, %f110, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f112, %f111;
	neg.ftz.f32 	%f189, %f112;

BB0_20:
	mov.f32 	%f188, %f189;
	setp.ltu.ftz.f32	%p11, %f170, 0f00000000;
	@%p11 bra 	BB0_22;

	lg2.approx.ftz.f32 	%f113, %f170;
	mul.ftz.f32 	%f114, %f113, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f205, %f114;
	bra.uni 	BB0_23;

BB0_22:
	neg.ftz.f32 	%f115, %f170;
	lg2.approx.ftz.f32 	%f116, %f115;
	mul.ftz.f32 	%f117, %f116, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f118, %f117;
	neg.ftz.f32 	%f205, %f118;

BB0_23:
	mov.f32 	%f204, %f205;
	setp.ltu.ftz.f32	%p12, %f171, 0f00000000;
	@%p12 bra 	BB0_25;

	lg2.approx.ftz.f32 	%f119, %f171;
	mul.ftz.f32 	%f120, %f119, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f215, %f120;
	bra.uni 	BB0_26;

BB0_25:
	neg.ftz.f32 	%f121, %f171;
	lg2.approx.ftz.f32 	%f122, %f121;
	mul.ftz.f32 	%f123, %f122, 0f400CCCCD;
	ex2.approx.ftz.f32 	%f124, %f123;
	neg.ftz.f32 	%f215, %f124;

BB0_26:
	mov.f32 	%f54, %f215;
	mov.f32 	%f51, %f213;
	mov.f32 	%f53, %f204;
	mov.f32 	%f50, %f202;
	mov.f32 	%f52, %f188;
	mov.f32 	%f49, %f186;
	add.ftz.f32 	%f125, %f168, 0fB70637BD;
	setp.gtu.ftz.f32	%p13, %f125, 0f00000000;
	@%p13 bra 	BB0_28;

	mov.f32 	%f126, 0f3F800000;
	sub.ftz.f32 	%f127, %f126, %f80;
	mul.ftz.f32 	%f173, %f172, %f127;
	mov.f32 	%f185, %f52;
	mov.f32 	%f201, %f53;
	mov.f32 	%f212, %f54;
	bra.uni 	BB0_33;

BB0_28:
	add.ftz.f32 	%f128, %f172, 0fB70637BD;
	setp.gtu.ftz.f32	%p14, %f128, 0f00000000;
	@%p14 bra 	BB0_30;

	mul.ftz.f32 	%f173, %f168, %f80;
	mov.f32 	%f185, %f49;
	mov.f32 	%f201, %f50;
	mov.f32 	%f212, %f51;
	bra.uni 	BB0_33;

BB0_30:
	sub.ftz.f32 	%f129, %f168, %f172;
	abs.ftz.f32 	%f130, %f129;
	setp.lt.ftz.f32	%p15, %f130, 0f370637BD;
	@%p15 bra 	BB0_32;

	mul.ftz.f32 	%f131, %f168, %f80;
	mov.f32 	%f132, 0f3F800000;
	sub.ftz.f32 	%f133, %f132, %f80;
	mul.ftz.f32 	%f134, %f172, %f133;
	add.ftz.f32 	%f173, %f131, %f134;
	div.approx.ftz.f32 	%f135, %f132, %f173;
	mul.ftz.f32 	%f136, %f52, %f134;
	fma.rn.ftz.f32 	%f137, %f49, %f131, %f136;
	mul.ftz.f32 	%f185, %f137, %f135;
	mul.ftz.f32 	%f138, %f53, %f134;
	fma.rn.ftz.f32 	%f139, %f50, %f131, %f138;
	mul.ftz.f32 	%f201, %f139, %f135;
	mul.ftz.f32 	%f140, %f54, %f134;
	fma.rn.ftz.f32 	%f141, %f51, %f131, %f140;
	mul.ftz.f32 	%f212, %f141, %f135;
	bra.uni 	BB0_33;

BB0_32:
	mov.f32 	%f142, 0f3F800000;
	sub.ftz.f32 	%f143, %f142, %f80;
	mul.ftz.f32 	%f144, %f52, %f143;
	fma.rn.ftz.f32 	%f185, %f49, %f80, %f144;
	mul.ftz.f32 	%f145, %f53, %f143;
	fma.rn.ftz.f32 	%f201, %f50, %f80, %f145;
	mul.ftz.f32 	%f146, %f54, %f143;
	fma.rn.ftz.f32 	%f212, %f51, %f80, %f146;

BB0_33:
	mov.f32 	%f211, %f212;
	mov.f32 	%f199, %f201;
	mov.f32 	%f183, %f185;
	@%p6 bra 	BB0_43;

	setp.ltu.ftz.f32	%p17, %f183, 0f00000000;
	@%p17 bra 	BB0_36;

	lg2.approx.ftz.f32 	%f147, %f183;
	mul.ftz.f32 	%f148, %f147, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f184, %f148;
	bra.uni 	BB0_37;

BB0_36:
	neg.ftz.f32 	%f149, %f183;
	lg2.approx.ftz.f32 	%f150, %f149;
	mul.ftz.f32 	%f151, %f150, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f152, %f151;
	neg.ftz.f32 	%f184, %f152;

BB0_37:
	mov.f32 	%f183, %f184;
	setp.ltu.ftz.f32	%p18, %f199, 0f00000000;
	@%p18 bra 	BB0_39;

	lg2.approx.ftz.f32 	%f153, %f199;
	mul.ftz.f32 	%f154, %f153, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f200, %f154;
	bra.uni 	BB0_40;

BB0_39:
	neg.ftz.f32 	%f155, %f199;
	lg2.approx.ftz.f32 	%f156, %f155;
	mul.ftz.f32 	%f157, %f156, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f158, %f157;
	neg.ftz.f32 	%f200, %f158;

BB0_40:
	mov.f32 	%f199, %f200;
	setp.ltu.ftz.f32	%p19, %f211, 0f00000000;
	@%p19 bra 	BB0_42;

	lg2.approx.ftz.f32 	%f159, %f211;
	mul.ftz.f32 	%f160, %f159, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f211, %f160;
	bra.uni 	BB0_43;

BB0_42:
	neg.ftz.f32 	%f161, %f211;
	lg2.approx.ftz.f32 	%f162, %f161;
	mul.ftz.f32 	%f163, %f162, 0f3EE8BA2E;
	ex2.approx.ftz.f32 	%f164, %f163;
	neg.ftz.f32 	%f211, %f164;

BB0_43:
	mad.lo.s32 	%r26, %r2, %r5, %r1;
	cvt.s64.s32	%rd5, %r26;
	@%p4 bra 	BB0_45;

	cvta.to.global.u64 	%rd17, %rd6;
	shl.b64 	%rd18, %rd5, 4;
	add.s64 	%rd19, %rd17, %rd18;
	st.global.v4.f32 	[%rd19], {%f183, %f199, %f211, %f173};
	bra.uni 	BB0_46;

BB0_45:
	cvta.to.global.u64 	%rd20, %rd6;
	shl.b64 	%rd21, %rd5, 3;
	add.s64 	%rd22, %rd20, %rd21;
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f173;
	mov.b16 	%rs17, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f211;
	mov.b16 	%rs18, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f199;
	mov.b16 	%rs19, %temp;
}
	{
	.reg .b16 %temp;
	cvt.rn.ftz.f16.f32 	%temp, %f183;
	mov.b16 	%rs20, %temp;
}
	st.global.v4.u16 	[%rd22], {%rs20, %rs19, %rs18, %rs17};

BB0_46:
	ret;
}


