
        ;; Function _udiv64 calculates the quotient and
        ;; the remainder of two unsigned long long integers.
        ;; Function _div64 does the same for signed by
        ;; making dividend and divisor positive and making
        ;; a call to _udiv64.
        
        .section        .text.libc

        .global __div64 
__div64:
        stwu    %r1,-8(%r1)
        mflr    %r0
        stw     %r0,12(%r1)     
        cmpwi   %cr1,%r3,0
        bge     %cr1,dvd_pos
        subfic  %r4,%r4,0
        subfze  %r3,%r3

dvd_pos:
        cmpwi   %cr0,%r5,0
        crxor   16,4,0          ; cr4[lt] = cr1[lt] ^ cr0[lt]
        bge     %cr0,dvs_pos
        subfic  %r6,%r6,0
        subfze  %r5,%r5
        
dvs_pos:
        bl      __udiv64

        bge     %cr1,rem_pos
        subfic  %r6,%r6,0
        subfze  %r5,%r5

rem_pos:
        bge     %cr4,quo_pos
        subfic  %r4,%r4,0
        subfze  %r3,%r3

quo_pos:
        lwz     %r0,12(%r1)
        addi    %r1,%r1,8
        mtlr    %r0
        blr
        
        .global __udiv64
__udiv64:       
        ;; From the Compiler Writes Guide
        ;;
        ;;   (r3:r4) = (r3:r4) / (r5:r6)        (64b) = (64b / 64b)
        ;;     quo       dvd       dvs
        ;;
        ;; Remainder is returned in r5:r6
        ;;
        ;; Code comment notation:
        ;; msw = most-significant (high-order) word, ie bits 0..31
        ;; lsw = least-significant (low-order) word, ie bits 32-63
        ;; LZ = Leading Zeroes
        ;; SD = Significant Digits
        ;;
        ;; r3:r4 = dvd (input dividend); quo (output quotient)
        ;; r5:r6 = dvs (input divisor); rem (output remainder)
        ;;
        ;; r7:r8 = tmp

        ;; count the number of leading 0s in the dividend
        cmpwi   %cr0,%r3,0      ; dvd.msw = 0?
        cntlzw  %r0,%r3         ; r0 = dvd.msw.LZ
        cntlzw  %r9,%r4         ; r9 = dvd.lsw.LZ
        bne     %cr0,lab1       ; if (dvd.msw == 0) dvd.LZ = dvd.msw.LZ
        addi    %r0,%r9,32      ; dvd.LZ = dvd.lsw.LZ + 32

lab1:
        ;; count the number of leading 0s in the divisor
        cmpwi   %cr0,%r5,0      ; dvs.msw = 0?
        cntlzw  %r9,%r5         ; r9 = dvs.msw.LZ
        cntlzw  %r10,%r6        ; r10 = dvs.lsw.LZ
        bne     %cr0,lab2       ; if (dvs.msw == 0) dvs.LZ = dvs.msw.LZ
        addi    %r9,%r10,32     ; dvs.LZ = dvs.lsw.LZ + 32

lab2:
        ;; determine shift amount to minimize the number of iterations
        cmpw    %cr0,%r0,%r9    ; compare dvd.LZ to dvs.LZ
        subfic  %r10,%r0,64     ; r10 = dvd.SD
        bgt     %cr0,lab9       ; if (dvs > dvd) quotient = 0
        addi    %r9,%r9,1       ; ++dvs.LZ (or --dvs.SD)
        subfic  %r9,%r9,64      ; r9 = dvs.SD
        add     %r0,%r0,%r9     ; (dvd.LZ + dvs.SD) = left shift of dvd for initial dvd
        subf    %r9,%r9,%r10    ; (dvd.SD - dvs.SD) = right shift of dvd for initial tmp
        mtctr   %r9             ; number of iterations = dvd.SD - dvs.SD

        ;; r7:r8 = r3:r4 >> r9
        cmpwi   %cr0,%r9,32     ; compare r9 to 32
        addi    %r7,%r9,-32
        blt     %cr0,lab3       ; if (r9 < 32) jump to lab3
        srw     %r8,%r3,%r7     ; tmp.lsw = dvd.msw >> (r9 - 32)
        li      %r7,0           ; tmp.msw = 0
        b       lab4
lab3:
        srw     %r8,%r4,%r9     ; r8 = dvd.lsw >> r9
        subfic  %r7,%r9,32
        slw     %r7,%r3,%r7     ; r7 = dvd.msw << 32 - r9
        or      %r8,%r8,%r7     ; tmp.lsw = r8 | r7
        srw     %r7,%r3,%r9     ; tmp.msw = dvd.msw >> r9

lab4:
        ;; r3:r4 = r3:r4 << r0
        cmpwi   %cr0,%r0,32     ; compare r0 to 32
        addic   %r9,%r0,-32
        blt     %cr0,lab5       ; if (r0 < 32) jump to lab5
        slw     %r3,%r4,%r9     ; dvd.msw = dvd.lsw << r9
        li      %r4,0           ; dvd.lsw = 0
        b       lab6
lab5:
        slw     %r3,%r3,%r0     ; r3 = dvd.msw << r0
        subfic  %r9,%r0,32
        srw     %r9,%r4,%r9     ; r9 = dvd.lsw >> 32 - r0
        or      %r3,%r3,%r9     ; dvd.msw = r3 | r9
        slw     %r4,%r4,%r0     ; dvd.lsw = dvd.lsw << r0

lab6:
        ;; restoring division shift and subtract loop
        li      %r10,-1         ; r10 = -1
        addic   %r7,%r7,0       ; clear carry bit before loop starts
lab7:
        ;; tmp.dvd is considered one large register
        ;; each portion is shfted left 1 bit by adding it to itself
        ;; adde summs the carry from the previous and creates a new carry
        adde    %r4,%r4,%r4     ; shift dvd.lsw to left 1 bit
        adde    %r3,%r3,%r3     ; shift dvd.msw to left 1 bit
        adde    %r8,%r8,%r8     ; shift tmp.lsw to left 1 bit
        adde    %r7,%r7,%r7     ; shift tmp.msw to left 1 bit
        subfc   %r0,%r6,%r8     ; tmp.lsw - dvs.lsw
        subfe.  %r9,%r5,%r7     ; tmp.msw - dvs.msw
        blt     %cr0,lab8       ; if (result < 0) clear carry bit
        mr      %r8,%r0         ; move lsw
        mr      %r7,%r9         ; move msw
        addic   %r0,%r10,1      ; set carry bit
lab8:
        bdnz    lab7

        ;; write quotient and remainder
        adde    %r4,%r4,%r4     ; quo.lsw (lsb = CA)
        adde    %r3,%r3,%r3     ; quo.msw (lsb from lsw)
        mr      %r6,%r8         ; rem.lsw
        mr      %r5,%r7         ; rem.msw
        blr                     ; return

lab9:
        ;; quotient is 0 (dvs > dvd)
        mr      %r6,%r4         ; rem.lsw = dvd.lsw
        mr      %r5,%r3         ; rem.msw = dvd.msw
        li      %r4,0           ; dvd.lsw = 0
        li      %r3,0           ; dvd.msw = 0
        blr                     ; return

        .endsec

        .end
