divfree_cvt.py

import math
import mpmath
from mpmath import mp
from array import array

mast = [
"divfree_cvt.py",
"Proof of Concept Implementation in Python 3.x. Version 1.0",
"Based on 'Division-Free Binary-to-Decimal Conversion' by Bouvier & Zimmerman",
"Implementation by Karl Butz https://www.EndlessSkye.com. It:",
" -is intended only as proof of concept;",
" -has not been rigorously tested or optimized for speed;",
" -is free software, though an acknowledgement to me would be appreciated."
]

"""
 *
 *  divfree_cvt.py
 *
 *  Division free binary to decimal conversion.
 *
    Known limitations:
       -It is recursive;
       -It relies upon expensive log() functions;
       -It is written in Python. c would be ideal.
       -Currently the main subquadratic routine
        cvt_rec() consistently drops the least
        sign1ficant digit. cvt_rec_ex(), using the
        so-called naive approach works but it's
        not subquadratic.
       
    Comments
       (1) The pseudo-code in the source paper
       uses needless floating point in places. 
       This version uses only integer arithmetic
       except for (2).
       (2)The log() functions should be replaced
       with accurate approximations using integer
       arithmetic.
       (3) All multiply, divide and mod operations
       by powers of two can be done quickly using
       bit shifts and a bitwise and operation. As
       I see it this is the primary benefit of this
       technique.
       (4) The term 'divsion free' in the title is
       somewhat misleading. The fine print tells you
       that divisions are eliminated only in 
       computationally intensive areas.
       (5) Although I haven't yet done any real
       benchmarking of this code, it's doubtful
       the so-called naive method referenced in 
       the paper is slower except for very, very
       large integers or on machines without a native
       divide instruction.
       (6) I am not a Python programmer. I chose Python
       for its multiple precision and math support. I
       tend to write Python like c anywhere I can.
       (7) I wrote this in a modern Linux environment 
       using Python 3.9.7, math, mpmath and array 
       packages.
       (8) I have an earlier version written in c that
       I abandoned for this implementation. The c version
       mostly works but needs work. It uses integer
       arithmetic to approximate the log() functions and
       an old & obscure library for bignum support.

"""

"""
/******************************************************************************/
/*                            G L O B A L S                                   */
/******************************************************************************/
"""
s_b = 10    # only tested for base 10
s_kt = 8    # threshold to call the cvt_trunc()
s_dbg_out = -1
s_log2file = -1
s_pszDigits = ["0","1","2","3","4","5","6","7","8","9"]
s_last_digit_hack = "last_digit_hack"
s_acc = 0
"""
/******************************************************************************/
/*                            F U N C T I O N S                               */
/******************************************************************************/
"""
"""
    Called by the main subquadratic function cvt_rec()
    It is called when k <= kt, the threshold.
    This function escapes from the recursion of cvt_rec()
"""
def cvt_trunc(y,k,n):
    dbg_out("cvt_trunc()>",y,k,n)
    ni = array('i',[32,29,26,23,19,16,13,9,6])
    
    pszDigitStr = ""
    
    for i in range(1,k):
        t = s_b * y
        v = ni[i-1]
        s = t >> v
        z = t % (1<<v)
        y = z >> (v-ni[i])
        pszDigitStr += s_pszDigits[s]           
    dbg_out(pszDigitStr)
    dbg_out("cvt_trunc()<")
    return pszDigitStr

"""
    Called by the main subquadratic function cvt_rec()
    It is called when k <= kt, the threshold.
    This function escapes from the recursion of cvt_rec()
    This version is an ugly hack that attempts to correct 
    the missing least significant digit of cvt_rec(). It 
    works for some cases but not all.
"""
def cvt_trunc_hack(y,k,n):
    global s_last_digit_hack
    dbg_out("cvt_trunc_hack()>",y,k,n)
    ni = array('i',[32,29,26,23,19,16,13,9,6])
    
    pszDigitStr = ""
    
    for i in range(1,k):
        t = s_b * y
        v = ni[i-1]
        s = t >> v
        z = t % (1<<v)
        y = z >> (v-ni[i])
        pszDigitStr += s_pszDigits[s]
    print ("xxx:",i)
    t = s_b * y
    v = ni[i-1]
    s = t >> v

    s_last_digit_hack = s_pszDigits[s]
    dbg_out(pszDigitStr)
    dbg_out("cvt_trunc_hack()<")
    return pszDigitStr
"""
    Workhorse used to compute n.
    I round up to the nearest multiple
    of 32, though thisn't is mandatory.
    Any n greater than the left hand side
    should theoretically work.
"""
def cpt_n(g,b,k):
    dbg_out("cpt_n()>")
    p = 4 * g * (b**k)
    pwr_of_2 = math.ceil(mp.log(p,2))
    #dbg_out( "max(4gb^k)=", pwr_of_2)
    mask = ~31
    n = pwr_of_2 + 31
    n = n & mask
    dbg_out( "n =", n)
    dbg_out("cpt_n()<")
    return n
"""
    Workhorse used to compute the
    approximation of y.
"""
def cpt_y(a,n,b,k):
    dbg_out("cpt_y()>")
    # y = (((a+1)*2^n)/(b^k)) - 1
    y = (((a+1)<<n)//(b**k))-1
    dbg_out("y =")
    dbg_out(y)
    return y
"""
    Special handling for two cases.
"""
def handle_digits(sh,sl):
    hi_len = len(sh)
    lo_len = len(sl)
    #dbg_out("xxx:",sh,sl)
    sh_new = sh
    sl_new = sl
    """
        In c this is more straightforward
        but it works. I do math on ascii
        values otherwise I would have to
        do an expensive mod operation in 
        base 10 for case 1. Case 2 is a
        slamdunk.
    """
    #
    #   case 1
    #
    if sh[hi_len-1] == "9":
        if sl[0] == "0":
          dbg_out("xxx:case 1")
          arsh = array('B',[0x30]*hi_len)
          arsh = [x for x in sh]
          #dbg_out("xxx:",hi_len)
          carry = 1
          arsh[hi_len-1] = 0x30
          for o in range(hi_len-2,-1,-1):
              acc = int(arsh[o]) + carry
              if acc == 10:
                  carry = 1
                  arsh[o] = 0x30
              else:
                  carry = 0
                  arsh[o] = acc + 0x30
          sh_new = ""
          for o in range(0,hi_len):
              sh_new+=s_pszDigits[(int(arsh[o]) - 0x30)]
          print("xxx:",sh_new)
              
    #
    #   case 2
    #
    if sh[hi_len-1] == "0":
        if sl[0] == "9":
          dbg_out("zzz:case 1")
          sl_new = ""
          for o in range(0,lo_len):
              sl_new += "0"
              
    return sh_new,sl_new
"""
    The heart of the subquadratic code.
    Called recursively. cvt_trunc() is
    called after k falls below or equal
    to kt. Used to escape the recursion.
"""
def cvt_rec(k,y,n,g):
    dbg_out("cvt_rec() k=",k,"n=",n,"g=",g);
    dbg_out(y)

    sh=""
    sl=""
     
    if k <= s_kt:
        return cvt_trunc_hack(y,k,n)
    kh = (k + 1) >> 1
    kl = (k - kh) + 1
    dbg_out("kh =",kh,"kl =",kl)
    nh = cpt_n(g,s_b,kh)
    nl = cpt_n(g,s_b,kl)
    dbg_out("nh =",nh,"nl =",nl)
    ##dbg_out(hex(y))
    
    yh = y >> (n-nh)
    dbg_out("yh =",yh)
    ##tz=y*(s_b**(k-kl))
    ##dbg_out(hex(tz))
    yl = ((y*(s_b**(k-kl))) % (1<<n) ) >> (n-nl)
    dbg_out("yl =",yl)
    sh0 = cvt_rec(kh,yh,nh,g)
    sl0 = cvt_rec(kl,yl,nl,g)
    shx,slx = handle_digits(sh0,sl0)
    sh += shx
    sl += slx
    return sh+sl
"""
    Computes the initial values used
    to make the first call to the subquadratic
    function cvt_rec()
    On return from cvt_rec() it does some
    housekeeping and checking.
"""
def cvt_t0(a):
    global s_last_digit_hack
    dbg_out("cvt_t0()>")
    s_last_digit_hack = "last_digit_hack"
    s = ""
    k = math.ceil(mp.log(a,s_b))
    g = max( math.ceil(mp.log(k,2)),s_kt)
    #k += 1
    #dbg_out( "a=",a)
    #dbg_out( "k=",k)
    #dbg_out( "g=",int(g))
    n = cpt_n(g,s_b,k)
    #k = 129
    #g = 9
    #n = 432
    y = cpt_y(a,n,s_b,k)
    s += cvt_rec(k,y,n,g)
    s += s_last_digit_hack
    
    len_diff = 0
    sza = str(a)
    la = len(sza)
    ls = len(s)
    strlen = la
    if ls != la:
      dbg_out("zzz:len(src)!=len(result)",la,ls)
      len_diff = -1
      if ls < strlen:
          strlen = ls
    dstr=""
    diffs = 0
    for q in range(strlen):
        delta = int(sza[q]) - int(s[q])
        if delta != 0:
            dstr += "."
            diffs += 1
        else:
            dstr += "x"
    ##dbg_out(dstr)
    if diffs == 0:
        if len_diff == 0:
          dbg_out("zzz:strings match!")
        else:
          dbg_out("zzz:len mismatch but match otherwise!")
    else: 
        dbg_out(a)
        dbg_out("zzzz;strings don't match!")
        
    dbg_out(s)
    dbg_out("cvt_t0()<")    
"""
    This function performs the "naive" method
    on any a, regardless of its size. It is
    not called by the subquadratic code.
    I originally wrote this only to verify functionality. 
"""
def cvt_trunc_ex(y,k,n):
    dbg_out("cvt_trunc_ex():",y,k,n)

    pszDigits = ["0","1","2","3","4","5","6","7","8","9"]
    pszDigitStr = ""
    bits_per_digit = 1.0/mp.log(2,10)
    acc = 0.0
    
    for i in range(1,k):
        t = s_b * y
        shft_amt_last = n - int(math.floor(acc))
        acc += bits_per_digit
        shft_amt = n - int(math.floor(acc))
        s = t >> shft_amt_last
        z = t % (1<<shft_amt_last)
        y = z >> (shft_amt_last-shft_amt)
        pszDigitStr += pszDigits[s]
    t = s_b * y
    shft_amt_last = n - int(math.floor(acc))
    s = t >> shft_amt_last
    pszDigitStr += pszDigits[s]
    dbg_out(pszDigitStr)
    return pszDigitStr
"""
    Called with a to cvt_trunc_ex()
    Functional but doesn't use the
    subquadratic code. For verification
    only.
"""
def cvt_t0_ex(a): 
    dbg_out("cvt_t0_ex()>")
    s = ""
    k = math.ceil(mp.log(a,s_b))
    g = max( math.ceil(mp.log(k,2)),s_kt)
    n = cpt_n(g,s_b,k)
    y = cpt_y(a,n,s_b,k)
    s += cvt_trunc_ex(y,k,n)
    
    sza = str(a)
    la = len(sza)
    ls = len(s)
    strlen = la
    if ls != la:
      dbg_out("zzz:len(src)!=len(result)",la,ls)
      if ls < strlen:
          strlen = ls
    dstr=""
    diffs = 0
    for q in range(strlen):
        delta = int(sza[q]) - int(s[q])
        if delta != 0:
            dstr += "."
            diffs += 1
        else:
            dstr += "x"
    ##dbg_out(dstr)
    if diffs == 0:
        dbg_out("zzz:strings match!")
    else: 
        dbg_out("zzz;strings don't match!")
      
    dbg_out("cvt_t0_ex()<")

#
#  Create an 'a'
#  No 0s or 9s which requires
#  special handling.
#
def cr8_a_no_0sOr9s():
    #"87651234","43125678","12348765","56784321"
    n = 1
    a=87651234 
    a*=(10**8)
    a+=43125678
    a*=(10**8)
    a+=12348765
    a*=(10**8)
    a+=56784321
    u = a
    a*=(10**32)
    n = 31
    for i in range(32):
        r = u % 10
        r *= (10**n)
        a += r
        u //= 10
        n -= 1
    dbg_out(a)
    return a
#
#  Create an 'a'. This
#  creates a value that tests
#  case 1 in handle_digits()
#
def cr8_a():
    k = 128
    a = s_b**k
    a -= 1
    a *= a
    a <<= 1
    dbg_out(a)
    return a
#
#  Create an 'a' using n# method
#  This is a factorial except it
#  skips multiples of 5. Eliminates
#  terminal zeroes. Naive Implementation,
#
def cr8_sharp_factorial(n):
    fact = 1
    for i in range(2,n):
      notfive = i % 5
      if( notfive != 0 ):
        fact *= i  
    return fact
"""
    Optionally echo debug messages
    to stdout or log.txt, or both.
"""
def dbg_out(*args):
    if s_dbg_out == 0:
        return
    if s_log2file == -1:
      f = open("log.txt","a")
    dbgstr = ""
    for num in args:
        dbgstr += str(num) + " "
        #print(num, end=" "
    #print("")
    print(dbgstr)
    if s_log2file == -1:
        f.write(dbgstr+"\n")
        f.close()
"""
    E N T R Y   P O I N T
"""
#
#   Show the mast.
#
for txt in mast:
    dbg_out(txt)

"""
    Convert some #s
"""
a = cr8_a()
cvt_t0(a)
cvt_t0_ex(a)

a = cr8_a_no_0sOr9s()
cvt_t0(a)
cvt_t0_ex(a)

a = cr8_a()
cvt_t0(a)
cvt_t0_ex(a)

for z in range(8):
  a //= 10
  cvt_t0(a)
  cvt_t0_ex(a)

a = cr8_sharp_factorial(100)
cvt_t0(a)
cvt_t0_ex(a)