import math import mpmath from mpmath import mp from array import array mast = [ "divfree_cvt.py", "Proof of Concept Implementation in Python 3.x. Version 1.0", "Based on 'Division-Free Binary-to-Decimal Conversion' by Bouvier & Zimmerman", "Implementation by Karl Butz https://www.EndlessSkye.com. It:", " -is intended only as proof of concept;", " -has not been rigorously tested or optimized for speed;", " -is free software, though an acknowledgement to me would be appreciated." ] """ * * divfree_cvt.py * * Division free binary to decimal conversion. * Known limitations: -It is recursive; -It relies upon expensive log() functions; -It is written in Python. c would be ideal. -Currently the main subquadratic routine cvt_rec() consistently drops the least sign1ficant digit. cvt_rec_ex(), using the so-called naive approach works but it's not subquadratic. Comments (1) The pseudo-code in the source paper uses needless floating point in places. This version uses only integer arithmetic except for (2). (2)The log() functions should be replaced with accurate approximations using integer arithmetic. (3) All multiply, divide and mod operations by powers of two can be done quickly using bit shifts and a bitwise and operation. As I see it this is the primary benefit of this technique. (4) The term 'divsion free' in the title is somewhat misleading. The fine print tells you that divisions are eliminated only in computationally intensive areas. (5) Although I haven't yet done any real benchmarking of this code, it's doubtful the so-called naive method referenced in the paper is slower except for very, very large integers or on machines without a native divide instruction. (6) I am not a Python programmer. I chose Python for its multiple precision and math support. I tend to write Python like c anywhere I can. (7) I wrote this in a modern Linux environment using Python 3.9.7, math, mpmath and array packages. (8) I have an earlier version written in c that I abandoned for this implementation. The c version mostly works but needs work. It uses integer arithmetic to approximate the log() functions and an old & obscure library for bignum support. """ """ /******************************************************************************/ /* G L O B A L S */ /******************************************************************************/ """ s_b = 10 # only tested for base 10 s_kt = 8 # threshold to call the cvt_trunc() s_dbg_out = -1 s_log2file = -1 s_pszDigits = ["0","1","2","3","4","5","6","7","8","9"] s_last_digit_hack = "last_digit_hack" s_acc = 0 """ /******************************************************************************/ /* F U N C T I O N S */ /******************************************************************************/ """ """ Called by the main subquadratic function cvt_rec() It is called when k <= kt, the threshold. This function escapes from the recursion of cvt_rec() """ def cvt_trunc(y,k,n): dbg_out("cvt_trunc()>",y,k,n) ni = array('i',[32,29,26,23,19,16,13,9,6]) pszDigitStr = "" for i in range(1,k): t = s_b * y v = ni[i-1] s = t >> v z = t % (1<<v) y = z >> (v-ni[i]) pszDigitStr += s_pszDigits[s] dbg_out(pszDigitStr) dbg_out("cvt_trunc()<") return pszDigitStr """ Called by the main subquadratic function cvt_rec() It is called when k <= kt, the threshold. This function escapes from the recursion of cvt_rec() This version is an ugly hack that attempts to correct the missing least significant digit of cvt_rec(). It works for some cases but not all. """ def cvt_trunc_hack(y,k,n): global s_last_digit_hack dbg_out("cvt_trunc_hack()>",y,k,n) ni = array('i',[32,29,26,23,19,16,13,9,6]) pszDigitStr = "" for i in range(1,k): t = s_b * y v = ni[i-1] s = t >> v z = t % (1<<v) y = z >> (v-ni[i]) pszDigitStr += s_pszDigits[s] print ("xxx:",i) t = s_b * y v = ni[i-1] s = t >> v s_last_digit_hack = s_pszDigits[s] dbg_out(pszDigitStr) dbg_out("cvt_trunc_hack()<") return pszDigitStr """ Workhorse used to compute n. I round up to the nearest multiple of 32, though thisn't is mandatory. Any n greater than the left hand side should theoretically work. """ def cpt_n(g,b,k): dbg_out("cpt_n()>") p = 4 * g * (b**k) pwr_of_2 = math.ceil(mp.log(p,2)) #dbg_out( "max(4gb^k)=", pwr_of_2) mask = ~31 n = pwr_of_2 + 31 n = n & mask dbg_out( "n =", n) dbg_out("cpt_n()<") return n """ Workhorse used to compute the approximation of y. """ def cpt_y(a,n,b,k): dbg_out("cpt_y()>") # y = (((a+1)*2^n)/(b^k)) - 1 y = (((a+1)<<n)//(b**k))-1 dbg_out("y =") dbg_out(y) return y """ Special handling for two cases. """ def handle_digits(sh,sl): hi_len = len(sh) lo_len = len(sl) #dbg_out("xxx:",sh,sl) sh_new = sh sl_new = sl """ In c this is more straightforward but it works. I do math on ascii values otherwise I would have to do an expensive mod operation in base 10 for case 1. Case 2 is a slamdunk. """ # # case 1 # if sh[hi_len-1] == "9": if sl[0] == "0": dbg_out("xxx:case 1") arsh = array('B',[0x30]*hi_len) arsh = [x for x in sh] #dbg_out("xxx:",hi_len) carry = 1 arsh[hi_len-1] = 0x30 for o in range(hi_len-2,-1,-1): acc = int(arsh[o]) + carry if acc == 10: carry = 1 arsh[o] = 0x30 else: carry = 0 arsh[o] = acc + 0x30 sh_new = "" for o in range(0,hi_len): sh_new+=s_pszDigits[(int(arsh[o]) - 0x30)] print("xxx:",sh_new) # # case 2 # if sh[hi_len-1] == "0": if sl[0] == "9": dbg_out("zzz:case 1") sl_new = "" for o in range(0,lo_len): sl_new += "0" return sh_new,sl_new """ The heart of the subquadratic code. Called recursively. cvt_trunc() is called after k falls below or equal to kt. Used to escape the recursion. """ def cvt_rec(k,y,n,g): dbg_out("cvt_rec() k=",k,"n=",n,"g=",g); dbg_out(y) sh="" sl="" if k <= s_kt: return cvt_trunc_hack(y,k,n) kh = (k + 1) >> 1 kl = (k - kh) + 1 dbg_out("kh =",kh,"kl =",kl) nh = cpt_n(g,s_b,kh) nl = cpt_n(g,s_b,kl) dbg_out("nh =",nh,"nl =",nl) ##dbg_out(hex(y)) yh = y >> (n-nh) dbg_out("yh =",yh) ##tz=y*(s_b**(k-kl)) ##dbg_out(hex(tz)) yl = ((y*(s_b**(k-kl))) % (1<<n) ) >> (n-nl) dbg_out("yl =",yl) sh0 = cvt_rec(kh,yh,nh,g) sl0 = cvt_rec(kl,yl,nl,g) shx,slx = handle_digits(sh0,sl0) sh += shx sl += slx return sh+sl """ Computes the initial values used to make the first call to the subquadratic function cvt_rec() On return from cvt_rec() it does some housekeeping and checking. """ def cvt_t0(a): global s_last_digit_hack dbg_out("cvt_t0()>") s_last_digit_hack = "last_digit_hack" s = "" k = math.ceil(mp.log(a,s_b)) g = max( math.ceil(mp.log(k,2)),s_kt) #k += 1 #dbg_out( "a=",a) #dbg_out( "k=",k) #dbg_out( "g=",int(g)) n = cpt_n(g,s_b,k) #k = 129 #g = 9 #n = 432 y = cpt_y(a,n,s_b,k) s += cvt_rec(k,y,n,g) s += s_last_digit_hack len_diff = 0 sza = str(a) la = len(sza) ls = len(s) strlen = la if ls != la: dbg_out("zzz:len(src)!=len(result)",la,ls) len_diff = -1 if ls < strlen: strlen = ls dstr="" diffs = 0 for q in range(strlen): delta = int(sza[q]) - int(s[q]) if delta != 0: dstr += "." diffs += 1 else: dstr += "x" ##dbg_out(dstr) if diffs == 0: if len_diff == 0: dbg_out("zzz:strings match!") else: dbg_out("zzz:len mismatch but match otherwise!") else: dbg_out(a) dbg_out("zzzz;strings don't match!") dbg_out(s) dbg_out("cvt_t0()<") """ This function performs the "naive" method on any a, regardless of its size. It is not called by the subquadratic code. I originally wrote this only to verify functionality. """ def cvt_trunc_ex(y,k,n): dbg_out("cvt_trunc_ex():",y,k,n) pszDigits = ["0","1","2","3","4","5","6","7","8","9"] pszDigitStr = "" bits_per_digit = 1.0/mp.log(2,10) acc = 0.0 for i in range(1,k): t = s_b * y shft_amt_last = n - int(math.floor(acc)) acc += bits_per_digit shft_amt = n - int(math.floor(acc)) s = t >> shft_amt_last z = t % (1<<shft_amt_last) y = z >> (shft_amt_last-shft_amt) pszDigitStr += pszDigits[s] t = s_b * y shft_amt_last = n - int(math.floor(acc)) s = t >> shft_amt_last pszDigitStr += pszDigits[s] dbg_out(pszDigitStr) return pszDigitStr """ Called with a to cvt_trunc_ex() Functional but doesn't use the subquadratic code. For verification only. """ def cvt_t0_ex(a): dbg_out("cvt_t0_ex()>") s = "" k = math.ceil(mp.log(a,s_b)) g = max( math.ceil(mp.log(k,2)),s_kt) n = cpt_n(g,s_b,k) y = cpt_y(a,n,s_b,k) s += cvt_trunc_ex(y,k,n) sza = str(a) la = len(sza) ls = len(s) strlen = la if ls != la: dbg_out("zzz:len(src)!=len(result)",la,ls) if ls < strlen: strlen = ls dstr="" diffs = 0 for q in range(strlen): delta = int(sza[q]) - int(s[q]) if delta != 0: dstr += "." diffs += 1 else: dstr += "x" ##dbg_out(dstr) if diffs == 0: dbg_out("zzz:strings match!") else: dbg_out("zzz;strings don't match!") dbg_out("cvt_t0_ex()<") # # Create an 'a' # No 0s or 9s which requires # special handling. # def cr8_a_no_0sOr9s(): #"87651234","43125678","12348765","56784321" n = 1 a=87651234 a*=(10**8) a+=43125678 a*=(10**8) a+=12348765 a*=(10**8) a+=56784321 u = a a*=(10**32) n = 31 for i in range(32): r = u % 10 r *= (10**n) a += r u //= 10 n -= 1 dbg_out(a) return a # # Create an 'a'. This # creates a value that tests # case 1 in handle_digits() # def cr8_a(): k = 128 a = s_b**k a -= 1 a *= a a <<= 1 dbg_out(a) return a # # Create an 'a' using n# method # This is a factorial except it # skips multiples of 5. Eliminates # terminal zeroes. Naive Implementation, # def cr8_sharp_factorial(n): fact = 1 for i in range(2,n): notfive = i % 5 if( notfive != 0 ): fact *= i return fact """ Optionally echo debug messages to stdout or log.txt, or both. """ def dbg_out(*args): if s_dbg_out == 0: return if s_log2file == -1: f = open("log.txt","a") dbgstr = "" for num in args: dbgstr += str(num) + " " #print(num, end=" " #print("") print(dbgstr) if s_log2file == -1: f.write(dbgstr+"\n") f.close() """ E N T R Y P O I N T """ # # Show the mast. # for txt in mast: dbg_out(txt) """ Convert some #s """ a = cr8_a() cvt_t0(a) cvt_t0_ex(a) a = cr8_a_no_0sOr9s() cvt_t0(a) cvt_t0_ex(a) a = cr8_a() cvt_t0(a) cvt_t0_ex(a) for z in range(8): a //= 10 cvt_t0(a) cvt_t0_ex(a) a = cr8_sharp_factorial(100) cvt_t0(a) cvt_t0_ex(a)