"""Code to perform `radix 050' encoding and decoding.

This encoding recognises fourty characters: space, a through z, dollar,
full-stop, question-mark, 0 through 9.  These are numbered 0 to 39 in the order
given and a string is then read as a number in base fourty (aka octal 050).
Letters are handled case-insensitively.  This provides for strings of up to six
characters (and a few seven-character ones) to be represented as 32-bit integers
(strings earlier than t84qkg don't use the top bit).

This module exports two functions:

 encode(string) -- yields a number.

 decode(number) -- yields a string.

Bill has found that this is the wrong way to approach the decoding, though: it
is actually done by cutting the int, seen as a sequence of bits, into chunks,
then decoding each chunk separately.

$Id: radix050.py,v 1.2 2007/03/24 22:02:59 eddy Exp $
""" # ' deconfuse font-lock

_charset = " abcdefghijklmnopqrstuvwxyz$.?0123456789"
_rootmaxint = 1
_bigint = 1
try:
	while type(_bigint) is type(1):
		_rootmaxint = _rootmaxint * 2
		_bigint = _bigint * 4

except OverflowError: pass
# Assert: any x representable by a (non-long) int satisfies abs(x / _rootmaxint) < _rootmaxint

def decode(number):
	"""Decodes a string from `radix 050' form.

	Argument, number, should be a positive integer (however, negative
	integers down to -2147483647 will be suitably interpreted as signed
	readings of unsigned ints: pow(2,32) will be added).  This number is
	expressed in base 40 (aka octal 050) and thereby read as a string.

	Letters are emitted lower-case.
	"""

	if number < 0:
		# coerce signed int to unsigned ...
		if -number / _rootmaxint < _rootmaxint:
			number = pow(2L, 32) + number

		else: raise ValueError, number	# huge, negative: not a radix050 string.

	row = []
	while number:
		number, here = divmod(number, 40)
		row.append(_charset[int(here)])

	return reduce(lambda x,y: y+x, row, '')

def encode(text):
	"""Encodes a string in `radix 050' form.

	Argument, text, is a string.  It should only use the characters known to
	the radix 050 encoding (alphanumeric, space, $, . and ?) - all others
	will be treated as spaces (but I reserve the right to treat them as some
	other character instead - probably `?').  The string is read as a radix
	050 (i.e. fourty) number using these characters as digits (with space as
	0, letters as 1 through 26, $, . and ? as 27, 28 and 29, each digit as
	its value plus 30).  Note that leading spaces are ignored (just like
	leading zeros on a number).
	"""

	number = 0
	import string
	for ch in text:

		try: here = string.index(_charset, string.lower(ch))
		except ValueError: here = 0	# perhaps use ? in place of ch.

		try: number = number * 40 + here
		except OverflowError: number = number * 40L + here

	return number
