import os
import sys
import string
import re
import time

cfg_assign = re.compile(r'^(.*?)\s*=\s*(.*)$')

configuration = { 'hometitle'  : "Eric's Home Page",
                  'indextitle' : "Map of Eric's Home Web",
                  'fullname'   : "Eric S. Raymond",
                  'mailaddr'   : "esr@thyrsus.com",
                  'homepage'   : "http://www.tuxedo.org/~esr/",
                  'exclude'    : [ "test", "netbuilder", "jargon", "java",
                                  "worlds", "keeper", "mersenne", "netscape" ],
                  'language'   : "english",
                  'icondirs'   : "",
                  'icontext'   : "",
                  'body'       : ""}


def makelist(xtra,dir,files):
    desc_re = re.compile(r'<META\s*NAME\s?=\s?"DESCRIPTION"\s*CONTENT\s?=\s?"([^"]*)"',
			 re.IGNORECASE)
    title_re = re.compile(r'<TITLE>([^<]*)</TITLE>',re.IGNORECASE)
    for file in files:
	title = None
	process_flag = 0
	fullpath = os.path.join(dir,file)
	if fullpath[-5:] in ('.html','shtml') or fullpath[-4:] == '.htm':
	    process_flag = 1
	for entry in xtra[0]:
	    if process_flag and string.find(fullpath,entry) > -1:
		process_flag = 0
	if process_flag:
	    inFile = open(fullpath,'r')
	    input = inFile.read()
	    inFile.close()
	    string.replace(input,'\012',' ')
	    desc_mo = desc_re.search(input)
	    title_mo = title_re.search(input)
	    if title_mo != None:
		title = title_mo.group(1)
	    else:
		title = "No title"
	    if desc_mo != None:
		xtra[1].append((fullpath[2:],title,desc_mo.group(1)))

def indsort(x,y):
    first = x[0]
    second = y[0]
    if first[-10:] == 'index.html':
	first = first[:-11]
    if second[-10:] == 'index.html':
	second = second[:-11]
    first = `string.count(first,os.sep)` + first
    second = `string.count(second,os.sep)` + second
    return cmp(first,second)
   
home = os.environ['HOME'] # simple Python way to do getpwuid?

if len(sys.argv) > 1:
    config = sys.argv[1]
else:
    config = "%s/.sitemaprc" % home
sitemappy_author = """Tom Bryan
                     <<a href="tbryan@python.net">tbryan@python.net</a>>"""

try:
  config_file = open(config)
  configuration = {}
  configuration['exclude'] = [];
  while 1:
      line = config_file.readline()
      if not line: break
      else:
	  line = string.lstrip(line)
	  if len(line) == 0 or line[0] == '#':
	      continue
	  else:
	      m_obj = cfg_assign.search(line)
	      if m_obj:
		  if (string.lower(m_obj.group[1]) == 'exclude'):
		      configuration['exclude'].append(m_obj.group[2])
		  else:
		      configuration[string.lower(m_obj.group[1])] = m_obj.group[2]
	      else:
		  sys.stderr.write("Unrecognized config line %s\n" % line)
  config_file.close()	  
except IOError:
  pass

(year, month, mday, hour, minute, sec, wday, day, isdst) = time.localtime(time.time())

if configuration['language'] == 'norwegian':
    months = ('Jan','Feb','Mar','Apr','Mai','Jun','Jul','Aug','Sep','Okt','Nov','Des')
else:
    months = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')

message = {}
if (configuration['language'] == 'french'):
    message['sitemap'] = 'Carte du site'
    message['back_to'] = 'Retour '
    message['autogen'] = 'Cet index a t gnr automatiquement  partir de \
                          balises META prsentes dans chaque page. Les pages \
                          de plus haut niveau sont disponibles au dbut de cette \
                          carte du site.'
    message['toolgen'] = 'L\'utilitaire de gnration automatique est "sitemap.py", \
                          crit par %s' % sitemappy_author
    date = '%d %s %04d,  %d h %d mn' % (mday, months[month-1], year, hour, minute)
elif (configuration['language'] == 'swedish'):
    message['sitemap'] = 'Sajtkarta'
    message['back_to'] = 'Tillbaka till' 
    message['autogen'] = 'Denna indexsida r automatiskt genererad frn \
                          meta-taggar p varje sida. Toppnivsidor r listade frst.'
    message['toolgen'] = 'Den hr sidan r genererad av "sitemap.py", \
                          skapad av %s.' % sitemappy_author 
    date = '%04d-%02d-%02d %02d:%02d' % (year,month,mday,hour,minute)
elif (configuration['language'] == 'german'):
    message['sitemap'] = 'Site Map'
    message['back_to'] = 'Zurüick zu' 
    message['autogen'] = 'Dieser Index wurde automatisch generiert aus Meta Tags\
	                  aller Seiten. Top-Level-Seiten werden zuerst gelistet.' 
    message['toolgen'] = 'Diese Seite wurde generiert von "sitemap.py", \
	                  geschrieben von %s.' % sitemappy_author 
    date = '%d %s %04d %02d:%02d' % (mday, months[month-1], year,hour,minute)
elif (configuration['language'] == 'norwegian'):
    message['sitemap'] = 'Nettkart'
    message['back_to'] = 'Tilbake til' 
    message['autogen'] = 'Dette er en indeks generert automatisk ut fra \
	                  meta-tagger p hver side. Toppniv-sider er listet frst.' 
    message['toolgen'] = 'Denne siden er generert av "sitemap.py", \
                          skrevet av %s.' % sitemappy_author 
    date = '%d %s %04d, at %d:%02d' % (mday, months[month-1], year, hour, minute)
else:
    message['sitemap'] = 'Site Map'
    message['back_to'] = 'Back to' 
    message['autogen'] = 'This is an index automatically generated from meta tags \
                          present in each of the pages.  Top-level pages are listed first.' 
    message['toolgen'] = 'This page generated by "sitemap.py", written by %s.<BR> \
                          "sitemap.py" is based on "sitemap" by Eric S. Raymond.' % sitemappy_author
    
    date = '%d %s %04d, at %d:%02d' % (mday, months[month-1], year, hour, minute)
if configuration['icondirs'] != '':
    incondirs = '<img src="%s" alt="Dir">' % configuration['icondirs']
else: icondirs = ''

if configuration['icontext'] != '':
    icontext = '<img src="%s" alt="Text">' % configuration['icontext']
else: icontext = ''

body_attr = configuration.get('body','')
body = '<BODY %s>' % body_attr

print '''
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
<HTML>
<HEAD>
  <TITLE>%s</TITLE>
  <META NAME="KEYWORDS" CONTENT="%s"> 
  <LINK REV=MADE HREF="mailto:%s">
</HEAD>
%s
<TABLE WIDTH="100%%" CELLPADDING=0><TR>
<TD WIDTH="50%%">%s <A HREF="%s">%s</A>
<TD WIDTH="50%%" ALIGN=RIGHT>%s
</TR></TABLE>
<HR><P>
<H1 ALIGN=CENTER>%s</H1>

<P>%s

<DL>
''' % (configuration['indextitle'],message['sitemap'],configuration['mailaddr'],
       body, message['back_to'], configuration['homepage'], configuration['hometitle'],
       date, message['sitemap'], message['autogen'])

pages = []
os.path.walk('.',makelist,(configuration['exclude'],pages))

pages.sort(indsort)

oldstem = None
for item in pages:
    (file, title, desc) = item 
    newstem = os.path.split(file)[0]
    if oldstem != newstem:
	print '<DT><P ALIGN=RIGHT><HR WIDTH="80%%">\n%s<BR>' % icondirs
    print '<DT>%s\n<a href="%s">%s</a>: <B>%s</B><DD>\n\t%s\n' % \
	  (icontext, file, file, title, desc)
    oldstem = newstem


print '''
</DL>
<P>
<HR>
%s
<HR>
<TABLE WIDTH="100%%" CELLPADDING=0><TR>
<TD WIDTH="50%%">%s <A HREF="%s">%s</A>
<TD WIDTH="50%%" ALIGN=RIGHT>%s
</TR><TR>
<TD COLSPAN=2><ADDRESS>%s <A HREF="mailto:%s"><%s></A></ADDRESS>
</TR></TABLE>

</BODY>
</HTML>
''' % (message['toolgen'],message['back_to'],configuration['homepage'],
       configuration['hometitle'],configuration['fullname'],date,
       configuration['mailaddr'],configuration['mailaddr'])