- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - (updated on Sat Dec 30 18:35:30 2000) ################### ### 1-LINE DIFF ### ################### % rcsdiff crawl.py 4c4 < from os import makedirs, unlink --- > from os import makedirs, unlink, sep 24c24 < if newpath[-1] == '/': --- > if path[-1] == '/': 28a29,30 > if sep != '/': # os-indep. path separator > dir = replace(dir, '/', sep) 36c38 < retval = urllib.urlretrieve(self.url, self.file) --- > retval = urlretrieve(self.url, self.file) 47c49 < return self.parse.anchorlist --- > return self.parser.anchorlist 24c24 < if newpath[-1] == '/': --- > if path[-1] == '/': 36c36 < retval = urllib.urlretrieve(self.url, self.file) --- > retval = urlretrieve(self.url, self.file) 47c47 < return self.parse.anchorlist --- > return self.parser.anchorlist ################### ### 3-LINE DIFF ### ################### % rcsdiff -C3 crawl.py *** /tmp/T0AVaiWf Tue Oct 17 18:00:54 2000 --- crawl.py Sat Dec 30 18:32:13 2000 *************** *** 1,7 **** #!/usr/bin/env python from sys import argv ! from os import makedirs, unlink from os.path import isdir, exists, dirname, splitext from string import replace, find, lower from htmllib import HTMLParser --- 1,7 ---- #!/usr/bin/env python from sys import argv ! from os import makedirs, unlink, sep from os.path import isdir, exists, dirname, splitext from string import replace, find, lower from htmllib import HTMLParser *************** *** 21,31 **** path = parsedurl[1] + parsedurl[2] ext = splitext(path) if ext[1] == '': ! if newpath[-1] == '/': path = path + deffile else: path = path + '/' + deffile dir = dirname(path) if not isdir(dir): # create archive dir if nec. if exists(dir): unlink(dir) makedirs(dir) --- 21,33 ---- path = parsedurl[1] + parsedurl[2] ext = splitext(path) if ext[1] == '': ! if path[-1] == '/': path = path + deffile else: path = path + '/' + deffile dir = dirname(path) + if sep != '/': # os-indep. path separator + dir = replace(dir, '/', sep) if not isdir(dir): # create archive dir if nec. if exists(dir): unlink(dir) makedirs(dir) *************** *** 33,39 **** def download(self): # download Web page try: ! retval = urllib.urlretrieve(self.url, self.file) except IOError: retval = ('*** ERROR: invalid URL "%s"' % \ self.url, ) --- 35,41 ---- def download(self): # download Web page try: ! retval = urlretrieve(self.url, self.file) except IOError: retval = ('*** ERROR: invalid URL "%s"' % \ self.url, ) *************** *** 44,50 **** DumbWriter(StringIO()))) self.parser.feed(open(self.file).read()) self.parser.close() ! return self.parse.anchorlist class Crawler: # manage entire crawling process --- 46,52 ---- DumbWriter(StringIO()))) self.parser.feed(open(self.file).read()) self.parser.close() ! return self.parser.anchorlist class Crawler: # manage entire crawling process