92 lines
1.7 KiB
Python
92 lines
1.7 KiB
Python
|
import urllib2
|
||
|
import sys
|
||
|
import unicodedata
|
||
|
|
||
|
def fetch_rst(url):
|
||
|
print 'Fetching %s..' % url
|
||
|
req = urllib2.Request(url)
|
||
|
|
||
|
fd = urllib2.urlopen(req, timeout=30)
|
||
|
body = fd.read()
|
||
|
body = body.replace("\r\n", "\n")
|
||
|
|
||
|
body = body.decode('utf8', 'ignore').encode('ascii', 'ignore')
|
||
|
|
||
|
pos = body.find("{{{")
|
||
|
if pos >= 0:
|
||
|
body = body[pos+4:]
|
||
|
|
||
|
pos = body.find("}}}")
|
||
|
if pos >= 0:
|
||
|
body = body[:pos]
|
||
|
|
||
|
pos = body.find("#!rst")
|
||
|
if pos >= 0:
|
||
|
body = body[pos+6:]
|
||
|
|
||
|
pos = url.rfind("/")
|
||
|
if pos >= 0:
|
||
|
filename = url[pos+1:]
|
||
|
else:
|
||
|
filename = url
|
||
|
|
||
|
pos = filename.find('?')
|
||
|
if pos >= 0:
|
||
|
filename = filename[:pos]
|
||
|
|
||
|
filename += ".rst"
|
||
|
f = open(filename, 'w')
|
||
|
f.write(body)
|
||
|
f.close()
|
||
|
|
||
|
|
||
|
def process_index(index):
|
||
|
pages = []
|
||
|
|
||
|
f = open(index + '.rst', 'r')
|
||
|
line = f.readline()
|
||
|
while line:
|
||
|
if line.find('toctree::') >= 0:
|
||
|
break
|
||
|
line = f.readline()
|
||
|
|
||
|
if line.find('toctree::') < 0:
|
||
|
return []
|
||
|
# Skip directive (or whatever it's called
|
||
|
line = f.readline().strip()
|
||
|
while line and line[0] == ':':
|
||
|
line = f.readline().strip()
|
||
|
# Skip empty lines
|
||
|
line = f.readline().strip()
|
||
|
while not line:
|
||
|
line = f.readline().strip()
|
||
|
# Parse names
|
||
|
while line:
|
||
|
pages.append(line)
|
||
|
line = f.readline().strip()
|
||
|
|
||
|
f.close()
|
||
|
|
||
|
return pages
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
print "** Warning: This will overwrite ALL RST files in current directory. Continue? [n] ",
|
||
|
if sys.stdin.readline().strip() != 'y':
|
||
|
sys.exit(0)
|
||
|
|
||
|
url_format = 'http://trac.pjsip.org/repos/wiki/pjsip-doc/%s?format=txt'
|
||
|
|
||
|
index = url_format % ('index')
|
||
|
fetch_rst(index)
|
||
|
|
||
|
pages = process_index('index')
|
||
|
for page in pages:
|
||
|
#if not 'endpoint' in page:
|
||
|
# continue
|
||
|
url = url_format % (page)
|
||
|
fetch_rst(url)
|
||
|
|
||
|
print 'Done.'
|
||
|
|