1
#!/usr/bin/env python
2
"""Convert PSEPs to (X)HTML - courtesy of /F
3
4
Usage: %(PROGRAM)s [options] [<pseps> ...]
5
6
Options:
7
8
-u, --user
9
    python.org username
10
11
-b, --browse
12
    After generating the HTML, direct your web browser to view it
13
    (using the Python webbrowser module).  If both -i and -b are
14
    given, this will browse the on-line HTML; otherwise it will
15
    browse the local HTML.  If no psep arguments are given, this
16
    will browse PSEP 0.
17
18
-i, --install
19
    After generating the HTML, install it and the plaintext source file
20
    (.txt) on python.org.  In that case the user's name is used in the scp
21
    and ssh commands, unless "-u username" is given (in which case, it is
22
    used instead).  Without -i, -u is ignored.
23
24
-l, --local
25
    Same as -i/--install, except install on the local machine.  Use this
26
    when logged in to the python.org machine (dinsdale).
27
28
-q, --quiet
29
    Turn off verbose messages.
30
31
-h, --help
32
    Print this help message and exit.
33
34
The optional arguments ``pseps`` are either psep numbers or .txt files.
35
"""
36
37
import sys
38
import os
39
import re
40
import cgi
41
import glob
42
import getopt
43
import errno
44
import random
45
import time
46
47
REQUIRES = {'python': '2.2',
48
            'docutils': '0.2.7'}
49
PROGRAM = sys.argv[0]
50
RFCURL = 'http://www.faqs.org/rfcs/rfc%d.html'
51
PSEPURL = 'psep-%04d.html'
52
PSEPCVSURL = ('http://qt.gitorious.org/pyside/pseps/blobs/history/master/psep-%04d.txt')
53
PSEPDIRRUL = 'http://www.pyside.org/docs/pseps/'
54
55
56
HOST = "dinsdale.python.org"                    # host for update
57
HDIR = "/data/ftp.python.org/pub/www.python.org/pseps" # target host directory
58
LOCALVARS = "Local Variables:"
59
60
COMMENT = """<!--
61
This HTML is auto-generated.  DO NOT EDIT THIS FILE!  If you are writing a new
62
PSEP, see http://www.pyside.org/docs/pseps/psep-0001.html for instructions and links
63
to templates.  DO NOT USE THIS HTML FILE AS YOUR TEMPLATE!
64
-->"""
65
66
# The generated HTML doesn't validate -- you cannot use <hr> and <h3> inside
67
# <pre> tags.  But if I change that, the result doesn't look very nice...
68
DTD = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">')
69
70
fixpat = re.compile("((https?|ftp):[-_a-zA-Z0-9/.+~:?#$=&,]+)|(psep-\d+(.txt)?)|"
71
                    "(RFC[- ]?(?P<rfcnum>\d+))|"
72
                    "(PSEP\s+(?P<psepnum>\d+))|"
73
                    ".")
74
75
EMPTYSTRING = ''
76
SPACE = ' '
77
COMMASPACE = ', '
78
79
80
81
def usage(code, msg=''):
82
    """Print usage message and exit.  Uses stderr if code != 0."""
83
    if code == 0:
84
        out = sys.stdout
85
    else:
86
        out = sys.stderr
87
    print >> out, __doc__ % globals()
88
    if msg:
89
        print >> out, msg
90
    sys.exit(code)
91
92
93
94
def fixanchor(current, match):
95
    text = match.group(0)
96
    link = None
97
    if (text.startswith('http:') or text.startswith('https:')
98
        or text.startswith('ftp:')):
99
        # Strip off trailing punctuation.  Pattern taken from faqwiz.
100
        ltext = list(text)
101
        while ltext:
102
            c = ltext.pop()
103
            if c not in '();:,.?\'"<>':
104
                ltext.append(c)
105
                break
106
        link = EMPTYSTRING.join(ltext)
107
    elif text.startswith('psep-') and text <> current:
108
        link = os.path.splitext(text)[0] + ".html"
109
    elif text.startswith('PSEP'):
110
        psepnum = int(match.group('psepnum'))
111
        link = PSEPURL % psepnum
112
    elif text.startswith('RFC'):
113
        rfcnum = int(match.group('rfcnum'))
114
        link = RFCURL % rfcnum
115
    if link:
116
        return '<a href="%s">%s</a>' % (cgi.escape(link), cgi.escape(text))
117
    return cgi.escape(match.group(0)) # really slow, but it works...
118
119
120
121
NON_MASKED_EMAILS = [
122
    'pyside@lists.pyside.org',
123
    ]
124
125
def fixemail(address, psepno):
126
    if address.lower() in NON_MASKED_EMAILS:
127
        # return hyperlinked version of email address
128
        return linkemail(address, psepno)
129
    else:
130
        # return masked version of email address
131
        parts = address.split('@', 1)
132
        return '%s&#32;&#97;t&#32;%s' % (parts[0], parts[1])
133
134
135
def linkemail(address, psepno):
136
    parts = address.split('@', 1)
137
    return ('<a href="mailto:%s&#64;%s?subject=PSEP%%20%s">'
138
            '%s&#32;&#97;t&#32;%s</a>'
139
            % (parts[0], parts[1], psepno, parts[0], parts[1]))
140
141
142
def fixfile(inpath, input_lines, outfile):
143
    from email.Utils import parseaddr
144
    basename = os.path.basename(inpath)
145
    infile = iter(input_lines)
146
    # convert plaintext psep to minimal XHTML markup
147
    print >> outfile, DTD
148
    print >> outfile, '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">'
149
    print >> outfile, COMMENT
150
    print >> outfile, '<head>'
151
    # head
152
    header = []
153
    psep = ""
154
    title = ""
155
    for line in infile:
156
        if not line.strip():
157
            break
158
        if line[0].strip():
159
            if ":" not in line:
160
                break
161
            key, value = line.split(":", 1)
162
            value = value.strip()
163
            header.append((key, value))
164
        else:
165
            # continuation line
166
            key, value = header[-1]
167
            value = value + line
168
            header[-1] = key, value
169
        if key.lower() == "title":
170
            title = value
171
        elif key.lower() == "psep":
172
            psep = value
173
    if psep:
174
        title = "PSEP " + psep + " -- " + title
175
    if title:
176
        print >> outfile, '  <title>%s</title>' % cgi.escape(title)
177
    r = random.choice(range(64))
178
    print >> outfile, (
179
        '  <link rel="STYLESHEET" href="psep.css" type="text/css" />\n'
180
        '</head>\n'
181
        '<body>\n'
182
        '<div id="header">\n'
183
        '<a href="http://www.pyside.org/" title="Python Home Page">\n'
184
        '<img src="http://www.pyside.org/wp-content/themes/openbossa/images/logo.png" alt="[PySide]"\n'
185
        'border="0" width="199" height="102" /></a>\n'
186
        '[<b><a href="http://www.pyside.org/">PySide Home</a></b>]\n'
187
        '</div>\n'
188
        '<div id="content">\n')
189
    if basename <> 'psep-0000.txt':
190
        print >> outfile, '[<b><a href=".">PSEP Index</a></b>]'
191
    print >> outfile, '<table border="0">'
192
    for k, v in header:
193
        if k.lower() in ('author', 'discussions-to'):
194
            mailtos = []
195
            for part in re.split(',\s*', v):
196
                if '@' in part:
197
                    realname, addr = parseaddr(part)
198
                    if k.lower() == 'discussions-to':
199
                        m = linkemail(addr, psep)
200
                    else:
201
                        m = fixemail(addr, psep)
202
                    mailtos.append('%s &lt;%s&gt;' % (realname, m))
203
                elif part.startswith('http:'):
204
                    mailtos.append(
205
                        '<a href="%s">%s</a>' % (part, part))
206
                else:
207
                    mailtos.append(part)
208
            v = COMMASPACE.join(mailtos)
209
        elif k.lower() in ('replaces', 'replaced-by', 'requires'):
210
            otherpseps = ''
211
            for otherpsep in re.split(',?\s+', v):
212
                otherpsep = int(otherpsep)
213
                otherpseps += '<a href="psep-%04d.html">%i</a> ' % (otherpsep,
214
                                                                  otherpsep)
215
            v = otherpseps
216
        elif k.lower() in ('last-modified',):
217
            date = time.strftime('%d-%b-%Y',
218
                                      time.localtime(os.stat(inpath)[8]))
219
            if int(psep) != 0:
220
                url = PSEPCVSURL % int(psep)
221
                v = '<a href="%s">%s</a> ' % (url, cgi.escape(date))
222
            else:
223
                v = date
224
        elif k.lower() in ('content-type',):
225
            url = PSEPURL % 9
226
            psep_type = v or 'text/plain'
227
            v = '<a href="%s">%s</a> ' % (url, cgi.escape(psep_type))
228
        else:
229
            v = cgi.escape(v)
230
        print >> outfile, '  <tr class="field"><th class="field-name">%s:&nbsp;</th><td class="field-body">%s</td></tr>' \
231
              % (cgi.escape(k), v)
232
    print >> outfile, '</table>'
233
    print >> outfile, '<hr />'
234
    need_pre = 1
235
    for line in infile:
236
        if line[0] == '\f':
237
            continue
238
        if line.strip() == LOCALVARS:
239
            break
240
        if line[0].strip():
241
            if not need_pre:
242
                print >> outfile, '</pre>'
243
            print >> outfile, '<h3>%s</h3>' % line.strip()
244
            need_pre = 1
245
        elif not line.strip() and need_pre:
246
            continue
247
        else:
248
            # PSEP 0 has some special treatment
249
            if basename == 'psep-0000.txt':
250
                parts = line.split()
251
                if len(parts) > 1 and re.match(r'\s*\d{1,4}', parts[1]):
252
                    # This is a PSEP summary line, which we need to hyperlink
253
                    url = PSEPURL % int(parts[1])
254
                    if need_pre:
255
                        print >> outfile, '<pre>'
256
                        need_pre = 0
257
                    print >> outfile, re.sub(
258
                        parts[1],
259
                        '<a href="%s">%s</a>' % (url, parts[1]),
260
                        line, 1),
261
                    continue
262
                elif parts and '@' in parts[-1]:
263
                    # This is a psep email address line, so filter it.
264
                    url = fixemail(parts[-1], psep)
265
                    if need_pre:
266
                        print >> outfile, '<pre>'
267
                        need_pre = 0
268
                    print >> outfile, re.sub(
269
                        parts[-1], url, line, 1),
270
                    continue
271
            line = fixpat.sub(lambda x, c=inpath: fixanchor(c, x), line)
272
            if need_pre:
273
                print >> outfile, '<pre>'
274
                need_pre = 0
275
            outfile.write(line)
276
    if not need_pre:
277
        print >> outfile, '</pre>'
278
    print >> outfile, '</div>'
279
    print >> outfile, '</body>'
280
    print >> outfile, '</html>'
281
282
283
docutils_settings = None
284
"""Runtime settings object used by Docutils.  Can be set by the client
285
application when this module is imported."""
286
287
def fix_rst_psep(inpath, input_lines, outfile):
288
    from docutils import core
289
    output = core.publish_string(
290
        source=''.join(input_lines),
291
        source_path=inpath,
292
        destination_path=outfile.name,
293
        reader_name='psep',
294
        parser_name='restructuredtext',
295
        writer_name='psep_html',
296
        settings=docutils_settings,
297
        # Allow Docutils traceback if there's an exception:
298
        settings_overrides={'traceback': 1})
299
300
    date = time.strftime('%d-%b-%Y', time.localtime(os.stat(inpath)[8]))
301
    output = output.replace('$Date$', date)
302
    outfile.write(output)
303
304
305
def get_psep_type(input_lines):
306
    """
307
    Return the Content-Type of the input.  "text/plain" is the default.
308
    Return ``None`` if the input is not a PSEP.
309
    """
310
    psep_type = None
311
    for line in input_lines:
312
        line = line.rstrip().lower()
313
        if not line:
314
            # End of the RFC 2822 header (first blank line).
315
            break
316
        elif line.startswith('content-type: '):
317
            psep_type = line.split()[1] or 'text/plain'
318
            break
319
        elif line.startswith('psep: '):
320
            # Default PSEP type, used if no explicit content-type specified:
321
            psep_type = 'text/plain'
322
    return psep_type
323
324
325
def get_input_lines(inpath):
326
    try:
327
        infile = open(inpath)
328
    except IOError, e:
329
        if e.errno <> errno.ENOENT: raise
330
        print >> sys.stderr, 'Error: Skipping missing PSEP file:', e.filename
331
        sys.stderr.flush()
332
        return None
333
    lines = infile.read().splitlines(1) # handles x-platform line endings
334
    infile.close()
335
    return lines
336
337
338
def find_psep(psep_str):
339
    """Find the .txt file indicated by a cmd line argument"""
340
    if os.path.exists(psep_str):
341
        return psep_str
342
    num = int(psep_str)
343
    return "psep-%04d.txt" % num
344
345
def make_html(inpath, verbose=0):
346
    input_lines = get_input_lines(inpath)
347
    if input_lines is None:
348
        return None
349
    psep_type = get_psep_type(input_lines)
350
    if psep_type is None:
351
        print >> sys.stderr, 'Error: Input file %s is not a PSEP.' % inpath
352
        sys.stdout.flush()
353
        return None
354
    elif not PSEP_TYPE_DISPATCH.has_key(psep_type):
355
        print >> sys.stderr, ('Error: Unknown PSEP type for input file %s: %s'
356
                              % (inpath, psep_type))
357
        sys.stdout.flush()
358
        return None
359
    elif PSEP_TYPE_DISPATCH[psep_type] == None:
360
        psep_type_error(inpath, psep_type)
361
        return None
362
    outpath = os.path.splitext(inpath)[0] + ".html"
363
    if verbose:
364
        print inpath, "(%s)" % psep_type, "->", outpath
365
        sys.stdout.flush()
366
    outfile = open(outpath, "w")
367
    PSEP_TYPE_DISPATCH[psep_type](inpath, input_lines, outfile)
368
    outfile.close()
369
    os.chmod(outfile.name, 0664)
370
    return outpath
371
372
def push_psep(htmlfiles, txtfiles, username, verbose, local=0):
373
    quiet = ""
374
    if local:
375
        if verbose:
376
            quiet = "-v"
377
        target = HDIR
378
        copy_cmd = "cp"
379
        chmod_cmd = "chmod"
380
    else:
381
        if not verbose:
382
            quiet = "-q"
383
        if username:
384
            username = username + "@"
385
        target = username + HOST + ":" + HDIR
386
        copy_cmd = "scp"
387
        chmod_cmd = "ssh %s%s chmod" % (username, HOST)
388
    files = htmlfiles[:]
389
    files.extend(txtfiles)
390
    files.append("style.css")
391
    files.append("psep.css")
392
    filelist = SPACE.join(files)
393
    rc = os.system("%s %s %s %s" % (copy_cmd, quiet, filelist, target))
394
    if rc:
395
        sys.exit(rc)
396
##    rc = os.system("%s 664 %s/*" % (chmod_cmd, HDIR))
397
##    if rc:
398
##        sys.exit(rc)
399
400
401
PSEP_TYPE_DISPATCH = {'text/plain': fixfile,
402
                     'text/x-rst': fix_rst_psep}
403
PSEP_TYPE_MESSAGES = {}
404
405
def check_requirements():
406
    # Check Python:
407
    try:
408
        from email.Utils import parseaddr
409
    except ImportError:
410
        PSEP_TYPE_DISPATCH['text/plain'] = None
411
        PSEP_TYPE_MESSAGES['text/plain'] = (
412
            'Python %s or better required for "%%(psep_type)s" PSEP '
413
            'processing; %s present (%%(inpath)s).'
414
            % (REQUIRES['python'], sys.version.split()[0]))
415
    # Check Docutils:
416
    try:
417
        import docutils
418
    except ImportError:
419
        PSEP_TYPE_DISPATCH['text/x-rst'] = None
420
        PSEP_TYPE_MESSAGES['text/x-rst'] = (
421
            'Docutils not present for "%(psep_type)s" PSEP file %(inpath)s.  '
422
            'See README.txt for installation.')
423
    else:
424
        installed = [int(part) for part in docutils.__version__.split('.')]
425
        required = [int(part) for part in REQUIRES['docutils'].split('.')]
426
        if installed < required:
427
            PSEP_TYPE_DISPATCH['text/x-rst'] = None
428
            PSEP_TYPE_MESSAGES['text/x-rst'] = (
429
                'Docutils must be reinstalled for "%%(psep_type)s" PSEP '
430
                'processing (%%(inpath)s).  Version %s or better required; '
431
                '%s present.  See README.txt for installation.'
432
                % (REQUIRES['docutils'], docutils.__version__))
433
434
def psep_type_error(inpath, psep_type):
435
    print >> sys.stderr, 'Error: ' + PSEP_TYPE_MESSAGES[psep_type] % locals()
436
    sys.stdout.flush()
437
438
439
def browse_file(psep):
440
    import webbrowser
441
    file = find_psep(psep)
442
    if file.endswith(".txt"):
443
        file = file[:-3] + "html"
444
    file = os.path.abspath(file)
445
    url = "file:" + file
446
    webbrowser.open(url)
447
448
def browse_remote(psep):
449
    import webbrowser
450
    file = find_psep(psep)
451
    if file.endswith(".txt"):
452
        file = file[:-3] + "html"
453
    url = PSEPDIRRUL + file
454
    webbrowser.open(url)
455
456
457
def main(argv=None):
458
    # defaults
459
    update = 0
460
    local = 0
461
    username = ''
462
    verbose = 1
463
    browse = 0
464
465
    check_requirements()
466
467
    if argv is None:
468
        argv = sys.argv[1:]
469
470
    try:
471
        opts, args = getopt.getopt(
472
            argv, 'bilhqu:',
473
            ['browse', 'install', 'local', 'help', 'quiet', 'user='])
474
    except getopt.error, msg:
475
        usage(1, msg)
476
477
    for opt, arg in opts:
478
        if opt in ('-h', '--help'):
479
            usage(0)
480
        elif opt in ('-i', '--install'):
481
            update = 1
482
        elif opt in ('-l', '--local'):
483
            update = 1
484
            local = 1
485
        elif opt in ('-u', '--user'):
486
            username = arg
487
        elif opt in ('-q', '--quiet'):
488
            verbose = 0
489
        elif opt in ('-b', '--browse'):
490
            browse = 1
491
492
    if args:
493
        pseptxt = []
494
        html = []
495
        for psep in args:
496
            file = find_psep(psep)
497
            pseptxt.append(file)
498
            newfile = make_html(file, verbose=verbose)
499
            if newfile:
500
                html.append(newfile)
501
                if browse and not update:
502
                    browse_file(psep)
503
    else:
504
        # do them all
505
        pseptxt = []
506
        html = []
507
        files = glob.glob("psep-*.txt")
508
        files.sort()
509
        for file in files:
510
            pseptxt.append(file)
511
            newfile = make_html(file, verbose=verbose)
512
            if newfile:
513
                html.append(newfile)
514
        if browse and not update:
515
            browse_file("0")
516
517
    if update:
518
        push_psep(html, pseptxt, username, verbose, local=local)
519
        if browse:
520
            if args:
521
                for psep in args:
522
                    browse_remote(psep)
523
            else:
524
                browse_remote("0")
525
526
527
528
if __name__ == "__main__":
529
    main()