1
"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds."""
2
3
__name__ = "PyRSS2Gen"
4
__version__ = (1, 0, 0)
5
__author__ = "Andrew Dalke <dalke@dalkescientific.com>"
6
7
_generator_name = __name__ + "-" + ".".join(map(str, __version__))
8
9
import datetime
10
11
# Could make this the base class; will need to add 'publish'
12
class WriteXmlMixin:
13
    def write_xml(self, outfile, encoding = "utf-8"):
14
        from xml.sax import saxutils
15
        handler = saxutils.XMLGenerator(outfile, encoding)
16
        handler.startDocument()
17
        self.publish(handler)
18
        handler.endDocument()
19
20
    def to_xml(self, encoding = "utf-8"):
21
        try:
22
            import cStringIO as StringIO
23
        except ImportError:
24
            import StringIO
25
        f = StringIO.StringIO()
26
        self.write_xml(f, encoding)
27
        return f.getvalue()
28
29
30
def _element(handler, name, obj, d = {}):
31
    if isinstance(obj, basestring) or obj is None:
32
        # special-case handling to make the API easier
33
        # to use for the common case.
34
        handler.startElement(name, d)
35
        if obj is not None:
36
            handler.characters(obj)
37
        handler.endElement(name)
38
    else:
39
        # It better know how to emit the correct XML.
40
        obj.publish(handler)
41
42
def _opt_element(handler, name, obj):
43
    if obj is None:
44
        return
45
    _element(handler, name, obj)
46
47
48
def _format_date(dt):
49
    """convert a datetime into an RFC 822 formatted date
50
51
    Input date must be in GMT.
52
    """
53
    # Looks like:
54
    #   Sat, 07 Sep 2002 00:00:01 GMT
55
    # Can't use strftime because that's locale dependent
56
    #
57
    # Isn't there a standard way to do this for Python?  The
58
    # rfc822 and email.Utils modules assume a timestamp.  The
59
    # following is based on the rfc822 module.
60
    return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
61
            ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
62
            dt.day,
63
            ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
64
             "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
65
            dt.year, dt.hour, dt.minute, dt.second)
66
67
        
68
##
69
# A couple simple wrapper objects for the fields which
70
# take a simple value other than a string.
71
class IntElement:
72
    """implements the 'publish' API for integers
73
74
    Takes the tag name and the integer value to publish.
75
    
76
    (Could be used for anything which uses str() to be published
77
    to text for XML.)
78
    """
79
    element_attrs = {}
80
    def __init__(self, name, val):
81
        self.name = name
82
        self.val = val
83
    def publish(self, handler):
84
        handler.startElement(self.name, self.element_attrs)
85
        handler.characters(str(self.val))
86
        handler.endElement(self.name)
87
88
class DateElement:
89
    """implements the 'publish' API for a datetime.datetime
90
91
    Takes the tag name and the datetime to publish.
92
93
    Converts the datetime to RFC 2822 timestamp (4-digit year).
94
    """
95
    def __init__(self, name, dt):
96
        self.name = name
97
        self.dt = dt
98
    def publish(self, handler):
99
        _element(handler, self.name, _format_date(self.dt))
100
####
101
102
class Category:
103
    """Publish a category element"""
104
    def __init__(self, category, domain = None):
105
        self.category = category
106
        self.domain = domain
107
    def publish(self, handler):
108
        d = {}
109
        if self.domain is not None:
110
            d["domain"] = self.domain
111
        _element(handler, "category", self.category, d)
112
113
class Cloud:
114
    """Publish a cloud"""
115
    def __init__(self, domain, port, path,
116
                 registerProcedure, protocol):
117
        self.domain = domain
118
        self.port = port
119
        self.path = path
120
        self.registerProcedure = registerProcedure
121
        self.protocol = protocol
122
    def publish(self, handler):
123
        _element(handler, "cloud", None, {
124
            "domain": self.domain,
125
            "port": str(self.port),
126
            "path": self.path,
127
            "registerProcedure": self.registerProcedure,
128
            "protocol": self.protocol})
129
130
class Image:
131
    """Publish a channel Image"""
132
    element_attrs = {}
133
    def __init__(self, url, title, link,
134
                 width = None, height = None, description = None):
135
        self.url = url
136
        self.title = title
137
        self.link = link
138
        self.width = width
139
        self.height = height
140
        self.description = description
141
        
142
    def publish(self, handler):
143
        handler.startElement("image", self.element_attrs)
144
145
        _element(handler, "url", self.url)
146
        _element(handler, "title", self.title)
147
        _element(handler, "link", self.link)
148
149
        width = self.width
150
        if isinstance(width, int):
151
            width = IntElement("width", width)
152
        _opt_element(handler, "width", width)
153
        
154
        height = self.height
155
        if isinstance(height, int):
156
            height = IntElement("height", height)
157
        _opt_element(handler, "height", height)
158
159
        _opt_element(handler, "description", self.description)
160
161
        handler.endElement("image")
162
163
class Guid:
164
    """Publish a guid
165
166
    Defaults to being a permalink, which is the assumption if it's
167
    omitted.  Hence strings are always permalinks.
168
    """
169
    def __init__(self, guid, isPermaLink = 1):
170
        self.guid = guid
171
        self.isPermaLink = isPermaLink
172
    def publish(self, handler):
173
        d = {}
174
        if self.isPermaLink:
175
            d["isPermaLink"] = "true"
176
        else:
177
            d["isPermaLink"] = "false"
178
        _element(handler, "guid", self.guid, d)
179
180
class TextInput:
181
    """Publish a textInput
182
183
    Apparently this is rarely used.
184
    """
185
    element_attrs = {}
186
    def __init__(self, title, description, name, link):
187
        self.title = title
188
        self.description = description
189
        self.name = name
190
        self.link = link
191
192
    def publish(self, handler):
193
        handler.startElement("textInput", self.element_attrs)
194
        _element(handler, "title", self.title)
195
        _element(handler, "description", self.description)
196
        _element(handler, "name", self.name)
197
        _element(handler, "link", self.link)
198
        handler.endElement("textInput")
199
        
200
201
class Enclosure:
202
    """Publish an enclosure"""
203
    def __init__(self, url, length, type):
204
        self.url = url
205
        self.length = length
206
        self.type = type
207
    def publish(self, handler):
208
        _element(handler, "enclosure", None,
209
                 {"url": self.url,
210
                  "length": str(self.length),
211
                  "type": self.type,
212
                  })
213
214
class Source:
215
    """Publish the item's original source, used by aggregators"""
216
    def __init__(self, name, url):
217
        self.name = name
218
        self.url = url
219
    def publish(self, handler):
220
        _element(handler, "source", self.name, {"url": self.url})
221
222
class SkipHours:
223
    """Publish the skipHours
224
225
    This takes a list of hours, as integers.
226
    """
227
    element_attrs = {}
228
    def __init__(self, hours):
229
        self.hours = hours
230
    def publish(self, handler):
231
        if self.hours:
232
            handler.startElement("skipHours", self.element_attrs)
233
            for hour in self.hours:
234
                _element(handler, "hour", str(hour))
235
            handler.endElement("skipHours")
236
237
class SkipDays:
238
    """Publish the skipDays
239
240
    This takes a list of days as strings.
241
    """
242
    element_attrs = {}
243
    def __init__(self, days):
244
        self.days = days
245
    def publish(self, handler):
246
        if self.days:
247
            handler.startElement("skipDays", self.element_attrs)
248
            for day in self.days:
249
                _element(handler, "day", day)
250
            handler.endElement("skipDays")
251
252
class RSS2(WriteXmlMixin):
253
    """The main RSS class.
254
255
    Stores the channel attributes, with the "category" elements under
256
    ".categories" and the RSS items under ".items".
257
    """
258
    
259
    rss_attrs = {"version": "2.0"}
260
    element_attrs = {}
261
    def __init__(self,
262
                 title,
263
                 link,
264
                 description,
265
266
                 language = None,
267
                 copyright = None,
268
                 managingEditor = None,
269
                 webMaster = None,
270
                 pubDate = None,  # a datetime, *in* *GMT*
271
                 lastBuildDate = None, # a datetime
272
                 
273
                 categories = None, # list of strings or Category
274
                 generator = _generator_name,
275
                 docs = "http://blogs.law.harvard.edu/tech/rss",
276
                 cloud = None,    # a Cloud
277
                 ttl = None,      # integer number of minutes
278
279
                 image = None,     # an Image
280
                 rating = None,    # a string; I don't know how it's used
281
                 textInput = None, # a TextInput
282
                 skipHours = None, # a SkipHours with a list of integers
283
                 skipDays = None,  # a SkipDays with a list of strings
284
285
                 items = None,     # list of RSSItems
286
                 ):
287
        self.title = title
288
        self.link = link
289
        self.description = description
290
        self.language = language
291
        self.copyright = copyright
292
        self.managingEditor = managingEditor
293
294
        self.webMaster = webMaster
295
        self.pubDate = pubDate
296
        self.lastBuildDate = lastBuildDate
297
        
298
        if categories is None:
299
            categories = []
300
        self.categories = categories
301
        self.generator = generator
302
        self.docs = docs
303
        self.cloud = cloud
304
        self.ttl = ttl
305
        self.image = image
306
        self.rating = rating
307
        self.textInput = textInput
308
        self.skipHours = skipHours
309
        self.skipDays = skipDays
310
311
        if items is None:
312
            items = []
313
        self.items = items
314
315
    def publish(self, handler):
316
        handler.startElement("rss", self.rss_attrs)
317
        handler.startElement("channel", self.element_attrs)
318
        _element(handler, "title", self.title)
319
        _element(handler, "link", self.link)
320
        _element(handler, "description", self.description)
321
322
        self.publish_extensions(handler)
323
        
324
        _opt_element(handler, "language", self.language)
325
        _opt_element(handler, "copyright", self.copyright)
326
        _opt_element(handler, "managingEditor", self.managingEditor)
327
        _opt_element(handler, "webMaster", self.webMaster)
328
329
        pubDate = self.pubDate
330
        if isinstance(pubDate, datetime.datetime):
331
            pubDate = DateElement("pubDate", pubDate)
332
        _opt_element(handler, "pubDate", pubDate)
333
334
        lastBuildDate = self.lastBuildDate
335
        if isinstance(lastBuildDate, datetime.datetime):
336
            lastBuildDate = DateElement("lastBuildDate", lastBuildDate)
337
        _opt_element(handler, "lastBuildDate", lastBuildDate)
338
339
        for category in self.categories:
340
            if isinstance(category, basestring):
341
                category = Category(category)
342
            category.publish(handler)
343
344
        _opt_element(handler, "generator", self.generator)
345
        _opt_element(handler, "docs", self.docs)
346
347
        if self.cloud is not None:
348
            self.cloud.publish(handler)
349
350
        ttl = self.ttl
351
        if isinstance(self.ttl, int):
352
            ttl = IntElement("ttl", ttl)
353
        _opt_element(handler, "tt", ttl)
354
355
        if self.image is not None:
356
            self.image.publish(handler)
357
358
        _opt_element(handler, "rating", self.rating)
359
        if self.textInput is not None:
360
            self.textInput.publish(handler)
361
        if self.skipHours is not None:
362
            self.skipHours.publish(handler)
363
        if self.skipDays is not None:
364
            self.skipDays.publish(handler)
365
366
        for item in self.items:
367
            item.publish(handler)
368
369
        handler.endElement("channel")
370
        handler.endElement("rss")
371
372
    def publish_extensions(self, handler):
373
        # Derived classes can hook into this to insert
374
        # output after the three required fields.
375
        pass
376
377
    
378
    
379
class RSSItem(WriteXmlMixin):
380
    """Publish an RSS Item"""
381
    element_attrs = {}
382
    def __init__(self,
383
                 title = None,  # string
384
                 link = None,   # url as string
385
                 description = None, # string
386
                 author = None,      # email address as string
387
                 categories = None,  # list of string or Category
388
                 comments = None,  # url as string
389
                 enclosure = None, # an Enclosure
390
                 guid = None,    # a unique string
391
                 pubDate = None, # a datetime
392
                 source = None,  # a Source
393
                 ):
394
        
395
        if title is None and description is None:
396
            raise TypeError(
397
                "must define at least one of 'title' or 'description'")
398
        self.title = title
399
        self.link = link
400
        self.description = description
401
        self.author = author
402
        if categories is None:
403
            categories = []
404
        self.categories = categories
405
        self.comments = comments
406
        self.enclosure = enclosure
407
        self.guid = guid
408
        self.pubDate = pubDate
409
        self.source = source
410
        # It sure does get tedious typing these names three times...
411
412
    def publish(self, handler):
413
        handler.startElement("item", self.element_attrs)
414
        _opt_element(handler, "title", self.title)
415
        _opt_element(handler, "link", self.link)
416
        self.publish_extensions(handler)
417
        _opt_element(handler, "description", self.description)
418
        _opt_element(handler, "author", self.author)
419
420
        for category in self.categories:
421
            if isinstance(category, basestring):
422
                category = Category(category)
423
            category.publish(handler)
424
        
425
        _opt_element(handler, "comments", self.comments)
426
        if self.enclosure is not None:
427
            self.enclosure.publish(handler)
428
        _opt_element(handler, "guid", self.guid)
429
430
        pubDate = self.pubDate
431
        if isinstance(pubDate, datetime.datetime):
432
            pubDate = DateElement("pubDate", pubDate)
433
        _opt_element(handler, "pubDate", pubDate)
434
435
        if self.source is not None:
436
            self.source.publish(handler)
437
        
438
        handler.endElement("item")
439
440
    def publish_extensions(self, handler):
441
        # Derived classes can hook into this to insert
442
        # output after the title and link elements
443
        pass