root/eridanus/util.py

Revision 190, 11.4 kB (checked in by Jonathan Jacobs <korpse@…>, 16 months ago)

Support years in humanReadableTimeDelta.
Ignore-this: 2c5be88564921275d50200c5524b5e6

Line 
1import re, math, fnmatch, itertools, warnings, htmlentitydefs
2
3from twisted.internet import reactor, task, error as ineterror
4from twisted.web import client, http, error as weberror
5from twisted.python import log
6
7from nevow.url import URL
8from nevow.rend import Page, Fragment
9
10from xmantissa import website
11from xmantissa.webtheme import _ThemedMixin, SiteTemplateResolver
12
13from eridanus import const, errors
14
15
16# XXX: do we need this crap? all of it?
17class _PublicThemedMixin(_ThemedMixin):
18    def getDocFactory(self, fragmentName, default=None):
19        resolver = SiteTemplateResolver(self.store)
20        return resolver.getDocFactory(fragmentName, default)
21
22
23class ThemedPage(_PublicThemedMixin, Page):
24    fragmentName = 'page-no-fragment-name-specified'
25
26    def renderHTTP(self, ctx):
27        if self.docFactory is None:
28            self.docFactory = self.getDocFactory(self.fragmentName)
29        return super(ThemedPage, self).renderHTTP(ctx)
30
31
32class ThemedFragment(_PublicThemedMixin, Fragment):
33    fragmentName = 'fragment-no-fragment-name-specified'
34
35    def __init__(self, store, **kw):
36        self.store = store
37        super(ThemedFragment, self).__init__(**kw)
38
39
40class PerseverantDownloader(object):
41    """
42    Perseverantly attempt to download a URL.
43
44    Each retry attempt is delayed by L{factor} up to a maximum of L{maxDelay},
45    starting at L{initialDelay}.
46
47    @type url: C{nevow.url.URL}
48    @ivar url: The HTTP URL to attempt to download
49
50    @type maxDelay: C{float}
51    @cvar maxDelay: Maximum delay, in seconds, between retry attempts
52
53    @type initialDelay: C{float}
54    @cvar initialDelay: The delay before the first retry attempt
55
56    @type factor: C{float}
57    @cvar factor: The factor to increase the delay by after each attempt
58
59    @type retryableHTTPCodes: C{list}
60    @cvar retryableHTTPCodes: HTTP error codes that suggest the error is
61        intermittent and that a retry should be attempted
62
63    @type defaultTimeout: C{float}
64    @cvar defaultTimeout: Default fetch timeout value
65    """
66    maxDelay = 3600
67    initialDelay = 1.0
68    factor = 1.6180339887498948
69
70    retryableHTTPCodes = [408, 500, 502, 503, 504]
71
72    defaultTimeout = 300.0
73
74    def __init__(self, url, tries=10, timeout=defaultTimeout, *a, **kw):
75        """
76        Prepare the download information.
77
78        Any additional positional or keyword arguments are passed on to
79        C{twisted.web.client.HTTPPageGetter}.
80
81        @type url: C{nevow.url.URL} or C{unicode} or C{str}
82        @param url: The HTTP URL to attempt to download
83
84        @type tries: C{int}
85        @param tries: The maximum number of retry attempts before giving up
86
87        @type timeout: C{float}
88        @param timeout: Timeout value, in seconds, for the page fetch;
89            defaults to L{defaultTimeout}
90        """
91        if isinstance(url, unicode):
92            url = url.encode('utf-8')
93        if isinstance(url, str):
94            url = URL.fromString(url)
95
96        self.url = url.anchor(None)
97        self.args = a
98        self.kwargs = kw
99        self.delay = self.initialDelay
100        self.tries = tries
101        self.timeout = timeout
102
103    def __repr__(self):
104        return '<%s %s>' % (type(self).__name__, self.url)
105
106    def go(self):
107        """
108        Attempt to download L{self.url}.
109        """
110        d, f = getPage(str(self.url), timeout=self.timeout, *self.args, **self.kwargs)
111        return d.addErrback(self.retryWeb
112               ).addCallback(lambda data: (data, f.response_headers))
113
114    def retryWeb(self, f):
115        """
116        Retry failed downloads in the case of "web errors."
117
118        Only errors that are web related are considered for a retry attempt
119        and then only when the HTTP status code is one of those in
120        L{self.retryableHTTPCodes}.
121
122        Other errors are not trapped.
123        """
124        f.trap((weberror.Error, ineterror.ConnectionDone))
125        err = f.value
126        if int(err.status) in self.retryableHTTPCodes:
127            return self.retry(f)
128
129        return f
130
131    def retry(self, f):
132        """
133        The retry machinery.
134
135        If C{self.tries} is greater than zero, a retry is attempted for
136        C{self.delay} seconds in the future.
137        """
138        self.tries -= 1
139        log.msg('PerseverantDownloader is retrying, %d attempts left.' % (self.tries,))
140        log.err(f)
141        self.delay = min(self.delay * self.factor, self.maxDelay)
142        if self.tries == 0:
143            return f
144
145        return task.deferLater(reactor, self.delay, self.go)
146
147
148def encode(s):
149    return s.encode(const.ENCODING, 'replace')
150
151
152def decode(s):
153    return s.decode(const.ENCODING, 'replace')
154
155
156def handle206(f):
157    """
158    Return any partial content when HTTP 206 is returned.
159    """
160    f.trap(weberror.Error)
161    err = f.value
162    try:
163        if int(err.status) == http.PARTIAL_CONTENT:
164            return err.response
165    except ValueError:
166        pass
167
168    return f
169
170
171# XXX: a copy from twisted.web.client because we need the useful stuff
172def getPage(url, contextFactory=None, *args, **kwargs):
173    if 'agent' not in kwargs:
174        kwargs['agent'] = 'Eridanus Page Fetcher'
175
176    factory = client._makeGetterFactory(
177        url,
178        client.HTTPClientFactory,
179        contextFactory=contextFactory,
180        *args, **kwargs)
181
182    return factory.deferred.addErrback(handle206), factory
183
184
185def truncate(s, limit):
186    """
187    Shorten C{s} to C{limit} characters and append an ellipsis.
188    """
189    if len(s) - 3 < limit:
190        return s
191
192    return s[:limit] + '...'
193
194
195def humanReadableTimeDelta(delta):
196    """
197    Convert a C{datetime.timedelta} instance into a human readable string.
198    """
199    days = delta.days
200
201    years = days // 365
202    days -= years * 365
203
204    seconds = delta.seconds
205
206    hours = seconds // 3600
207    seconds -= hours * 3600
208
209    minutes = seconds // 60
210    seconds -= minutes * 60
211
212    def makeText(s, value):
213        if value == 1:
214            s = s[:-1]
215        return s % (value,)
216
217    def getParts():
218        if years:
219            yield makeText(u'%d years', years)
220        if days:
221            yield makeText(u'%d days', days)
222        if hours:
223            yield makeText(u'%d hours', hours)
224        if minutes:
225            yield makeText(u'%d minutes', minutes)
226        if seconds:
227            yield makeText(u'%d seconds', seconds)
228
229    parts = list(getParts())
230    if not parts:
231        parts = [u'0 seconds']
232
233    return u' '.join(parts)
234
235
236sizePrefixes = (u'bytes', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB')
237
238def humanReadableFileSize(size):
239    """
240    Convert a size in bytes to a human readable string.
241
242    @param size: The size, in bytes, to convert
243    @type size: C{int} or C{float}
244
245    @returns: A human readable string
246    @rtype: C{unicode}
247    """
248    index = min(int(math.log(size, 1024)), len(sizePrefixes) - 1)
249    size = size / (1024.0 ** index)
250    prefix = sizePrefixes[index]
251    if index == 0:
252        size = int(size)
253        format = u'%s %s'
254    else:
255        format = u'%0.2f %s'
256    return format % (size, prefix)
257
258
259def hostMatches(host, mask):
260    """
261    Determine whether a given host matches the specified mask.
262
263    @param host: Something of the form C{nick!user@host}
264    @type host: C{str} or C{unicode}
265
266    @param mask: A wildcard-enabled mask to attempt to match C{host} with
267    @type mask: C{str} or C{unicode}
268
269    @returns: Whether C{mask} matched C{host}
270    @rtype: C{bool}
271    """
272    return re.match(fnmatch.translate(mask), host) is not None
273
274
275def padIterable(iterable, length, padding=None):
276    """
277    Ensure that C{iterable} is at least C{length} items long.
278
279    @param padding: The object to pad C{iterable} with in the case where it is
280        less than C{length}
281
282    @rtype: C{iterable}
283    """
284    return itertools.islice(itertools.chain(iterable, itertools.repeat(padding)), length)
285
286
287def normalizeMask(mask):
288    """
289    Create the canonical IRC mask for the given input.
290
291    For example::
292
293        joe => joe!*@*
294
295        joe!black => joe!black@*
296
297        joe!black@hell => joe!black@hell
298
299    It is an error to specify an C{@} without or before a C{!}.
300
301    @raise errors.InvalidMaskError: If the mask is malformed
302    """
303    def splitMask():
304        if not mask:
305            return None
306
307        nick = mask
308        user = host = '*'
309        if '!' in mask:
310            atPos = mask.find('@')
311            if atPos > 0 and atPos < mask.find('!'):
312                return None
313
314            nick, user = padIterable(mask.split('!', 1), 2, padding='*')
315            user, host = padIterable(user.split('@', 1), 2, padding='*')
316        elif '@' in mask:
317            return None
318
319        return nick, user, host
320
321    parts = splitMask()
322    if parts is None:
323        raise errors.InvalidMaskError(u'"%s" is not a valid complete or partial mask' % (mask,))
324
325    return '%s!%s@%s' % parts
326
327
328def tabulate(headers, data, joiner='  '):
329    """
330    Tabulate data, attaching the specified headers.
331
332    @param headers: The table headers
333    @type headers: C{iterable}
334
335    @param data: The table data to be split into columns under each respective
336                 heading
337    @type data: C{iterable} of C{iterable}s
338
339    @param joiner: The string used to join columns together
340    @type joiner: C{str}
341
342    @returns: The tabulated data in rows
343    @rtype: C{iterable} of C{str}
344    """
345    data = [headers] + list(data)
346    columnWidths = [max(map(len, c)) for c in zip(*data)]
347
348    for row in data:
349        yield joiner.join(value.ljust(columnWidths[i]) for i, value in enumerate(row)).rstrip()
350
351
352def deprecation(msg):
353    warnings.warn(msg, DeprecationWarning, stacklevel=2)
354
355
356def collate(it):
357    """
358    Unpack C{(key, value)} pairs from C{it} and collate overlapping keys.
359
360    For example::
361
362        >>> collate([(1, 'foo'), (2, 'bar'), (1, 'baz')])
363        {1: ['foo', 'baz'], 2: ['bar']}
364
365    @type it: C{iterable}
366
367    @rtype: C{dict}
368    """
369    d = {}
370    for key, value in it:
371        d.setdefault(key, []).append(value)
372
373    return d
374
375
376def getSiteStore(store):
377    """
378    Given C{store} find the site store.
379    """
380    siteStore = store
381    while siteStore.parent:
382        siteStore = siteStore.parent
383
384    return siteStore
385
386
387def getAPIKey(store, apiName, **kw):
388    """
389    Get the API key for C{apiName}.
390
391    @raise errors.MissingAPIKey: If there is no key stored for C{apiName}
392
393    @rtype: C{unicode}
394    @return: The API key for C{apiName}
395    """
396    hasDefault = 'default' in kw
397    siteStore = getSiteStore(store)
398    key = website.APIKey.getKeyForAPI(siteStore, apiName)
399    if key is None:
400        if hasDefault:
401            return kw['default']
402        else:
403            raise errors.MissingAPIKey(u'No API key available for "%s"' % (apiName,))
404
405    return key.apiKey
406
407
408def setAPIKey(store, apiName, key):
409    """
410    Store an API key for C{apiName}.
411    """
412    siteStore = getSiteStore(store)
413    return website.APIKey.setKeyForAPI(siteStore, apiName, key)
414
415
416_entityPattern = re.compile(ur'&#?\w+;')
417htmlentitydefs.name2codepoint['apos'] = ord(u"'")
418
419def unescapeEntities(text):
420    """
421    Replace HTML or XML character references and entities in a string.
422    """
423    def fixup(m):
424        text = m.group(0)
425        if text[:2] == '&#':
426            try:
427                if text[:3] == '&#x':
428                    return unichr(int(text[3:-1], 16))
429                else:
430                    return unichr(int(text[2:-1]))
431            except ValueError:
432                pass
433        else:
434            try:
435                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
436            except KeyError:
437                pass
438        return text
439
440    return _entityPattern.sub(fixup, text)
Note: See TracBrowser for help on using the browser.