| 1 | import re, math, fnmatch, itertools, warnings, htmlentitydefs |
|---|
| 2 | |
|---|
| 3 | from twisted.internet import reactor, task, error as ineterror |
|---|
| 4 | from twisted.web import client, http, error as weberror |
|---|
| 5 | from twisted.python import log |
|---|
| 6 | |
|---|
| 7 | from nevow.url import URL |
|---|
| 8 | from nevow.rend import Page, Fragment |
|---|
| 9 | |
|---|
| 10 | from xmantissa import website |
|---|
| 11 | from xmantissa.webtheme import _ThemedMixin, SiteTemplateResolver |
|---|
| 12 | |
|---|
| 13 | from eridanus import const, errors |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | # XXX: do we need this crap? all of it? |
|---|
| 17 | class _PublicThemedMixin(_ThemedMixin): |
|---|
| 18 | def getDocFactory(self, fragmentName, default=None): |
|---|
| 19 | resolver = SiteTemplateResolver(self.store) |
|---|
| 20 | return resolver.getDocFactory(fragmentName, default) |
|---|
| 21 | |
|---|
| 22 | |
|---|
| 23 | class ThemedPage(_PublicThemedMixin, Page): |
|---|
| 24 | fragmentName = 'page-no-fragment-name-specified' |
|---|
| 25 | |
|---|
| 26 | def renderHTTP(self, ctx): |
|---|
| 27 | if self.docFactory is None: |
|---|
| 28 | self.docFactory = self.getDocFactory(self.fragmentName) |
|---|
| 29 | return super(ThemedPage, self).renderHTTP(ctx) |
|---|
| 30 | |
|---|
| 31 | |
|---|
| 32 | class ThemedFragment(_PublicThemedMixin, Fragment): |
|---|
| 33 | fragmentName = 'fragment-no-fragment-name-specified' |
|---|
| 34 | |
|---|
| 35 | def __init__(self, store, **kw): |
|---|
| 36 | self.store = store |
|---|
| 37 | super(ThemedFragment, self).__init__(**kw) |
|---|
| 38 | |
|---|
| 39 | |
|---|
| 40 | class PerseverantDownloader(object): |
|---|
| 41 | """ |
|---|
| 42 | Perseverantly attempt to download a URL. |
|---|
| 43 | |
|---|
| 44 | Each retry attempt is delayed by L{factor} up to a maximum of L{maxDelay}, |
|---|
| 45 | starting at L{initialDelay}. |
|---|
| 46 | |
|---|
| 47 | @type url: C{nevow.url.URL} |
|---|
| 48 | @ivar url: The HTTP URL to attempt to download |
|---|
| 49 | |
|---|
| 50 | @type maxDelay: C{float} |
|---|
| 51 | @cvar maxDelay: Maximum delay, in seconds, between retry attempts |
|---|
| 52 | |
|---|
| 53 | @type initialDelay: C{float} |
|---|
| 54 | @cvar initialDelay: The delay before the first retry attempt |
|---|
| 55 | |
|---|
| 56 | @type factor: C{float} |
|---|
| 57 | @cvar factor: The factor to increase the delay by after each attempt |
|---|
| 58 | |
|---|
| 59 | @type retryableHTTPCodes: C{list} |
|---|
| 60 | @cvar retryableHTTPCodes: HTTP error codes that suggest the error is |
|---|
| 61 | intermittent and that a retry should be attempted |
|---|
| 62 | |
|---|
| 63 | @type defaultTimeout: C{float} |
|---|
| 64 | @cvar defaultTimeout: Default fetch timeout value |
|---|
| 65 | """ |
|---|
| 66 | maxDelay = 3600 |
|---|
| 67 | initialDelay = 1.0 |
|---|
| 68 | factor = 1.6180339887498948 |
|---|
| 69 | |
|---|
| 70 | retryableHTTPCodes = [408, 500, 502, 503, 504] |
|---|
| 71 | |
|---|
| 72 | defaultTimeout = 300.0 |
|---|
| 73 | |
|---|
| 74 | def __init__(self, url, tries=10, timeout=defaultTimeout, *a, **kw): |
|---|
| 75 | """ |
|---|
| 76 | Prepare the download information. |
|---|
| 77 | |
|---|
| 78 | Any additional positional or keyword arguments are passed on to |
|---|
| 79 | C{twisted.web.client.HTTPPageGetter}. |
|---|
| 80 | |
|---|
| 81 | @type url: C{nevow.url.URL} or C{unicode} or C{str} |
|---|
| 82 | @param url: The HTTP URL to attempt to download |
|---|
| 83 | |
|---|
| 84 | @type tries: C{int} |
|---|
| 85 | @param tries: The maximum number of retry attempts before giving up |
|---|
| 86 | |
|---|
| 87 | @type timeout: C{float} |
|---|
| 88 | @param timeout: Timeout value, in seconds, for the page fetch; |
|---|
| 89 | defaults to L{defaultTimeout} |
|---|
| 90 | """ |
|---|
| 91 | if isinstance(url, unicode): |
|---|
| 92 | url = url.encode('utf-8') |
|---|
| 93 | if isinstance(url, str): |
|---|
| 94 | url = URL.fromString(url) |
|---|
| 95 | |
|---|
| 96 | self.url = url.anchor(None) |
|---|
| 97 | self.args = a |
|---|
| 98 | self.kwargs = kw |
|---|
| 99 | self.delay = self.initialDelay |
|---|
| 100 | self.tries = tries |
|---|
| 101 | self.timeout = timeout |
|---|
| 102 | |
|---|
| 103 | def __repr__(self): |
|---|
| 104 | return '<%s %s>' % (type(self).__name__, self.url) |
|---|
| 105 | |
|---|
| 106 | def go(self): |
|---|
| 107 | """ |
|---|
| 108 | Attempt to download L{self.url}. |
|---|
| 109 | """ |
|---|
| 110 | d, f = getPage(str(self.url), timeout=self.timeout, *self.args, **self.kwargs) |
|---|
| 111 | return d.addErrback(self.retryWeb |
|---|
| 112 | ).addCallback(lambda data: (data, f.response_headers)) |
|---|
| 113 | |
|---|
| 114 | def retryWeb(self, f): |
|---|
| 115 | """ |
|---|
| 116 | Retry failed downloads in the case of "web errors." |
|---|
| 117 | |
|---|
| 118 | Only errors that are web related are considered for a retry attempt |
|---|
| 119 | and then only when the HTTP status code is one of those in |
|---|
| 120 | L{self.retryableHTTPCodes}. |
|---|
| 121 | |
|---|
| 122 | Other errors are not trapped. |
|---|
| 123 | """ |
|---|
| 124 | f.trap((weberror.Error, ineterror.ConnectionDone)) |
|---|
| 125 | err = f.value |
|---|
| 126 | if int(err.status) in self.retryableHTTPCodes: |
|---|
| 127 | return self.retry(f) |
|---|
| 128 | |
|---|
| 129 | return f |
|---|
| 130 | |
|---|
| 131 | def retry(self, f): |
|---|
| 132 | """ |
|---|
| 133 | The retry machinery. |
|---|
| 134 | |
|---|
| 135 | If C{self.tries} is greater than zero, a retry is attempted for |
|---|
| 136 | C{self.delay} seconds in the future. |
|---|
| 137 | """ |
|---|
| 138 | self.tries -= 1 |
|---|
| 139 | log.msg('PerseverantDownloader is retrying, %d attempts left.' % (self.tries,)) |
|---|
| 140 | log.err(f) |
|---|
| 141 | self.delay = min(self.delay * self.factor, self.maxDelay) |
|---|
| 142 | if self.tries == 0: |
|---|
| 143 | return f |
|---|
| 144 | |
|---|
| 145 | return task.deferLater(reactor, self.delay, self.go) |
|---|
| 146 | |
|---|
| 147 | |
|---|
| 148 | def encode(s): |
|---|
| 149 | return s.encode(const.ENCODING, 'replace') |
|---|
| 150 | |
|---|
| 151 | |
|---|
| 152 | def decode(s): |
|---|
| 153 | return s.decode(const.ENCODING, 'replace') |
|---|
| 154 | |
|---|
| 155 | |
|---|
| 156 | def handle206(f): |
|---|
| 157 | """ |
|---|
| 158 | Return any partial content when HTTP 206 is returned. |
|---|
| 159 | """ |
|---|
| 160 | f.trap(weberror.Error) |
|---|
| 161 | err = f.value |
|---|
| 162 | try: |
|---|
| 163 | if int(err.status) == http.PARTIAL_CONTENT: |
|---|
| 164 | return err.response |
|---|
| 165 | except ValueError: |
|---|
| 166 | pass |
|---|
| 167 | |
|---|
| 168 | return f |
|---|
| 169 | |
|---|
| 170 | |
|---|
| 171 | # XXX: a copy from twisted.web.client because we need the useful stuff |
|---|
| 172 | def getPage(url, contextFactory=None, *args, **kwargs): |
|---|
| 173 | if 'agent' not in kwargs: |
|---|
| 174 | kwargs['agent'] = 'Eridanus Page Fetcher' |
|---|
| 175 | |
|---|
| 176 | factory = client._makeGetterFactory( |
|---|
| 177 | url, |
|---|
| 178 | client.HTTPClientFactory, |
|---|
| 179 | contextFactory=contextFactory, |
|---|
| 180 | *args, **kwargs) |
|---|
| 181 | |
|---|
| 182 | return factory.deferred.addErrback(handle206), factory |
|---|
| 183 | |
|---|
| 184 | |
|---|
| 185 | def truncate(s, limit): |
|---|
| 186 | """ |
|---|
| 187 | Shorten C{s} to C{limit} characters and append an ellipsis. |
|---|
| 188 | """ |
|---|
| 189 | if len(s) - 3 < limit: |
|---|
| 190 | return s |
|---|
| 191 | |
|---|
| 192 | return s[:limit] + '...' |
|---|
| 193 | |
|---|
| 194 | |
|---|
| 195 | def humanReadableTimeDelta(delta): |
|---|
| 196 | """ |
|---|
| 197 | Convert a C{datetime.timedelta} instance into a human readable string. |
|---|
| 198 | """ |
|---|
| 199 | days = delta.days |
|---|
| 200 | |
|---|
| 201 | years = days // 365 |
|---|
| 202 | days -= years * 365 |
|---|
| 203 | |
|---|
| 204 | seconds = delta.seconds |
|---|
| 205 | |
|---|
| 206 | hours = seconds // 3600 |
|---|
| 207 | seconds -= hours * 3600 |
|---|
| 208 | |
|---|
| 209 | minutes = seconds // 60 |
|---|
| 210 | seconds -= minutes * 60 |
|---|
| 211 | |
|---|
| 212 | def makeText(s, value): |
|---|
| 213 | if value == 1: |
|---|
| 214 | s = s[:-1] |
|---|
| 215 | return s % (value,) |
|---|
| 216 | |
|---|
| 217 | def getParts(): |
|---|
| 218 | if years: |
|---|
| 219 | yield makeText(u'%d years', years) |
|---|
| 220 | if days: |
|---|
| 221 | yield makeText(u'%d days', days) |
|---|
| 222 | if hours: |
|---|
| 223 | yield makeText(u'%d hours', hours) |
|---|
| 224 | if minutes: |
|---|
| 225 | yield makeText(u'%d minutes', minutes) |
|---|
| 226 | if seconds: |
|---|
| 227 | yield makeText(u'%d seconds', seconds) |
|---|
| 228 | |
|---|
| 229 | parts = list(getParts()) |
|---|
| 230 | if not parts: |
|---|
| 231 | parts = [u'0 seconds'] |
|---|
| 232 | |
|---|
| 233 | return u' '.join(parts) |
|---|
| 234 | |
|---|
| 235 | |
|---|
| 236 | sizePrefixes = (u'bytes', u'KB', u'MB', u'GB', u'TB', u'PB', u'EB', u'ZB', u'YB') |
|---|
| 237 | |
|---|
| 238 | def humanReadableFileSize(size): |
|---|
| 239 | """ |
|---|
| 240 | Convert a size in bytes to a human readable string. |
|---|
| 241 | |
|---|
| 242 | @param size: The size, in bytes, to convert |
|---|
| 243 | @type size: C{int} or C{float} |
|---|
| 244 | |
|---|
| 245 | @returns: A human readable string |
|---|
| 246 | @rtype: C{unicode} |
|---|
| 247 | """ |
|---|
| 248 | index = min(int(math.log(size, 1024)), len(sizePrefixes) - 1) |
|---|
| 249 | size = size / (1024.0 ** index) |
|---|
| 250 | prefix = sizePrefixes[index] |
|---|
| 251 | if index == 0: |
|---|
| 252 | size = int(size) |
|---|
| 253 | format = u'%s %s' |
|---|
| 254 | else: |
|---|
| 255 | format = u'%0.2f %s' |
|---|
| 256 | return format % (size, prefix) |
|---|
| 257 | |
|---|
| 258 | |
|---|
| 259 | def hostMatches(host, mask): |
|---|
| 260 | """ |
|---|
| 261 | Determine whether a given host matches the specified mask. |
|---|
| 262 | |
|---|
| 263 | @param host: Something of the form C{nick!user@host} |
|---|
| 264 | @type host: C{str} or C{unicode} |
|---|
| 265 | |
|---|
| 266 | @param mask: A wildcard-enabled mask to attempt to match C{host} with |
|---|
| 267 | @type mask: C{str} or C{unicode} |
|---|
| 268 | |
|---|
| 269 | @returns: Whether C{mask} matched C{host} |
|---|
| 270 | @rtype: C{bool} |
|---|
| 271 | """ |
|---|
| 272 | return re.match(fnmatch.translate(mask), host) is not None |
|---|
| 273 | |
|---|
| 274 | |
|---|
| 275 | def padIterable(iterable, length, padding=None): |
|---|
| 276 | """ |
|---|
| 277 | Ensure that C{iterable} is at least C{length} items long. |
|---|
| 278 | |
|---|
| 279 | @param padding: The object to pad C{iterable} with in the case where it is |
|---|
| 280 | less than C{length} |
|---|
| 281 | |
|---|
| 282 | @rtype: C{iterable} |
|---|
| 283 | """ |
|---|
| 284 | return itertools.islice(itertools.chain(iterable, itertools.repeat(padding)), length) |
|---|
| 285 | |
|---|
| 286 | |
|---|
| 287 | def normalizeMask(mask): |
|---|
| 288 | """ |
|---|
| 289 | Create the canonical IRC mask for the given input. |
|---|
| 290 | |
|---|
| 291 | For example:: |
|---|
| 292 | |
|---|
| 293 | joe => joe!*@* |
|---|
| 294 | |
|---|
| 295 | joe!black => joe!black@* |
|---|
| 296 | |
|---|
| 297 | joe!black@hell => joe!black@hell |
|---|
| 298 | |
|---|
| 299 | It is an error to specify an C{@} without or before a C{!}. |
|---|
| 300 | |
|---|
| 301 | @raise errors.InvalidMaskError: If the mask is malformed |
|---|
| 302 | """ |
|---|
| 303 | def splitMask(): |
|---|
| 304 | if not mask: |
|---|
| 305 | return None |
|---|
| 306 | |
|---|
| 307 | nick = mask |
|---|
| 308 | user = host = '*' |
|---|
| 309 | if '!' in mask: |
|---|
| 310 | atPos = mask.find('@') |
|---|
| 311 | if atPos > 0 and atPos < mask.find('!'): |
|---|
| 312 | return None |
|---|
| 313 | |
|---|
| 314 | nick, user = padIterable(mask.split('!', 1), 2, padding='*') |
|---|
| 315 | user, host = padIterable(user.split('@', 1), 2, padding='*') |
|---|
| 316 | elif '@' in mask: |
|---|
| 317 | return None |
|---|
| 318 | |
|---|
| 319 | return nick, user, host |
|---|
| 320 | |
|---|
| 321 | parts = splitMask() |
|---|
| 322 | if parts is None: |
|---|
| 323 | raise errors.InvalidMaskError(u'"%s" is not a valid complete or partial mask' % (mask,)) |
|---|
| 324 | |
|---|
| 325 | return '%s!%s@%s' % parts |
|---|
| 326 | |
|---|
| 327 | |
|---|
| 328 | def tabulate(headers, data, joiner=' '): |
|---|
| 329 | """ |
|---|
| 330 | Tabulate data, attaching the specified headers. |
|---|
| 331 | |
|---|
| 332 | @param headers: The table headers |
|---|
| 333 | @type headers: C{iterable} |
|---|
| 334 | |
|---|
| 335 | @param data: The table data to be split into columns under each respective |
|---|
| 336 | heading |
|---|
| 337 | @type data: C{iterable} of C{iterable}s |
|---|
| 338 | |
|---|
| 339 | @param joiner: The string used to join columns together |
|---|
| 340 | @type joiner: C{str} |
|---|
| 341 | |
|---|
| 342 | @returns: The tabulated data in rows |
|---|
| 343 | @rtype: C{iterable} of C{str} |
|---|
| 344 | """ |
|---|
| 345 | data = [headers] + list(data) |
|---|
| 346 | columnWidths = [max(map(len, c)) for c in zip(*data)] |
|---|
| 347 | |
|---|
| 348 | for row in data: |
|---|
| 349 | yield joiner.join(value.ljust(columnWidths[i]) for i, value in enumerate(row)).rstrip() |
|---|
| 350 | |
|---|
| 351 | |
|---|
| 352 | def deprecation(msg): |
|---|
| 353 | warnings.warn(msg, DeprecationWarning, stacklevel=2) |
|---|
| 354 | |
|---|
| 355 | |
|---|
| 356 | def collate(it): |
|---|
| 357 | """ |
|---|
| 358 | Unpack C{(key, value)} pairs from C{it} and collate overlapping keys. |
|---|
| 359 | |
|---|
| 360 | For example:: |
|---|
| 361 | |
|---|
| 362 | >>> collate([(1, 'foo'), (2, 'bar'), (1, 'baz')]) |
|---|
| 363 | {1: ['foo', 'baz'], 2: ['bar']} |
|---|
| 364 | |
|---|
| 365 | @type it: C{iterable} |
|---|
| 366 | |
|---|
| 367 | @rtype: C{dict} |
|---|
| 368 | """ |
|---|
| 369 | d = {} |
|---|
| 370 | for key, value in it: |
|---|
| 371 | d.setdefault(key, []).append(value) |
|---|
| 372 | |
|---|
| 373 | return d |
|---|
| 374 | |
|---|
| 375 | |
|---|
| 376 | def getSiteStore(store): |
|---|
| 377 | """ |
|---|
| 378 | Given C{store} find the site store. |
|---|
| 379 | """ |
|---|
| 380 | siteStore = store |
|---|
| 381 | while siteStore.parent: |
|---|
| 382 | siteStore = siteStore.parent |
|---|
| 383 | |
|---|
| 384 | return siteStore |
|---|
| 385 | |
|---|
| 386 | |
|---|
| 387 | def getAPIKey(store, apiName, **kw): |
|---|
| 388 | """ |
|---|
| 389 | Get the API key for C{apiName}. |
|---|
| 390 | |
|---|
| 391 | @raise errors.MissingAPIKey: If there is no key stored for C{apiName} |
|---|
| 392 | |
|---|
| 393 | @rtype: C{unicode} |
|---|
| 394 | @return: The API key for C{apiName} |
|---|
| 395 | """ |
|---|
| 396 | hasDefault = 'default' in kw |
|---|
| 397 | siteStore = getSiteStore(store) |
|---|
| 398 | key = website.APIKey.getKeyForAPI(siteStore, apiName) |
|---|
| 399 | if key is None: |
|---|
| 400 | if hasDefault: |
|---|
| 401 | return kw['default'] |
|---|
| 402 | else: |
|---|
| 403 | raise errors.MissingAPIKey(u'No API key available for "%s"' % (apiName,)) |
|---|
| 404 | |
|---|
| 405 | return key.apiKey |
|---|
| 406 | |
|---|
| 407 | |
|---|
| 408 | def setAPIKey(store, apiName, key): |
|---|
| 409 | """ |
|---|
| 410 | Store an API key for C{apiName}. |
|---|
| 411 | """ |
|---|
| 412 | siteStore = getSiteStore(store) |
|---|
| 413 | return website.APIKey.setKeyForAPI(siteStore, apiName, key) |
|---|
| 414 | |
|---|
| 415 | |
|---|
| 416 | _entityPattern = re.compile(ur'&#?\w+;') |
|---|
| 417 | htmlentitydefs.name2codepoint['apos'] = ord(u"'") |
|---|
| 418 | |
|---|
| 419 | def unescapeEntities(text): |
|---|
| 420 | """ |
|---|
| 421 | Replace HTML or XML character references and entities in a string. |
|---|
| 422 | """ |
|---|
| 423 | def fixup(m): |
|---|
| 424 | text = m.group(0) |
|---|
| 425 | if text[:2] == '&#': |
|---|
| 426 | try: |
|---|
| 427 | if text[:3] == '&#x': |
|---|
| 428 | return unichr(int(text[3:-1], 16)) |
|---|
| 429 | else: |
|---|
| 430 | return unichr(int(text[2:-1])) |
|---|
| 431 | except ValueError: |
|---|
| 432 | pass |
|---|
| 433 | else: |
|---|
| 434 | try: |
|---|
| 435 | text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) |
|---|
| 436 | except KeyError: |
|---|
| 437 | pass |
|---|
| 438 | return text |
|---|
| 439 | |
|---|
| 440 | return _entityPattern.sub(fixup, text) |
|---|