Enter 2004 April

home *** CD-ROM | disk | FTP | other *** search

/ Enter 2004 April / enter-2004-04.iso / files / EVE_1424_100181.exe / _HeadersUtil.py < prev next >

Wrap

Python Source | 2004-04-20 | 6.2 KB | 178 lines

"""Header value parsing utility functions. from ClientCookie._HeadersUtil import split_header_words values = split_header_words(h.headers["Content-Type"]) This module provides a few functions that helps parsing and construction of valid HTTP header values. Copyright 1997-1998, Gisle Aas Copyright 2002, John J. Lee This code is free software; you can redistribute it and/or modify it under the terms of the MIT License (see the file COPYING included with the distribution). """ # from Gisle Aas's CVS revision 1.9, libwww-perl 5.64 import re, string from types import StringType try: from types import UnicodeType STRING_TYPES = StringType, UnicodeType except: STRING_TYPES = StringType, from _Util import startswith def pair_up(l): """Return list of pairs, given a list. pair_up([1,2,3,4]) => [(1,2), (3,4)] """ assert len(l)%2 == 0 result = [] pair = [None, None] for i in xrange(len(l)): pair[i%2] = l[i] if i%2 == 1: result.append(tuple(pair)) return result def unmatched(match): """Return unmatched part of re.Match object.""" start, end = match.span(0) return match.string[:start]+match.string[end:] token_re = re.compile(r"^\s*(=*[^\s=;,]+)") quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") escape_re = re.compile(r"\\(.)") value_re = re.compile(r"^\s*=\s*([^;,\s]*)") def split_header_words(header_values): r"""Parse header values into a list of lists containing key,value pairs. The function knows how to deal with ",", ";" and "=" as well as quoted values after "=". A list of space separated tokens are parsed as if they were separated by ";". If the header_values passed as argument contains multiple values, then they are treated as if they were a single value separated by comma ",". This means that this function is useful for parsing header fields that follow this syntax (BNF as from the HTTP/1.1 specification, but we relax the requirement for tokens). headers = #header header = (token | parameter) *( [";"] (token | parameter)) token = 1*<any CHAR except CTLs or separators> separators = "(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\" | <"> | "/" | "[" | "]" | "?" | "=" | "{" | "}" | SP | HT quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) qdtext = <any TEXT except <">> quoted-pair = "\" CHAR parameter = attribute "=" value attribute = token value = token | quoted-string Each header is represented by an anonymous array of key/value pairs. The value for a simple token (not part of a parameter) is undef. Syntactically incorrect headers will not necessary be parsed as you would want. This is easier to describe with some examples: >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] >>> split_header_words(['text/html; charset="iso-8859-1"']) [[('text/html', None), ('charset', 'iso-8859-1')]] >>> split_header_words([r'Basic realm="\"foo\bar\""']) [[('Basic', None), ('realm', '"foobar"')]] """ # XXX yuck assert type(header_values) not in STRING_TYPES res = [] for thing in header_values: cur = [] while len(thing) != 0: matched = 0 m = token_re.search(thing) if m: # 'token' or parameter 'attribute' matched = 1 thing = unmatched(m) cur.append(m.group(1)) matched_inner = 0 m = quoted_value_re.search(thing) if m: # a quoted value matched_inner = 1 thing = unmatched(m) val = m.group(1) val = escape_re.sub(r"\1", val) cur.append(val) if not matched_inner: m = value_re.search(thing) if m: # some unquoted value matched_inner = 1 thing = unmatched(m) val = m.group(1) val = string.rstrip(val) cur.append(val) if not matched_inner: # no value, a lone token cur.append(None) if not matched: if startswith(string.lstrip(thing), ","): matched = 1 thing = string.lstrip(thing)[1:] if cur: res.append(pair_up(cur)) cur = [] if not matched: assert startswith(thing, " ") or startswith(thing, ";"), ( "This should not happen: '%s'\n cur: %s" % (thing, cur)) thing = string.lstrip(thing) if startswith(thing, ";"): thing = thing[1:] if cur: res.append(pair_up(cur)) return res join_escape_re = re.compile(r"([\"\\])") def join_header_words(lists): """Do the inverse of the conversion done by split_header_words. Takes a list of lists of (key, value) pairs and produces a single header value. Attribute values are quoted if needed. >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) 'text/plain; charset="iso-8859/1"' >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) 'text/plain, charset="iso-8859/1"' """ res = [] for pairs in lists: attr = [] for k, v in pairs: if v is not None: if re.search(r"^\w+$", v): k = k + ("=%s" % (v,)) else: v = join_escape_re.sub(r"\\\1", v) # escape " and \ k = k + ('="%s"' % (v,)) attr.append(k) if attr: res.append(string.join(attr, "; ")) return string.join(res, ", ") def _test(): import doctest, _HeadersUtil return doctest.testmod(_HeadersUtil) if __name__ == "__main__": _test()