home *** CD-ROM | disk | FTP | other *** search
- # From: http://www.w3.org/TR/REC-xml#NT-CombiningChar
- #
- # * Name start characters must have one of the categories Ll, Lu, Lo,
- # Lt, Nl.
- #
- # * Name characters other than Name-start characters must have one of
- # the categories Mc, Me, Mn, Lm, or Nd.
- #
- # * Characters in the compatibility area (i.e. with character code
- # greater than #xF900 and less than #xFFFE) are not allowed in XML
- # names.
- #
- # * Characters which have a font or compatibility decomposition
- # (i.e. those with a "compatibility formatting tag" in field 5 of the
- # database -- marked by field 5 beginning with a "<") are not allowed.
- #
- # * The following characters are treated as name-start characters rather
- # than name characters, because the property file classifies them as
- # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.
- #
- # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode
- # 2.0, section 5.14).
- #
- # * Character #x00B7 is classified as an extender, because the property
- # list so identifies it.
- #
- # * Character #x0387 is added as a name character, because #x00B7 is its
- # canonical equivalent.
- #
- # * Characters ':' and '_' are allowed as name-start characters.
- #
- # * Characters '-' and '.' are allowed as name characters.
-
- from unicodedata import category, decomposition
-
- NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
- NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
- ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_"]
-
- # http://www.w3.org/TR/REC-xml-names/#NT-NCName
- # [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus
- # the ":" */
- # [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar
- # | Extender
-
- def is_ncname(name):
- first = name[0]
- if first=="_" or category(first) in NAME_START_CATEGORIES:
- for i in xrange(1, len(name)):
- c = name[i]
- if not category(c) in NAME_CATEGORIES:
- if c in ALLOWED_NAME_CHARS:
- continue
- return 0
- #if in compatibility area
- #if decomposition(c)!='':
- # return 0
-
- return 1
- else:
- return 0
-
- XMLNS = "http://www.w3.org/XML/1998/namespace"
-
- def split_uri(uri):
- if uri.startswith(XMLNS):
- return (XMLNS, uri.split(XMLNS)[1])
- length = len(uri)
- for i in xrange(0, length):
- c = uri[-i-1]
- if not category(c) in NAME_CATEGORIES:
- if c in ALLOWED_NAME_CHARS:
- continue
- for j in xrange(-1-i, length):
- if category(uri[j]) in NAME_START_CATEGORIES or uri[j]=="_":
- ns = uri[:j]
- if not ns:
- break
- ln = uri[j:]
- return (ns, ln)
- break
- raise Exception("Can't split '%s'" % uri)
-
-
-
-