home *** CD-ROM | disk | FTP | other *** search
- /* ------------------------------------------------------------------------
-
- Python Codec Registry and support functions
-
- Written by Marc-Andre Lemburg (mal@lemburg.com).
-
- Copyright (c) Corporation for National Research Initiatives.
-
- ------------------------------------------------------------------------ */
-
- #include "Python.h"
- #include <ctype.h>
-
- /* --- Globals ------------------------------------------------------------ */
-
- static PyObject *_PyCodec_SearchPath;
- static PyObject *_PyCodec_SearchCache;
-
- /* Flag used for lazy import of the standard encodings package */
- static int import_encodings_called = 0;
-
- /* --- Codec Registry ----------------------------------------------------- */
-
- /* Import the standard encodings package which will register the first
- codec search function.
-
- This is done in a lazy way so that the Unicode implementation does
- not downgrade startup time of scripts not needing it.
-
- ImportErrors are silently ignored by this function. Only one try is
- made.
-
- */
-
- static
- int import_encodings(void)
- {
- PyObject *mod;
-
- import_encodings_called = 1;
- mod = PyImport_ImportModule("encodings");
- if (mod == NULL) {
- if (PyErr_ExceptionMatches(PyExc_ImportError)) {
- /* Ignore ImportErrors... this is done so that
- distributions can disable the encodings package. Note
- that other errors are not masked, e.g. SystemErrors
- raised to inform the user of an error in the Python
- configuration are still reported back to the user. */
- PyErr_Clear();
- return 0;
- }
- return -1;
- }
- Py_DECREF(mod);
- return 0;
- }
-
- int PyCodec_Register(PyObject *search_function)
- {
- if (!import_encodings_called) {
- if (import_encodings())
- goto onError;
- }
- if (search_function == NULL) {
- PyErr_BadArgument();
- goto onError;
- }
- if (!PyCallable_Check(search_function)) {
- PyErr_SetString(PyExc_TypeError,
- "argument must be callable");
- goto onError;
- }
- return PyList_Append(_PyCodec_SearchPath, search_function);
-
- onError:
- return -1;
- }
-
- /* Convert a string to a normalized Python string: all characters are
- converted to lower case, spaces are replaced with underscores. */
-
- static
- PyObject *normalizestring(const char *string)
- {
- register size_t i;
- size_t len = strlen(string);
- char *p;
- PyObject *v;
-
- if (len > INT_MAX) {
- PyErr_SetString(PyExc_OverflowError, "string is too large");
- return NULL;
- }
-
- v = PyString_FromStringAndSize(NULL, (int)len);
- if (v == NULL)
- return NULL;
- p = PyString_AS_STRING(v);
- for (i = 0; i < len; i++) {
- register char ch = string[i];
- if (ch == ' ')
- ch = '-';
- else
- ch = tolower(ch);
- p[i] = ch;
- }
- return v;
- }
-
- /* Lookup the given encoding and return a tuple providing the codec
- facilities.
-
- The encoding string is looked up converted to all lower-case
- characters. This makes encodings looked up through this mechanism
- effectively case-insensitive.
-
- If no codec is found, a LookupError is set and NULL returned.
-
- As side effect, this tries to load the encodings package, if not
- yet done. This is part of the lazy load strategy for the encodings
- package.
-
- */
-
- PyObject *_PyCodec_Lookup(const char *encoding)
- {
- PyObject *result, *args = NULL, *v;
- int i, len;
-
- if (encoding == NULL) {
- PyErr_BadArgument();
- goto onError;
- }
- if (_PyCodec_SearchCache == NULL ||
- _PyCodec_SearchPath == NULL) {
- PyErr_SetString(PyExc_SystemError,
- "codec module not properly initialized");
- goto onError;
- }
- if (!import_encodings_called) {
- if (import_encodings())
- goto onError;
- }
-
- /* Convert the encoding to a normalized Python string: all
- characters are converted to lower case, spaces and hyphens are
- replaced with underscores. */
- v = normalizestring(encoding);
- if (v == NULL)
- goto onError;
- PyString_InternInPlace(&v);
-
- /* First, try to lookup the name in the registry dictionary */
- result = PyDict_GetItem(_PyCodec_SearchCache, v);
- if (result != NULL) {
- Py_INCREF(result);
- Py_DECREF(v);
- return result;
- }
-
- /* Next, scan the search functions in order of registration */
- args = PyTuple_New(1);
- if (args == NULL)
- goto onError;
- PyTuple_SET_ITEM(args,0,v);
-
- len = PyList_Size(_PyCodec_SearchPath);
- if (len < 0)
- goto onError;
- if (len == 0) {
- PyErr_SetString(PyExc_LookupError,
- "no codec search functions registered: "
- "can't find encoding");
- goto onError;
- }
-
- for (i = 0; i < len; i++) {
- PyObject *func;
-
- func = PyList_GetItem(_PyCodec_SearchPath, i);
- if (func == NULL)
- goto onError;
- result = PyEval_CallObject(func, args);
- if (result == NULL)
- goto onError;
- if (result == Py_None) {
- Py_DECREF(result);
- continue;
- }
- if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
- PyErr_SetString(PyExc_TypeError,
- "codec search functions must return 4-tuples");
- Py_DECREF(result);
- goto onError;
- }
- break;
- }
- if (i == len) {
- /* XXX Perhaps we should cache misses too ? */
- PyErr_SetString(PyExc_LookupError,
- "unknown encoding");
- goto onError;
- }
-
- /* Cache and return the result */
- PyDict_SetItem(_PyCodec_SearchCache, v, result);
- Py_DECREF(args);
- return result;
-
- onError:
- Py_XDECREF(args);
- return NULL;
- }
-
- static
- PyObject *args_tuple(PyObject *object,
- const char *errors)
- {
- PyObject *args;
-
- args = PyTuple_New(1 + (errors != NULL));
- if (args == NULL)
- return NULL;
- Py_INCREF(object);
- PyTuple_SET_ITEM(args,0,object);
- if (errors) {
- PyObject *v;
-
- v = PyString_FromString(errors);
- if (v == NULL) {
- Py_DECREF(args);
- return NULL;
- }
- PyTuple_SET_ITEM(args, 1, v);
- }
- return args;
- }
-
- /* Build a codec by calling factory(stream[,errors]) or just
- factory(errors) depending on whether the given parameters are
- non-NULL. */
-
- static
- PyObject *build_stream_codec(PyObject *factory,
- PyObject *stream,
- const char *errors)
- {
- PyObject *args, *codec;
-
- args = args_tuple(stream, errors);
- if (args == NULL)
- return NULL;
-
- codec = PyEval_CallObject(factory, args);
- Py_DECREF(args);
- return codec;
- }
-
- /* Convenience APIs to query the Codec registry.
-
- All APIs return a codec object with incremented refcount.
-
- */
-
- PyObject *PyCodec_Encoder(const char *encoding)
- {
- PyObject *codecs;
- PyObject *v;
-
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- goto onError;
- v = PyTuple_GET_ITEM(codecs,0);
- Py_INCREF(v);
- return v;
-
- onError:
- return NULL;
- }
-
- PyObject *PyCodec_Decoder(const char *encoding)
- {
- PyObject *codecs;
- PyObject *v;
-
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- goto onError;
- v = PyTuple_GET_ITEM(codecs,1);
- Py_INCREF(v);
- return v;
-
- onError:
- return NULL;
- }
-
- PyObject *PyCodec_StreamReader(const char *encoding,
- PyObject *stream,
- const char *errors)
- {
- PyObject *codecs;
-
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- goto onError;
- return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
-
- onError:
- return NULL;
- }
-
- PyObject *PyCodec_StreamWriter(const char *encoding,
- PyObject *stream,
- const char *errors)
- {
- PyObject *codecs;
-
- codecs = _PyCodec_Lookup(encoding);
- if (codecs == NULL)
- goto onError;
- return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
-
- onError:
- return NULL;
- }
-
- /* Encode an object (e.g. an Unicode object) using the given encoding
- and return the resulting encoded object (usually a Python string).
-
- errors is passed to the encoder factory as argument if non-NULL. */
-
- PyObject *PyCodec_Encode(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *encoder = NULL;
- PyObject *args = NULL, *result;
- PyObject *v;
-
- encoder = PyCodec_Encoder(encoding);
- if (encoder == NULL)
- goto onError;
-
- args = args_tuple(object, errors);
- if (args == NULL)
- goto onError;
-
- result = PyEval_CallObject(encoder,args);
- if (result == NULL)
- goto onError;
-
- if (!PyTuple_Check(result) ||
- PyTuple_GET_SIZE(result) != 2) {
- PyErr_SetString(PyExc_TypeError,
- "encoder must return a tuple (object,integer)");
- goto onError;
- }
- v = PyTuple_GET_ITEM(result,0);
- Py_INCREF(v);
- /* We don't check or use the second (integer) entry. */
-
- Py_DECREF(args);
- Py_DECREF(encoder);
- Py_DECREF(result);
- return v;
-
- onError:
- Py_XDECREF(args);
- Py_XDECREF(encoder);
- return NULL;
- }
-
- /* Decode an object (usually a Python string) using the given encoding
- and return an equivalent object (e.g. an Unicode object).
-
- errors is passed to the decoder factory as argument if non-NULL. */
-
- PyObject *PyCodec_Decode(PyObject *object,
- const char *encoding,
- const char *errors)
- {
- PyObject *decoder = NULL;
- PyObject *args = NULL, *result = NULL;
- PyObject *v;
-
- decoder = PyCodec_Decoder(encoding);
- if (decoder == NULL)
- goto onError;
-
- args = args_tuple(object, errors);
- if (args == NULL)
- goto onError;
-
- result = PyEval_CallObject(decoder,args);
- if (result == NULL)
- goto onError;
- if (!PyTuple_Check(result) ||
- PyTuple_GET_SIZE(result) != 2) {
- PyErr_SetString(PyExc_TypeError,
- "decoder must return a tuple (object,integer)");
- goto onError;
- }
- v = PyTuple_GET_ITEM(result,0);
- Py_INCREF(v);
- /* We don't check or use the second (integer) entry. */
-
- Py_DECREF(args);
- Py_DECREF(decoder);
- Py_DECREF(result);
- return v;
-
- onError:
- Py_XDECREF(args);
- Py_XDECREF(decoder);
- Py_XDECREF(result);
- return NULL;
- }
-
- void _PyCodecRegistry_Init(void)
- {
- if (_PyCodec_SearchPath == NULL)
- _PyCodec_SearchPath = PyList_New(0);
- if (_PyCodec_SearchCache == NULL)
- _PyCodec_SearchCache = PyDict_New();
- if (_PyCodec_SearchPath == NULL ||
- _PyCodec_SearchCache == NULL)
- Py_FatalError("can't initialize codec registry");
- }
-
- void _PyCodecRegistry_Fini(void)
- {
- Py_XDECREF(_PyCodec_SearchPath);
- _PyCodec_SearchPath = NULL;
- Py_XDECREF(_PyCodec_SearchCache);
- _PyCodec_SearchCache = NULL;
- }
-