Logo Search packages:      
Sourcecode: python-biopython version File versions

triemodule.c

#include <Python.h>
#include <marshal.h>
#include "trie.h"

staticforward PyTypeObject Trie_Type;

typedef struct {
    PyObject_HEAD
    Trie trie;
} trieobject;

static PyObject*
trie_trie(PyObject* self, PyObject* args)
{
    trieobject* trieobj;
    Trie trie;

    if (!PyArg_ParseTuple(args,":trie")) 
        return NULL;
    if(!(trie = Trie_new()))
      return PyErr_NoMemory();
    if(!(trieobj = PyObject_New(trieobject, &Trie_Type)))
      return NULL;
    trieobj->trie = trie;
    return (PyObject*)trieobj;
}

static void 
_decref_objects(const unsigned char *key, const void *value, void *data) 
{
    Py_DECREF((PyObject *)value);
}

static void
trie_dealloc(PyObject* self)
{
    trieobject *mp = (trieobject *)self;
    Trie_iterate(mp->trie, _decref_objects, NULL);
    Trie_del(mp->trie);
    PyObject_Del(self);
}

static int
trie_length(trieobject *mp)
{
    return Trie_len(mp->trie);
}

static PyObject *
trie_subscript(trieobject *mp, PyObject *py_key)
{
    unsigned char *key;
    PyObject *py_value;

    /* Make sure key is a string. */
    if(!PyString_Check(py_key)) {
      PyErr_SetString(PyExc_TypeError, "key must be a string");
      return NULL;
    }
    key = (unsigned char *)PyString_AS_STRING(py_key);
    py_value = (PyObject *)Trie_get(mp->trie, key);
    if(py_value == NULL)
      PyErr_SetString(PyExc_KeyError, (char *)key);
    else
      Py_INCREF(py_value);
    return py_value;
}

static int
trie_ass_sub(trieobject *mp, PyObject *py_key, PyObject *py_value)
{
    unsigned char *key;
    PyObject *py_prev;

    /* Make sure key is a string. */
    if(!PyString_Check(py_key)) {
      PyErr_SetString(PyExc_TypeError, "key must be a string");
      return -1;
    }
    key = (unsigned char *)PyString_AS_STRING((char *)py_key);
    
    /* Check to see whether something already exists at that key.  If
       there's already an object there, then I will have to remove it.
    */
    py_prev = (PyObject *)Trie_get(mp->trie, key);
    if(py_prev) {
      Py_DECREF(py_prev);
    }

    /* The client wants to delete a key from a dictionary.  The Trie
       API doesn't support this, so I will just overwrite it with
       NULL. */
    if(!py_value) {
      /* If the key doesn't exist, raise a KeyError. */
      if(!py_prev) {
          PyErr_SetString(PyExc_KeyError, (char *)key);
          return -1;
      }
      Trie_set(mp->trie, key, NULL);
    }
    /* The client wants to set a key in the dictionary. */
    else {
      Py_INCREF(py_value);
      if(Trie_set(mp->trie, key, py_value)) {
          PyErr_SetString(PyExc_AssertionError, "error setting trie");
          return -1;
      }
    }
    return 0;
}

static char has_key__doc__[] =
"D.has_key(k) -> 1 if D has a key k, else 0";

static PyObject *
trie_has_key(trieobject *mp, PyObject *py_key)
{
    unsigned char *key;
    int has_key;

    /* Make sure key is a string. */
    if(!PyString_Check(py_key)) {
      PyErr_SetString(PyExc_TypeError, "key must be a string");
      return NULL;
    }
    key = (unsigned char *)PyString_AS_STRING(py_key);
    has_key = Trie_has_key(mp->trie, key);
    return PyInt_FromLong((long)has_key);
}

static PyObject *
trie_has_key_onearg(trieobject *mp, PyObject *py_args)
{
    PyObject *py_arg;
    if(!PyArg_ParseTuple(py_args, "O", &py_arg))
      return NULL;
    return trie_has_key(mp, py_arg);
}



static char has_prefix__doc__[] =
"D.has_prefix(k) -> 1 if D has a prefix k, else 0";

static PyObject *
trie_has_prefix(trieobject *mp, PyObject *py_prefix)
{
    unsigned char *prefix;
    int has_prefix;

    /* Make sure prefix is a string. */
    if(!PyString_Check(py_prefix)) {
      PyErr_SetString(PyExc_TypeError, "k must be a string");
      return NULL;
    }
    prefix = (unsigned char *)PyString_AS_STRING(py_prefix);
    has_prefix = Trie_has_prefix(mp->trie, prefix);
    return PyInt_FromLong((long)has_prefix);
}

static PyObject *
trie_has_prefix_onearg(trieobject *mp, PyObject *py_args)
{
    PyObject *py_arg;
    if(!PyArg_ParseTuple(py_args, "O", &py_arg))
      return NULL;
    return trie_has_prefix(mp, py_arg);
}

static char with_prefix__doc__[] =
"D.with_prefix(prefix) -> list of D's keys that begins with prefix";

static void 
_trie_with_prefix_helper(const unsigned char *key, const void *value, 
                   void *data) 
{
    PyObject *py_list = (PyObject *)data;
    PyObject *py_key;

    if(PyErr_Occurred())
      return;

    if(!(py_key = PyString_FromString((const char *)key)))
      return;
    PyList_Append(py_list, py_key);
    Py_DECREF(py_key);
}

static PyObject *
trie_with_prefix(trieobject *mp, PyObject *py_prefix)
{
    unsigned char *prefix;
    PyObject *py_list;

    /* Make sure prefix is a string. */
    if(!PyString_Check(py_prefix)) {
      PyErr_SetString(PyExc_TypeError, "k must be a string");
      return NULL;
    }
    prefix = (unsigned char *)PyString_AS_STRING(py_prefix);

    if(!(py_list = PyList_New(0)))
      return NULL;
    Trie_with_prefix(mp->trie, prefix, 
                 _trie_with_prefix_helper, (void *)py_list);
    if(PyErr_Occurred()) {
      Py_DECREF(py_list);
      return NULL;
    }
    return py_list;
}

static PyObject *
trie_with_prefix_onearg(trieobject *mp, PyObject *py_args)
{
    PyObject *py_arg;
    if(!PyArg_ParseTuple(py_args, "O", &py_arg))
      return NULL;
    return trie_with_prefix(mp, py_arg);
}


static char keys__doc__[] =
"D.keys() -> list of D's keys";

static void 
_trie_keys_helper(const unsigned char *key, const void *value, void *data) 
{
    PyObject *py_list = (PyObject *)data;
    PyObject *py_key;

    if(PyErr_Occurred())
      return;

    if(!(py_key = PyString_FromString((char *)key)))
      return;
    PyList_Append(py_list, py_key);
    Py_DECREF(py_key);
}

static PyObject *
trie_keys(trieobject *mp)
{
    PyObject *py_list;

    if(!(py_list = PyList_New(0)))
      return NULL;
    Trie_iterate(mp->trie, _trie_keys_helper, (void *)py_list);
    if(PyErr_Occurred()) {
      Py_DECREF(py_list);
      return NULL;
    }
    return py_list;
}

static PyObject *
trie_keys_noargs(trieobject *mp, PyObject *py_args)
{
    if(PyTuple_Size(py_args) != 0) {
      PyErr_SetString(PyExc_ValueError, "no args expected");
      return NULL;
    }
    return trie_keys(mp);
}

static char values__doc__[] =
"D.values() -> list of D's values";

static void 
_trie_values_helper(const unsigned char *key, const void *value, void *data) 
{
    PyObject *py_list = (PyObject *)data;
    if(PyErr_Occurred())
      return;
    PyList_Append(py_list, (PyObject *)value);
}

static PyObject *
trie_values(trieobject *mp)
{
    PyObject *py_list;

    if(!(py_list = PyList_New(0)))
      return NULL;
    Trie_iterate(mp->trie, _trie_values_helper, (void *)py_list);
    if(PyErr_Occurred()) {
      Py_DECREF(py_list);
      return NULL;
    }
    return py_list;
}

static PyObject *
trie_values_noargs(trieobject *mp, PyObject *py_args)
{
    if(PyTuple_Size(py_args) != 0) {
      PyErr_SetString(PyExc_ValueError, "no args expected");
      return NULL;
    }
    return trie_values(mp);
}

static char get__doc__[] =
"D.get(k[,d]) -> D[k] if D.has_key(k), else d.  d defaults to None.";

static PyObject *
trie_get(trieobject *mp, PyObject *args)
{
    unsigned char *key;
    PyObject *py_value;
    PyObject *py_failobj = Py_None;

    if (!PyArg_ParseTuple(args, "s|O:get", &key, &py_failobj))
      return NULL;
    py_value = (PyObject *)Trie_get(mp->trie, key);
    if(!py_value)
      py_value = py_failobj;
    Py_INCREF(py_value);
    return py_value;
}

static char get_approximate__doc__[] =
"D.get_approximate(key, k) -> List of (key, value, mismatches) in D, allowing up to k mismatches in key.";

void 
_trie_get_approximate_helper(const unsigned char *key, const void *value, 
                       const int mismatches, void *data)
{
    /* Append a tuple of (key, value) to data, which is a PyList. */
    PyObject *py_list = (PyObject *)data,
      *py_value = (PyObject *)value,
      *py_key,
      *py_tuple,
      *py_mismatches;

    if(PyErr_Occurred())
      return;

    if(!(py_key = PyString_FromString((const char *)key)))
      return;
    if(!(py_mismatches = PyInt_FromLong(mismatches))) {
      Py_DECREF(py_key);
      return;
    }
    Py_INCREF(py_value);

    if(!(py_tuple = PyTuple_New(3))) {
      Py_DECREF(py_key);
      Py_DECREF(py_mismatches);
      Py_DECREF(py_value);
      return;
    }
    PyTuple_SetItem(py_tuple, 0, py_key);
    PyTuple_SetItem(py_tuple, 1, py_value);
    PyTuple_SetItem(py_tuple, 2, py_mismatches);
    PyList_Append(py_list, py_tuple);
    Py_DECREF(py_tuple);
}

static PyObject *
trie_get_approximate(trieobject *mp, PyObject *args)
{
    unsigned char *key;
    int k;
    PyObject *py_list;

    if (!PyArg_ParseTuple(args, "si:get_approximate", &key, &k))
      return NULL;

    if(!(py_list = PyList_New(0)))
      return NULL;
    Trie_get_approximate(mp->trie, key, k, 
                   _trie_get_approximate_helper, (void *)py_list);
    if(PyErr_Occurred()) {
      Py_DECREF(py_list);
      return NULL;
    }
    return py_list;
}

static long
trie_nohash(PyObject *self)
{
    PyErr_SetString(PyExc_TypeError, "trie objects are unhashable");
    return -1;
}

static PyMappingMethods trie_as_mapping = {
    (inquiry)trie_length,        /*mp_length*/
    (binaryfunc)trie_subscript,  /*mp_subscript*/
    (objobjargproc)trie_ass_sub  /*mp_ass_subscript*/
};

static PyMethodDef trieobj_methods[] = {
    /*  METH_O and METH_NOARGS require Python 2.2.
    {"has_key", (PyCFunction)trie_has_key,  METH_O,
     has_key__doc__},
    {"has_prefix", (PyCFunction)trie_has_prefix,  METH_O,
     has_prefix__doc__},
    {"with_prefix", (PyCFunction)trie_with_prefix,  METH_O,
     with_prefix__doc__},
    {"keys",    (PyCFunction)trie_keys,     METH_NOARGS,
     keys__doc__},
    {"values",  (PyCFunction)trie_values,   METH_NOARGS,
     values__doc__},
    */

    {"has_key", (PyCFunction)trie_has_key_onearg,  METH_VARARGS,
     has_key__doc__},
    {"has_prefix", (PyCFunction)trie_has_prefix_onearg,  METH_VARARGS,
     has_prefix__doc__},
    {"with_prefix", (PyCFunction)trie_with_prefix_onearg,  METH_VARARGS,
     with_prefix__doc__},
    {"keys",    (PyCFunction)trie_keys_noargs,     METH_VARARGS,
     keys__doc__},
    {"values",  (PyCFunction)trie_values_noargs,   METH_VARARGS,
     values__doc__},

    {"get",     (PyCFunction)trie_get,      METH_VARARGS,
     get__doc__},
    {"get_approximate",  (PyCFunction)trie_get_approximate,  METH_VARARGS,
     get_approximate__doc__},
    {NULL, NULL}   /* sentinel */
};

static PyObject *trie_getattr(PyObject *obj, char *name)
{
    return Py_FindMethod(trieobj_methods, (PyObject *)obj, name);

}

static PyTypeObject Trie_Type = {
    PyObject_HEAD_INIT(NULL)
    0,
    "trie",
    sizeof(trieobject),
    0,
    trie_dealloc,       /*tp_dealloc*/
    0,                  /*tp_print*/
    trie_getattr,                  /*tp_getattr*/
    0,                  /*tp_setattr*/
    0,                  /*tp_compare*/
    0,                  /*tp_repr*/
    0,                  /*tp_as_number*/
    0,                  /*tp_as_sequence*/
    &trie_as_mapping,   /*tp_as_mapping*/
    trie_nohash,        /*tp_hash */
};

static int
_write_to_handle(const void *towrite, const int length, void *handle)
{
    PyObject *py_handle = (PyObject *)handle,
      *py_retval = NULL;
    int success = 0;

    if(!length)
      return 1;

    if(!(py_retval = PyObject_CallMethod(py_handle, "write", "s#", 
                               towrite, length)))
      goto _write_to_handle_cleanup;
    success = 1;

 _write_to_handle_cleanup:
    if(py_retval) {
      Py_DECREF(py_retval);
    }
    return success;
}

int _write_value_to_handle(const void *value, void *handle)
{
    PyObject *py_value = (PyObject *)value,
      *py_marshalled = NULL;
    char *marshalled;
    int length;
    int success = 0;

#ifdef Py_MARSHAL_VERSION  
    if(!(py_marshalled =   
       PyMarshal_WriteObjectToString(py_value, Py_MARSHAL_VERSION)))  
        goto _write_value_to_handle_cleanup;  
#else  
    if(!(py_marshalled = PyMarshal_WriteObjectToString(py_value)))  
        goto _write_value_to_handle_cleanup;  
#endif  
    if(PyString_AsStringAndSize(py_marshalled, &marshalled, &length) == -1)
      goto _write_value_to_handle_cleanup;
    if(!_write_to_handle(&length, sizeof(length), handle))
      goto _write_value_to_handle_cleanup;
    if(!_write_to_handle(marshalled, length, handle))
      goto _write_value_to_handle_cleanup;
    success = 1;

 _write_value_to_handle_cleanup:
    if(py_marshalled) {
      Py_DECREF(py_marshalled);
    }

    return success;
}

static PyObject *
trie_save(PyObject *self, PyObject *args)
{
    PyObject *py_handle,
      *py_trie;
    trieobject *mp;

    if(!PyArg_ParseTuple(args, "OO:save", &py_handle, &py_trie))
        return NULL;
    mp = (trieobject *)py_trie;
    if(!Trie_serialize(mp->trie, _write_to_handle, _write_value_to_handle, 
                   (void *)py_handle)) {
      if(!PyErr_Occurred())
          PyErr_SetString(PyExc_RuntimeError,
                      "saving failed for some reason");
      return NULL;
    }
    Py_INCREF(Py_None);
    return Py_None;
}

static int 
_read_from_handle(void *wasread, const int length, void *handle)
{
    PyObject *py_handle = (PyObject *)handle,
      *py_retval = NULL;
    void *retval;
    int success = 0;
    PyBufferProcs *buffer;
    int segment;
    int bytes_read, bytes_left;
    
    if(!length)
      return 1;

    if(!(py_retval = PyObject_CallMethod(py_handle, "read", "i", length)))
      goto _read_from_handle_cleanup;
    if(!py_retval->ob_type->tp_as_buffer) {
      PyErr_SetString(PyExc_ValueError, "read method should return buffer");
      goto _read_from_handle_cleanup;
    }
    if(!(py_retval->ob_type->tp_flags & Py_TPFLAGS_DEFAULT)) {
      PyErr_SetString(PyExc_ValueError, "no bf_getcharbuffer slot");
      goto _read_from_handle_cleanup;
    }
    buffer = py_retval->ob_type->tp_as_buffer;
    if(!buffer->bf_getreadbuffer) {
      PyErr_SetString(PyExc_ValueError, "no bf_getreadbuffer");
      goto _read_from_handle_cleanup;
    }

    bytes_left = length;
    segment = 0;
    while(bytes_left > 0) {
      if((bytes_read = buffer->bf_getreadbuffer(py_retval, 
                                      segment, &retval)) == -1)
          goto _read_from_handle_cleanup; 
      memcpy(wasread, retval, bytes_read);
      wasread = (void *)((char *)wasread + bytes_read);
      bytes_left -= bytes_read;
      segment += 1;
    }

    success = 1;
    
 _read_from_handle_cleanup:
    if(py_retval) {
      Py_DECREF(py_retval);
    }
    return success;
}

#define MAX_KEY_LENGTH 2000
static void *
_read_value_from_handle(void *handle)
{
    int length;
    char KEY[MAX_KEY_LENGTH];

    if(!_read_from_handle((void *)&length, sizeof(length), (void *)handle))
      return NULL;
    if(length < 0 || length >= MAX_KEY_LENGTH)
      return NULL;
    if(!_read_from_handle((void *)KEY, length, (void *)handle))
      return NULL;
    return PyMarshal_ReadObjectFromString(KEY, length);
}


static PyObject *
trie_load(PyObject *self, PyObject *args)
{
    PyObject *py_handle;
    Trie trie;
    trieobject *trieobj;

    if(!PyArg_ParseTuple(args, "O:load", &py_handle))
      return NULL;

    if(!(trie = Trie_deserialize(_read_from_handle, _read_value_from_handle, 
                         py_handle))) {
      if(!PyErr_Occurred())
          PyErr_SetString(PyExc_RuntimeError, 
                      "loading failed for some reason");
      return NULL;
    }
      
    if(!(trieobj = PyObject_New(trieobject, &Trie_Type))) {
      Trie_del(trie);
      return NULL;
    }
    trieobj->trie = trie;
    return (PyObject *)trieobj;
}

static PyMethodDef trie_methods[] = {
    {"trie", trie_trie, METH_VARARGS, 
     "trie() -> new Trie object."},
    {"load", trie_load, METH_VARARGS, 
     "load(handle) -> trie object"},
    {"save", trie_save, METH_VARARGS, 
     "save(handle, trie), save a trie object to a handle"},
    {NULL, NULL, 0, NULL}
};

static char trie__doc__[] =
"\
This module implements a trie data structure.  This allows an O(M)\n\
lookup of a string in a dictionary, where M is the length of the\n\
string.  It also supports approximate matches.\n\
\n\
Functions:\n\
trie    Create a new trie object.\n\
save    Save a trie to a handle.\n\
load    Load a trie from a handle.\n\
\n\
";

DL_EXPORT(void)
inittrie(void) 
{
    Trie_Type.ob_type = &PyType_Type;

    (void) Py_InitModule3("trie", trie_methods, trie__doc__);
}

Generated by  Doxygen 1.6.0   Back to index