Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

Std.py

# Standard Bioformats definitions

import Martel
Group = Martel.Group

namespace = "bioformat"
NS = namespace + ":"
XMLNS = "http://biopython.org/bioformat"

def _set_if_given(attrs, field, d, valid = None, convert = None):
    value = attrs.get(field)
    if value is not None:
        if valid is not None:
            if value not in valid:
                raise TypeError("%s (%r) must be one of %s" % \
                                (field, value, valid))
        if convert is None:
            d[field] = value
        else:
            d[field] = convert(value)

def _complain_if_given(attrs, name):
    if attrs.has_key(name) and attrs[name] is not None:
        raise NotImplementedError("Don't yet handle %r" % (name,))

def _must_have(expr, f):
    tag = f.tag
    if tag not in expr.group_names():
        raise TypeError(
            "group %r not present in the expression but is required" % \
            (tag,))

def _must_have_set(expr, sets):
    names = expr.group_names()
    for set in sets:
        for f in set:
            tag = f.tag
            if tag not in names:
                break
        else:
            return
    if len(sets) == 1:
        raise TypeError("missing required tags (need %s) in expression" %
                        [f.tag for f in sets[0]])
    lines = ["missing required tags in expression; must have one set from:"]
    for set in sets:
        lines.append( str( [t.tag for f in set] ) )
    s = "\n".join(lines)
    raise TypeError(s)

def _must_not_have(expr, f):
    f.tag
    if tag in expr.group_names():
        raise TypeError(
            "group %r present in the expression but is not allowed" % \
            (tag,))


# pre- Python 2.2 functions didn't allow attributes
def _f():
    pass
try:
    _f.x = 1
    _use_hack = 0
except AttributeError:
    _use_hack = 1
del _f

def _check_name(f, text):
    if text == "record": # XXX FIXME
        return
    assert NS + f.func_name == text, (NS + ":" + f.func_name, text)

def _check_attrs(attrs, names):
    for name in attrs.keys():
        if name not in names:
            raise TypeError("attr %r is not allowed here (valid terms: %s)" % \
                            (name, names))
    d = attrs.copy()
    for name in names:
        if not d.has_key(name):
            d[name] = None
    return d

if not _use_hack:
    def _settag(f, tag):
        _check_name(f, tag)
        f.tag = tag
else:
    # Convert the functions into callable objects
    class StdTerm:
        def __init__(self, func):
            self._func = func
        def __call__(self, *args, **kwargs):
            return self._func( *args, **kwargs)

    def _settag(f, tag):
        _check_name(f, tag)
        x = globals()[f.func_name] = StdTerm(f)
        x.tag = tag

################ identifier, description, and cross-references
def record(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("format",))
    d = {"xmlns:bioformat": XMLNS}
    _set_if_given(attrs, "format", d)
    return Group("record", expr, d) # XXX FIXME
_settag(record, "record") # XXX AND FIXME


def dbid(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("type", "style", "dbname"))
    d = {}
    _set_if_given(attrs, "type", d, ("primary", "accession", "secondary"))
    _set_if_given(attrs, "dbname", d)
    return Group(NS + "dbid", expr, d)
_settag(dbid, NS + "dbid")

def description_block(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("join",))
    _must_have(expr, description)
    d = {}
    _set_if_given(attrs, "join", d, ("english", "concat", "space", "newline"))
    return Group(NS + "description_block", expr, d)
_settag(description_block, NS + "description_block")

def description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "description", expr)
_settag(description, NS + "description")

def description_line(expr, attrs = {}):
    return description_block(description(expr, attrs))

def fast_dbxref(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("style",))
    d = {}
    _set_if_given(attrs, "style", d, ("sp-general", "sp-prosite", "sp-embl"))
    return Group(NS + "fast_dbxref", expr, d)

def dbxref(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("style",))
    _must_have(expr, dbxref_dbid)
    d = {}
    _complain_if_given(attrs, "style")
    return Group(NS + "dbxref", expr, d)
_settag(dbxref, NS + "dbxref")

def dbxref_dbname(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("style",))
    d = {}
    _set_if_given(attrs, "style", d)
    return Group(NS + "dbxref_dbname", expr, d)
_settag(dbxref_dbname, NS + "dbxref_dbname")

def dbxref_dbid(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("dbname", "type", "style", "negate"))
    d = {}
    _set_if_given(attrs, "dbname", d)
    _set_if_given(attrs, "type", d, ("primary", "accession", "secondary"))
    _complain_if_given(attrs, "style")
    _set_if_given(attrs, "negate", d, (0, 1), str)
    
    return Group(NS + "dbxref_dbid", expr, d)
_settag(dbxref_dbid, NS + "dbxref_dbid")

def dbxref_negate(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "dbxref_negate", expr)
_settag(dbxref_negate, NS + "dbxref_negate")

##################### sequences

def _check_gapchar(s):
    if not ( ord(" ") <= ord(s) <= 126 ):
        raise TypeError("%r not allowed as a gap character" % (s,))
    return s

# What about three letter codes?
def sequence_block(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("alphabet", "gapchar", "remove_spaces"))
    _must_have(expr, sequence)
    d = {}
    _set_if_given(attrs, "alphabet", d,
                  ("iupac-protein", "iupac-dna", "iupac-rna",
                   "iupac-ambiguous-protein",
                   "iupac-ambiguous-dna",
                   "iupac-ambiguous-rna",
                   "protein", "dna", "rna", "unknown"))
    _set_if_given(attrs, "gapchar", d, convert = _check_gapchar)
    _set_if_given(attrs, "remove_spaces", d, (0, 1), str)
    return Group(NS + "sequence_block", expr, d)
_settag(sequence_block, NS + "sequence_block")

def sequence(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "sequence", expr)
_settag(sequence, NS + "sequence")

def alphabet(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("alphabet",))
    d = {}
    _set_if_given(attrs, "alphabet", d,
                  ("iupac-protein", "iupac-dna", "iupac-rna",
                   "iupac-ambiguous-protein",
                   "iupac-ambiguous-dna",
                   "iupac-ambiguous-rna",
                   "protein", "dna", "rna", "nucleotide", "unknown"))
    return Group(NS + "alphabet", expr, d)
_settag(alphabet, NS + "alphabet")

    

############################## features

# In PIR

# FEATURE
#    1-25                #domain signal sequence #status predicted #label SIG\
#    26-737              #product procollagen-lysine 5-dioxygenase 2 #status
#                        predicted #label MAT\
#    63,209,297,365,522,
#    725                 #binding_site carbohydrate (Asn) (covalent) #status
#                        predicted

# The whole thing is a 'feature_block'

# One 'feature' is
#    26-737              #product procollagen-lysine 5-dioxygenase 2 #status
#                        predicted #label MAT\

# One 'feature_name' is "binding_site".

# An example of the feature_location_block and feature_block, which I
# will abbreviate as 'flb' and 'fl', is:
# <flb>   <fl>63,209,297,365,522,</fl>
#    <fl>725</fl>                 #binding_site carbohydrate ...

# PIR doesn't have a 'feature_description'

# Let:
#   fq = feature_qualifier
#   fqb = feature_qualifier
#   fqn = feature_qualifier_name
#   fqd = feature_qualifier_description
# then the text
#   
#    26-737              #product procollagen-lysine 5-dioxygenase 2 #status
#                        predicted #label MAT\
# 
# can be represented as (the rather tedious)
# 
#    26-737              <fqb><fq>#<fqn>product</fqn> <fqd>procollagen-\
# lysine 5-dioxygenase 2</fqd></fq> #<fq><fqn>status</fqn>
#                        <fqd>predicted</fqd> #<fq><fqn>label\
# </fqn> <fqd>MAT</fqd></fq></fqb>
#

# 'style' determines the namespace for the feature name
def feature_block(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("style", "location-style"))
    d = {}
    _set_if_given(attrs, "style", d)
    _set_if_given(attrs, "location-style", d)
    _must_have(expr, feature)
    return Group(NS + "feature_block", expr, d)
_settag(feature_block, NS + "feature_block")

def feature(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("location-style",))
    d = {}
    _set_if_given(attrs, "location-style", d)
    _must_have(expr, feature_name)
    _must_have_set(expr, [[feature_location],
                          [feature_location_start, feature_location_end]])
    return Group(NS + "feature", expr, d)
_settag(feature, NS + "feature")

def feature_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_name", expr)
_settag(feature_name, NS + "feature_name")

def feature_location(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_location", expr)
_settag(feature_location, NS + "feature_location")

def feature_location_start(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_location_start", expr)
_settag(feature_location_start, NS + "feature_location_start")

def feature_location_end(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_location_end", expr)
_settag(feature_location_end, NS + "feature_location_end")

def feature_description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_description", expr)
_settag(feature_description, NS + "feature_description")


##def feature_qualifier_block(expr, attrs = {}):
##    attrs = _check_attrs(attrs, ())
##    _must_have(expr, feature_qualifier)
##    return Group(NS + "feature_qualifier_block", expr)
##_settag(feature_qualifier_block, NS + "feature_qualifier_block")

def feature_qualifier(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    _must_have(expr, feature_qualifier_name)
    return Group(NS + "feature_qualifier", expr)
_settag(feature_qualifier, NS + "feature_qualifier")

def feature_qualifier_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_qualifier_name", expr)
_settag(feature_qualifier_name, NS + "feature_qualifier_name")

def feature_qualifier_description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group(NS + "feature_qualifier_description", expr)
_settag(feature_qualifier_description, NS + "feature_qualifier_description")


############ For homology searches

# "BLASTN", "BLASTP"
def application_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("app",))
    return Group("bioformat:application_name", expr, attrs)

# "2.0.11", "2.0a19MP-WashU"
def application_version(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:application_version", expr, attrs)

def search_header(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:search_header", expr, attrs)

def search_table(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:search_table", expr, attrs)

def search_table_description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("bioformat:decode",))
    d = {"bioformat:decode": "strip"}
    _set_if_given(attrs, "bioformat:decode", d)
    return Group("bioformat:search_table_description", expr, d)

def search_table_value(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
    return Group("bioformat:search_table_value", expr, attrs)

def search_table_entry(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:search_table_entry", expr, attrs)

def query_description_block(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("join-query",))
    d = {"join-query": "join|fixspaces"}
    _set_if_given(attrs, "join-query", d)
    return Group("bioformat:query_description_block", expr, d)

def query_description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("bioformat:decode"))
    d = {}
    _set_if_given(attrs, "bioformat:decode", d)
    return Group("bioformat:query_description", expr, d)

def query_size(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:query_size", expr)

def database_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:database_name", expr, attrs)

def database_num_sequences(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("bioformat:decode",))
    return Group("bioformat:database_num_sequences", expr, attrs)

def database_num_letters(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("bioformat:decode",))
    return Group("bioformat:database_num_letters", expr, attrs)

def hit(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("join-description",))
    d = {"join-description": "join|fixspaces"}
    _set_if_given(attrs, "join-description", d)
    return Group("bioformat:hit", expr, d)

def hit_length(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hit_length", expr, attrs)

def hit_description(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("bioformat:decode"))
    d = {}
    _set_if_given(attrs, "bioformat:decode", d)
    return Group("bioformat:hit_description", expr, d)

def hsp(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp", expr, attrs)

def hsp_value(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
    return Group("bioformat:hsp_value", expr, attrs)

def hsp_frame(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("which",))
    d = {}
    _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject"))
    return Group("bioformat:hsp_frame", expr, d)

def hsp_strand(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("strand", "which"))
    d = {}
    _set_if_given(attrs, "which", d, valid = ("query", "homology", "subject"))
    _set_if_given(attrs, "strand", d, valid = ("+1", "0", "-1", ""))
    return Group("bioformat:hsp_strand", expr, d)

def hsp_seqalign_query_seq(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_query_seq", expr, attrs)

def hsp_seqalign_homology_seq(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_homology_seq", expr, attrs)

def hsp_seqalign_subject_seq(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_subject_seq", expr, attrs)

def hsp_seqalign_query_leader(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_query_leader", expr, attrs)
    

def hsp_seqalign_query_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_query_name", expr, attrs)

def hsp_seqalign_subject_name(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_subject_name", expr, attrs)

def hsp_seqalign(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign", expr, attrs)

def hsp_seqalign_query_start(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_query_start", expr, attrs)

def hsp_seqalign_query_end(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_query_end", expr, attrs)

def hsp_seqalign_subject_start(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_subject_start", expr, attrs)

def hsp_seqalign_subject_end(expr, attrs = {}):
    attrs = _check_attrs(attrs, ())
    return Group("bioformat:hsp_seqalign_subject_end", expr, attrs)

def search_parameter(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
    d = {}
    _set_if_given(attrs, "name", d)
    _set_if_given(attrs, "bioformat:decode", d)
    return Group("bioformat:search_parameter", expr, d)

def search_statistic(expr, attrs = {}):
    attrs = _check_attrs(attrs, ("name", "bioformat:decode"))
    d = {}
    _set_if_given(attrs, "name", d)
    _set_if_given(attrs, "bioformat:decode", d)
    return Group("bioformat:search_statistic", expr, d)


Generated by  Doxygen 1.6.0   Back to index