Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

KeyWList.py

# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""
This module provides code to work with the keywlist.txt file from
SwissProt.
http://www.expasy.ch/sprot/sprot-top.html


Classes:
Record            Stores the information about one keyword or one category
                  in the keywlist.txt file.

Functions:
parse             Parses the keywlist.txt file and returns an iterator to
                  the records it contains.
"""


00022 class Record(dict):
    """
    This record stores the information of one keyword or category in the
    keywlist.txt as a Python dictionary. The keys in this dictionary are
    the line codes that can appear in the keywlist.txt file:

    ---------  ---------------------------     ----------------------
    Line code  Content                         Occurrence in an entry
    ---------  ---------------------------     ----------------------
    ID         Identifier (keyword)            Once; starts a keyword entry
    IC         Identifier (category)           Once; starts a category entry
    AC         Accession (KW-xxxx)             Once
    DE         Definition                      Once or more
    SY         Synonyms                        Optional; once or more
    GO         Gene ontology (GO) mapping      Optional; once or more
    HI         Hierarchy                       Optional; once or more
    WW         Relevant WWW site               Optional; once or more
    CA         Category                        Once per keyword entry; absent
                                               in category entries
    """
    def __init__(self):
        dict.__init__(self)
        for keyword in ("DE", "SY", "GO", "HI", "WW"):
            self[keyword] = []
    
def parse(handle):
    # First, skip the header
    for line in handle:
        if line.startswith("______________________________________"):
            break
    # Now parse the records
    record = Record()
    for line in handle:
        if line.startswith("-------------------------------------"):
            # We have reached the footer
            break
        key = line[:2]
        if key=="//":
            record["DE"] = " ".join(record["DE"])
            record["SY"] = " ".join(record["SY"])
            yield record
            record = Record()
        else:
            value = line[5:].strip()
            if key in ("ID", "IC", "AC", "CA"):
                record[key] = value
            elif key in ("DE", "SY", "GO", "HI", "WW"):
                record[key].append(value)
    # Read the footer and throw it away
    for line in handle:
        pass

Generated by  Doxygen 1.6.0   Back to index