Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

# Copyright 2000 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

This module provides code to work with the prosite.doc file from

Tested with:
Release 15.0, July 1998
Release 16.0, July 1999
Release 20.22, 13 November 2007
Release 20.43, 10 February 2009

read               Read a Prodoc file containing exactly one Prodoc entry.
parse              Iterates over entries in a Prodoc file.

Record             Holds Prodoc data.
Reference          Holds data from a Prodoc reference.

def read(handle):
    record = __read(handle)
    # We should have reached the end of the record by now
    line = handle.readline()
    if line:
        raise ValueError("More than one Prodoc record found")
    return record

def parse(handle):
    while True:
        record = __read(handle)
        if not record:
        yield record
00043 class Record(object):
    """Holds information from a Prodoc record.

    accession      Accession number of the record.
    prosite_refs   List of tuples (prosite accession, prosite name).
    text           Free format text.
    references     List of reference objects.

    def __init__(self):
        self.accession = ''
        self.prosite_refs = []
        self.text = ''
        self.references = []

00060 class Reference(object):
    """Holds information from a Prodoc citation.

    number     Number of the reference. (string)
    authors    Names of the authors.
    citation   Describes the citation.

    def __init__(self):
        self.number = ''
        self.authors = ''
        self.citation = ''

# Below are private functions

def __read_prosite_reference_line(record, line):
    line = line.rstrip()
    if line[-1] != '}':
        raise ValueError("I don't understand the Prosite reference on line\n%s" % line)
    acc, name = line[1:-1].split('; ')
    record.prosite_refs.append((acc, name))

def __read_text_line(record, line):
    record.text += line
    return True

def __read_reference_start(record, line):
    # Read the references
    reference = Reference()
    reference.number = line[1:3].strip()
    if line[1] == 'E':
        # If it's an electronic reference, then the URL is on the
        # line, instead of the author.
        reference.citation = line[4:].strip()
        reference.authors = line[4:].strip()

def __read_reference_line(record, line):
    if not line.strip():
        return False
    reference = record.references[-1]
    if line.startswith('     '):
        if reference.authors[-1]==',':
            reference.authors += line[4:].rstrip()
            reference.citation += line[5:]
        return True
    raise Exception("I don't understand the reference line\n%s" % line)

def __read_copyright_line(record, line):
    # Skip the copyright statement
    if line.startswith('+----'):
        return False
    return True

def __read(handle):
    # Skip blank lines between records
    for line in handle:
        line = line.rstrip()
        if line and not line.startswith("//"):
        return None
    record = Record()
    # Read the accession number
    if not line.startswith("{PDOC"):
        raise ValueError("Line does not start with '{PDOC':\n%s" % line)
    if line[-1] != '}':
        raise ValueError("I don't understand accession line\n%s" % line)
    record.accession = line[1:-1]
    # Read the Prosite references
    for line in handle:
        if line.startswith('{PS'):
            __read_prosite_reference_line(record, line)
        raise ValueError("Unexpected end of stream.")
    # Read the actual text
    if not line.startswith('{BEGIN'):
        raise ValueError("Line does not start with '{BEGIN':\n%s" % line)
    read_line = __read_text_line
    for line in handle:
        if line.startswith('{END}'):
            # Clean up the record and return
            for reference in record.references:
                reference.citation = reference.citation.rstrip()
                reference.authors = reference.authors.rstrip()
            return record
        elif line[0] == '[' and line[3] == ']' and line[4] == ' ':
            __read_reference_start(record, line)
            read_line = __read_reference_line
        elif line.startswith('+----'):
            read_line = __read_copyright_line
        elif read_line:
            if not read_line(record, line):
                read_line = None
    raise ValueError("Unexpected end of stream.")

Generated by  Doxygen 1.6.0   Back to index