Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

Prodoc.py

# Copyright 2000 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""
This module provides code to work with the prosite.doc file from
Prosite.
http://www.expasy.ch/prosite/

Tested with:
Release 15.0, July 1998
Release 16.0, July 1999
Release 20.22, 13 November 2007
Release 20.43, 10 February 2009


Functions:
read               Read a Prodoc file containing exactly one Prodoc entry.
parse              Iterates over entries in a Prodoc file.

Classes:
Record             Holds Prodoc data.
Reference          Holds data from a Prodoc reference.


DEPRECATED functions:
index_file         Index a Prodoc file for a Dictionary.
_extract_record    Extract Prodoc data from a web page.

DEPRECATED classes:
Dictionary         Accesses a Prodoc file using a dictionary interface.
RecordParser       Parses a Prodoc record into a Record object.
_Scanner           Scans Prodoc-formatted data.
_RecordConsumer    Consumes Prodoc data to a Record object.
Iterator           Iterates over entries in a Prodoc file.
"""


def read(handle):
    record = __read(handle)
    # We should have reached the end of the record by now
    line = handle.readline()
    if line:
        raise ValueError("More than one Prodoc record found")
    return record

def parse(handle):
    while True:
        record = __read(handle)
        if not record:
            return
        yield record
 
00055 class Record:
    """Holds information from a Prodoc record.

    Members:
    accession      Accession number of the record.
    prosite_refs   List of tuples (prosite accession, prosite name).
    text           Free format text.
    references     List of reference objects.

    """
    def __init__(self):
        self.accession = ''
        self.prosite_refs = []
        self.text = ''
        self.references = []


00072 class Reference:
    """Holds information from a Prodoc citation.

    Members:
    number     Number of the reference. (string)
    authors    Names of the authors.
    citation   Describes the citation.

    """
    def __init__(self):
        self.number = ''
        self.authors = ''
        self.citation = ''

# Below are private functions

def __read_prosite_reference_line(record, line):
    line = line.rstrip()
    if line[-1] != '}':
        raise ValueError("I don't understand the Prosite reference on line\n%s" % line)
    acc, name = line[1:-1].split('; ')
    record.prosite_refs.append((acc, name))

def __read_text_line(record, line):
    record.text += line
    return True

def __read_reference_start(record, line):
    # Read the references
    reference = Reference()
    reference.number = line[1:3].strip()
    if line[1] == 'E':
        # If it's an electronic reference, then the URL is on the
        # line, instead of the author.
        reference.citation = line[4:].strip()
    else:
        reference.authors = line[4:].strip()
    record.references.append(reference)

def __read_reference_line(record, line):
    if not line.strip():
        return False
    reference = record.references[-1]
    if line.startswith('     '):
        if reference.authors[-1]==',':
            reference.authors += line[4:].rstrip()
        else:
            reference.citation += line[5:]
        return True
    raise Exception("I don't understand the reference line\n%s" % line)

def __read_copyright_line(record, line):
    # Skip the copyright statement
    if line.startswith('+----'):
        return False
    return True

def __read(handle):
    # Skip blank lines between records
    for line in handle:
        line = line.rstrip()
        if line and not line.startswith("//"):
            break
    else:
        return None
    record = Record()
    # Read the accession number
    if not line.startswith("{PDOC"):
        raise ValueError("Line does not start with '{PDOC':\n%s" % line)
    if line[-1] != '}':
        raise ValueError("I don't understand accession line\n%s" % line)
    record.accession = line[1:-1]
    # Read the Prosite references
    for line in handle:
        if line.startswith('{PS'):
            __read_prosite_reference_line(record, line)
        else:
            break
    else:
        raise ValueError("Unexpected end of stream.")
    # Read the actual text
    if not line.startswith('{BEGIN'):
        raise ValueError("Line does not start with '{BEGIN':\n%s" % line)
    read_line = __read_text_line
    for line in handle:
        if line.startswith('{END}'):
            # Clean up the record and return
            for reference in record.references:
                reference.citation = reference.citation.rstrip()
                reference.authors = reference.authors.rstrip()
            return record
        elif line[0] == '[' and line[3] == ']' and line[4] == ' ':
            __read_reference_start(record, line)
            read_line = __read_reference_line
        elif line.startswith('+----'):
            read_line = __read_copyright_line
        elif read_line:
            if not read_line(record, line):
                read_line = None
    raise ValueError("Unexpected end of stream.")

Generated by  Doxygen 1.6.0   Back to index