Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

test_SwissProt.py

#!/usr/bin/env python
"""Test for the SwissProt parser on SwissProt files.
"""
import os
import unittest

from Bio import SeqIO
from Bio import SwissProt
from Bio.SeqRecord import SeqRecord



00013 class TestSwissProt(unittest.TestCase):

    def test_sp001(self):
        "Parsing SwissProt file sp001"
        filename = 'sp001'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "Q13454")
        self.assertEqual(seq_record.name, "N33_HUMAN")
        self.assertEqual(seq_record.description, "N33 PROTEIN.")
        self.assertEqual(repr(seq_record.seq), "Seq('MGARGAPSRRRQAGRRLRYLPTGSFPFLLLLLLLCIQLGGGQKKKENLLAEKVE...DFE', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "N33_HUMAN")
        self.assertEqual(record.accessions, ['Q13454', 'Q14911', 'Q14912'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (348, 39676, '75818910'))
    
        self.assertEqual(len(record.features), 6)
        self.assertEqual(record.features[0], ('TRANSMEM', 20, 40, 'POTENTIAL.', ''))
        self.assertEqual(record.features[1], ('TRANSMEM', 197, 217, 'POTENTIAL.', ''))
        self.assertEqual(record.features[2], ('TRANSMEM', 222, 242, 'POTENTIAL.', ''))
        self.assertEqual(record.features[3], ('TRANSMEM', 277, 297, 'POTENTIAL.', ''))
        self.assertEqual(record.features[4], ('TRANSMEM', 313, 333, 'POTENTIAL.', ''))
        self.assertEqual(record.features[5], ('VARSPLIC', 344, 348, 'DLDFE -> FLIK (IN FORM 2).', ''))

        self.assertEqual(len(record.references), 1)
        self.assertEqual(record.references[0].authors, "MACGROGAN D., LEVY A., BOVA G.S., ISAACS W.B., BOOKSTEIN R.;")
        self.assertEqual(record.references[0].title, '"Structure and methylation-associated silencing of a gene within a homozygously deleted region of human chromosome band 8p22.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '96299740'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp002(self):
        "Parsing SwissProt file sp002"

        filename = 'sp002'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P54101")
        self.assertEqual(seq_record.name, "CSP_MOUSE")
        self.assertEqual(seq_record.description, "CYSTEINE STRING PROTEIN (CSP).")
        self.assertEqual(repr(seq_record.seq), "Seq('MADQRQRSLSTSGESLYHVLGLDKNATSDDIKKSYRKLALKYHPDKNPDNPEAA...GFN', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "CSP_MOUSE")
        self.assertEqual(record.accessions, ['P54101'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Mus'])
        self.assertEqual(record.seqinfo, (198, 22100, '9DF0142B'))
    
        self.assertEqual(len(record.features), 2) 
        self.assertEqual(record.features[0], ('DOMAIN', 13, 82, 'DNAJ-LIKE.', ''))
        self.assertEqual(record.features[1], ('DOMAIN', 118, 128, 'POLY-CYS.', ''))

        self.assertEqual(len(record.references), 3)
        self.assertEqual(record.references[0].authors, "QIN N., LIN T., BIRNBAUMER L.;")
        self.assertEqual(record.references[0].title, "")
        self.assertEqual(len(record.references[0].references), 0)
        self.assertEqual(record.references[1].authors, "MASTROGIACOMO A., GUNDERSEN C.B.;")
        self.assertEqual(record.references[1].title, '"The nucleotide and deduced amino acid sequence of a rat cysteine string protein.";')
        self.assertEqual(len(record.references[1].references), 1)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '95223109'))
        self.assertEqual(record.references[2].authors, "BRAUN J.E., SCHELLER R.H.;")
        self.assertEqual(record.references[2].title, '"Cysteine string protein, a DnaJ family member, is present on diverse secretory vesicles.";')
        self.assertEqual(len(record.references[2].references), 1)
        self.assertEqual(record.references[2].references[0], ('MEDLINE', '96188189'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)

    def test_sp003(self):
        "Parsing SwissProt file sp003"

        filename = 'sp003'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P42655")
        self.assertEqual(seq_record.name, "143E_HUMAN")
        self.assertEqual(seq_record.description, "14-3-3 PROTEIN EPSILON (MITOCHONDRIAL IMPORT STIMULATION FACTOR L SUBUNIT) (PROTEIN KINASE C INHIBITOR PROTEIN-1) (KCIP-1) (14-3-3E).")
        self.assertEqual(repr(seq_record.seq), "Seq('MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLSVAYKNVIG...ENQ', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "143E_HUMAN")
        self.assertEqual(record.accessions, ['P42655', 'P29360', 'Q63631'])
        self.assertEqual(record.organism_classification, ['EUKARYOTA', 'METAZOA', 'CHORDATA', 'VERTEBRATA', 'MAMMALIA', 'EUTHERIA', 'PRIMATES', 'CATARRHINI', 'HOMINIDAE', 'HOMO'])
        self.assertEqual(record.seqinfo, (255, 29174, '40A43E62'))

        self.assertEqual(len(record.features), 5)
        self.assertEqual(record.features[0], ('MOD_RES', 1, 1, 'ACETYLATION.', ''))
        self.assertEqual(record.features[1], ('CONFLICT', 73, 73, 'K -> T (IN REF. 8).', ''))
        self.assertEqual(record.features[2], ('CONFLICT', 120, 120, 'F -> S (IN REF. 8).', ''))
        self.assertEqual(record.features[3], ('CONFLICT', 123, 123, 'K -> Y (IN REF. 8).', ''))
        self.assertEqual(record.features[4], ('CONFLICT', 129, 129, 'H -> Y (IN REF. 13).', ''))

        self.assertEqual(len(record.references), 13)
        self.assertEqual(record.references[0].authors, "CONKLIN D.S., GALAKTIONOV K., BEACH D.;")
        self.assertEqual(record.references[0].title, '"14-3-3 proteins associate with cdc25 phosphatases.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '95372385'))
        self.assertEqual(record.references[1].authors, "LUK S.C.W., LEE C.Y., WAYE M.M.Y.;")
        self.assertEqual(record.references[1].title, "")
        self.assertEqual(len(record.references[1].references), 0)
        self.assertEqual(record.references[2].authors, "JIN D.Y., LYU M.S., KOZAK C.A., JEANG K.T.;")
        self.assertEqual(record.references[2].title, '"Function of 14-3-3 proteins.";')
        self.assertEqual(len(record.references[2].references), 1)
        self.assertEqual(record.references[2].references[0], ('MEDLINE', '96300316'))
        self.assertEqual(record.references[3].authors, "CHONG S.S., TANIGAMI A., ROSCHKE A.V., LEDBETTER D.H.;")
        self.assertEqual(record.references[3].title, '"14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome region.";')
        self.assertEqual(len(record.references[3].references), 1)
        self.assertEqual(record.references[3].references[0], ('MEDLINE', '97011338'))
        self.assertEqual(record.references[4].authors, "TANIGAMI A., CHONG S.S., LEDBETTER D.H.;")
        self.assertEqual(record.references[4].title, '"14-3-3 epsilon genomic sequence.";')
        self.assertEqual(len(record.references[4].references), 0)
        self.assertEqual(record.references[5].authors, "ROSEBOOM P.H., WELLER J.L., BABILA T., AITKEN A., SELLERS L.A., MOFFET J.R., NAMBOODIRI M.A., KLEIN D.C.;")
        self.assertEqual(record.references[5].title, '"Cloning and characterization of the epsilon and zeta isoforms of the 14-3-3 proteins.";')
        self.assertEqual(len(record.references[5].references), 1)
        self.assertEqual(record.references[5].references[0], ('MEDLINE', '94296566'))
        self.assertEqual(record.references[6].authors, "ALAM R., HACHIYA N., SAKAGUCHI M., SHUN-ICHIRO K., IWANAGA S., KITAJIMA M., MIHARA K., OMURA T.;")
        self.assertEqual(record.references[6].title, '"cDNA cloning and characterization of mitochondrial import stimulation factor (MSF) purified from rat liver cytosol.";')
        self.assertEqual(len(record.references[6].references), 1)
        self.assertEqual(record.references[6].references[0], ('MEDLINE', '95122474'))
        self.assertEqual(record.references[7].authors, "GAO L., GU X.B., YU D.S., YU R.K., ZENG G.;")
        self.assertEqual(record.references[7].title, '"Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3- sialyltransferase.";')
        self.assertEqual(len(record.references[7].references), 1)
        self.assertEqual(record.references[7].references[0], ('MEDLINE', '96280718'))
        self.assertEqual(record.references[8].authors, "MCCONNELL J.E., ARMSTRONG J.F., BARD J.B.;")
        self.assertEqual(record.references[8].title, '"The mouse 14-3-3 epsilon isoform, a kinase regulator whose expression pattern is modulated in mesenchyme and neuronal differentiation.";')
        self.assertEqual(len(record.references[8].references), 1)
        self.assertEqual(record.references[8].references[0], ('MEDLINE', '95269876'))
        self.assertEqual(record.references[9].authors, "TAKIHARA Y., IRIE K., NOMURA M., MOTALEB M., MATSUMOTO K., SHIMADA K.;")
        self.assertEqual(record.references[9].title, "")
        self.assertEqual(len(record.references[9].references), 0)
        self.assertEqual(record.references[10].authors, "JONES J.M., NIIKURA T., PINKE R.M., GUO W., MOLDAY L., LEYKAM J., MCCONNELL D.G.;")
        self.assertEqual(record.references[10].title, '"Expression of 14-3-3 proteins in bovine retinal photoreceptors.";')
        self.assertEqual(len(record.references[10].references), 0)
        self.assertEqual(record.references[11].authors, "TOKER A., SELLERS L.A., AMESS B., PATEL Y., HARRIS A., AITKEN A.;")
        self.assertEqual(record.references[11].title, '"Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3) from sheep brain. Amino acid sequence of phosphorylated forms.";')
        self.assertEqual(len(record.references[11].references), 1)
        self.assertEqual(record.references[11].references[0], ('MEDLINE', '92283271'))
        self.assertEqual(record.references[12].authors, "TOKER A., ELLIS C.A., SELLERS L.A., AITKEN A.;")
        self.assertEqual(record.references[12].title, '"Protein kinase C inhibitor proteins. Purification from sheep brain and sequence similarity to lipocortins and 14-3-3 protein.";')
        self.assertEqual(len(record.references[12].references), 1)
        self.assertEqual(record.references[12].references[0], ('MEDLINE', '90345949'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp004(self):
        "Parsing SwissProt file sp004"

        filename = 'sp004'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P23082")
        self.assertEqual(seq_record.name, "NDOA_PSEPU")
        self.assertEqual(seq_record.description, "NAPHTHALENE 1,2-DIOXYGENASE SYSTEM FERREDOXIN COMPONENT.")
        self.assertEqual(repr(seq_record.seq), "Seq('TVKWIEAVALSDILEGDVLGVTVEGKELALYEVEGEIYATDNLCTHGSARMSDG...DLS', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "NDOA_PSEPU")
        self.assertEqual(record.accessions, ['P23082', 'Q52123', 'O07829'])
        self.assertEqual(record.organism_classification, ['Bacteria', 'Proteobacteria', 'gamma subdivision', 'Pseudomonas group', 'Pseudomonas'])
        self.assertEqual(record.seqinfo, (103, 11315, '9F91B3C8'))
    
        self.assertEqual(len(record.features), 12)
        self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))
        self.assertEqual(record.features[1], ('METAL', 44, 44, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
        self.assertEqual(record.features[2], ('METAL', 46, 46, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
        self.assertEqual(record.features[3], ('METAL', 63, 63, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
        self.assertEqual(record.features[4], ('METAL', 66, 66, 'IRON-SULFUR (2FE-2S) (POTENTIAL).', ''))
        self.assertEqual(record.features[5], ('VARIANT', 2, 2, 'V -> E (IN STRAIN G7).', ''))
        self.assertEqual(record.features[6], ('VARIANT', 14, 14, 'L -> P (IN STRAIN G7).', ''))
        self.assertEqual(record.features[7], ('VARIANT', 48, 48, 'S -> A (IN STRAIN G7).', ''))
        self.assertEqual(record.features[8], ('VARIANT', 76, 76, 'K -> R (IN STRAIN G7).', ''))
        self.assertEqual(record.features[9], ('VARIANT', 84, 84, 'Q -> E (IN STRAIN G7).', ''))
        self.assertEqual(record.features[10], ('VARIANT', 90, 90, 'P -> A (IN STRAIN G7).', ''))
        self.assertEqual(record.features[11], ('VARIANT', 103, 103, 'S -> GEF (IN STRAIN G7).', ''))

        self.assertEqual(len(record.references), 4) 
        self.assertEqual(record.references[0].authors, "KURKELA S., LEHVAESLAIHO H., PALVA E.T., TEERI T.H.;")
        self.assertEqual(record.references[0].title, '"Cloning, nucleotide sequence and characterization of genes encoding naphthalene dioxygenase of Pseudomonas putida strain NCIB9816.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '89211973'))
        self.assertEqual(record.references[1].authors, "SIMON M.J., OSSLUND T.D., SAUNDERS R., ENSLEY B.D., SUGGS S., HARCOURT A.A., SUEN W.-C., CRUDEN D.L., GIBSON D.T., ZYLSTRA G.J.;")
        self.assertEqual(record.references[1].title, '"Sequences of genes encoding naphthalene dioxygenase in Pseudomonas putida strains G7 and NCIB 9816-4.";')
        self.assertEqual(len(record.references[1].references), 1)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '93252277'))
        self.assertEqual(record.references[2].authors, "DENOME S.A., STANLEY D.C., OLSON E.S., YOUNG K.D.;")
        self.assertEqual(record.references[2].title, '"Metabolism of dibenzothiophene and naphthalene in Pseudomonas strains: complete DNA sequence of an upper naphthalene catabolic pathway.";')
        self.assertEqual(len(record.references[2].references), 1)
        self.assertEqual(record.references[2].references[0], ('MEDLINE', '94042852'))
        self.assertEqual(record.references[3].authors, "HAMANN C.;")
        self.assertEqual(record.references[3].title, "")
        self.assertEqual(len(record.references[3].references), 0)

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)

    def test_sp005(self):
        "Parsing SwissProt file sp005"

        filename = 'sp005'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P24973")
        self.assertEqual(seq_record.name, "NU3M_BALPH")
        self.assertEqual(seq_record.description, "NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 3 (EC 1.6.5.3).")
        self.assertEqual(repr(seq_record.seq), "Seq('MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGSARLPFSMK...WAE', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "NU3M_BALPH")
        self.assertEqual(record.accessions, ['P24973'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Cetartiodactyla', 'Cetacea', 'Mysticeti', 'Balaenopteridae', 'Balaenoptera'])
        self.assertEqual(record.seqinfo, (115, 13022, 'ACF02965'))
    
        self.assertEqual(len(record.features), 0)

        self.assertEqual(len(record.references), 2)
        self.assertEqual(record.references[0].authors, "ARNASON U., GULLBERG A., WIDEGREN B.;")
        self.assertEqual(record.references[0].title, '"The complete nucleotide sequence of the mitochondrial DNA of the fin whale, Balaenoptera physalus.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '92139449'))
        self.assertEqual(record.references[1].authors, "ARNASON U., GULLBERG A.;")
        self.assertEqual(record.references[1].title, '"Comparison between the complete mtDNA sequences of the blue and the fin whale, two species that can hybridize in nature.";')
        self.assertEqual(len(record.references[1].references), 1)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '94141932'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp006(self):
        "Parsing SwissProt file sp006"

        filename = 'sp006'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P39896")
        self.assertEqual(seq_record.name, "TCMO_STRGA")
        self.assertEqual(seq_record.description, "TETRACENOMYCIN POLYKETIDE SYNTHESIS 8-O-METHYL TRANSFERASE TCMO (EC 2.1.1.-).")
        self.assertEqual(repr(seq_record.seq), "Seq('MTPHTHVRGPGDILQLTMAFYGSRALISAVELDLFTLLAGKPLPLGELCERAGI...KPR', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "TCMO_STRGA")
        self.assertEqual(record.accessions, ['P39896'])
        self.assertEqual(record.organism_classification, ['BACTERIA', 'FIRMICUTES', 'ACTINOBACTERIA', 'ACTINOBACTERIDAE', 'ACTINOMYCETALES', 'STREPTOMYCINEAE', 'STREPTOMYCETACEAE', 'STREPTOMYCES'])
        self.assertEqual(record.seqinfo, (339, 37035, '848B7337'))
    
        self.assertEqual(len(record.features), 0)

        self.assertEqual(len(record.references), 1)
        self.assertEqual(record.references[0].authors, "SUMMERS R.G., WENDT-PIENKOWSKI E., MOTAMEDI H., HUTCHINSON C.R.;")
        self.assertEqual(record.references[0].title, '"Nucleotide sequence of the tcmII-tcmIV region of the tetracenomycin C biosynthetic gene cluster of Streptomyces glaucescens and evidence that the tcmN gene encodes a multifunctional cyclase-dehydratase-O-methyl transferase.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '92193265'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp007(self):
        "Parsing SwissProt file sp007"

        filename = 'sp007'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "O95832")
        self.assertEqual(seq_record.name, "CLD1_HUMAN")
        self.assertEqual(seq_record.description, "CLAUDIN-1 (SENESCENCE-ASSOCIATED EPITHELIAL MEMBRANE PROTEIN).")
        self.assertEqual(repr(seq_record.seq), "Seq('MANAGLQLLGFILAFLGWIGAIVSTALPQWRIYSYAGDNIVTAQAMYEGLWMSC...DYV', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "CLD1_HUMAN")
        self.assertEqual(record.accessions, ['O95832'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (211, 22744, '07269000E6C214F0'))
    
        self.assertEqual(len(record.features), 6)
        self.assertEqual(record.features[0], ('TRANSMEM', 8, 28, 'POTENTIAL.', ''))
        self.assertEqual(record.features[1], ('TRANSMEM', 82, 102, 'POTENTIAL.', ''))
        self.assertEqual(record.features[2], ('TRANSMEM', 116, 136, 'POTENTIAL.', ''))
        self.assertEqual(record.features[3], ('TRANSMEM', 164, 184, 'POTENTIAL.', ''))
        self.assertEqual(record.features[4], ('CONFLICT', 62, 62, 'I -> V (IN REF. 2).', ''))
        self.assertEqual(record.features[5], ('CONFLICT', 135, 135, 'V -> A (IN REF. 2).', ''))

        self.assertEqual(len(record.references), 2)
        self.assertEqual(record.references[0].authors, "Swisshelm K.L., Machl A., Planitzer S., Robertson R., Kubbies M., Hosier S.;")
        self.assertEqual(record.references[0].title, '"SEMP1, a senescence-associated cDNA isolated from human mammary epithelial cells, is a member of an epithelial membrane protein superfamily.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '99132301'))
        self.assertEqual(record.references[1].authors, "Mitic L.M., Anderson J.M.;")
        self.assertEqual(record.references[1].title, '"Human claudin-1 isolated from Caco-2 mRNA.";')
        self.assertEqual(len(record.references[1].references), 0)

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp008(self):
        "Parsing SwissProt file sp008"

        filename = 'sp008'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P01892")
        self.assertEqual(seq_record.name, "1A02_HUMAN")
        self.assertEqual(seq_record.description, "HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN PRECURSOR.")
        self.assertEqual(repr(seq_record.seq), "Seq('MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDD...CKV', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "1A02_HUMAN")
        self.assertEqual(record.accessions, ['P01892', 'P06338', 'P30514', 'P30444', 'P30445', 'P30446', 'Q29680', 'Q29899', 'Q95352', 'Q29837', 'Q95380'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (365, 40922, 'B54A97B24B337C08'))
    
        self.assertEqual(len(record.features), 71)
        self.assertEqual(record.features[0], ('SIGNAL', 1, 24, '', ''))
        self.assertEqual(record.features[1], ('CHAIN', 25, 365, 'HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN.', ''))
        self.assertEqual(record.features[2], ('DOMAIN', 25, 114, 'EXTRACELLULAR ALPHA-1.', ''))
        self.assertEqual(record.features[3], ('DOMAIN', 115, 206, 'EXTRACELLULAR ALPHA-2.', ''))
        self.assertEqual(record.features[4], ('DOMAIN', 207, 298, 'EXTRACELLULAR ALPHA-3.', ''))
        self.assertEqual(record.features[5], ('DOMAIN', 299, 308, 'CONNECTING PEPTIDE.', ''))
        self.assertEqual(record.features[6], ('TRANSMEM', 309, 332, '', ''))
        self.assertEqual(record.features[7], ('DOMAIN', 333, 365, 'CYTOPLASMIC TAIL.', ''))
        self.assertEqual(record.features[8], ('CARBOHYD', 110, 110, '', ''))
        self.assertEqual(record.features[9], ('DISULFID', 125, 188, '', ''))
        self.assertEqual(record.features[10], ('DISULFID', 227, 283, '', ''))
        self.assertEqual(record.features[11], ('STRAND', 27, 36, '', ''))
        self.assertEqual(record.features[12], ('STRAND', 45, 52, '', ''))
        self.assertEqual(record.features[13], ('TURN', 53, 54, '', ''))
        self.assertEqual(record.features[14], ('STRAND', 55, 61, '', ''))
        self.assertEqual(record.features[15], ('TURN', 62, 63, '', ''))
        self.assertEqual(record.features[16], ('STRAND', 70, 71, '', ''))
        self.assertEqual(record.features[17], ('HELIX', 74, 76, '', ''))
        self.assertEqual(record.features[18], ('TURN', 77, 78, '', ''))
        self.assertEqual(record.features[19], ('HELIX', 81, 108, '', ''))
        self.assertEqual(record.features[20], ('TURN', 109, 110, '', ''))
        self.assertEqual(record.features[21], ('TURN', 113, 114, '', ''))
        self.assertEqual(record.features[22], ('STRAND', 118, 127, '', ''))
        self.assertEqual(record.features[23], ('TURN', 129, 130, '', ''))
        self.assertEqual(record.features[24], ('STRAND', 133, 142, '', ''))
        self.assertEqual(record.features[25], ('TURN', 143, 144, '', ''))
        self.assertEqual(record.features[26], ('STRAND', 145, 150, '', ''))
        self.assertEqual(record.features[27], ('TURN', 152, 153, '', ''))
        self.assertEqual(record.features[28], ('STRAND', 157, 159, '', ''))
        self.assertEqual(record.features[29], ('TURN', 163, 163, '', ''))
        self.assertEqual(record.features[30], ('HELIX', 164, 173, '', ''))
        self.assertEqual(record.features[31], ('TURN', 174, 175, '', ''))
        self.assertEqual(record.features[32], ('HELIX', 176, 185, '', ''))
        self.assertEqual(record.features[33], ('TURN', 186, 186, '', ''))
        self.assertEqual(record.features[34], ('HELIX', 187, 198, '', ''))
        self.assertEqual(record.features[35], ('TURN', 199, 199, '', ''))
        self.assertEqual(record.features[36], ('HELIX', 200, 203, '', ''))
        self.assertEqual(record.features[37], ('TURN', 204, 204, '', ''))
        self.assertEqual(record.features[38], ('STRAND', 207, 207, '', ''))
        self.assertEqual(record.features[39], ('STRAND', 210, 219, '', ''))
        self.assertEqual(record.features[40], ('TURN', 220, 221, '', ''))
        self.assertEqual(record.features[41], ('STRAND', 222, 233, '', ''))
        self.assertEqual(record.features[42], ('STRAND', 238, 243, '', ''))
        self.assertEqual(record.features[43], ('TURN', 244, 245, '', ''))
        self.assertEqual(record.features[44], ('STRAND', 246, 247, '', ''))
        self.assertEqual(record.features[45], ('HELIX', 249, 251, '', ''))
        self.assertEqual(record.features[46], ('STRAND', 253, 254, '', ''))
        self.assertEqual(record.features[47], ('STRAND', 258, 259, '', ''))
        self.assertEqual(record.features[48], ('STRAND', 265, 274, '', ''))
        self.assertEqual(record.features[49], ('TURN', 275, 276, '', ''))
        self.assertEqual(record.features[50], ('HELIX', 278, 280, '', ''))
        self.assertEqual(record.features[51], ('STRAND', 281, 286, '', ''))
        self.assertEqual(record.features[52], ('TURN', 288, 289, '', ''))
        self.assertEqual(record.features[53], ('STRAND', 294, 297, '', ''))
        self.assertEqual(record.features[54], ('VARIANT', 33, 33, 'F -> Y (IN A*0205, A*0206, A*0208, A*0210 AND A*0221). /FTId=VAR_004334.', 'VAR_004334'))
        self.assertEqual(record.features[55], ('VARIANT', 54, 54, 'D -> N (IN A*0221). /FTId=VAR_004335.', 'VAR_004335'))
        self.assertEqual(record.features[56], ('VARIANT', 67, 67, 'Q -> R (IN A*0202, A*0205, AND A*0208). /FTId=VAR_004336.', 'VAR_004336'))
        self.assertEqual(record.features[57], ('VARIANT', 90, 90, 'K -> N (IN A*0208 AND A*0220). /FTId=VAR_004337.', 'VAR_004337'))
        self.assertEqual(record.features[58], ('VARIANT', 97, 98, 'TH -> ID (IN A*0211). /FTId=VAR_004338.', 'VAR_004338'))
        self.assertEqual(record.features[59], ('VARIANT', 119, 119, 'V -> L (IN A*0202, A*0205, A*0208 AND A*0217). /FTId=VAR_004339.', 'VAR_004339'))
        self.assertEqual(record.features[60], ('VARIANT', 121, 121, 'R -> M (IN A*0204 AND A*0217). /FTId=VAR_004340.', 'VAR_004340'))
        self.assertEqual(record.features[61], ('VARIANT', 123, 123, 'Y -> C (IN A*0207 AND A*0218). /FTId=VAR_004341.', 'VAR_004341'))
        self.assertEqual(record.features[62], ('VARIANT', 123, 123, 'Y -> F (IN A*0210 AND A*0217). /FTId=VAR_004342.', 'VAR_004342'))
        self.assertEqual(record.features[63], ('VARIANT', 131, 131, 'W -> G (IN A*0210). /FTId=VAR_004343.', 'VAR_004343'))
        self.assertEqual(record.features[64], ('VARIANT', 162, 162, 'M -> K (IN A*0218). /FTId=VAR_004344.', 'VAR_004344'))
        self.assertEqual(record.features[65], ('VARIANT', 173, 173, 'A -> T (IN A*0203). /FTId=VAR_004345.', 'VAR_004345'))
        self.assertEqual(record.features[66], ('VARIANT', 176, 176, 'V -> E (IN A*0203 AND A*0213). /FTId=VAR_004346.', 'VAR_004346'))
        self.assertEqual(record.features[67], ('VARIANT', 180, 180, 'L -> W (IN A*0202, A*0203, A*0205 AND A*0208). /FTId=VAR_004347.', 'VAR_004347'))
        self.assertEqual(record.features[68], ('VARIANT', 180, 180, 'L -> Q (IN A*0212 AND A*0213). /FTId=VAR_004348.', 'VAR_004348'))
        self.assertEqual(record.features[69], ('VARIANT', 187, 187, 'T -> E (IN A*0216). /FTId=VAR_004349.', 'VAR_004349'))
        self.assertEqual(record.features[70], ('VARIANT', 260, 260, 'A -> E (IN A*0209). /FTId=VAR_004350.', 'VAR_004350'))

        self.assertEqual(len(record.references), 27)
        self.assertEqual(record.references[0].authors, "Koller B.H., Orr H.T.;")
        self.assertEqual(record.references[0].title, '"Cloning and complete sequence of an HLA-A2 gene: analysis of two HLA-A alleles at the nucleotide level.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '85132727'))
        self.assertEqual(record.references[1].authors, "Cianetti L., Testa U., Scotto L., la Valle R., Simeone A., Boccoli G., Giannella G., Peschle C., Boncinelli E.;")
        self.assertEqual(record.references[1].title, '"Three new class I HLA alleles: structure of mRNAs and alternative mechanisms of processing.";')
        self.assertEqual(len(record.references[1].references), 1)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '89122144'))
        self.assertEqual(record.references[2].authors, "Ennis P.D., Zemmour J., Salter R.D., Parham P.;")
        self.assertEqual(record.references[2].title, '"Rapid cloning of HLA-A,B cDNA by using the polymerase chain reaction: frequency and nature of errors produced in amplification.";')
        self.assertEqual(len(record.references[2].references), 1)
        self.assertEqual(record.references[2].references[0], ('MEDLINE', '90207291'))
        self.assertEqual(record.references[3].authors, "Belich M.P., Madrigal J.A., Hildebrand W.H., Zemmour J., Williams R.C., Luz R., Petzl-Erler M.L., Parham P.;")
        self.assertEqual(record.references[3].title, '"Unusual HLA-B alleles in two tribes of Brazilian Indians.";')
        self.assertEqual(len(record.references[3].references), 1)
        self.assertEqual(record.references[3].references[0], ('MEDLINE', '92269955'))
        self.assertEqual(record.references[4].authors, "Krangel M.S.;")
        self.assertEqual(record.references[4].title, '"Unusual RNA splicing generates a secreted form of HLA-A2 in a mutagenized B lymphoblastoid cell line.";')
        self.assertEqual(len(record.references[4].references), 1)
        self.assertEqual(record.references[4].references[0], ('MEDLINE', '85230571'))
        self.assertEqual(record.references[5].authors, "Orr H.T., Lopez de Castro J.A., Parham P., Ploegh H.L., Strominger J.L.;")
        self.assertEqual(record.references[5].title, '"Comparison of amino acid sequences of two human histocompatibility antigens, HLA-A2 and HLA-B7: location of putative alloantigenic sites.";')
        self.assertEqual(len(record.references[5].references), 1)
        self.assertEqual(record.references[5].references[0], ('MEDLINE', '80056745'))
        self.assertEqual(record.references[6].authors, "Lopez de Castro J.A., Strominger J.L., Strong D.M., Orr H.T.;")
        self.assertEqual(record.references[6].title, '"Structure of crossreactive human histocompatibility antigens HLA-A28 and HLA-A2: possible implications for the generation of HLA polymorphism.";')
        self.assertEqual(len(record.references[6].references), 1)
        self.assertEqual(record.references[6].references[0], ('MEDLINE', '82247941'))
        self.assertEqual(record.references[7].authors, "Mattson D.H., Handy D.E., Bradley D.A., Coligan J.E., Cowan E.P., Biddison W.E.;")
        self.assertEqual(record.references[7].title, '"DNA sequences of the genes that encode the CTL-defined HLA-A2 variants M7 and DK1.";')
        self.assertEqual(len(record.references[7].references), 1)
        self.assertEqual(record.references[7].references[0], ('MEDLINE', '87306734'))
        self.assertEqual(record.references[8].authors, "Holmes N., Ennis P., Wan A.M., Denney D.W., Parham P.;")
        self.assertEqual(record.references[8].title, '"Multiple genetic mechanisms have contributed to the generation of the HLA-A2/A28 family of class I MHC molecules.";')
        self.assertEqual(len(record.references[8].references), 1)
        self.assertEqual(record.references[8].references[0], ('MEDLINE', '87252273'))
        self.assertEqual(record.references[9].authors, "Domena J.D.;")
        self.assertEqual(record.references[9].title, "")
        self.assertEqual(len(record.references[9].references), 0)
        self.assertEqual(record.references[10].authors, "Castano A.R., Lopez de Castro J.A.;")
        self.assertEqual(record.references[10].title, '"Structure of the HLA-A*0204 antigen, found in South American Indians. Spatial clustering of HLA-A2 subtype polymorphism.";')
        self.assertEqual(len(record.references[10].references), 1)
        self.assertEqual(record.references[10].references[0], ('MEDLINE', '92039809'))
        self.assertEqual(record.references[11].authors, "Watkins D.I., McAdam S.N., Liu X., Stang C.R., Milford E.L., Levine C.G., Garber T.L., Dogon A.L., Lord C.I., Ghim S.H., Troup G.M., Hughes A.L., Letvin N.L.;")
        self.assertEqual(record.references[11].title, '"New recombinant HLA-B alleles in a tribe of South American Amerindians indicate rapid evolution of MHC class I loci.";')
        self.assertEqual(len(record.references[11].references), 1)
        self.assertEqual(record.references[11].references[0], ('MEDLINE', '92269956'))
        self.assertEqual(record.references[12].authors, "Parham P., Lawlor D.A., Lomen C.E., Ennis P.D.;")
        self.assertEqual(record.references[12].title, '"Diversity and diversification of HLA-A,B,C alleles.";')
        self.assertEqual(len(record.references[12].references), 1)
        self.assertEqual(record.references[12].references[0], ('MEDLINE', '89235215'))
        self.assertEqual(record.references[13].authors, "Ezquerra A., Domenech N., van der Poel J., Strominger J.L., Vega M.A., Lopez de Castro J.A.;")
        self.assertEqual(record.references[13].title, '"Molecular analysis of an HLA-A2 functional variant CLA defined by cytolytic T lymphocytes.";')
        self.assertEqual(len(record.references[13].references), 1)
        self.assertEqual(record.references[13].references[0], ('MEDLINE', '86305811'))
        self.assertEqual(record.references[14].authors, "Domenech N., Ezquerra A., Castano R., Lopez de Castro J.A.;")
        self.assertEqual(record.references[14].title, '"Structural analysis of HLA-A2.4 functional variant KNE. Implications for the mapping of HLA-A2-specific T-cell epitopes.";')
        self.assertEqual(len(record.references[14].references), 1)
        self.assertEqual(record.references[14].references[0], ('MEDLINE', '88113844'))
        self.assertEqual(record.references[15].authors, "Domenech N., Castano R., Goulmy E., Lopez de Castro J.A.;")
        self.assertEqual(record.references[15].title, '"Molecular analysis of HLA-A2.4 functional variant KLO: close structural and evolutionary relatedness to the HLA-A2.2 subtype.";')
        self.assertEqual(len(record.references[15].references), 1)
        self.assertEqual(record.references[15].references[0], ('MEDLINE', '88314183'))
        self.assertEqual(record.references[16].authors, "Castano R., Ezquerra A., Domenech N., Lopez de Castro J.A.;")
        self.assertEqual(record.references[16].title, '"An HLA-A2 population variant with structural polymorphism in the alpha 3 region.";')
        self.assertEqual(len(record.references[16].references), 1)
        self.assertEqual(record.references[16].references[0], ('MEDLINE', '88186100'))
        self.assertEqual(record.references[17].authors, "Epstein H., Kennedy L., Holmes N.;")
        self.assertEqual(record.references[17].title, '"An Oriental HLA-A2 subtype is closely related to a subset of Caucasoid HLA-A2 alleles.";')
        self.assertEqual(len(record.references[17].references), 1)
        self.assertEqual(record.references[17].references[0], ('MEDLINE', '89122133'))
        self.assertEqual(record.references[18].authors, "Castano A.R., Lopez de Castro J.A.;")
        self.assertEqual(record.references[18].title, '"Structure of the HLA-A*0211 (A2.5) subtype: further evidence for selection-driven diversification of HLA-A2 antigens.";')
        self.assertEqual(len(record.references[18].references), 1)
        self.assertEqual(record.references[18].references[0], ('MEDLINE', '92218010'))
        self.assertEqual(record.references[19].authors, "Barber D.F., Fernandez J.M., Lopez de Castro J.A.;")
        self.assertEqual(record.references[19].title, '"Primary structure of a new HLA-A2 subtype: HLA-A*0213.";')
        self.assertEqual(len(record.references[19].references), 1)
        self.assertEqual(record.references[19].references[0], ('MEDLINE', '94222455'))
        self.assertEqual(record.references[20].authors, "Barouch D., Krausa P., Bodmer J., Browning M.J., McMichael A.J.;")
        self.assertEqual(record.references[20].title, '"Identification of a novel HLA-A2 subtype, HLA-A*0216.";')
        self.assertEqual(len(record.references[20].references), 1)
        self.assertEqual(record.references[20].references[0], ('MEDLINE', '95278976'))
        self.assertEqual(record.references[21].authors, "Selvakumar A., Granja C.B., Salazar M., Alosco S.M., Yunis E.J., Dupont B.;")
        self.assertEqual(record.references[21].title, '"A novel subtype of A2 (A*0217) isolated from the South American Indian B-cell line AMALA.";')
        self.assertEqual(len(record.references[21].references), 1)
        self.assertEqual(record.references[21].references[0], ('MEDLINE', '95381236'))
        self.assertEqual(record.references[22].authors, "Kashiwase K., Tokunaga K., Ishikawa Y., Oohashi H., Hashimoto M., Akaza T., Tadokoro K., Juji T.;")
        self.assertEqual(record.references[22].title, '"A new A2 sequence HLA-A2K from Japanese.";')
        self.assertEqual(len(record.references[22].references), 0)
        self.assertEqual(record.references[23].authors, "Fleischhauer K., Zino E., Mazzi B., Severini G.M., Benazzi E., Bordignon C.;")
        self.assertEqual(record.references[23].title, '"HLA-A*02 subtype distribution in Caucasians from northern Italy: identification of A*0220.";')
        self.assertEqual(len(record.references[23].references), 1)
        self.assertEqual(record.references[23].references[0], ('MEDLINE', '97161038'))
        self.assertEqual(record.references[24].authors, "Szmania S., Baxter-Lowe L.A.;")
        self.assertEqual(record.references[24].title, '"Nucleotide sequence of a novel HLA-A2 gene.";')
        self.assertEqual(len(record.references[24].references), 0)
        self.assertEqual(record.references[25].authors, "Bjorkman P.J., Saper M.A., Samraoui B., Bennett W.S., Strominger J.L., Wiley D.C.;")
        self.assertEqual(record.references[25].title, '"Structure of the human class I histocompatibility antigen, HLA-A2.";')
        self.assertEqual(len(record.references[25].references), 1)
        self.assertEqual(record.references[25].references[0], ('MEDLINE', '88014204'))
        self.assertEqual(record.references[26].authors, "Saper M.A., Bjorkman P.J., Wiley D.C.;")
        self.assertEqual(record.references[26].title, '"Refined structure of the human histocompatibility antigen HLA-A2 at 2.6-A resolution.";')
        self.assertEqual(len(record.references[26].references), 1)
        self.assertEqual(record.references[26].references[0], ('MEDLINE', '91245570'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)

    def test_sp009(self):
        "Parsing SwissProt file sp009"

        filename = 'sp009'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "O23729")
        self.assertEqual(seq_record.name, "CHS3_BROFI")
        self.assertEqual(seq_record.description, "CHALCONE SYNTHASE 3 (EC 2.3.1.74) (NARINGENIN-CHALCONE SYNTHASE 3).")
        self.assertEqual(repr(seq_record.seq), "Seq('MAPAMEEIRQAQRAEGPAAVLAIGTSTPPNALYQADYPDYYFRITKSEHLTELK...GAE', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "CHS3_BROFI")
        self.assertEqual(record.accessions, ['O23729'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Asparagales', 'Orchidaceae', 'Bromheadia'])
        self.assertEqual(record.seqinfo, (394, 42941, '2F8D14AF4870BBB2'))
    
        self.assertEqual(len(record.features), 1)
        self.assertEqual(record.features[0], ('ACT_SITE', 165, 165, 'BY SIMILARITY.', ''))

        self.assertEqual(len(record.references), 1)
        self.assertEqual(record.references[0].authors, "Liew C.F., Lim S.H., Loh C.S., Goh C.J.;")
        self.assertEqual(record.references[0].title, '"Molecular cloning and sequence analysis of chalcone synthase cDNAs of Bromheadia finlaysoniana.";')
        self.assertEqual(len(record.references[0].references), 0)


        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)

    def test_sp010(self):
        "Parsing SwissProt file sp010"

        filename = 'sp010'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "Q13639")
        self.assertEqual(seq_record.name, "5H4_HUMAN")
        self.assertEqual(seq_record.description, "5-HYDROXYTRYPTAMINE 4 RECEPTOR (5-HT-4) (SEROTONIN RECEPTOR) (5-HT4).")
        self.assertEqual(repr(seq_record.seq), "Seq('MDKLDANVSSEEGFGSVEKVVLLTFLSTVILMAILGNLLVMVAVCWDRQLRKIK...SDT', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "5H4_HUMAN")
        self.assertEqual(record.accessions, ['Q13639', 'Q9UBM6', 'Q9UQR6', 'Q9UE22', 'Q9UE23', 'Q9UBT4', 'Q9NY73'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (388, 43761, '7FCFEC60E7BDF560'))
    
        self.assertEqual(len(record.features), 23)
        self.assertEqual(record.features[0], ('DOMAIN', 1, 19, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[1], ('TRANSMEM', 20, 40, '1 (POTENTIAL).', ''))
        self.assertEqual(record.features[2], ('DOMAIN', 41, 58, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[3], ('TRANSMEM', 59, 79, '2 (POTENTIAL).', ''))
        self.assertEqual(record.features[4], ('DOMAIN', 80, 93, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[5], ('TRANSMEM', 94, 116, '3 (POTENTIAL).', ''))
        self.assertEqual(record.features[6], ('DOMAIN', 117, 137, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[7], ('TRANSMEM', 138, 158, '4 (POTENTIAL).', ''))
        self.assertEqual(record.features[8], ('DOMAIN', 159, 192, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[9], ('TRANSMEM', 193, 213, '5 (POTENTIAL).', ''))
        self.assertEqual(record.features[10], ('DOMAIN', 214, 260, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[11], ('TRANSMEM', 261, 281, '6 (POTENTIAL).', ''))
        self.assertEqual(record.features[12], ('DOMAIN', 282, 294, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[13], ('TRANSMEM', 295, 315, '7 (POTENTIAL).', ''))
        self.assertEqual(record.features[14], ('DOMAIN', 316, 388, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[15], ('CARBOHYD', 7, 7, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[16], ('DISULFID', 93, 184, 'BY SIMILARITY.', ''))
        self.assertEqual(record.features[17], ('LIPID', 329, 329, 'PALMITATE (BY SIMILARITY).', ''))
        self.assertEqual(record.features[18], ('VARSPLIC', 169, 169, 'L -> LERSLNQGLGQDFHA (IN ISOFORM 5- HT4(F)).', ''))
        self.assertEqual(record.features[19], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SGCSPVSSFLLLFCNRPVPV (IN ISOFORM 5-HT4(E)).', ''))
        self.assertEqual(record.features[20], ('VARSPLIC', 359, 388, 'RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SSGTETDRRNFGIRKRRLTKPS (IN ISOFORM 5-HT4(D)).', ''))
        self.assertEqual(record.features[21], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> F (IN ISOFORM 5-HT4(C)).', ''))
        self.assertEqual(record.features[22], ('VARSPLIC', 360, 388, 'DAVECGGQWESQCHPPATSPLVAAQPSDT -> YTVLHRGHHQELEKLPIHNDPESLESCF (IN ISOFORM 5- HT4(A)).', ''))
        self.assertEqual(len(record.references), 6)

        self.assertEqual(record.references[0].authors, "Blondel O., Gastineau M., Dahmoune Y., Langlois M., Fischmeister R.;")
        self.assertEqual(record.references[0].title, '"Cloning, expression, and pharmacology of four human 5- hydroxytryptamine receptor isoforms produced by alternative splicing in the carboxyl terminus.";')
        self.assertEqual(len(record.references[0].references), 1)
        self.assertEqual(record.references[0].references[0], ('PubMed', '9603189'))
        self.assertEqual(record.references[1].authors, "Van den Wyngaert I., Gommeren W., Jurzak M., Verhasselt P., Gordon R., Leysen J., Luyten W., Bender E.;")
        self.assertEqual(record.references[1].title, '"Cloning and expression of 5-HT4 receptor species and splice variants.";')
        self.assertEqual(len(record.references[1].references), 0)
        self.assertEqual(record.references[2].authors, "Claeysen S., Faye P., Sebben M., Lemaire S., Bockaert J., Dumuis A.;")
        self.assertEqual(record.references[2].title, '"Cloning and expression of human 5-HT4S receptors. Effect of receptor density on their coupling to adenylyl cyclase.";')
        self.assertEqual(len(record.references[2].references), 1)
        self.assertEqual(record.references[2].references[0], ('PubMed', '9351641'))
        self.assertEqual(record.references[3].authors, "Claeysen S., Sebben M., Becamel C., Bockaert J., Dumuis A.;")
        self.assertEqual(record.references[3].title, '"Novel brain-specific 5-HT4 receptors splice variants show marked constitutive activity: role of the c-terminal intracellular domain.";')
        self.assertEqual(len(record.references[3].references), 0)
        self.assertEqual(record.references[4].authors, "Bender E., Pindon A., van Oers I., Zhang Y.B., Gommeren W., Verhasselt P., Jurzak M., Leysen J., Luyten W.;")
        self.assertEqual(record.references[4].title, '"Structure of the human serotonin 5-HT4 receptor gene and cloning of a novel 5-HT4 splice variant.";')
        self.assertEqual(len(record.references[4].references), 1)
        self.assertEqual(record.references[4].references[0], ('PubMed', '10646498'))
        self.assertEqual(record.references[5].authors, "Ullmer C., Schmuck K., Kalkman H.O., Lubbert H.;")
        self.assertEqual(record.references[5].title, '"Expression of serotonin receptor mRNAs in blood vessels.";')
        self.assertEqual(len(record.references[5].references), 2)
        self.assertEqual(record.references[5].references[0], ('MEDLINE', '95385798'))
        self.assertEqual(record.references[5].references[1], ('PubMed', '7656980'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)

    def test_sp011(self):
        "Parsing SwissProt file sp011"

        filename = 'sp011'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P16235")
        self.assertEqual(seq_record.name, "LSHR_RAT")
        self.assertEqual(seq_record.description, "LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR PRECURSOR (LH/CG-R) (LSH-R) (LUTEINIZING HORMONE RECEPTOR).")
        self.assertEqual(repr(seq_record.seq), "Seq('MGRRVPALRQLLVLAVLLLKPSQLQSRELSGSRCPEPCDCAPDGALRCPGPRAG...LTH', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "LSHR_RAT")
        self.assertEqual(record.accessions, ['P16235', 'P70646', 'Q63807', 'Q63808', 'Q63809'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Rodentia', 'Sciurognathi', 'Muridae', 'Murinae', 'Rattus'])
        self.assertEqual(record.seqinfo, (700, 78035, '31807E73BAC94F1F'))
    
        self.assertEqual(len(record.features), 52)
        self.assertEqual(record.features[0], ('SIGNAL', 1, 26, '', ''))
        self.assertEqual(record.features[1], ('CHAIN', 27, 700, 'LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR.', ''))
        self.assertEqual(record.features[2], ('DOMAIN', 27, 362, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[3], ('TRANSMEM', 363, 390, '1 (POTENTIAL).', ''))
        self.assertEqual(record.features[4], ('DOMAIN', 391, 399, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[5], ('TRANSMEM', 400, 422, '2 (POTENTIAL).', ''))
        self.assertEqual(record.features[6], ('DOMAIN', 423, 443, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[7], ('TRANSMEM', 444, 466, '3 (POTENTIAL).', ''))
        self.assertEqual(record.features[8], ('DOMAIN', 467, 486, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[9], ('TRANSMEM', 487, 509, '4 (POTENTIAL).', ''))
        self.assertEqual(record.features[10], ('DOMAIN', 510, 529, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[11], ('TRANSMEM', 530, 551, '5 (POTENTIAL).', ''))
        self.assertEqual(record.features[12], ('DOMAIN', 552, 574, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[13], ('TRANSMEM', 575, 598, '6 (POTENTIAL).', ''))
        self.assertEqual(record.features[14], ('DOMAIN', 599, 609, 'EXTRACELLULAR (POTENTIAL).', ''))
        self.assertEqual(record.features[15], ('TRANSMEM', 610, 631, '7 (POTENTIAL).', ''))
        self.assertEqual(record.features[16], ('DOMAIN', 632, 700, 'CYTOPLASMIC (POTENTIAL).', ''))
        self.assertEqual(record.features[17], ('REPEAT', 52, 75, 'LRR 1.', ''))
        self.assertEqual(record.features[18], ('REPEAT', 126, 150, 'LRR 2.', ''))
        self.assertEqual(record.features[19], ('REPEAT', 152, 175, 'LRR 3.', ''))
        self.assertEqual(record.features[20], ('REPEAT', 176, 200, 'LRR 4.', ''))
        self.assertEqual(record.features[21], ('REPEAT', 202, 224, 'LRR 5.', ''))
        self.assertEqual(record.features[22], ('REPEAT', 225, 248, 'LRR 6.', ''))
        self.assertEqual(record.features[23], ('REPEAT', 250, 271, 'LRR 7.', ''))
        self.assertEqual(record.features[24], ('DISULFID', 443, 518, 'BY SIMILARITY.', ''))
        self.assertEqual(record.features[25], ('CARBOHYD', 103, 103, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[26], ('CARBOHYD', 178, 178, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[27], ('CARBOHYD', 199, 199, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[28], ('CARBOHYD', 295, 295, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[29], ('CARBOHYD', 303, 303, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[30], ('CARBOHYD', 317, 317, 'N-LINKED (GLCNAC...) (POTENTIAL).', ''))
        self.assertEqual(record.features[31], ('VARSPLIC', 83, 132, 'MISSING (IN ISOFORM 1950).', ''))
        self.assertEqual(record.features[32], ('VARSPLIC', 133, 157, 'MISSING (IN ISOFORM 1759).', ''))
        self.assertEqual(record.features[33], ('VARSPLIC', 184, 700, 'MISSING (IN ISOFORM C2).', ''))
        self.assertEqual(record.features[34], ('VARSPLIC', 232, 251, 'DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPCLPTH (IN ISOFORM 2075).', ''))
        self.assertEqual(record.features[35], ('VARSPLIC', 232, 293, 'MISSING (IN ISOFORM E/A2, ISOFORM EB AND ISOFORM B1).', ''))
        self.assertEqual(record.features[36], ('VARSPLIC', 252, 700, 'MISSING (IN ISOFORM 2075).', ''))
        self.assertEqual(record.features[37], ('VARSPLIC', 294, 367, 'QNFSFSIFENFSKQCESTVRKADNETLYSAIFEENELSGWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYAFLR -> IFHFPFLKTSPNNAKAQLEKQITRRFIPPSLRRMNSVAGIMIMASVHPRHSNVLQNQMLSTPVKILWAMPSLGS (IN ISOFORM B1 AND ISOFORM B3).', ''))
        self.assertEqual(record.features[38], ('VARSPLIC', 294, 294, 'Q -> P (IN ISOFORM C1).', ''))
        self.assertEqual(record.features[39], ('VARSPLIC', 295, 700, 'MISSING (IN ISOFORM C1).', ''))
        self.assertEqual(record.features[40], ('VARSPLIC', 321, 342, 'YSAIFEENELSGWDYDYGFCSP -> LHGALPAAHCLRGLPNKRPVL (IN ISOFORM 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
        self.assertEqual(record.features[41], ('VARSPLIC', 343, 700, 'MISSING (IN ISOFORMS 1834, ISOFORM 1759 AND ISOFORM EB).', ''))
        self.assertEqual(record.features[42], ('VARSPLIC', 368, 700, 'MISSING (IN ISOFORM B1 AND ISOFORM B3).', ''))
        self.assertEqual(record.features[43], ('VARIANT', 82, 82, 'I -> M (IN ISOFORM 1950).', ''))
        self.assertEqual(record.features[44], ('VARIANT', 179, 179, 'E -> G (IN ISOFORM 1759).', ''))
        self.assertEqual(record.features[45], ('VARIANT', 233, 233, 'I -> T (IN ISOFORM 1950).', ''))
        self.assertEqual(record.features[46], ('VARIANT', 646, 646, 'G -> S (IN ISOFORM 1950).', ''))
        self.assertEqual(record.features[47], ('MUTAGEN', 409, 409, 'D->N: SIGNIFICANT REDUCTION OF BINDING.', ''))
        self.assertEqual(record.features[48], ('MUTAGEN', 436, 436, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
        self.assertEqual(record.features[49], ('MUTAGEN', 455, 455, 'E->Q: NO CHANGE IN BINDING OR CAMP PROD.', ''))
        self.assertEqual(record.features[50], ('MUTAGEN', 582, 582, 'D->N: NO CHANGE IN BINDING OR CAMP PROD.', ''))
        self.assertEqual(record.features[51], ('CONFLICT', 33, 33, 'R -> L (IN REF. 7).', ''))

        self.assertEqual(len(record.references), 8)
        self.assertEqual(record.references[0].authors, "McFarland K.C., Sprengel R., Phillips H.S., Koehler M., Rosemblit N., Nikolics K., Segaloff D.L., Seeburg P.H.;")
        self.assertEqual(record.references[0].title, '"Lutropin-choriogonadotropin receptor: an unusual member of the G protein-coupled receptor family.";')
        self.assertEqual(len(record.references[0].references), 2)
        self.assertEqual(record.references[0].references[0], ('MEDLINE', '89332512'))
        self.assertEqual(record.references[0].references[1], ('PubMed', '2502842'))
        self.assertEqual(record.references[1].authors, """\
Aatsinki J.T., Pietila E.M., Lakkakorpi J.T., Rajaniemi H.J.;""")
        self.assertEqual(record.references[1].title, '"Expression of the LH/CG receptor gene in rat ovarian tissue is regulated by an extensive alternative splicing of the primary transcript.";')
        self.assertEqual(len(record.references[1].references), 2)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '92347604'))
        self.assertEqual(record.references[1].references[1], ('PubMed', '1353463'))
        self.assertEqual(record.references[2].authors, "Koo Y.B., Slaughter R.G., Ji T.H.;")
        self.assertEqual(record.references[2].title, '"Structure of the luteinizing hormone receptor gene and multiple exons of the coding sequence.";')
        self.assertEqual(len(record.references[2].references), 2)
        self.assertEqual(record.references[2].references[0], ('MEDLINE', '91209270'))
        self.assertEqual(record.references[2].references[1], ('PubMed', '2019252'))
        self.assertEqual(record.references[3].authors, "Bernard M.P., Myers R.V., Moyle W.R.;")
        self.assertEqual(record.references[3].title, '"Cloning of rat lutropin (LH) receptor analogs lacking the soybean lectin domain.";')
        self.assertEqual(len(record.references[3].references), 2)
        self.assertEqual(record.references[3].references[0], ('MEDLINE', '91006819'))
        self.assertEqual(record.references[3].references[1], ('PubMed', '1976554'))
        self.assertEqual(record.references[4].authors, "Segaloff D.L., Sprengel R., Nikolics K., Ascoli M.;")
        self.assertEqual(record.references[4].title, '"Structure of the lutropin/choriogonadotropin receptor.";')
        self.assertEqual(len(record.references[4].references), 2)
        self.assertEqual(record.references[4].references[0], ('MEDLINE', '91126285'))
        self.assertEqual(record.references[4].references[1], ('PubMed', '2281186'))
        self.assertEqual(record.references[5].authors, "Tsai-Morris C.H., Buczko E., Wang W., Dufau M.L.;")
        self.assertEqual(record.references[5].title, '"Intronic nature of the rat luteinizing hormone receptor gene defines a soluble receptor subspecies with hormone binding activity.";')
        self.assertEqual(len(record.references[5].references), 2)
        self.assertEqual(record.references[5].references[0], ('MEDLINE', '91060531'))
        self.assertEqual(record.references[5].references[1], ('PubMed', '2174034'))
        self.assertEqual(record.references[6].authors, "Roche P.C., Ryan R.J.;")
        self.assertEqual(record.references[6].title, '"Purification, characterization, and amino-terminal sequence of rat ovarian receptor for luteinizing hormone/human choriogonadotropin.";')
        self.assertEqual(len(record.references[6].references), 2)
        self.assertEqual(record.references[6].references[0], ('MEDLINE', '89174723'))
        self.assertEqual(record.references[6].references[1], ('PubMed', '2925659'))
        self.assertEqual(record.references[7].authors, "Ji I., Ji T.H.;")
        self.assertEqual(record.references[7].title, '"Asp383 in the second transmembrane domain of the lutropin receptor is important for high affinity hormone binding and cAMP production.";')
        self.assertEqual(len(record.references[7].references), 2)
        self.assertEqual(record.references[7].references[0], ('MEDLINE', '91332007'))
        self.assertEqual(record.references[7].references[1], ('PubMed', '1714448'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp012(self):
        "Parsing SwissProt file sp012"

        filename = 'sp012'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "Q9Y736")
        self.assertEqual(seq_record.name, "Q9Y736")
        self.assertEqual(seq_record.description, "UBIQUITIN.")
        self.assertEqual(repr(seq_record.seq), "Seq('MQIFVKTLTGKTITLEVESSDTIDNVKTKIQDKEGIPPDQQRLIFAGKQLEDGR...GGN', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "Q9Y736")
        self.assertEqual(record.accessions, ['Q9Y736'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Fungi', 'Ascomycota', 'Pezizomycotina', 'Eurotiomycetes', 'Onygenales', 'Arthrodermataceae', 'mitosporic Arthrodermataceae', 'Trichophyton'])
        self.assertEqual(record.seqinfo, (153, 17238, '01153CF30C2DEDFF'))
    
        self.assertEqual(len(record.features), 0)

        self.assertEqual(len(record.references), 2)
        self.assertEqual(record.references[0].authors, "Kano R., Nakamura Y., Watanabe S., Hasegawa A.;")
        self.assertEqual(record.references[0].title, '"Trichophyton mentagrophytes mRNA for ubiquitin.";')
        self.assertEqual(len(record.references[0].references), 0)
        self.assertEqual(record.references[1].authors, "Kano R.;")
        self.assertEqual(record.references[1].title, '"Microsporum canis mRNA for ubiquitin, complete cds.";')
        self.assertEqual(len(record.references[1].references), 0)

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp013(self):
        "Parsing SwissProt file sp013"

        filename = 'sp013'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P82909")
        self.assertEqual(seq_record.name, "P82909")
        self.assertEqual(seq_record.description, "MITOCHONDRIAL 28S RIBOSOMAL PROTEIN S36 (MRP-S36).")
        self.assertEqual(repr(seq_record.seq), "Seq('MGSKMASASRVVQVVKPHTPLIRFPDRRDNPKPNVSEALRSAGLPSHSSVISQH...GPE', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "P82909")
        self.assertEqual(record.accessions, ['P82909'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (102, 11335, '83EF107B42E2FCFD'))
    
        self.assertEqual(len(record.features), 0)

        self.assertEqual(len(record.references), 2)
        self.assertEqual(record.references[0].authors, "Strausberg R.;")
        self.assertEqual(record.references[0].title, "")
        self.assertEqual(len(record.references[0].references), 0)
        self.assertEqual(record.references[1].authors, "Koc E.C., Burkhart W., Blackburn K., Moseley A., Spremulli L.L.;")
        self.assertEqual(record.references[1].title, '"The small subunit of the mammalian mitochondrial ribosome. Identification of the full complement ribosomal proteins present.";')
        self.assertEqual(len(record.references[1].references), 0)

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp014(self):
        "Parsing SwissProt file sp014"

        filename = 'sp014'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "P12166")
        self.assertEqual(seq_record.name, "PSBL_ORYSA")
        self.assertEqual(seq_record.description, "PHOTOSYSTEM II REACTION CENTER L PROTEIN (PSII 5 KDA PROTEIN).")
        self.assertEqual(repr(seq_record.seq), "Seq('TQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "PSBL_ORYSA")
        self.assertEqual(record.accessions, ['P12166', 'P12167', 'Q34007'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Viridiplantae', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Poales', 'Poaceae', 'Ehrhartoideae', 'Oryzeae', 'Oryza'])
        self.assertEqual(record.seqinfo, (37, 4366, 'CC537AEC50B2C784'))
    
        self.assertEqual(len(record.features), 1)
        self.assertEqual(record.features[0], ('INIT_MET', 0, 0, '', ''))

        self.assertEqual(len(record.references), 22)
        self.assertEqual(record.references[0].authors, "Sugiura M.;")
        self.assertEqual(record.references[0].title, "")
        self.assertEqual(len(record.references[0].references), 0)
        self.assertEqual(record.references[1].authors, "Hiratsuka J., Shimada H., Whittier R., Ishibashi T., Sakamoto M., Mori M., Kondo C., Honji Y., Sun C.-R., Meng B.-Y., Li Y.-Q., Kanno A., Nishizawa Y., Hirai A., Shinozaki K., Sugiura M.;")
        self.assertEqual(record.references[1].title, '"The complete sequence of the rice (Oryza sativa) chloroplast genome: intermolecular recombination between distinct tRNA genes accounts for a major plastid DNA inversion during the evolution of the cereals.";')
        self.assertEqual(len(record.references[1].references), 2)
        self.assertEqual(record.references[1].references[0], ('MEDLINE', '89364698'))
        self.assertEqual(record.references[1].references[1], ('PubMed', '2770692'))
        self.assertEqual(record.references[2].authors, "Sugiura M.;")
        self.assertEqual(record.references[2].title, "")
        self.assertEqual(len(record.references[2].references), 0)
        self.assertEqual(record.references[3].authors, "Shinozaki K., Ohme M., Tanaka M., Wakasugi T., Hayashida N., Matsubayashi T., Zaita N., Chunwongse J., Obokata J., Yamaguchi-Shinozaki K., Ohto C., Torazawa K., Meng B.Y., Sugita M., Deno H., Kamogashira T., Yamada K., Kusuda J., Takaiwa F., Kato A., Tohdoh N., Shimada H., Sugiura M.;")
        self.assertEqual(record.references[3].title, '"The complete nucleotide sequence of the tobacco chloroplast genome: its gene organization and expression.";')
        self.assertEqual(len(record.references[3].references), 0)
        self.assertEqual(record.references[4].authors, "Chaudhuri S., Maliga P.;")
        self.assertEqual(record.references[4].title, '"Sequences directing C to U editing of the plastid psbL mRNA are located within a 22 nucleotide segment spanning the editing site.";')
        self.assertEqual(len(record.references[4].references), 2)
        self.assertEqual(record.references[4].references[0], ('MEDLINE', '97076156'))
        self.assertEqual(record.references[4].references[1], ('PubMed', '8918473'))
        self.assertEqual(record.references[5].authors, "Chakhmakhcheva O.G., Andreeva A.V., Buryakova A.A., Reverdatto S.V., Efimov V.A.;")
        self.assertEqual(record.references[5].title, '"Nucleotide sequence of the barley chloroplast psbE, psbF genes and flanking regions.";')
        self.assertEqual(len(record.references[5].references), 2)
        self.assertEqual(record.references[5].references[0], ('MEDLINE', '89240046'))
        self.assertEqual(record.references[5].references[1], ('PubMed', '2654886'))
        self.assertEqual(record.references[6].authors, "Efimov V.A., Andreeva A.V., Reverdatto S.V., Chakhmakhcheva O.G.;")
        self.assertEqual(record.references[6].title, '"Photosystem II of rye. Nucleotide sequence of the psbB, psbC, psbE, psbF, psbH genes of rye and chloroplast DNA regions adjacent to them.";')
        self.assertEqual(len(record.references[6].references), 2)
        self.assertEqual(record.references[6].references[0], ('MEDLINE', '92207253'))
        self.assertEqual(record.references[6].references[1], ('PubMed', '1804121'))
        self.assertEqual(record.references[7].authors, "Webber A.N., Hird S.M., Packman L.C., Dyer T.A., Gray J.C.;")
        self.assertEqual(record.references[7].title, '"A photosystem II polypeptide is encoded by an open reading frame co-transcribed with genes for cytochrome b-559 in wheat chloroplast DNA.";')
        self.assertEqual(len(record.references[7].references), 0)
        self.assertEqual(record.references[8].authors, "Kudla J., Igloi G.L., Metzlaff M., Hagemann R., Koessel H.;")
        self.assertEqual(record.references[8].title, '"RNA editing in tobacco chloroplasts leads to the formation of a translatable psbL mRNA by a C to U substitution within the initiation codon.";')
        self.assertEqual(len(record.references[8].references), 2)
        self.assertEqual(record.references[8].references[0], ('MEDLINE', '92191997'))
        self.assertEqual(record.references[8].references[1], ('PubMed', '1547774'))
        self.assertEqual(record.references[9].authors, "Zolotarev A.S., Kolosov V.L.;")
        self.assertEqual(record.references[9].title, '"Nucleotide sequence of the rye chloroplast DNA fragment, comprising psbE and psbF genes.";')
        self.assertEqual(len(record.references[9].references), 2)
        self.assertEqual(record.references[9].references[0], ('MEDLINE', '89160331'))
        self.assertEqual(record.references[9].references[1], ('PubMed', '2646599'))
        self.assertEqual(record.references[10].authors, "Kolosov V.L., Klezovich O.N., Abdulaev N.G., Zolotarev A.S.;")
        self.assertEqual(record.references[10].title, '"Photosystem II of rye. Nucleotide sequence of genes psbE, psbF, psbL and OPC40 of chloroplast DNA.";')
        self.assertEqual(len(record.references[10].references), 2)
        self.assertEqual(record.references[10].references[0], ('MEDLINE', '90073796'))
        self.assertEqual(record.references[10].references[1], ('PubMed', '2686655'))
        self.assertEqual(record.references[11].authors, "Haley J., Bogorad L.;")
        self.assertEqual(record.references[11].title, "")
        self.assertEqual(len(record.references[11].references), 0)
        self.assertEqual(record.references[12].authors, "Maier R.M., Neckermann K., Igloi G.L., Koessel H.;")
        self.assertEqual(record.references[12].title, '"Complete sequence of the maize chloroplast genome: gene content, hotspots of divergence and fine tuning of genetic information by transcript editing.";')
        self.assertEqual(len(record.references[12].references), 2)
        self.assertEqual(record.references[12].references[0], ('MEDLINE', '95395841'))
        self.assertEqual(record.references[12].references[1], ('PubMed', '7666415'))
        self.assertEqual(record.references[13].authors, "Willey D.L., Gray J.C.;")
        self.assertEqual(record.references[13].title, '"Two small open reading frames are co-transcribed with the pea chloroplast genes for the polypeptides of cytochrome b-559.";')
        self.assertEqual(len(record.references[13].references), 2)
        self.assertEqual(record.references[13].references[0], ('MEDLINE', '89354671'))
        self.assertEqual(record.references[13].references[1], ('PubMed', '2766383'))
        self.assertEqual(record.references[14].authors, "Bock R., Hagemann R., Koessel H., Kudla J.;")
        self.assertEqual(record.references[14].title, '"Tissue- and stage-specific modulation of RNA editing of the psbF and psbL transcript from spinach plastids -- a new regulatory mechanism?";')
        self.assertEqual(len(record.references[14].references), 2)
        self.assertEqual(record.references[14].references[0], ('MEDLINE', '93360903'))
        self.assertEqual(record.references[14].references[1], ('PubMed', '8355656'))
        self.assertEqual(record.references[15].authors, "Hermann R.G., Alt J., Schiller B., Widger W.R., Cramer W.A.;")
        self.assertEqual(record.references[15].title, '"Nucleotide sequence of the gene for apocytochrome b-559 on the spinach plastid chromosome: implications for the structure of the membrane protein.";')
        self.assertEqual(len(record.references[15].references), 0)
        self.assertEqual(record.references[16].authors, "Kuntz M., Camara B., Weil J.-H., Schantz R.;")
        self.assertEqual(record.references[16].title, '"The psbL gene from bell pepper (Capsicum annuum): plastid RNA editing also occurs in non-photosynthetic chromoplasts.";')
        self.assertEqual(len(record.references[16].references), 2)
        self.assertEqual(record.references[16].references[0], ('MEDLINE', '93099270'))
        self.assertEqual(record.references[16].references[1], ('PubMed', '1463853'))
        self.assertEqual(record.references[17].authors, "Forsthoefel N.R., Cushman J.C.;")
        self.assertEqual(record.references[17].title, '"Characterization and expression of photosystem II genes (psbE, psbF, and psbL) from the facultative crassulacean acid metabolism plant Mesembryanthemum crystallinum.";')
        self.assertEqual(len(record.references[17].references), 2)
        self.assertEqual(record.references[17].references[0], ('MEDLINE', '94345017'))
        self.assertEqual(record.references[17].references[1], ('PubMed', '8066140'))
        self.assertEqual(record.references[18].authors, "Kubo T., Yanai Y., Kinoshita T., Mikami T.;")
        self.assertEqual(record.references[18].title, '"The chloroplast trnP-trnW-petG gene cluster in the mitochondrial genomes of Beta vulgaris, B. trigyna and B. webbiana: evolutionary aspects.";')
        self.assertEqual(len(record.references[18].references), 2)
        self.assertEqual(record.references[18].references[0], ('MEDLINE', '95254673'))
        self.assertEqual(record.references[18].references[1], ('PubMed', '7736615'))
        self.assertEqual(record.references[19].authors, "Naithani S.;")
        self.assertEqual(record.references[19].title, "")
        self.assertEqual(len(record.references[19].references), 0)
        self.assertEqual(record.references[20].authors, "Ikeuchi M., Takio K., Inoue Y.;")
        self.assertEqual(record.references[20].title, '"N-terminal sequencing of photosystem II low-molecular-mass proteins. 5 and 4.1 kDa components of the O2-evolving core complex from higher plants.";')
        self.assertEqual(len(record.references[20].references), 2)
        self.assertEqual(record.references[20].references[0], ('MEDLINE', '89121082'))
        self.assertEqual(record.references[20].references[1], ('PubMed', '2644131'))
        self.assertEqual(record.references[21].authors, "Zheleva D., Sharma J., Panico M., Morris H.R., Barber J.;")
        self.assertEqual(record.references[21].title, '"Isolation and characterization of monomeric and dimeric CP47-reaction center photosystem II complexes.";')
        self.assertEqual(len(record.references[21].references), 2)
        self.assertEqual(record.references[21].references[0], ('MEDLINE', '98298118'))
        self.assertEqual(record.references[21].references[1], ('PubMed', '9632665'))

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)


    def test_sp015(self):
        "Parsing SwissProt file sp015"

        filename = 'sp015'
        # test the record parser

        datafile = os.path.join('SwissProt', filename)

        test_handle = open(datafile)
        seq_record = SeqIO.read(test_handle, "swiss")
        test_handle.close()

        self.assert_(isinstance(seq_record, SeqRecord))

        self.assertEqual(seq_record.id, "IPI00383150")
        self.assertEqual(seq_record.name, "IPI00383150.2")
        self.assertEqual(seq_record.description, "")
        self.assertEqual(repr(seq_record.seq), "Seq('MSFQAPRRLLELAGQSLLRDQALAISVLDELPRELFPRLFVEAFTSRRCEVLKV...TPC', ProteinAlphabet())")

        test_handle = open(datafile)
        record = SwissProt.read(test_handle)
        test_handle.close()

        # test a couple of things on the record -- this is not exhaustive
        self.assertEqual(record.entry_name, "IPI00383150.2")
        self.assertEqual(record.accessions, ['IPI00383150'])
        self.assertEqual(record.organism_classification, ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Primates', 'Catarrhini', 'Hominidae', 'Homo'])
        self.assertEqual(record.seqinfo, (457, 52856, '5C3151AAADBDE232'))
    
        self.assertEqual(len(record.features), 0)
        self.assertEqual(len(record.references), 0)

        #Check the two parsers agree on the essentials
        self.assertEqual(seq_record.seq.tostring(), record.sequence)
        self.assertEqual(seq_record.description, record.description)
        self.assertEqual(seq_record.name, record.entry_name)
        self.assert_(seq_record.id in record.accessions)

        #Now try using the iterator - note that all these
        #test cases have only one record.

        # With the SequenceParser
        test_handle = open(datafile)
        records = list(SeqIO.parse(test_handle, "swiss"))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SeqRecord))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].seq.tostring(), seq_record.seq.tostring())
        self.assertEqual(records[0].description, seq_record.description)
        self.assertEqual(records[0].name, seq_record.name)
        self.assertEqual(records[0].id, seq_record.id)

        # With the RecordParser
        test_handle = open(datafile)
        records = list(SwissProt.parse(test_handle))
        test_handle.close()

        self.assertEqual(len(records), 1)
        self.assert_(isinstance(records[0], SwissProt.Record))

        #Check matches what we got earlier without the iterator:
        self.assertEqual(records[0].sequence, record.sequence)
        self.assertEqual(records[0].description, record.description)
        self.assertEqual(records[0].entry_name, record.entry_name)
        self.assertEqual(records[0].accessions, record.accessions)



if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity = 2)
    unittest.main(testRunner=runner)

Generated by  Doxygen 1.6.0   Back to index