Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

test_SeqIO_index.py

# Copyright 2009-2010 by Peter Cock.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Additional unit tests for Bio.SeqIO.convert(...) function."""
import os
import unittest
from StringIO import StringIO
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from Bio.SeqIO._index import _FormatToIndexedDict
from Bio.Alphabet import generic_protein, generic_nucleotide, generic_dna

from seq_tests_common import compare_record

00017 class IndexDictTests(unittest.TestCase):
    """Cunning unit test where methods are added at run time."""
    def simple_check(self, filename, format, alphabet):
        if format in SeqIO._BinaryFormats:
            mode = "rb"
        else :
            mode = "r"
        id_list = [rec.id for rec in \
                   SeqIO.parse(open(filename, mode), format, alphabet)]
        rec_dict = SeqIO.index(filename, format, alphabet)
        self.assertEqual(set(id_list), set(rec_dict.keys()))
        #This is redundant, I just want to make sure len works:
        self.assertEqual(len(id_list), len(rec_dict))
        #Make sure boolean evaluation works
        self.assertEqual(bool(id_list), bool(rec_dict))
        for key in id_list:
            self.assert_(key in rec_dict)
            self.assertEqual(key, rec_dict[key].id)
            self.assertEqual(key, rec_dict.get(key).id)
        #Check non-existant keys,
        try:
            rec = rec_dict[chr(0)]
            raise ValueError("Accessing a non-existent key should fail")
        except KeyError:
            pass
        self.assertEqual(rec_dict.get(chr(0)), None)
        self.assertEqual(rec_dict.get(chr(0), chr(1)), chr(1))
        #Now check iteritems...
        for key, rec in rec_dict.iteritems():
            self.assert_(key in id_list)
            self.assert_(isinstance(rec, SeqRecord))
            self.assertEqual(rec.id, key)
        #Now check non-defined methods...
        self.assertRaises(NotImplementedError, rec_dict.values)
        self.assertRaises(NotImplementedError, rec_dict.popitem)
        self.assertRaises(NotImplementedError, rec_dict.pop, chr(0))
        self.assertRaises(NotImplementedError, rec_dict.pop, chr(0), chr(1))
        self.assertRaises(NotImplementedError, rec_dict.clear)
        self.assertRaises(NotImplementedError, rec_dict.__setitem__, "X", None)
        self.assertRaises(NotImplementedError, rec_dict.copy)
        self.assertRaises(NotImplementedError, rec_dict.fromkeys, [])
        #Done

    def get_raw_check(self, filename, format, alphabet):
        if format in SeqIO._BinaryFormats:
            #This means SFF at the moment, which does not get
            #implement the get_raw method
            return
        handle = open(filename, "rU")
        raw_file = handle.read()
        handle.close()
        #Also checking the key_function here
        id_list = [rec.id.lower() for rec in \
                   SeqIO.parse(filename, format, alphabet)]
        rec_dict = SeqIO.index(filename, format, alphabet,
                               key_function = lambda x : x.lower())
        self.assertEqual(set(id_list), set(rec_dict.keys()))
        self.assertEqual(len(id_list), len(rec_dict))
        for key in id_list:
            self.assert_(key in rec_dict)
            self.assertEqual(key, rec_dict[key].id.lower())
            self.assertEqual(key, rec_dict.get(key).id.lower())
            raw = rec_dict.get_raw(key)
            self.assert_(raw.strip())
            self.assert_(raw in raw_file)
            if format in ["ig"]:
               #These have a header structure and can't be parsed
               #individually (at least, not right now).
               continue
            rec1 = rec_dict[key]
            rec2 = SeqIO.read(StringIO(raw), format, alphabet)
          self.assertEqual(True, compare_record(rec1, rec2))
        #Done
            
tests = [
    ("Ace/contig1.ace", "ace", generic_dna),
    ("Ace/consed_sample.ace", "ace", None),
    ("Ace/seq.cap.ace", "ace", generic_dna),
    ("Quality/wrapping_original_sanger.fastq", "fastq", None),
    ("Quality/example.fastq", "fastq", None),
    ("Quality/example.fastq", "fastq-sanger", generic_dna),
    ("Quality/tricky.fastq", "fastq", generic_nucleotide),
    ("Quality/sanger_faked.fastq", "fastq-sanger", generic_dna),
    ("Quality/solexa_faked.fastq", "fastq-solexa", generic_dna),
    ("Quality/illumina_faked.fastq", "fastq-illumina", generic_dna),
    ("EMBL/U87107.embl", "embl", None),
    ("EMBL/TRBG361.embl", "embl", None),
    ("EMBL/A04195.imgt", "embl", None), #Not a proper EMBL file, an IMGT file
    ("GenBank/NC_000932.faa", "fasta", generic_protein),
    ("GenBank/NC_005816.faa", "fasta", generic_protein),
    ("GenBank/NC_005816.tsv", "tab", generic_protein),
    ("GenBank/NC_005816.ffn", "fasta", generic_dna),
    ("GenBank/NC_005816.fna", "fasta", generic_dna),
    ("GenBank/NC_005816.gb", "gb", None),
    ("GenBank/cor6_6.gb", "genbank", None),
    ("IntelliGenetics/vpu_nucaligned.txt", "ig", generic_nucleotide),
    ("IntelliGenetics/TAT_mase_nuc.txt", "ig", None),
    ("IntelliGenetics/VIF_mase-pro.txt", "ig", generic_protein),
    ("Phd/phd1", "phd", generic_dna),
    ("Phd/phd2", "phd", None),
    ("Phd/phd_solexa", "phd", generic_dna),
    ("Phd/phd_454", "phd", generic_dna),
    ("NBRF/B_nuc.pir", "pir", generic_nucleotide),
    ("NBRF/Cw_prot.pir", "pir", generic_protein),
    ("NBRF/clustalw.pir", "pir", None),
    ("SwissProt/sp001", "swiss", None),
    ("SwissProt/sp010", "swiss", None),
    ("SwissProt/sp016", "swiss", None),
    ("Roche/E3MFGYR02_random_10_reads.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_random_10_reads.sff", "sff-trim", generic_dna),
    ("Roche/E3MFGYR02_index_at_start.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_index_in_middle.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_alt_index_at_start.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_alt_index_in_middle.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_alt_index_at_end.sff", "sff", generic_dna),
    ("Roche/E3MFGYR02_no_manifest.sff", "sff", generic_dna),
    ("Roche/greek.sff", "sff", generic_nucleotide),
    ("Roche/greek.sff", "sff-trim", generic_nucleotide),
    ("Roche/paired.sff", "sff", None),
    ("Roche/paired.sff", "sff-trim", None),
    ]
for filename, format, alphabet in tests:
    assert format in _FormatToIndexedDict
    def funct(fn,fmt,alpha):
        f = lambda x : x.simple_check(fn, fmt, alpha)
        f.__doc__ = "Index %s file %s" % (fmt, fn)
        return f
    setattr(IndexDictTests, "test_%s_%s" \
            % (filename.replace("/","_").replace(".","_"), format),
            funct(filename, format, alphabet))
    del funct

    if format in SeqIO._BinaryFormats:
        continue

    def funct(fn,fmt,alpha):
        f = lambda x : x.get_raw_check(fn, fmt, alpha)
        f.__doc__ = "Index %s file %s get_raw" % (fmt, fn)
        return f
    setattr(IndexDictTests, "test_%s_%s_get_raw" \
            % (filename.replace("/","_").replace(".","_"), format),
            funct(filename, format, alphabet))
    del funct

if __name__ == "__main__":
    runner = unittest.TextTestRunner(verbosity = 2)
    unittest.main(testRunner=runner)

Generated by  Doxygen 1.6.0   Back to index