Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

def Bio::AlignIO::FastaIO::FastaM10Iterator::_parse_query_header (   self,
  line 
) [private]
Helper function for the main parsing code.

Skips over the free format query header, extracting the tagged parameters.

If there are no hits for the current query, it is skipped entirely.
  2>>>gi|10955264|ref|NP_052605.1| hypothetical protein pOSAK1_02 [Escherichia coli O157:H7 s 126 aa - 126 aa
Library: NC_009649.faa   45119 residues in   180 sequences

  45119 residues in   180 sequences
Statistics: (shuffled [500]) Expectation_n fit: rho(ln(x))= 5.0398+/-0.00968; mu= 2.8364+/- 0.508
 mean_var=44.7937+/-10.479, 0's: 0 Z-trim: 0  B-trim: 0 in 0/32
 Lambda= 0.191631
Algorithm: FASTA (3.5 Sept 2006) [optimized]
Parameters: BL50 matrix (15:-5) ktup: 2
 join: 36, opt: 24, open/ext: -10/-2, width:  16
 Scan time:  0.040

The best scores are:                                      opt bits E(180)
gi|152973462|ref|YP_001338513.1| hypothetical prot ( 101)   58 23.3    0.22
gi|152973501|ref|YP_001338552.1| hypothetical prot ( 245)   55 22.5    0.93
  2>>>gi|152973838|ref|YP_001338875.1| hypothetical protein KPN_pKPN7p10263 [Klebsiella pneumoniae subsp. pneumonia 76 aa - 76 aa
 vs  NC_002127.faa library

    579 residues in     3 sequences
 Altschul/Gish params: n0: 76 Lambda: 0.158 K: 0.019 H: 0.100

FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
 join: 36, opt: 24, open/ext: -10/-2, width:  16
 Scan time:  0.000
!! No library sequences with E() < 0.5

Definition at line 311 of file FastaIO.py.

                                        :
        """Helper function for the main parsing code.

        Skips over the free format query header, extracting the tagged parameters.

        If there are no hits for the current query, it is skipped entirely."""
        #e.g. this region (where there is often a histogram too):
        """
          2>>>gi|10955264|ref|NP_052605.1| hypothetical protein pOSAK1_02 [Escherichia coli O157:H7 s 126 aa - 126 aa
        Library: NC_009649.faa   45119 residues in   180 sequences

          45119 residues in   180 sequences
        Statistics: (shuffled [500]) Expectation_n fit: rho(ln(x))= 5.0398+/-0.00968; mu= 2.8364+/- 0.508
         mean_var=44.7937+/-10.479, 0's: 0 Z-trim: 0  B-trim: 0 in 0/32
         Lambda= 0.191631
        Algorithm: FASTA (3.5 Sept 2006) [optimized]
        Parameters: BL50 matrix (15:-5) ktup: 2
         join: 36, opt: 24, open/ext: -10/-2, width:  16
         Scan time:  0.040

        The best scores are:                                      opt bits E(180)
        gi|152973462|ref|YP_001338513.1| hypothetical prot ( 101)   58 23.3    0.22
        gi|152973501|ref|YP_001338552.1| hypothetical prot ( 245)   55 22.5    0.93
        """
        #Sometimes have queries with no matches, in which case we continue to the
        #next query block:
        """
          2>>>gi|152973838|ref|YP_001338875.1| hypothetical protein KPN_pKPN7p10263 [Klebsiella pneumoniae subsp. pneumonia 76 aa - 76 aa
         vs  NC_002127.faa library

            579 residues in     3 sequences
         Altschul/Gish params: n0: 76 Lambda: 0.158 K: 0.019 H: 0.100

        FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
         join: 36, opt: 24, open/ext: -10/-2, width:  16
         Scan time:  0.000
        !! No library sequences with E() < 0.5
        """

        self._query_header_annotation = {}
        self._query_descr = ""

        assert ">>>" in line and not line[0:3] == ">>>"
        #There is nothing useful in this line, the query description is truncated.
        
        line = self.handle.readline()
        #We ignore the free form text...
        while not line[0:3] == ">>>" :
            #print "Ignoring %s" % line.strip()
            line = self.handle.readline()
            if not line :
                raise ValueError("Premature end of file!")
            if ">>><<<" in line :
                #End of alignments, looks like the last query
                #or queries had no hits.
                return line

        #Now want to parse this section:
        """
        >>>gi|10955264|ref|NP_052605.1|, 126 aa vs NC_009649.faa library
        ; pg_name: /home/pjcock/Downloads/FASTA/fasta-35.3.6/fasta35
        ; pg_ver: 35.03
        ; pg_argv: /home/pjcock/Downloads/FASTA/fasta-35.3.6/fasta35 -Q -H -E 1 -m 10 -X -5 -5 NC_002127.faa NC_009649.faa
        ; pg_name: FASTA
        ; pg_ver: 3.5 Sept 2006
        ; pg_matrix: BL50 (15:-5)
        ; pg_open-ext: -10 -2
        ; pg_ktup: 2
        ; pg_optcut: 24
        ; pg_cgap: 36
        ; mp_extrap: 60000 500
        ; mp_stats: (shuffled [500]) Expectation_n fit: rho(ln(x))= 5.0398+/-0.00968; mu= 2.8364+/- 0.508  mean_var=44.7937+/-10.479, 0's: 0 Z-trim: 0  B-trim: 0 in 0/32  Lambda= 0.191631
        ; mp_KS: -0.0000 (N=1066338402) at  20
        ; mp_Algorithm: FASTA (3.5 Sept 2006) [optimized]
        ; mp_Parameters: BL50 matrix (15:-5) ktup: 2  join: 36, opt: 24, open/ext: -10/-2, width:  16
        """

        assert line[0:3] == ">>>", line
        self._query_descr = line[3:].strip()

        #Handle the following "program" tagged data,
        line = self.handle.readline()
        line = self._parse_tag_section(line, self._query_header_annotation)
        assert not line[0:2] == "; ", line
        assert line[0:2] == ">>" or ">>>" in line, line
        return line



Generated by  Doxygen 1.6.0   Back to index