Logo Search packages:      
Sourcecode: python-biopython version File versions


#!/usr/bin/env python

from __future__ import division

__version__ = "$Revision: 1.10 $"

import commands
import itertools
import os
import re

from Bio import Wise


_CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]

def _build_dnal_cmdline(match, mismatch, gap, extension):
    res = _CMDLINE_DNAL[:]
    res.extend(["-match", str(match)])
    res.extend(["-mis", str(mismatch)])
    res.extend(["-gap", str(-gap)]) # negative: convert score to penalty
    res.extend(["-ext", str(-extension)])  # negative: convert score to penalty

    return res

_CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
def _fgrep_count(pattern, file):
    return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))

_re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
def _alb_line2coords(line):
    return tuple([int(coord)+1 # one-based -> zero-based
                  for coord
                  in _re_alb_line2coords.match(line).groups()])

def _get_coords(filename):
    alb = file(filename)

    start_line = None
    end_line = None

    for line in alb:
        if line.startswith("["):
            if not start_line:
                start_line = line # rstrip not needed
                end_line = line

    if end_line is None: # sequence is too short
        return [(0, 0), (0, 0)]
    return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]

def _any(seq, pred=bool):
    "Returns True if pred(x) is True at least one element in the iterable"
    return True in itertools.imap(pred, seq)

00062 class Statistics(object):
    Calculate statistics from an ALB report
    def __init__(self, filename, match, mismatch, gap, extension):
        self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
        self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
        self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)

        if gap == extension:
            self.extensions = 0
            self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
        self.score = (match*self.matches +
                      mismatch*self.mismatches +
                      gap*self.gaps +

        if _any([self.matches, self.mismatches, self.gaps, self.extensions]):
            self.coords = _get_coords(filename)
            self.coords = [(0, 0), (0,0)]

    def identity_fraction(self):
        return self.matches/(self.matches+self.mismatches)

    header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"

    def __str__(self):
        return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])

def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
    cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
    temp_file = Wise.align(cmdline, pair, **keywds)
        return Statistics(temp_file.name, match, mismatch, gap, extension)
    except AttributeError:
            return None
        except KeyError:

def main():
    import sys
    stats = align(sys.argv[1:3])
    print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
                     for attr in
                     ("matches", "mismatches", "gaps", "extensions")])
    print "identity_fraction: %s" % stats.identity_fraction()
    print "coords: %s" % stats.coords

def _test(*args, **keywds):
    import doctest, sys
    doctest.testmod(sys.modules[__name__], *args, **keywds)

if __name__ == "__main__":
    if __debug__:

Generated by  Doxygen 1.6.0   Back to index