Logo Search packages:      
Sourcecode: python-biopython version File versions

__init__.py

#!/usr/bin/env python
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#
# Bio.Wise contains modules for running and processing the output of
# some of the models in the Wise2 package by Ewan Birney available from:
# ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/
# http://www.ebi.ac.uk/Wise2/
# 
# Bio.Wise.psw is for protein Smith-Waterman alignments
# Bio.Wise.dnal is for Smith-Waterman DNA alignments

__version__ = "$Revision: 1.17 $"

import os
import sys
import tempfile

from Bio import SeqIO


def _build_align_cmdline(cmdline, pair, output_filename, kbyte=None, force_type=None, quiet=False):
    """
    >>> os.environ["WISE_KBYTE"]="300000"
    >>> _build_align_cmdline(["dnal"], ("seq1.fna", "seq2.fna"), "/tmp/output", kbyte=100000)
    'dnal -kbyte 100000 seq1.fna seq2.fna > /tmp/output'
    >>> _build_align_cmdline(["psw"], ("seq1.faa", "seq2.faa"), "/tmp/output_aa")
    'psw -kbyte 300000 seq1.faa seq2.faa > /tmp/output_aa'

    """
    cmdline = cmdline[:]

    ### XXX: force_type ignored

    if kbyte is None:
        try:
            cmdline.extend(("-kbyte", os.environ["WISE_KBYTE"]))
        except KeyError:
            pass
    else:
        cmdline.extend(("-kbyte", str(kbyte)))

    if not os.isatty(sys.stderr.fileno()):
        cmdline.append("-quiet")

    cmdline.extend(pair)
    cmdline.extend((">", output_filename))
    if quiet:
        cmdline.extend(("2>", "/dev/null"))
    cmdline_str = ' '.join(cmdline)

    return cmdline_str

def align(cmdline, pair, kbyte=None, force_type=None, dry_run=False, quiet=False, debug=False):
    """
    Returns a filehandle
    """
    assert len(pair) == 2
    
    output_file = tempfile.NamedTemporaryFile(mode='r')
    input_files = tempfile.NamedTemporaryFile(mode="w"), tempfile.NamedTemporaryFile(mode="w")

    if dry_run:
        print _build_align_cmdline(cmdline,
                                   pair,
                                   output_file.name,
                                   kbyte,
                                   force_type,
                                   quiet)
        return

    for filename, input_file in zip(pair, input_files):
        # Pipe the file through Biopython's Fasta parser/writer
        # to make sure it conforms to the Fasta standard (in particular,
        # Wise2 may choke on long lines in the Fasta file)
        records = SeqIO.parse(open(filename), 'fasta')
        SeqIO.write(records, input_file, 'fasta')
        input_file.flush()

    input_file_names = [input_file.name for input_file in input_files]

    cmdline_str = _build_align_cmdline(cmdline,
                                       input_file_names,
                                       output_file.name,
                                       kbyte,
                                       force_type,
                                       quiet)

    if debug:
        print >>sys.stderr, cmdline_str

    status = os.system(cmdline_str) >> 8

    if status > 1:
        if kbyte != 0: # possible memory problem; could be None
            print >>sys.stderr, "INFO trying again with the linear model"
            return align(cmdline, pair, 0, force_type, dry_run, quiet, debug)
        else:
            raise OSError("%s returned %s" % (" ".join(cmdline), status))
    
    return output_file

def all_pairs(singles):
    """
    Generate pairs list for all-against-all alignments

    >>> all_pairs(range(4))
    [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
    """
    pairs = []

    singles = list(singles)
    while singles:
        suitor = singles.pop(0) # if sorted, stay sorted
        pairs.extend([(suitor, single) for single in singles])

    return pairs

def main():
    pass

def _test(*args, **keywds):
    import doctest, sys
    doctest.testmod(sys.modules[__name__], *args, **keywds)

if __name__ == "__main__":
    if __debug__:
        _test()
    main()

Generated by  Doxygen 1.6.0   Back to index