Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

NewickIO.py

# Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com)
# Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""I/O function wrappers for the Newick file format.

See: U{ http://evolution.genetics.washington.edu/phylip/newick_doc.html }
"""
__docformat__ = "epytext en"

from cStringIO import StringIO

from Bio.Phylo import Newick

# Definitions retrieved from Bio.Nexus.Trees
NODECOMMENT_START = '[&'
NODECOMMENT_END = ']'


00023 class NewickError(Exception):
    """Exception raised when Newick object construction cannot continue."""
    pass


# ---------------------------------------------------------
# Public API

def parse(handle):
    """Iterate over the trees in a Newick file handle.

    @return: a generator of Bio.Phylo.Newick.Tree objects.
    """
    return Parser(handle).parse()

def write(trees, handle, plain=False, **kwargs):
    """Write a trees in Newick format to the given file handle.

    @return: number of trees written.
    """
    return Writer(trees).write(handle, plain=plain, **kwargs)


# ---------------------------------------------------------
# Input

00049 class Parser(object):
    """Parse a Newick tree given a file handle.

    Based on the parser in Bio.Nexus.Trees.
    """

    def __init__(self, handle):
        self.handle = handle

    @classmethod
    def from_string(cls, treetext):
        handle = StringIO(treetext)
        return cls(handle)

00063     def parse(self, values_are_support=False, rooted=False):
        """Parse the text stream this object was initialized with."""
        self.values_are_support = values_are_support
        self.rooted = rooted
        buf = ''
        for line in self.handle:
            buf += line.rstrip()
            if buf.endswith(';'):
                yield self._parse_tree(buf)
                buf = ''
        if buf:
            # Last tree is missing a terminal ';' character -- that's OK
            yield self._parse_tree(buf)

00077     def _parse_tree(self, text):
        """Parses the text representation into an Tree object."""
        # XXX what global info do we have here? Any? Use **kwargs?
        return Newick.Tree(root=self._parse_subtree(text))

00082     def _parse_subtree(self, text):
        """Parse (a,b,c...)[[[xx]:]yy] into subcomponents, recursively."""
        text = text.strip().rstrip(';')
        if text.count('(')!=text.count(')'):
            raise NewickError("Parentheses do not match in (sub)tree: " + text)
        # Text is now "(...)..." (balanced parens) or "..." (leaf node)
        if text.count('(') == 0:
            # Leaf/terminal node -- recursion stops here
            return self._parse_tag(text)
        # Handle one layer of the nested subtree
        # XXX what if there's a paren in a comment or other string?
        close_posn = text.rfind(')')
        subtrees = []
        # Locate subtrees by counting nesting levels of parens
        plevel = 0
        prev = 1
        for posn in range(1, close_posn):
            if text[posn] == '(':
                plevel += 1
            elif text[posn] == ')':
                plevel -= 1
            elif text[posn] == ',' and plevel == 0:
                subtrees.append(text[prev:posn])
                prev = posn + 1
        subtrees.append(text[prev:close_posn])
        # Construct a new clade from trailing text, then attach subclades
        clade = self._parse_tag(text[close_posn+1:])
        clade.clades = [self._parse_subtree(st) for st in subtrees]
        return clade

00112     def _parse_tag(self, text):
        """Extract the data for a node from text.

        @return: Clade instance containing any available data
        """
        # Extract the comment
        comment_start = text.find(NODECOMMENT_START)
        if comment_start != -1:
            comment_end = text.find(NODECOMMENT_END)
            if comment_end == -1:
                raise NewickError('Error in tree description: '
                                  'Found %s without matching %s'
                                  % (NODECOMMENT_START, NODECOMMENT_END))
            comment = text[comment_start+len(NODECOMMENT_START):comment_end]
            text = text[:comment_start] + text[comment_end+len(NODECOMMENT_END):]
        else:
            comment = None
        clade = Newick.Clade(comment=comment)
        # Extract name (taxon), and optionally support, branch length
        # Float values are support and branch length, the string is name/taxon
        values = []
        for part in (t.strip() for t in text.split(':')):
            if part:
                try:
                    values.append(float(part))
                except ValueError:
                    assert clade.name is None, "Two string taxonomies?"
                    clade.name = part
        if len(values) == 1:
            # Real branch length, or support as branch length
            if self.values_are_support:
                clade.confidence = values[0]
            else:
                clade.branch_length = values[0]
        elif len(values) == 2:
            # Two non-taxon values: support comes first. (Is that always so?)
            clade.confidence, clade.branch_length = values
        elif len(values) > 2:
            raise NewickError("Too many colons in tag: " + text)
        return clade


# ---------------------------------------------------------
# Output

00157 class Writer(object):
    """Based on the writer in Bio.Nexus.Trees (str, to_string)."""

    def __init__(self, trees):
        self.trees = trees

00163     def write(self, handle, **kwargs):
        """Write this instance's trees to a file handle."""
        count = 0
        for treestr in self.to_strings(**kwargs):
            handle.write(treestr + '\n')
            count += 1
        return count

00171     def to_strings(self, support_as_branchlengths=False,
            branchlengths_only=False, plain=False,
            plain_newick=True, ladderize=None,
            max_support=1.0):
        """Return an iterable of PAUP-compatible tree lines."""
        # If there's a conflict in the arguments, we override plain=True
        if support_as_branchlengths or branchlengths_only:
            plain = False
        make_info_string = self._info_factory(plain, support_as_branchlengths,
                                              branchlengths_only, max_support)
        def newickize(clade):
            """Convert a node tree to a Newick tree string, recursively."""
            if clade.is_terminal():    #terminal
                return ((clade.name or '')
                        + make_info_string(clade, terminal=True))
            else:
                subtrees = (newickize(sub) for sub in clade)
                return '(%s)%s' % (','.join(subtrees),
                                   make_info_string(clade))

        # Convert each tree to a string
        for tree in self.trees:
            if ladderize in ('left', 'LEFT', 'right', 'RIGHT'):
                # Nexus compatibility shim, kind of
                tree.ladderize(reverse=(ladderize in ('right', 'RIGHT')))
            rawtree = newickize(tree.root) + ';'
            if plain_newick:
                yield rawtree
                continue
            # Nexus-style (?) notation before the raw Newick tree
            treeline = ['tree', (tree.name or 'a_tree'), '=']
            if tree.weight != 1:
                treeline.append('[&W%s]' % round(float(tree.weight), 3))
            if tree.rooted:
                treeline.append('[&R]')
            treeline.append(rawtree)
            yield ' '.join(treeline)

00209     def _info_factory(self, plain, support_as_branchlengths,
            branchlengths_only, max_support):
        """Return a function that creates a nicely formatted node tag."""
        if plain:
            # Plain tree only. That's easy.
            def make_info_string(clade, terminal=False):
                return ''

        elif support_as_branchlengths:
            # Support as branchlengths (eg. PAUP), ignore actual branchlengths
            def make_info_string(clade, terminal=False):
                if terminal:
                    # terminal branches have 100% support
                    return ':%1.2f' % max_support
                else:
                    return ':%1.2f' % (clade.confidence)

        elif branchlengths_only:
            # write only branchlengths, ignore support
            def make_info_string(clade, terminal=False):
                return ':%1.5f' % (clade.branch_length)

        else:
            # write support and branchlengths (e.g. .con tree of mrbayes)
            def make_info_string(clade, terminal=False):
                if terminal:
                    return ':%1.5f' % (clade.branch_length or 1.0)
                else:
                    if (clade.branch_length is not None and
                        hasattr(clade, 'confidence') and
                        clade.confidence is not None):
                        # we have blen and suppport
                        return '%1.2f:%1.5f' % (clade.confidence,
                                                clade.branch_length)
                    elif clade.branch_length is not None:
                        # we have only blen
                        return '0.00000:%1.5f' % clade.branch_length
                    elif (hasattr(clade, 'confidence') and
                          clade.confidence is not None):
                        # we have only support
                        return '%1.2f:0.00000' % clade.confidence
                    else:
                        return '0.00:0.00000'

        return make_info_string


Generated by  Doxygen 1.6.0   Back to index