Logo Search packages:      
Sourcecode: python-biopython version File versions

clustal_format.py

00001 """
A Martel grammar to read information from a clustal formatted file (*.aln).

This uses Andrew Dalke's Martel to do the parsing dirty work for me.
So all we need to do here is set up a big ol' regular expression to
let Martel know what the file looks like.
"""
# standard library
import sys

import Martel
    

# define everything we will parse at a ton of regular expressions with
# specific callbacks
version = Martel.Group("version",
                       Martel.Re("\d.\d\d?"))

header = Martel.Group("header",
                     Martel.Str("CLUSTAL ") +
                     Martel.Re(".+") +
                     Martel.MaxRepeat(Martel.AnyEol(), 0, 3))

seq_id = Martel.Group("seq_id",
                      Martel.Re("[-a-zA-Z:;^_'\",\+\#\|\[\]\(\)\/\.\d\?]+"))

# space between the sequence and id
seq_space = Martel.Group("seq_space",
                         Martel.Re("[ ]+"))

seq_info = Martel.Group("seq_info",
                        Martel.Re("[-a-zA-Z.]+"))

# you can output an optional number to tell you where you are in the sequence
# we need to swallow this up if it is here
seq_num = Martel.Group("seq_num",
                       Martel.Re("[ ]+") +
                       Martel.Re("[\d]+"))

seq_line = Martel.Group("seq_line", seq_id + seq_space + seq_info +
                        Martel.Opt(seq_num) +
                        Martel.Str("\n"))

match_stars = Martel.Group("match_stars",
                           Martel.Re("[ :\.\*]+") +
                           Martel.Opt(Martel.AnyEol()))

# separator between blocks
new_block = Martel.Group("new_block",
                         Martel.AnyEol())

block_info = Martel.Group("block_info",
                          Martel.Rep1(seq_line) +
                          Martel.Opt(match_stars) +
                          Martel.Rep(new_block))


# define the format we can import to parse clustal files, one header
# plus multiple lines of alignments
format = Martel.Group("clustalx",
                      header +
                      Martel.Rep1(block_info))

Generated by  Doxygen 1.6.0   Back to index