Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

Reduced.py

from Bio import Alphabet
"""
Reduced (redundant or simplified) alphabets are used to represent protein sequences using an
alternative alphabet which lumps together several amino-acids into one letter, based
on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually
quite interchangeable, so many sequence studies group them into one letter

Examples of reduced alphabets are available in:

http://viscose.ifg.uni-muenster.de/html/alphabets.html

Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of
the tables here, or a user-defined table.
"""
# The Murphy tables are from here:
#     Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid alphabets for protein
#     fold recognition and implications for folding. Protein Eng. 13(3):149-152

murphy_15_tab = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "C",
             "A": "A",
             "G": "G",
             "S": "S",
             "T": "T",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "W",
             "E": "E",
             "D": "D",
             "N": "N",
             "Q": "Q",
             "K": "K",
             "R": "K",
             "H": "H"}

class Murphy15(Alphabet.ProteinAlphabet):
   letters = "LCAGSTPFWEDNQKH"
   size = 15
murphy_15 = Murphy15()

murphy_10_tab = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "C",
             "A": "A",
             "G": "G",
             "S": "S",
             "T": "S",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "K",
             "R": "K",
             "H": "H"}
class Murphy10(Alphabet.ProteinAlphabet):
   letters = "LCAGSPFEKH"
   size = 10
murphy_10 = Murphy10()

murphy_8_tab  = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "L",
             "A": "A",
             "G": "A",
             "S": "S",
             "T": "S",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "K",
             "R": "K",
             "H": "H"}

class Murphy8(Alphabet.ProteinAlphabet):
   letters = "LASPFEKH"
   size = 8
murphy_8 = Murphy8()

murphy_4_tab  = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "L",
             "A": "A",
             "G": "A",
             "S": "A",
             "T": "A",
             "P": "A",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "E",
             "R": "E",
             "H": "E"}

class Murphy4(Alphabet.ProteinAlphabet):
   letters = "LAFE"
   size = 4
murphy_4 = Murphy4()

hp_model_tab = {"A": "P",   # Hydrophilic
            "G": "P",
            "T": "P",
            "S": "P",
            "N": "P",
            "Q": "P",
            "D": "P",
            "E": "P",
            "H": "P",
            "R": "P",
            "K": "P",
            "P": "P",
            "C": "H",  # Hydrophobic
            "M": "H",
            "F": "H",
            "I": "H",
            "L": "H",
            "V": "H",
            "W": "H",
            "Y": "H"}

class HPModel(Alphabet.ProteinAlphabet):
   letters = "HP"
   size = 2
hp_model = HPModel()

pc_5_table  = {"I": "A", # Aliphatic
         "V": "A",
         "L": "A",
         "F": "R", # Aromatic
         "Y": "R",
         "W": "R",
         "H": "R",
         "K": "C", # Charged
         "R": "C",
         "D": "C",
         "E": "C",
         "G": "T", # Tiny
         "A": "T",
         "C": "T",
         "S": "T",
         "T": "D", # Diverse
         "M": "D",
         "Q": "D",
         "N": "D",
         "P": "D"}

class PC5(Alphabet.ProteinAlphabet):
   letters = "ARCTD"
   size = 5
hp_model = HPModel()

Generated by  Doxygen 1.6.0   Back to index