# Copyright 2007 by Tiago Antao.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

This module provides code to work with GenePop.

See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
here: http://wbiomed.curtin.edu.au/genepop/help_input.html .

Record           Holds GenePop data.
RecordParser     Parses a GenePop record (file) into a Record object.

_Scanner         Scans a GenePop record.
_RecordConsumer  Consumes GenePop data to a Record object.

Partially inspired on MedLine Code.

from copy import deepcopy
from types import *

from Bio import File
from Bio.ParserSupport import *

00029 class Record:
    """Holds information from a GenePop record.

    marker_len         The marker length (2 or 3 digit code per allele).    
    comment_line       Comment line.

    loci_list          List of loci names.

    pop_list           List of population names.
    populations        List of population data.
    In most genepop files, the population name is not trustable.
    It is strongly recommended that populations are referred by index.

    populations has one element per population. Each element is itself
    a list of individuals, each individual is a pair composed by individual
    name and a list of alleles (2 per marker): Example
            ('Ind1', [(1,2),    (3,3), (200,201)],
            ('Ind2', [(2,None), (3,3), (None,None)],
            ('Other1', [(1,1),  (4,3), (200,200)],

    def __init__(self):
        self.marker_len      = 0
        self.comment_line    = ""
        self.loci_list       = []
        self.pop_list        = []
        self.populations     = []

00068     def __str__(self):
        """Returns (reconstructs) a GenePop textual representation.
        rep  = [self.comment_line + '\n']
        rep.append('\n'.join(self.loci_list) + '\n')
        for pop in self.populations:
            for indiv in pop:
                name, markers = indiv
                for marker in markers:
                    rep.append(' ')
                    for al in marker:
                        if al == None:
                            al = '0'
                        aStr = str(al)
                        while len(aStr)<self.marker_len:
                            aStr = "".join(['0', aStr])
        return "".join(rep)

00091     def split_in_pops(self, pop_names):
        """Splits a GP record in a dictionary with 1 pop per entry.

            Given a record with n pops and m loci returns a dictionary
            of records (key pop_name) where each item is a record
            with a single pop and m loci.

            pop_names - Population names
        gp_pops = {}
        for i in range(len(self.populations)):
            gp_pop = GenePop.Record()
            gp_pop.marker_len = self.marker_len
            gp_pop.comment_line = self.comment_line
            gp_pop.loci_list = deepcopy(self.loci_list)
            gp_pop.populations = [deepcopy(self.populations[i])]
            gp_pops[pop_names[i]] = gp_pop
        return gp_pops

00111     def split_in_loci(self, gp):
        """Splits a GP record in a dictionary with 1 locus per entry.

            Given a record with n pops and m loci returns a dictionary
            of records (key locus name) where each item is a record
            with a single locus and n pops.
        gp_loci = {}
        for i in range(len(self.loci_list)):
            gp_pop = GenePop.Record()
            gp_pop.marker_len = self.marker_len
            gp_pop.comment_line = self.comment_line
            gp_pop.loci_list = [self.loci_list[i]]
            gp_pop.populations = []
            for pop in self.populations:
                my_pop = []
                for indiv in pop:
                    my_pop.append((indiv[0], [indiv[1][i]]))
            gp_loci[gp_pop.loci_list[0]] = gp_pop
        return gp_loci

00134     def remove_population(self, pos):
        """Removes a population (by position).
        del self.populations[pos]
00139     def remove_locus_by_position(self, pos):
        """Removes a locus by position.
        del self.loci_list[pos]
        for pop in self.populations:
            for indiv in pop:
                name, loci = indiv
                del loci[pos]

00148     def remove_locus_by_name(self, name):
        """Removes a locus by name.
        for i in range(len(self.loci_list)):
            if self.loci_list[i] == name:
        #If here than locus not existent... Maybe raise exception?
        #   Although it should be Ok... Just a boolean return, maybe?

00159 class RecordParser(AbstractParser):
    """Parses GenePop data into a Record object.

    def __init__(self):
        self._scanner = _Scanner()
        self._consumer = _RecordConsumer()

    def parse(self, handle):
        self._scanner.feed(handle, self._consumer)
        return self._consumer.data

def parse(handle):
   """Parses a handle containing a GenePop file.
   parser = RecordParser()
   return parser.parse(handle)

00177 class _Scanner:
    """Scans a GenePop record.
    There is only one record per file.

00184     def feed(self, handle, consumer):
        """feed(self, handle, consumer)

        Feed in a GenePop unit record for scanning.  handle is a file-like
        object that contains a Genepop record.  consumer is a
        Consumer object that will receive events as the report is scanned.

        if isinstance(handle, File.UndoHandle):
            uhandle = handle
            uhandle = File.UndoHandle(handle)

        comment_line = uhandle.readline().rstrip()
        #We can now have one loci per line or all loci in a single line
        #seperated by either space or comma+space...
        #We will remove all commas on loci... that should not be a problem
        sample_loci_line = uhandle.readline().rstrip().replace(',', '')
        all_loci = sample_loci_line.split(' ')
        if len(all_loci)>1: #This is all loci in one line
            for locus in all_loci:
        next_line = uhandle.readline().rstrip()
        while next_line.upper()!='POP':
            if next_line == '':
                raise ValueError('No population data found, file probably not GenePop related')
            next_line = uhandle.readline().rstrip()
        first_individual = True
        line = uhandle.readline().rstrip()
        while line!='':
            if line.upper()=='POP':
                (indiv_name, marker_line) = line.split(',')
                markers = marker_line.replace('\t', ' ').split(' ')
                for i in range(len(markers), 0, -1):
                    if markers[i-1] == '':
                if first_individual:
                    first_individual = False
                    if len(markers[0]) == 4: #2 digits per allele
                        marker_len = 2
                        marker_len = 3
                allele_list = []
                for marker in markers:
                consumer.individual(indiv_name, allele_list)
            line = uhandle.readline().rstrip()

00248 class _RecordConsumer(AbstractConsumer):
    """Consumer that converts a GenePop record to a Record object.

    data    Record with GenePop data.

    def __init__(self):
        self.data = None

    def start_record(self):
        self.data = Record()

    def end_record(self):
        pops = self.data.populations
        loci = self.data.loci_list
        for pop_i in range(len(pops)):
            for indiv_i in range(len(pops[pop_i])):
                for mk_i in range(len(loci)):
                    mk_orig = pops[pop_i][indiv_i][1][mk_i]
                    mk_real = []
                    for al in mk_orig:
                        if al == 0:
                    pops[pop_i][indiv_i][1][mk_i] = tuple(mk_real)

    def comment(self, comment_line):
        self.data.comment_line = comment_line

    def loci_name(self, locus):

    def marker_len(self, marker_len):
        self.data.marker_len = marker_len

    def start_pop(self):
        self.current_pop = []

    def individual(self, indiv_name, allele_list):
        self.current_pop.append((indiv_name, allele_list))

