Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package

test_Cluster.py

# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

import unittest

try:
    from Bio import Cluster
except ImportError:
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError("If you want to use Bio.Cluster, "
                                       "install NumPy first and then "
                                       "reinstall Biopython")

try:
    import numpy
except ImportError:
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError(\
        "Install NumPy if you want to use Bio.Cluster")

00022 class TestCluster(unittest.TestCase):

    module = 'Bio.Cluster'

    def test_median_mean(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import mean, median
        elif TestCluster.module=='Pycluster':
            from Pycluster import mean, median

        data = numpy.array([ 34.3, 3, 2 ])
        self.assertAlmostEqual(mean(data), 13.1, 3)
        self.assertAlmostEqual(median(data), 3.0, 3)

        data = [ 5, 10, 15, 20]
        self.assertAlmostEqual(mean(data), 12.5, 3)
        self.assertAlmostEqual(median(data), 12.5, 3)

        data = [ 1, 2, 3, 5, 7, 11, 13, 17]
        self.assertAlmostEqual(mean(data), 7.375, 3)
        self.assertAlmostEqual(median(data), 6.0, 3)

        data = [ 100, 19, 3, 1.5, 1.4, 1, 1, 1]
        self.assertAlmostEqual(mean(data), 15.988, 3)
        self.assertAlmostEqual(median(data), 1.45, 3)
      

    def test_matrix_parse(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module=='Pycluster':
            from Pycluster import treecluster

        # Normal matrix, no errors
        data1 = numpy.array([[ 1.1, 1.2 ],
                             [ 1.4, 1.3 ],
                             [ 1.1, 1.5 ],
                             [ 2.0, 1.5 ],
                             [ 1.7, 1.9 ],
                             [ 1.7, 1.9 ],
                             [ 5.7, 5.9 ],
                             [ 5.7, 5.9 ],
                             [ 3.1, 3.3 ],
                             [ 5.4, 5.3 ],
                             [ 5.1, 5.5 ],
                             [ 5.0, 5.5 ],
                             [ 5.1, 5.2 ]])
      
        # Another normal matrix, no errors; written as a list
        data2 =  [[  1.1, 2.2, 3.3, 4.4, 5.5 ], 
                  [  3.1, 3.2, 1.3, 2.4, 1.5 ], 
                  [  4.1, 2.2, 0.3, 5.4, 0.5 ], 
                  [ 12.1, 2.0, 0.0, 5.0, 0.0 ]]
      
        # Ragged matrix
        data3 =  [[ 91.1, 92.2, 93.3, 94.4, 95.5], 
                  [ 93.1, 93.2, 91.3, 92.4 ], 
                  [ 94.1, 92.2, 90.3 ], 
                  [ 12.1, 92.0, 90.0, 95.0, 90.0 ]]
      
        # Matrix with bad cells
        data4 =  [ [ 7.1, 7.2, 7.3, 7.4, 7.5, ],
                   [ 7.1, 7.2, 7.3, 7.4, 'snoopy' ], 
                   [ 7.1, 7.2, 7.3, None, None]] 

        # Matrix with a bad row
        data5 =  [ [ 23.1, 23.2, 23.3, 23.4, 23.5], 
                   None,
                   [ 23.1, 23.0, 23.0, 23.0, 23.0]]

        # Various references that don't point to matrices at all
        data6 = "snoopy"
        data7 = {'a': [[2.3,1.2],[3.3,5.6]]}
        data8 = []
        data9 = [None]
      
        try:
            treecluster(data1)
        except:
            self.fail("treecluster failed to accept matrix data1")

        try:
            treecluster(data2)
        except:
            self.fail("treecluster failed to accept matrix data2")

        self.assertRaises(TypeError, lambda : treecluster(data3))
        self.assertRaises(TypeError, lambda : treecluster(data4))
        self.assertRaises(TypeError, lambda : treecluster(data5))
        self.assertRaises(TypeError, lambda : treecluster(data6))
        self.assertRaises(TypeError, lambda : treecluster(data7))
        self.assertRaises(TypeError, lambda : treecluster(data8))
        self.assertRaises(TypeError, lambda : treecluster(data9))

    def test_kcluster(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import kcluster
        elif TestCluster.module=='Pycluster':
            from Pycluster import kcluster

        nclusters = 3
        # First data set
        weight = numpy.array([1,1,1,1,1])
        data   = numpy.array([[ 1.1, 2.2, 3.3, 4.4, 5.5],
                              [ 3.1, 3.2, 1.3, 2.4, 1.5], 
                              [ 4.1, 2.2, 0.3, 5.4, 0.5], 
                              [12.1, 2.0, 0.0, 5.0, 0.0]]) 
        mask =  numpy.array([[ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1]], int) 
      
        clusterid, error, nfound = kcluster(data, nclusters=nclusters,
                                            mask=mask, weight=weight,
                                            transpose=0, npass=100,
                                            method='a', dist='e')
        self.assertEqual(len(clusterid), len(data))

        correct = [0,1,1,2]
        mapping = [clusterid[correct.index(i)] for i in range(nclusters)]
        for i in range(len(clusterid)):
            self.assertEqual(clusterid[i], mapping[correct[i]])
      
        # Second data set
        weight = numpy.array([1,1])
        data = numpy.array([[ 1.1, 1.2 ],
                      [ 1.4, 1.3 ],
                      [ 1.1, 1.5 ],
                      [ 2.0, 1.5 ],
                      [ 1.7, 1.9 ],
                      [ 1.7, 1.9 ],
                      [ 5.7, 5.9 ],
                      [ 5.7, 5.9 ],
                      [ 3.1, 3.3 ],
                      [ 5.4, 5.3 ],
                      [ 5.1, 5.5 ],
                      [ 5.0, 5.5 ],
                      [ 5.1, 5.2 ]])
        mask = numpy.array([[ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ]], int)

        clusterid, error, nfound = kcluster(data, nclusters=3, mask=mask,
                                            weight=weight, transpose=0,
                                            npass=100, method='a', dist='e')
        self.assertEqual(len(clusterid), len(data))

        correct = [0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1]
        mapping = [clusterid[correct.index(i)] for i in range(nclusters)]
        for i in range(len(clusterid)):
            self.assertEqual(clusterid[i], mapping[correct[i]])

    def test_clusterdistance(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import clusterdistance
        elif TestCluster.module=='Pycluster':
            from Pycluster import clusterdistance

        # First data set
        weight = numpy.array([ 1,1,1,1,1 ])
        data   = numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5, ], 
                              [  3.1, 3.2, 1.3, 2.4, 1.5, ], 
                              [  4.1, 2.2, 0.3, 5.4, 0.5, ], 
                              [ 12.1, 2.0, 0.0, 5.0, 0.0, ]])
        mask   = numpy.array([[ 1, 1, 1, 1, 1], 
                              [ 1, 1, 1, 1, 1], 
                              [ 1, 1, 1, 1, 1], 
                              [ 1, 1, 1, 1, 1]], int)

        # Cluster assignments
        c1 = [0]
        c2 = [1,2]
        c3 = [3]

        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c1, index2=c2, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 6.650, 3)
        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c1, index2=c3, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 32.508, 3)
        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c2, index2=c3, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 15.118, 3)

        # Second data set
        weight =  numpy.array([ 1,1 ])
        data   =  numpy.array([[ 1.1, 1.2 ],
                         [ 1.4, 1.3 ],
                         [ 1.1, 1.5 ],
                         [ 2.0, 1.5 ],
                         [ 1.7, 1.9 ],
                         [ 1.7, 1.9 ],
                         [ 5.7, 5.9 ],
                         [ 5.7, 5.9 ],
                         [ 3.1, 3.3 ],
                         [ 5.4, 5.3 ],
                         [ 5.1, 5.5 ],
                         [ 5.0, 5.5 ],
                         [ 5.1, 5.2 ]])
        mask = numpy.array([[ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ]], int)

        # Cluster assignments
        c1 = [ 0, 1, 2, 3 ]
        c2 = [ 4, 5, 6, 7 ]
        c3 = [ 8 ]

        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c1, index2=c2, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 5.833, 3)
        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c1, index2=c3, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 3.298, 3)
        distance = clusterdistance(data, mask=mask, weight=weight,
                                   index1=c2, index2=c3, dist='e',
                                   method='a', transpose=0);
        self.assertAlmostEqual(distance, 0.360, 3)


    def test_treecluster(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import treecluster
        elif TestCluster.module=='Pycluster':
            from Pycluster import treecluster

        # First data set
        weight1 =  [ 1,1,1,1,1 ]
        data1   =  numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5], 
                                [  3.1, 3.2, 1.3, 2.4, 1.5], 
                                [  4.1, 2.2, 0.3, 5.4, 0.5], 
                                [ 12.1, 2.0, 0.0, 5.0, 0.0]])
        mask1 = numpy.array([[ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1], 
                             [ 1, 1, 1, 1, 1]], int)
      
        # test first data set
        # Pairwise average-linkage clustering"
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='a', dist='e')
        self.assertEqual(len(tree), len(data1)-1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 7.300, 3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 21.348, 3)

        # Pairwise single-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='s', dist='e')
        self.assertEqual(len(tree), len(data1)-1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 5.800, 3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 12.908, 3)

        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='c', dist='e')
        self.assertEqual(len(tree), len(data1)-1)
        self.assertEqual(tree[0].left, 1)
        self.assertEqual(tree[0].right, 2)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, 0)
        self.assertEqual(tree[1].right, -1)
        self.assertAlmostEqual(tree[1].distance, 6.650, 3)
        self.assertEqual(tree[2].left, -2)
        self.assertEqual(tree[2].right, 3)
        self.assertAlmostEqual(tree[2].distance, 19.437, 3)

        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data1, mask=mask1, weight=weight1,
                           transpose=0, method='m', dist='e')
        self.assertEqual(len(tree), len(data1)-1)
        self.assertEqual(tree[0].left, 2)
        self.assertEqual(tree[0].right, 1)
        self.assertAlmostEqual(tree[0].distance, 2.600, 3)
        self.assertEqual(tree[1].left, -1)
        self.assertEqual(tree[1].right, 0)
        self.assertAlmostEqual(tree[1].distance, 8.800, 3)
        self.assertEqual(tree[2].left, 3)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 32.508, 3)
      
        # Second data set
        weight2 =  [ 1,1 ]
        data2 = numpy.array([[ 0.8223, 0.9295 ],
                             [ 1.4365, 1.3223 ],
                             [ 1.1623, 1.5364 ],
                             [ 2.1826, 1.1934 ],
                             [ 1.7763, 1.9352 ],
                             [ 1.7215, 1.9912 ],
                             [ 2.1812, 5.9935 ],
                             [ 5.3290, 5.9452 ],
                             [ 3.1491, 3.3454 ],
                             [ 5.1923, 5.3156 ],
                             [ 4.7735, 5.4012 ],
                             [ 5.1297, 5.5645 ],
                             [ 5.3934, 5.1823 ]])
        mask2 = numpy.array([[ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ],
                             [ 1, 1 ]], int)
      
        # Test second data set
        # Pairwise average-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='a', dist='e')
        self.assertEqual(len(tree), len(data2)-1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, -2)
        self.assertAlmostEqual(tree[3].distance, 0.070, 3)
        self.assertEqual(tree[4].left, -4)
        self.assertEqual(tree[4].right, 10)
        self.assertAlmostEqual(tree[4].distance, 0.128, 3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.224, 3)
        self.assertEqual(tree[6].left, -3)
        self.assertEqual(tree[6].right, 0)
        self.assertAlmostEqual(tree[6].distance, 0.254, 3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.391, 3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -7)
        self.assertAlmostEqual(tree[8].distance, 0.532, 3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, -9)
        self.assertAlmostEqual(tree[9].distance, 3.234, 3)
        self.assertEqual(tree[10].left, -6)
        self.assertEqual(tree[10].right, 6)
        self.assertAlmostEqual(tree[10].distance, 4.636, 3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -10)
        self.assertAlmostEqual(tree[11].distance, 12.741, 3)
      
        # Pairwise single-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='s', dist='e')
        self.assertEqual(len(tree), len(data2)-1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 11)
        self.assertEqual(tree[2].right, -2)
        self.assertAlmostEqual(tree[2].distance, 0.033, 3)
        self.assertEqual(tree[3].left, 1)
        self.assertEqual(tree[3].right, 2)
        self.assertAlmostEqual(tree[3].distance, 0.061, 3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -3)
        self.assertAlmostEqual(tree[4].distance, 0.077, 3)
        self.assertEqual(tree[5].left, 7)
        self.assertEqual(tree[5].right, -5)
        self.assertAlmostEqual(tree[5].distance, 0.092, 3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -4)
        self.assertAlmostEqual(tree[6].distance, 0.242, 3)
        self.assertEqual(tree[7].left, -7)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.246, 3)
        self.assertEqual(tree[8].left, 3)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.287, 3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 1.936, 3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 3.432, 3)
        self.assertEqual(tree[11].left, 6)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 3.535, 3)
      
        # Pairwise centroid-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='c', dist='e')
        self.assertEqual(len(tree), len(data2)-1)
        self.assertEqual(tree[0].left, 4)
        self.assertEqual(tree[0].right, 5)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 12)
        self.assertEqual(tree[1].right, 9)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 1)
        self.assertEqual(tree[2].right, 2)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, -2)
        self.assertEqual(tree[3].right, 11)
        self.assertAlmostEqual(tree[3].distance, 0.063, 3)
        self.assertEqual(tree[4].left, 10)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.109, 3)
        self.assertEqual(tree[5].left, -5)
        self.assertEqual(tree[5].right, 7)
        self.assertAlmostEqual(tree[5].distance, 0.189, 3)
        self.assertEqual(tree[6].left, 0)
        self.assertEqual(tree[6].right, -3)
        self.assertAlmostEqual(tree[6].distance, 0.239, 3)
        self.assertEqual(tree[7].left, 3)
        self.assertEqual(tree[7].right, -1)
        self.assertAlmostEqual(tree[7].distance, 0.390, 3)
        self.assertEqual(tree[8].left, -7)
        self.assertEqual(tree[8].right, -8)
        self.assertAlmostEqual(tree[8].distance, 0.382, 3)
        self.assertEqual(tree[9].left, -9)
        self.assertEqual(tree[9].right, 8)
        self.assertAlmostEqual(tree[9].distance, 3.063, 3)
        self.assertEqual(tree[10].left, 6)
        self.assertEqual(tree[10].right, -6)
        self.assertAlmostEqual(tree[10].distance, 4.578, 3)
        self.assertEqual(tree[11].left, -10)
        self.assertEqual(tree[11].right, -11)
        self.assertAlmostEqual(tree[11].distance, 11.536, 3)
      
        # Pairwise maximum-linkage clustering
        tree = treecluster(data=data2, mask=mask2, weight=weight2,
                           transpose=0, method='m', dist='e')
        self.assertEqual(len(tree), len(data2)-1)
        self.assertEqual(tree[0].left, 5)
        self.assertEqual(tree[0].right, 4)
        self.assertAlmostEqual(tree[0].distance, 0.003, 3)
        self.assertEqual(tree[1].left, 9)
        self.assertEqual(tree[1].right, 12)
        self.assertAlmostEqual(tree[1].distance, 0.029, 3)
        self.assertEqual(tree[2].left, 2)
        self.assertEqual(tree[2].right, 1)
        self.assertAlmostEqual(tree[2].distance, 0.061, 3)
        self.assertEqual(tree[3].left, 11)
        self.assertEqual(tree[3].right, 10)
        self.assertAlmostEqual(tree[3].distance, 0.077, 3)
        self.assertEqual(tree[4].left, -2)
        self.assertEqual(tree[4].right, -4)
        self.assertAlmostEqual(tree[4].distance, 0.216, 3)
        self.assertEqual(tree[5].left, -3)
        self.assertEqual(tree[5].right, 0)
        self.assertAlmostEqual(tree[5].distance, 0.266, 3)
        self.assertEqual(tree[6].left, -5)
        self.assertEqual(tree[6].right, 7)
        self.assertAlmostEqual(tree[6].distance, 0.302, 3)
        self.assertEqual(tree[7].left, -1)
        self.assertEqual(tree[7].right, 3)
        self.assertAlmostEqual(tree[7].distance, 0.425, 3)
        self.assertEqual(tree[8].left, -8)
        self.assertEqual(tree[8].right, -6)
        self.assertAlmostEqual(tree[8].distance, 0.968, 3)
        self.assertEqual(tree[9].left, 8)
        self.assertEqual(tree[9].right, 6)
        self.assertAlmostEqual(tree[9].distance, 3.975, 3)
        self.assertEqual(tree[10].left, -10)
        self.assertEqual(tree[10].right, -7)
        self.assertAlmostEqual(tree[10].distance, 5.755, 3)
        self.assertEqual(tree[11].left, -11)
        self.assertEqual(tree[11].right, -9)
        self.assertAlmostEqual(tree[11].distance, 22.734, 3)

    def test_somcluster(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import somcluster
        elif TestCluster.module=='Pycluster':
            from Pycluster import somcluster

        # First data set
        weight = [ 1,1,1,1,1 ]
        data = numpy.array([[  1.1, 2.2, 3.3, 4.4, 5.5], 
                            [  3.1, 3.2, 1.3, 2.4, 1.5], 
                            [  4.1, 2.2, 0.3, 5.4, 0.5], 
                            [ 12.1, 2.0, 0.0, 5.0, 0.0]])
        mask = numpy.array([[ 1, 1, 1, 1, 1], 
                            [ 1, 1, 1, 1, 1], 
                            [ 1, 1, 1, 1, 1], 
                            [ 1, 1, 1, 1, 1]], int)

        clusterid, celldata = somcluster(data=data, mask=mask, weight=weight,
                                         transpose=0, nxgrid=10, nygrid=10,
                                         inittau=0.02, niter=100, dist='e')
        self.assertEqual(len(clusterid), len(data))
        self.assertEqual(len(clusterid[0]), 2)

        # Second data set
        weight =  [ 1,1 ]
        data = numpy.array([[ 1.1, 1.2 ],
                            [ 1.4, 1.3 ],
                            [ 1.1, 1.5 ],
                            [ 2.0, 1.5 ],
                            [ 1.7, 1.9 ],
                            [ 1.7, 1.9 ],
                            [ 5.7, 5.9 ],
                            [ 5.7, 5.9 ],
                            [ 3.1, 3.3 ],
                            [ 5.4, 5.3 ],
                            [ 5.1, 5.5 ],
                            [ 5.0, 5.5 ],
                            [ 5.1, 5.2 ]])
        mask = numpy.array([[ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ],
                            [ 1, 1 ]], int)

        clusterid, celldata = somcluster(data=data, mask=mask, weight=weight,
                                         transpose=0, nxgrid=10, nygrid=10,
                                         inittau=0.02, niter=100, dist='e')
        self.assertEqual(len(clusterid), len(data))
        self.assertEqual(len(clusterid[0]), 2)

    def test_distancematrix_kmedoids(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import distancematrix, kmedoids
        elif TestCluster.module=='Pycluster':
            from Pycluster import distancematrix, kmedoids

        data = numpy.array([[2.2, 3.3, 4.4],
                            [2.1, 1.4, 5.6],
                            [7.8, 9.0, 1.2],
                            [4.5, 2.3, 1.5],
                            [4.2, 2.4, 1.9],
                            [3.6, 3.1, 9.3],
                            [2.3, 1.2, 3.9],
                            [4.2, 9.6, 9.3],
                            [1.7, 8.9, 1.1]])
        mask = numpy.array([[1, 1, 1],
                            [1, 1, 1],
                            [0, 1, 1],
                            [1, 1, 1],
                            [1, 1, 1],
                            [0, 1, 0],
                            [1, 1, 1],
                            [1, 0, 1],
                            [1, 1, 1]], int)
        weight = numpy.array([2.0, 1.0, 0.5])
        matrix = distancematrix(data, mask=mask, weight=weight)

        self.assertAlmostEqual(matrix[1][0], 1.243, 3)

        self.assertAlmostEqual(matrix[2][0], 25.073, 3)
        self.assertAlmostEqual(matrix[2][1], 44.960, 3)

        self.assertAlmostEqual(matrix[3][0], 4.510, 3)
        self.assertAlmostEqual(matrix[3][1], 5.924, 3)
        self.assertAlmostEqual(matrix[3][2], 29.957, 3)

        self.assertAlmostEqual(matrix[4][0], 3.410, 3)
        self.assertAlmostEqual(matrix[4][1], 4.761, 3)
        self.assertAlmostEqual(matrix[4][2], 29.203, 3)
        self.assertAlmostEqual(matrix[4][3], 0.077, 3)

        self.assertAlmostEqual(matrix[5][0], 0.040, 3)
        self.assertAlmostEqual(matrix[5][1], 2.890, 3)
        self.assertAlmostEqual(matrix[5][2], 34.810, 3)
        self.assertAlmostEqual(matrix[5][3], 0.640, 3)
        self.assertAlmostEqual(matrix[5][4], 0.490, 3)

        self.assertAlmostEqual(matrix[6][0], 1.301, 3)
        self.assertAlmostEqual(matrix[6][1], 0.447, 3)
        self.assertAlmostEqual(matrix[6][2], 42.990, 3)
        self.assertAlmostEqual(matrix[6][3], 3.934, 3)
        self.assertAlmostEqual(matrix[6][4], 3.046, 3)
        self.assertAlmostEqual(matrix[6][5], 3.610, 3)

        self.assertAlmostEqual(matrix[7][0], 8.002, 3)
        self.assertAlmostEqual(matrix[7][1], 6.266, 3)
        self.assertAlmostEqual(matrix[7][2], 65.610, 3)
        self.assertAlmostEqual(matrix[7][3], 12.240, 3)
        self.assertAlmostEqual(matrix[7][4], 10.952, 3)
        self.assertAlmostEqual(matrix[7][5], 0.000, 3)
        self.assertAlmostEqual(matrix[7][6], 8.720, 3)

        self.assertAlmostEqual(matrix[8][0], 10.659, 3)
        self.assertAlmostEqual(matrix[8][1], 19.056, 3)
        self.assertAlmostEqual(matrix[8][2], 0.010, 3)
        self.assertAlmostEqual(matrix[8][3], 16.949, 3)
        self.assertAlmostEqual(matrix[8][4], 15.734, 3)
        self.assertAlmostEqual(matrix[8][5], 33.640, 3)
        self.assertAlmostEqual(matrix[8][6], 18.266, 3)
        self.assertAlmostEqual(matrix[8][7], 18.448, 3)
        clusterid, error, nfound = kmedoids(matrix, npass=1000)
        self.assertEqual(clusterid[0], 5)
        self.assertEqual(clusterid[1], 5)
        self.assertEqual(clusterid[2], 2)
        self.assertEqual(clusterid[3], 5)
        self.assertEqual(clusterid[4], 5)
        self.assertEqual(clusterid[5], 5)
        self.assertEqual(clusterid[6], 5)
        self.assertEqual(clusterid[7], 5)
        self.assertEqual(clusterid[8], 2)
        self.assertAlmostEqual(error, 7.680, 3)

    def test_pca(self):
        if TestCluster.module=='Bio.Cluster':
            from Bio.Cluster import pca
        elif TestCluster.module=='Pycluster':
            from Pycluster import pca

        data = numpy.array([[ 3.1, 1.2 ],
                            [ 1.4, 1.3 ],
                            [ 1.1, 1.5 ],
                            [ 2.0, 1.5 ],
                            [ 1.7, 1.9 ],
                            [ 1.7, 1.9 ],
                            [ 5.7, 5.9 ],
                            [ 5.7, 5.9 ],
                            [ 3.1, 3.3 ],
                            [ 5.4, 5.3 ],
                            [ 5.1, 5.5 ],
                            [ 5.0, 5.5 ],
                            [ 5.1, 5.2 ],
                           ])

        mean, coordinates, pc, eigenvalues =  pca(data)
        self.assertAlmostEqual(mean[0], 3.5461538461538464)
        self.assertAlmostEqual(mean[1], 3.5307692307692311)
        self.assertAlmostEqual(coordinates[0,0],  2.0323189722653883)
        self.assertAlmostEqual(coordinates[0,1],  1.2252420399694917)
        self.assertAlmostEqual(coordinates[1,0],  3.0936985166252251)
        self.assertAlmostEqual(coordinates[1,1], -0.10647619705157851)
        self.assertAlmostEqual(coordinates[2,0],  3.1453186907749426)
        self.assertAlmostEqual(coordinates[2,1], -0.46331699855941139)
        self.assertAlmostEqual(coordinates[3,0],  2.5440202962223761)
        self.assertAlmostEqual(coordinates[3,1],  0.20633980959571077)
        self.assertAlmostEqual(coordinates[4,0],  2.4468278463376221)
        self.assertAlmostEqual(coordinates[4,1], -0.28412285736824866)
        self.assertAlmostEqual(coordinates[5,0],  2.4468278463376221)
        self.assertAlmostEqual(coordinates[5,1], -0.28412285736824866)
        self.assertAlmostEqual(coordinates[6,0], -3.2018619434743254)
        self.assertAlmostEqual(coordinates[6,1],  0.019692314198662915)
        self.assertAlmostEqual(coordinates[7,0], -3.2018619434743254)
        self.assertAlmostEqual(coordinates[7,1],  0.019692314198662915)
        self.assertAlmostEqual(coordinates[8,0],  0.46978641990344067)
        self.assertAlmostEqual(coordinates[8,1], -0.17778754731982949)
        self.assertAlmostEqual(coordinates[9,0], -2.5549912731867215)
        self.assertAlmostEqual(coordinates[9,1],  0.19733897451533403)
        self.assertAlmostEqual(coordinates[10,0], -2.5033710990370044)
        self.assertAlmostEqual(coordinates[10,1], -0.15950182699250004)
        self.assertAlmostEqual(coordinates[11,0], -2.4365601663089413)
        self.assertAlmostEqual(coordinates[11,1], -0.23390813900973562)
        self.assertAlmostEqual(coordinates[12,0], -2.2801521629852974)
        self.assertAlmostEqual(coordinates[12,1],  0.0409309711916888)
        self.assertAlmostEqual(pc[0,0], -0.66810932728062988)
        self.assertAlmostEqual(pc[0,1], -0.74406312017235743)
        self.assertAlmostEqual(pc[1,0],  0.74406312017235743)
        self.assertAlmostEqual(pc[1,1], -0.66810932728062988)
        self.assertAlmostEqual(eigenvalues[0], 9.3110471246032844)
        self.assertAlmostEqual(eigenvalues[1], 1.4437456297481428)

        data = numpy.array([[ 2.3, 4.5, 1.2, 6.7, 5.3, 7.1],
                            [ 1.3, 6.5, 2.2, 5.7, 6.2, 9.1],
                            [ 3.2, 7.2, 3.2, 7.4, 7.3, 8.9],
                            [ 4.2, 5.2, 9.2, 4.4, 6.3, 7.2]])
        mean, coordinates, pc, eigenvalues =  pca(data)
        self.assertAlmostEqual(mean[0], 2.7500)
        self.assertAlmostEqual(mean[1], 5.8500)
        self.assertAlmostEqual(mean[2], 3.9500)
        self.assertAlmostEqual(mean[3], 6.0500)
        self.assertAlmostEqual(mean[4], 6.2750)
        self.assertAlmostEqual(mean[5], 8.0750)
        self.assertAlmostEqual(coordinates[0,0],  2.6460846688406905)
        self.assertAlmostEqual(coordinates[0,1], -2.1421701432732418)
        self.assertAlmostEqual(coordinates[0,2], -0.56620932754145858)
        self.assertAlmostEqual(coordinates[0,3],  0.0)
        self.assertAlmostEqual(coordinates[1,0],  2.0644120899917544)
        self.assertAlmostEqual(coordinates[1,1],  0.55542108669180323)
        self.assertAlmostEqual(coordinates[1,2],  1.4818772348457117)
        self.assertAlmostEqual(coordinates[1,3],  0.0)
        self.assertAlmostEqual(coordinates[2,0],  1.0686641862092987)
        self.assertAlmostEqual(coordinates[2,1],  1.9994412069101073)
        self.assertAlmostEqual(coordinates[2,2], -1.000720598980291)
        self.assertAlmostEqual(coordinates[2,3],  0.0)
        self.assertAlmostEqual(coordinates[3,0], -5.77916094504174)
        self.assertAlmostEqual(coordinates[3,1], -0.41269215032867046)
        self.assertAlmostEqual(coordinates[3,2],  0.085052691676038017)
        self.assertAlmostEqual(coordinates[3,3],  0.0)
        self.assertAlmostEqual(pc[0,0], -0.26379660005997291)
        self.assertAlmostEqual(pc[0,1],  0.064814972617134495)
        self.assertAlmostEqual(pc[0,2], -0.91763310094893846)
        self.assertAlmostEqual(pc[0,3],  0.26145408875373249)
        self.assertAlmostEqual(pc[1,0],  0.05073770520434398)
        self.assertAlmostEqual(pc[1,1],  0.68616983388698793)
        self.assertAlmostEqual(pc[1,2],  0.13819106187213354)
        self.assertAlmostEqual(pc[1,3],  0.19782544121828985)
        self.assertAlmostEqual(pc[2,0], -0.63000893660095947)
        self.assertAlmostEqual(pc[2,1],  0.091155993862151397)
        self.assertAlmostEqual(pc[2,2],  0.045630391256086845)
        self.assertAlmostEqual(pc[2,3], -0.67456694780914772)
        # As the last eigenvalue is zero, the corresponding eigenvector is
        # strongly affected by roundoff error, and is not being tested here.
        # For PCA, this doesn't matter since all data have a zero coefficient
        # along this eigenvector.
        self.assertAlmostEqual(eigenvalues[0], 6.7678878332578778)
        self.assertAlmostEqual(eigenvalues[1], 3.0108911400291856)
        self.assertAlmostEqual(eigenvalues[2], 1.8775592718563467)
        self.assertAlmostEqual(eigenvalues[3], 0.0)

if __name__ == "__main__":
    TestCluster.module = 'Bio.Cluster'
    runner = unittest.TextTestRunner(verbosity = 2)
    unittest.main(testRunner=runner)

Generated by  Doxygen 1.6.0   Back to index