Logo Search packages:      
Sourcecode: python-biopython version File versions  Download package


#!/usr/bin/env python

This module provides code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

distance   The distance between two points in the feature space.
weight     The importance given to each point for classification. 

kNN           Holds information for a nearest neighbors classifier.

train        Train a new kNN classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.

    Weighting Functions:
equal_weight    Every example is given a weight of 1.

    from Numeric import *
except ImportError, x:
    raise ImportError, "This module requires Numeric (precursor to NumPy)"

from Bio import listfns
from Bio import distance

00035 class kNN:
    """Holds information necessary to do nearest neighbors classification.

    classes  List of the possible classes.
    xs       List of the neighbors.
    ys       List of the classes that the neighbors belong to.
    k        Number of neighbors to look at.

00045     def __init__(self):
        self.classes = []
        self.xs = []
        self.ys = []
        self.k = None

def equal_weight(x, y):
    """equal_weight(x, y) -> 1"""
    # everything gets 1 vote
    return 1

def train(xs, ys, k, typecode=None):
    """train(xs, ys, k) -> kNN
    Train a k nearest neighbors classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    knn = kNN()
    knn.classes = listfns.items(ys)
    knn.xs = asarray(xs, typecode)
    knn.ys = ys
    knn.k = k
    return knn

def calculate(knn, x, weight_fn=equal_weight, distance_fn=distance.euclidean):
    """calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict

    Calculate the probability for each class.  knn is a kNN object.  x
    is the observed data.  weight_fn is an optional function that
    takes x and a training example, and returns a weight.  distance_fn
    is an optional function that takes two points and returns the
    distance between them.  Returns a dictionary of the class to the
    weight given to the class.
    x = asarray(x)

    order = []  # list of (distance, index)
    for i in range(len(knn.xs)):
        dist = distance_fn(x, knn.xs[i])
        order.append((dist, i))

    # first 'k' are the ones I want.
    weights = {}  # class -> number of votes
    for k in knn.classes:
        weights[k] = 0.0
    for dist, i in order[:knn.k]:
        klass = knn.ys[i]
        weights[klass] = weights[klass] + weight_fn(x, knn.xs[i])

    return weights

def classify(knn, x, weight_fn=equal_weight, distance_fn=distance.euclidean):
    """classify(knn, x[, weight_fn][, distance_fn]) -> class

    Classify an observation into a class.  If not specified, weight_fn will
    give all neighbors equal weight and distance_fn will be the euclidean

    weights = calculate(
        knn, x, weight_fn=weight_fn, distance_fn=distance_fn)

    most_class = None
    most_weight = None
    for klass, weight in weights.items():
        if most_class is None or weight > most_weight:
            most_class = klass
            most_weight = weight
    return most_class

Generated by  Doxygen 1.6.0   Back to index