Using DecisionBoundary() class of bayes classifier with Chaâ€™s Dichotomy Model

Transform 3 classes problem into 2 classes (within-class and out-class difference) then show the decision boundary between the two classes

In [1]:
__author__ = "A.Aziz Altowayan"
__email__ = "aa10212w@pace.edu"

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import math
from random import randrange

In [2]:
# distance between 2 points
def dist_point(p0, p1):
x1, x2 = [abs(p0[0] - p1[0]), abs(p0[1] - p1[1])]
return x1, x2

In [3]:
# get distances of inside-class points
def get_in_distances(c):
distances = []
def rec(points):
if len(points) < 1:
return
for i in range(len(points)-1):
d = list(dist_point(points[0], points[i+1]))
distances.append(d)
return rec(points[1:])
rec(c)
return np.array(distances)

In [4]:
# get distances of outside-class points
def get_out_distances(c1,c2):
if len(c1) < 1 or len(c2) < 1:
return
distances = []
def rec(c1):
if len(c1) < 1:
return
for i in range(len(c2)):
d = list(dist_point(c1[0], c2[i]))
distances.append(d)
return rec(c1[1:])
rec(c1)
return np.array(distances)

In [11]:
# generate random datasets (by group choice)
def get_data(x1=[0,20],x2=[0,20],y1=[30,50],y2=[30,50], z1=[60,80], z2=[60,80],n=5, group=0): # apart classes

if group=="messedup": # messed up classes
x1=[0,20];x2=[0,20];y1=[0,30];y2=[0,30];z1=[20,40];z2=[20,40]
if group=="negative": # neg classes
x1=[-20,0];x2=[-20,0];y1=[-20,0];y2=[40,60];z1=[60,80];z2=[60,80]

[x11,x12],[x21,x22] = x1, x2
[y11,y12],[y21,y22] = y1, y2
[z11,z12],[z21,z22] = z1, z2
d1 = np.array([[randrange(x11,x12), randrange(x21,x22)] for _ in range(n)])
d2 = np.array([[randrange(y11,y12), randrange(y21,y22)] for _ in range(n)])
d3 = np.array([[randrange(z11,z12), randrange(z21,z22)] for _ in range(n)])

return d1, d2, d3

In [12]:
""" dataset group choices:
group="messedup"  scattered points
group="negative"
"""
c1, c2, c3 = get_data(group="messedup")

In [13]:
# original points
w1, w2, w3 = c1.T, c2.T, c3.T

print("Original 3-classes datasets")
plt.scatter(w1[0],w1[1], c='r', label="c1")
plt.scatter(w2[0],w2[1], c='g', label="c2")
plt.scatter(w3[0],w3[1], c='b', label="c3")
plt.grid();plt.legend(bbox_to_anchor = (1.5, 1));plt.show()

# in-class distances
d1 = get_in_distances(c1).T
d2 = get_in_distances(c2).T
d3 = get_in_distances(c3).T

plt.scatter(d1[0],d1[1], c='r', marker='x', label="dist: in-class c1")
plt.scatter(d2[0],d2[1], c='g', marker='x', label="dist: in-class c2")
plt.scatter(d3[0],d3[1], c='b', marker='x', label="dist: in-class c3")
# plt.grid();plt.legend(bbox_to_anchor = (1.5, 1));plt.show()

# outside-class distances
d12 = get_out_distances(c1,c2).T
d23 = get_out_distances(c2,c3).T
d13 = get_out_distances(c1,c3).T

print("Transformed 2-classes datasets")
plt.scatter(d12[0],d12[1], c='c', label="dist: c1-c2")
plt.scatter(d23[0],d23[1], c='k', label="dist: c2-c3")
plt.scatter(d13[0],d13[1], c='r', label="dist: c1-c3")
plt.grid()
plt.legend(bbox_to_anchor = (1.5, 1))
plt.show()

Original 3-classes datasets


Transformed 2-classes datasets


In [14]:
import sys
sys.path.append("/Users/Aziz/Desktop/")
import bayesclassifier

In [16]:
inside = np.concatenate((d1,d2,d3),axis=1)
outside = np.concatenate((d12,d23,d13),axis=1)
plt.scatter(inside[0], inside[1], c='g', label="in-class")
plt.scatter(outside[0], outside[1], c='r', label="out-class")
plt.title("in-class and out-class sample distances");plt.legend();plt.show()

## get classifier
decision = bayesclassifier.DescisionBoundary(inside, outside)
gx1 = decision.disc_function(c=1)
gx2 = decision.disc_function(c=2)
g = gx1 - gx2
model = decision.get_model(g, poly=1)
x2 = np.arange(-20,50,0.1)
bound = decision.get_decision(x2, model)
decision.plot_boundary(bound, x2, density=0, lab1="in-class", lab2="out-class", color1='k')
decision.plot_boundary(bound, x2, density=1, lab1="in-class", lab2="out-class", color1='k')