In [1]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Classification

Get some data to play with

In [2]:
from sklearn.datasets import load_digits
digits = load_digits()
digits.keys()
Out[2]:
dict_keys(['target', 'data', 'target_names', 'DESCR', 'images'])
In [3]:
digits.images.shape
Out[3]:
(1797, 8, 8)
In [4]:
print(digits.images[0])
[[  0.   0.   5.  13.   9.   1.   0.   0.]
 [  0.   0.  13.  15.  10.  15.   5.   0.]
 [  0.   3.  15.   2.   0.  11.   8.   0.]
 [  0.   4.  12.   0.   0.   8.   8.   0.]
 [  0.   5.   8.   0.   0.   9.   8.   0.]
 [  0.   4.  11.   0.   1.  12.   7.   0.]
 [  0.   2.  14.   5.  10.  12.   0.   0.]
 [  0.   0.   6.  13.  10.   0.   0.   0.]]
In [5]:
plt.matshow(digits.images[0], cmap=plt.cm.Greys)
Out[5]:
<matplotlib.image.AxesImage at 0x7f9e86984748>
In [6]:
digits.data.shape
Out[6]:
(1797, 64)
In [7]:
digits.target.shape
Out[7]:
(1797,)
In [8]:
digits.target
Out[8]:
array([0, 1, 2, ..., 8, 9, 8])

Data is always a numpy array (or sparse matrix) of shape (n_samples, n_features)

Split the data to get going

In [9]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)

Really Simple API

0) Import your model class

In [10]:
from sklearn.svm import LinearSVC

1) Instantiate an object and set the parameters

In [11]:
svm = LinearSVC(C=0.1)

2) Fit the model

In [12]:
svm.fit(X_train, y_train)
Out[12]:
LinearSVC(C=0.1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
     random_state=None, tol=0.0001, verbose=0)

3) Apply / evaluate

In [13]:
print(svm.predict(X_train))
print(y_train)
[2 3 3 ..., 9 7 0]
[2 3 3 ..., 9 7 0]
In [14]:
svm.score(X_train, y_train)
Out[14]:
0.99628804751299183
In [15]:
svm.score(X_test, y_test)
Out[15]:
0.95333333333333337

And again

In [16]:
from sklearn.ensemble import RandomForestClassifier
In [17]:
rf = RandomForestClassifier(n_estimators=50)
In [18]:
rf.fit(X_train, y_train)
Out[18]:
RandomForestClassifier(bootstrap=True, compute_importances=None,
            criterion='gini', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_density=None, min_samples_leaf=1,
            min_samples_split=2, n_estimators=50, n_jobs=1,
            oob_score=False, random_state=None, verbose=0)
In [19]:
rf.predict(X_test)
Out[19]:
array([6, 8, 8, 9, 6, 2, 3, 3, 9, 9, 5, 6, 2, 8, 1, 6, 1, 5, 8, 1, 1, 1, 3,
       0, 0, 6, 7, 0, 2, 5, 8, 0, 8, 6, 8, 0, 4, 6, 6, 7, 5, 3, 1, 7, 9, 6,
       7, 8, 9, 6, 1, 5, 3, 1, 6, 7, 2, 3, 2, 7, 8, 0, 9, 0, 2, 3, 9, 2, 2,
       0, 0, 2, 6, 6, 5, 4, 9, 0, 3, 5, 7, 2, 6, 2, 3, 0, 5, 4, 8, 6, 6, 6,
       0, 2, 3, 6, 7, 3, 7, 7, 2, 1, 5, 2, 7, 4, 4, 2, 7, 0, 6, 9, 9, 3, 7,
       3, 7, 2, 4, 8, 0, 4, 7, 8, 1, 3, 9, 6, 3, 7, 7, 8, 8, 6, 9, 3, 1, 2,
       3, 9, 4, 4, 0, 9, 2, 1, 0, 1, 3, 6, 6, 9, 0, 4, 8, 5, 2, 0, 2, 2, 1,
       2, 6, 8, 4, 5, 8, 5, 9, 4, 5, 6, 6, 4, 9, 0, 9, 7, 0, 3, 3, 5, 6, 3,
       5, 6, 3, 1, 0, 9, 6, 4, 4, 3, 8, 5, 5, 4, 1, 5, 4, 4, 1, 3, 7, 2, 3,
       3, 8, 1, 4, 6, 1, 6, 1, 4, 9, 7, 0, 5, 7, 9, 5, 3, 0, 3, 3, 4, 5, 8,
       4, 4, 0, 7, 4, 0, 6, 8, 6, 0, 2, 8, 7, 6, 1, 4, 3, 9, 0, 7, 9, 2, 2,
       5, 4, 1, 6, 0, 1, 9, 0, 2, 3, 1, 7, 3, 7, 7, 4, 9, 3, 5, 4, 7, 7, 7,
       1, 3, 1, 2, 5, 8, 3, 3, 3, 0, 7, 3, 8, 9, 7, 6, 0, 7, 1, 5, 4, 9, 2,
       4, 4, 9, 9, 8, 2, 2, 7, 5, 9, 7, 4, 9, 8, 2, 8, 2, 9, 5, 1, 2, 8, 6,
       5, 8, 0, 5, 5, 9, 2, 1, 4, 9, 3, 8, 1, 5, 4, 6, 7, 3, 6, 0, 9, 8, 2,
       2, 1, 1, 0, 7, 9, 3, 1, 2, 7, 2, 8, 6, 4, 5, 8, 2, 3, 4, 8, 2, 9, 2,
       1, 5, 6, 3, 2, 7, 4, 6, 3, 8, 9, 8, 1, 7, 8, 1, 7, 3, 2, 9, 8, 7, 9,
       6, 5, 3, 0, 8, 0, 5, 6, 6, 2, 1, 4, 3, 0, 3, 6, 9, 5, 8, 3, 0, 0, 8,
       4, 0, 6, 5, 9, 1, 8, 4, 2, 0, 1, 8, 1, 2, 2, 5, 7, 0, 6, 5, 4, 4, 0,
       8, 5, 3, 5, 5, 5, 9, 1, 2, 0, 4, 1, 5])
In [20]:
rf.score(X_test, y_test)
Out[20]:
0.97777777777777775

Classifier Comparison

<img src="classifier_comparison.png" width=100%>

Labels Can Be Anything

In [21]:
numbers = np.array(["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"])
In [22]:
y_train_string = numbers[y_train]
svm.fit(X_train, y_train_string)
Out[22]:
LinearSVC(C=0.1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
     random_state=None, tol=0.0001, verbose=0)
In [23]:
svm.predict(X_test)
Out[23]:
array(['six', 'eight', 'eight', 'nine', 'six', 'two', 'three', 'three',
       'nine', 'nine', 'five', 'six', 'two', 'eight', 'one', 'six', 'one',
       'five', 'eight', 'one', 'one', 'one', 'three', 'zero', 'zero',
       'six', 'seven', 'zero', 'two', 'five', 'eight', 'zero', 'eight',
       'six', 'eight', 'zero', 'four', 'six', 'six', 'seven', 'five',
       'three', 'one', 'seven', 'nine', 'six', 'seven', 'eight', 'nine',
       'six', 'one', 'five', 'three', 'one', 'six', 'seven', 'two',
       'three', 'two', 'seven', 'eight', 'zero', 'nine', 'zero', 'two',
       'three', 'nine', 'two', 'two', 'zero', 'zero', 'two', 'six', 'six',
       'five', 'four', 'nine', 'zero', 'three', 'five', 'seven', 'two',
       'six', 'two', 'three', 'zero', 'five', 'four', 'eight', 'six',
       'six', 'six', 'zero', 'two', 'three', 'six', 'seven', 'three',
       'eight', 'seven', 'eight', 'one', 'five', 'two', 'seven', 'four',
       'four', 'two', 'seven', 'zero', 'six', 'nine', 'nine', 'three',
       'seven', 'three', 'seven', 'two', 'four', 'eight', 'zero', 'four',
       'seven', 'eight', 'one', 'three', 'nine', 'six', 'three', 'five',
       'seven', 'eight', 'eight', 'six', 'nine', 'three', 'one', 'two',
       'three', 'nine', 'four', 'four', 'zero', 'nine', 'two', 'one',
       'zero', 'one', 'three', 'six', 'six', 'nine', 'zero', 'four',
       'eight', 'five', 'two', 'zero', 'two', 'two', 'one', 'two', 'six',
       'eight', 'four', 'five', 'eight', 'five', 'nine', 'four', 'five',
       'six', 'six', 'four', 'nine', 'zero', 'nine', 'seven', 'zero',
       'three', 'three', 'five', 'six', 'eight', 'five', 'six', 'three',
       'one', 'zero', 'nine', 'six', 'four', 'four', 'three', 'eight',
       'five', 'five', 'four', 'one', 'five', 'four', 'four', 'eight',
       'three', 'seven', 'two', 'three', 'three', 'eight', 'one', 'four',
       'six', 'one', 'six', 'one', 'four', 'nine', 'seven', 'zero', 'five',
       'seven', 'nine', 'five', 'three', 'zero', 'three', 'three', 'four',
       'five', 'eight', 'four', 'four', 'zero', 'seven', 'four', 'zero',
       'six', 'eight', 'six', 'zero', 'two', 'eight', 'seven', 'six',
       'one', 'four', 'three', 'nine', 'zero', 'seven', 'nine', 'two',
       'two', 'five', 'four', 'one', 'six', 'zero', 'one', 'nine', 'zero',
       'two', 'three', 'one', 'seven', 'three', 'seven', 'seven', 'four',
       'nine', 'eight', 'five', 'four', 'seven', 'seven', 'seven', 'eight',
       'three', 'one', 'two', 'two', 'eight', 'three', 'three', 'three',
       'zero', 'seven', 'three', 'eight', 'nine', 'seven', 'six', 'zero',
       'eight', 'one', 'five', 'four', 'nine', 'two', 'four', 'four',
       'nine', 'nine', 'eight', 'two', 'two', 'five', 'five', 'nine',
       'seven', 'four', 'nine', 'eight', 'two', 'eight', 'two', 'nine',
       'five', 'one', 'two', 'eight', 'six', 'five', 'eight', 'zero',
       'five', 'five', 'nine', 'two', 'one', 'four', 'nine', 'three',
       'eight', 'one', 'five', 'four', 'six', 'one', 'three', 'six',
       'zero', 'nine', 'eight', 'two', 'two', 'one', 'one', 'zero',
       'seven', 'nine', 'three', 'one', 'two', 'seven', 'two', 'eight',
       'six', 'four', 'five', 'eight', 'two', 'three', 'four', 'eight',
       'two', 'nine', 'two', 'three', 'five', 'six', 'three', 'two',
       'seven', 'four', 'six', 'three', 'eight', 'nine', 'eight', 'three',
       'seven', 'eight', 'one', 'seven', 'three', 'two', 'nine', 'eight',
       'seven', 'nine', 'six', 'five', 'three', 'zero', 'eight', 'zero',
       'five', 'six', 'six', 'two', 'one', 'one', 'three', 'zero', 'three',
       'eight', 'nine', 'five', 'eight', 'eight', 'zero', 'zero', 'eight',
       'four', 'zero', 'six', 'five', 'nine', 'one', 'eight', 'four',
       'two', 'zero', 'one', 'eight', 'one', 'two', 'two', 'five', 'three',
       'zero', 'six', 'five', 'four', 'one', 'zero', 'eight', 'five',
       'three', 'five', 'five', 'five', 'nine', 'one', 'two', 'zero',
       'four', 'one', 'five'], 
      dtype='<U5')
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: