カーネルpcaを手動で記述することがあります。多項式カーネルから始めましょう。
from sklearn.datasets import make_circles
from scipy.spatial.distance import pdist, squareform
from scipy.linalg import eigh
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
X_c, y_c = make_circles(n_samples=100, random_state=654)
plt.figure(figsize=(8,6))
plt.scatter(X_c[y_c==0, 0], X_c[y_c==0, 1], color='red')
plt.scatter(X_c[y_c==1, 0], X_c[y_c==1, 1], color='blue')
plt.ylabel('y coordinate')
plt.xlabel('x coordinate')
plt.show()
データ:
def degree_pca(X, gamma, degree, n_components):
# Calculating kernel
K = gamma*([email protected]+1)**degree
# Obtaining eigenvalues in descending order with corresponding
# eigenvectors from the symmetric matrix.
eigvals, eigvecs = eigh(K)
# Obtaining the i eigenvectors that corresponds to the i highest eigenvalues.
X_pc = np.column_stack((eigvecs[:,-i] for i in range(1,n_components+1)))
return X_pc
今すぐデータを変換し、
X_c1 = degree_pca(X_c, gamma=5, degree=2, n_components=2)
plt.figure(figsize=(8,6))
plt.scatter(X_c1[y_c==0, 0], X_c1[y_c==0, 1], color='red')
plt.scatter(X_c1[y_c==1, 0], X_c1[y_c==1, 1], color='blue')
plt.ylabel('y coordinate')
plt.xlabel('x coordinate')
plt.show()
線形分離、それをプロットします
現在、ドットは直線的に分離されている可能性があります。
次にRBFカーネルを書きましょう。デモンストレーションのために月を取ってみましょう。
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, random_state=654)
plt.figure(figsize=(8,6))
plt.scatter(X[y==0, 0], X[y==0, 1], color='red')
plt.scatter(X[y==1, 0], X[y==1, 1], color='blue')
plt.ylabel('y coordinate')
plt.xlabel('x coordinate')
plt.show()
ムーンズ:
カーネルPCA変換:
def stepwise_kpca(X, gamma, n_components):
"""
X: A MxN dataset as NumPy array where the samples are stored as rows (M), features as columns (N).
gamma: coefficient for the RBF kernel.
n_components: number of components to be returned.
"""
# Calculating the squared Euclidean distances for every pair of points
# in the MxN dimensional dataset.
sq_dists = pdist(X, 'sqeuclidean')
# Converting the pairwise distances into a symmetric MxM matrix.
mat_sq_dists = squareform(sq_dists)
K=np.exp(-gamma*mat_sq_dists)
# Centering the symmetric NxN kernel matrix.
N = K.shape[0]
one_n = np.ones((N,N))/N
K = K - one_n.dot(K) - K.dot(one_n) + one_n.dot(K).dot(one_n)
# Obtaining eigenvalues in descending order with corresponding
# eigenvectors from the symmetric matrix.
eigvals, eigvecs = eigh(K)
# Obtaining the i eigenvectors that corresponds to the i highest eigenvalues.
X_pc = np.column_stack((eigvecs[:,-i] for i in range(1,n_components+1)))
return X_pc
レッツ・プロット
X_4 = stepwise_kpca(X, gamma=15, n_components=2)
plt.scatter(X_4[y==0, 0], X_4[y==0, 1], color='red')
plt.scatter(X_4[y==1, 0], X_4[y==1, 1], color='blue')
plt.ylabel('y coordinate')
plt.xlabel('x coordinate')
plt.show()
結果: