Building distance matrices.
Sys.setenv(RETICULATE_PYTHON = "C:/Users/brent/anaconda3/envs/STOCK_MASTER/python.exe")
reticulate::use_condaenv(condaenv = 'STOCK_MASTER', required = TRUE)
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
library('reticulate')
import numpy as np
np.set_printoptions(precision=2, linewidth=150)
from scipy.spatial import distance
X1 = np.array([[6,1,7],[10,9,4],[13,9,3],[10,8,15],[14,4,1]])
X2 = np.array([6,1,7])
print(X1, "\n\n", X2)
## [[ 6 1 7]
## [10 9 4]
## [13 9 3]
## [10 8 15]
## [14 4 1]]
##
## [6 1 7]
def ed_sc(x):
x = x[:,None] if x.ndim == 1 else x
D_sp = distance.pdist(x, metric='euclidean')
Dmat = distance.squareform(D_sp, force='no', checks=True)
return Dmat
a = ed_sc(X1)
b = ed_sc(X2)
print(a, "\n\n", b)
## [[ 0. 9.43 11.36 11.36 10.44]
## [ 9.43 0. 3.16 11.05 7.07]
## [11.36 3.16 0. 12.41 5.48]
## [11.36 11.05 12.41 0. 15.1 ]
## [10.44 7.07 5.48 15.1 0. ]]
##
## [[0. 5. 1.]
## [5. 0. 6.]
## [1. 6. 0.]]
SO reference
def ed_np1(x):
x = x[:,None] if x.ndim == 1 else x
a = np.sum(x * x, 1)
b = np.repeat(a[:, None], x.shape[0], axis=1)
D = b + b.T -2 * np.dot(x, x.T)
Dmat = np.sqrt(D)
return Dmat
a = ed_np1(X1)
b = ed_np1(X2)
print(a, "\n\n", b)
## [[ 0. 9.43 11.36 11.36 10.44]
## [ 9.43 0. 3.16 11.05 7.07]
## [11.36 3.16 0. 12.41 5.48]
## [11.36 11.05 12.41 0. 15.1 ]
## [10.44 7.07 5.48 15.1 0. ]]
##
## [[0. 5. 1.]
## [5. 0. 6.]
## [1. 6. 0.]]
Code adapted from here
def ed_np2(x):
x = x[:,None] if x.ndim == 1 else x
D = np.sum(x**2, 1).reshape(-1, 1) \
+ np.sum(x**2, 1) \
- 2 * np.dot(x, x.T)
Dmat = np.sqrt(D)
return Dmat
a = ed_np2(X1)
b = ed_np2(X2)
print(a, "\n\n", b)
## [[ 0. 9.43 11.36 11.36 10.44]
## [ 9.43 0. 3.16 11.05 7.07]
## [11.36 3.16 0. 12.41 5.48]
## [11.36 11.05 12.41 0. 15.1 ]
## [10.44 7.07 5.48 15.1 0. ]]
##
## [[0. 5. 1.]
## [5. 0. 6.]
## [1. 6. 0.]]
SO reference
def ed_np3(x):
x = x[:,None] if x.ndim == 1 else x
Dmat = np.sqrt(((x[:, :, None] - x[:, :, None].T) ** 2).sum(1))
return Dmat
a = ed_np3(X1)
b = ed_np3(X2)
print(a, "\n\n", b)
## [[ 0. 9.43 11.36 11.36 10.44]
## [ 9.43 0. 3.16 11.05 7.07]
## [11.36 3.16 0. 12.41 5.48]
## [11.36 11.05 12.41 0. 15.1 ]
## [10.44 7.07 5.48 15.1 0. ]]
##
## [[0. 5. 1.]
## [5. 0. 6.]
## [1. 6. 0.]]
def md_sp(x):
x = x[:,None] if x.ndim == 1 else x
D_sp = distance.pdist(x, metric='mahalanobis')
Dmat = distance.squareform(D_sp, force='no', checks=True)
return Dmat
a = md_sp(X1)
b = md_sp(X2)
print(a, "\n\n", b)
## [[0. 2.39 2.63 2.76 2.6 ]
## [2.39 0. 1.22 2.68 2.8 ]
## [2.63 1.22 0. 2.3 1.85]
## [2.76 2.68 2.3 0. 2.78]
## [2.6 2.8 1.85 2.78 0. ]]
##
## [[0. 1.56 0.31]
## [1.56 0. 1.87]
## [0.31 1.87 0. ]]
def md_np(x):
dec = np.linalg.cholesky(np.cov(x, rowvar=False))
tmp = np.linalg.solve(dec, x.T)
tmp1 = tmp.T
return ed_np3(tmp1)
print(md_np(X1))
## [[0. 2.39 2.63 2.76 2.6 ]
## [2.39 0. 1.22 2.68 2.8 ]
## [2.63 1.22 0. 2.3 1.85]
## [2.76 2.68 2.3 0. 2.78]
## [2.6 2.8 1.85 2.78 0. ]]
md_r <- function(x) {
dec <- chol( cov(x) )
tmp0 <- forwardsolve(t(dec), t(x) )
tmp1 <- t(tmp0)
lower_tri <- dist(tmp1)
m <- matrix(NA, nrow(x), nrow(x))
m[lower.tri(m)] <- lower_tri
m[upper.tri(m)] <- t(m)[upper.tri(m)]
diag(m) <- 0
m
}
d <- matrix(c(6,1,7,10,9,4,13,9,3,10,8,15,14,4,1), ncol = 3, byrow= TRUE)
md_r(d)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.000000 2.388123 2.631810 2.762811 2.603338
## [2,] 2.388123 0.000000 1.220222 2.678094 2.798580
## [3,] 2.631810 1.220222 0.000000 2.303122 1.847275
## [4,] 2.762811 2.678094 2.303122 0.000000 2.783878
## [5,] 2.603338 2.798580 1.847275 2.783878 0.000000