%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
Given $A$ a $m\times n$ non-negative matrix ($\forall i,j; \; A_{ij}\geq 0$) find:
Diagonal square matrices $D_1$ ($m\times m$) and $D_2$ ($n\times n$)
So $P=D_1 A D_2$ is doubly stochastic (i.e., $\sum_{i}P_{i,.} = \sum_{j}P_{.,j} = 1$)
We want to satisfy:
Let $D_1 = Diag(r)$ and $D_2 = Diag(c)$
Then:
This can be solved iteratively:
def SinkhornKnopp(A, iterations):
A = np.asarray(A)
m,n = A.shape
c = 1 / A.T.dot(np.ones(m))
r = 1 / A.dot(np.ones(n))
for i in range(iterations):
D1 = np.diag(r)
D2 = np.diag(c)
#print(np.mean(D1.dot(A).dot(D2).dot(np.ones(n))))
#print(np.mean(D2.dot(A.T).dot(D1).dot(np.ones(m))))
c = 1 / A.T.dot(r)
r = 1 / A.dot(c)
P = D1.dot(A).dot(D2)
return(P,D1,D2)
m = 10
n = 1000
A = np.random.randn(m,n)
A *= np.abs(np.random.randn(n))*2
A = (A.T*np.abs(np.random.randn(m))*3).T
A += np.abs(np.random.randn(n)*10)
A = (A.T + np.abs(np.random.randn(m))*10).T
#A = A/A.std(axis=0)
#A = (A.T/A.std(axis=1)).T
plt.plot(A.mean(axis=0))
plt.plot(A.mean(axis=1))
A-=A.mean(axis=0)
A = (A.T-A.mean(axis=1)).T
plt.plot(A.mean(axis=0))
plt.plot(A.mean(axis=1))
plt.plot(A.std(axis=0))
plt.plot(A.std(axis=1))
P,D1,D2 = SinkhornKnopp(A**2/n,100)
A_ = np.sqrt(D1).dot(A).dot(np.sqrt(D2))
plt.plot(A_.std(axis=0))
plt.plot(A_.std(axis=1))
plt.plot(A_.mean(axis=0))
plt.plot(A_.mean(axis=1))
Given $A$ a $m\times n$ non-negative matrix ($\forall i,j; \; A_{ij}\geq 0$) find:
So $P=D_1 A D_2$ is s.t. $\sum_{i}P_{i,.} = r_i$ and $\sum_{j}P_{.,j} = c_jk$
Repeat:
def RAS(A,r,c,nb_iterations):
for i in range(nb_iterations):
A = (A.T*r/A.sum(axis=1)).T
A = A*c/A.sum(axis=0)
return(A)
m = 10
n = 1000
A = np.random.randn(m,n)
A *= np.abs(np.random.randn(n))*2
A = (A.T*np.abs(np.random.randn(m))*3).T
A += np.abs(np.random.randn(n)*10)
A = (A.T + np.abs(np.random.randn(m))*10).T
plt.plot(A.mean(axis=0))
plt.plot(A.mean(axis=1))
A-=A.mean(axis=0)
A = (A.T-A.mean(axis=1)).T
plt.plot(A.mean(axis=0))
plt.plot(A.mean(axis=1))
r = np.ones(m)*n
c = np.ones(n)*m
A_2 = RAS(A**2,r,c,10)
A_ = np.sqrt(A_2)*np.sign(A)
plt.plot(A_.std(axis=0))
plt.plot(A_.std(axis=1))
plt.plot(A_.mean(axis=0))
plt.plot(A_.mean(axis=1))
def mean_std_polishing(A, nb_iterations):
for i in range(nb_iterations):
# mean polish the column
A = A-A.mean(axis=0)
# std polish the column
A = A/A.std(axis=0)
# mean polish the row
A = (A.T-A.T.mean(axis=0)).T
# std polish the row
A = (A.T/A.T.std(axis=0)).T
return(A)
m = 10
n = 1000
A = np.random.randn(m,n)
A *= np.abs(np.random.randn(n))*2
A = (A.T*np.abs(np.random.randn(m))*3).T
A += np.abs(np.random.randn(n)*10)
A = (A.T + np.abs(np.random.randn(m))*10).T
A_ = mean_std_polishing(A,100)
plt.plot(A_.std(axis=0))
plt.plot(A_.std(axis=1))
plt.plot(A_.mean(axis=0))
plt.plot(A_.mean(axis=1))