Title: | Multi-Modal Similarity Matrix Factorization for Integrative Multi-Omics Data Analysis |
---|---|
Description: | A new method to implement clustering from multiple modality data of certain samples, the function M2SMF() jointly factorizes multiple similarity matrices into a shared sub-matrix and several modality private sub-matrices, which is further used for clustering. Along with this method, we also provide function to calculate the similarity matrix and function to evaluate the best cluster number from the original data. |
Authors: | Xiaoyao Yin [aut, cre] |
Maintainer: | Xiaoyao Yin <[email protected]> |
License: | GPL (>= 2) |
Version: | 2.0 |
Built: | 2025-02-22 03:24:11 UTC |
Source: | https://github.com/cran/M2SMF |
calculate the affinity matrix from the diff matrix with 20 neighbors
affinityMatrix(Diff, K = 20, sigma = 0.5)
affinityMatrix(Diff, K = 20, sigma = 0.5)
Diff |
A diff matrix |
K |
The number of neighbors in consideration |
sigma |
A parameter to determine the scale |
W The similarity matrix
Xiaoyao Yin
data_list <- simu_data_gen() Diff <- dist2eu(Standard_Normalization(data_list[[1]]),Standard_Normalization(data_list[[1]])) simi <- affinityMatrix(Diff,20,0.5)
data_list <- simu_data_gen() Diff <- dist2eu(Standard_Normalization(data_list[[1]]),Standard_Normalization(data_list[[1]])) simi <- affinityMatrix(Diff,20,0.5)
calculate the normalized mutual information of two vectors x and y.
Cal_NMI(x, y)
Cal_NMI(x, y)
x |
A vector |
y |
A vector as long as x |
A number between 0 and 1 indicating the normalized mutual information
Xiaoyao Yin
x <- c(0.1,0.2,0.3,0.4) y <- c(0.1,0.2,0.3,0.4) NMI <- Cal_NMI(x,y)
x <- c(0.1,0.2,0.3,0.4) y <- c(0.1,0.2,0.3,0.4) NMI <- Cal_NMI(x,y)
A function to calculate the cost of the objective function
cost(new_WL_list, init_list, lambda)
cost(new_WL_list, init_list, lambda)
new_WL_list |
A list of matrices factorized from the similarity matrices list WL |
init_list |
A list containing the updated result in this iteration |
lambda |
A parameter to set the relative weight of the group sparsity constraints |
A number indicating the total cost of the objective function
Xiaoyao Yin
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 lambda <- 0.25 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(new_WL_list,init_list) update_alpha_list <- update_alpha(new_WL_list,update_L_list,lambda) init_list <- update_alpha_list new_loss <- cost(new_WL_list,init_list,lambda)
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 lambda <- 0.25 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(new_WL_list,init_list) update_alpha_list <- update_alpha(new_WL_list,update_L_list,lambda) init_list <- update_alpha_list new_loss <- cost(new_WL_list,init_list,lambda)
Calculate the agreement-based measurement of two any pair-wise samples x_i and x_j for binary variables
dist2bin(X, C)
dist2bin(X, C)
X |
A sample-feature matrix with rows as samples and columns as features |
C |
The same as X |
A matrix whose elements at (i,j) is the agreement-based measurement of two any pair-wise samples x_i and x_j
Xiaoyao Yin
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2bin(X,C)
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2bin(X,C)
Calculate the chi-squared distance of two any pair-wise samples x_i and x_j for discrete variables
dist2chi(X, C)
dist2chi(X, C)
X |
A sample-feature matrix with rows as samples and columns as features |
C |
The same as X |
A matrix whose elements at (i,j) is the chi-squared distance of two any pair-wise samples x_i and x_j
Xiaoyao Yin
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2chi(X,C)
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2chi(X,C)
Calculate the Euclidean distance of two any pair-wise samples x_i and x_j for continuous variables
dist2eu(X, C)
dist2eu(X, C)
X |
A sample-feature matrix with rows as samples and columns as features |
C |
The same as X |
A matrix whose elements at (i,j) is the Euclidean distance of two any pair-wise samples x_i and x_j
Xiaoyao Yin
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2eu(X,C)
data_list <- simu_data_gen() X <- data_list[[1]] C <- X Diff <- dist2eu(X,C)
Li takes the first k columns of matrix d in SVD, while alpha is the mean of all the u of SVD result in each modality
initialization(WL, k)
initialization(WL, k)
WL |
A list of similarity matrices |
k |
A parameter to specify the cluster number |
A list with N+2 elements, the former N as modality private sub-matrices, the Nth as the shared sub-matrix and the last one as 1
Xiaoyao Yin
WL <- simu_data_gen() new_WL_list <- initialize_WL(WL) k <- 5 init_list <- initialization(new_WL_list,k)
WL <- simu_data_gen() new_WL_list <- initialize_WL(WL) k <- 5 init_list <- initialization(new_WL_list,k)
Factorize the each of the similairty matrix Si into Ci*t(Ci) by SVD
initialize_WL(WL)
initialize_WL(WL)
WL |
A list of similarity matrices |
A list as long as WL with elements satisfying res[[i]]
Xiaoyao Yin
WL <- simu_data_gen() new_WL_list <- initialize_WL(WL)
WL <- simu_data_gen() new_WL_list <- initialize_WL(WL)
jointly factorize multiple matrices into a shared sub-matrix and multiple private sub-matrices
M2SMF(WL, lambda = 0.25, theta = 10^-4, k)
M2SMF(WL, lambda = 0.25, theta = 10^-4, k)
WL |
A list of similarity matrices |
lambda |
A parameter to set the relative weight of the group sparsity constraints |
theta |
A parameter to determine the convergence |
k |
A parameter to specify the cluster number |
A list containing the clustering result
sub_matrices |
a list containing all the sub-matrices |
cluster_res |
the clustering result which is as long as the number of samples |
Xiaoyao Yin
WL <- simu_data_gen() res <- M2SMF(WL,0.25,10^-4,5)
WL <- simu_data_gen() res <- M2SMF(WL,0.25,10^-4,5)
A function to calculate the modularity for weighted graph
new_modularity(init_list, WL)
new_modularity(init_list, WL)
init_list |
A list with N+2 elements, the former N as modality private sub-matrices, the Nth as the shared sub-matrix and the last one as the current loss |
WL |
A list of similarity matrices |
A single value indicating the mudularity of current factorization and clustering
Xiaoyao Yin
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) init_list <- initialization(new_WL_list,5) res <- M2SMF(WL,0.25,10^-4,5) init_list <- res[[1]] modularity <- new_modularity(init_list,WL)
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) init_list <- initialization(new_WL_list,5) res <- M2SMF(WL,0.25,10^-4,5) init_list <- res[[1]] modularity <- new_modularity(init_list,WL)
A function to generate simulated data with two modularities and five clusters
simu_data_gen()
simu_data_gen()
A list with two elements, which are the sample-feature matrices from different modality
Xiaoyao Yin
data_list <- simu_data_gen()
data_list <- simu_data_gen()
Normalize each column of x to have mean 0 and standard deviation 1.
Standard_Normalization(x)
Standard_Normalization(x)
x |
A sample-feature matrix with rows as samples and columns as features |
A sample-feature matrix with rows as samples and columns as features,each column of the matrix have mean 0 and standard deviation 1
Xiaoyao Yin
data_list <- simu_data_gen() x <- data_list[[1]] data_matrix <- Standard_Normalization(x)
data_list <- simu_data_gen() x <- data_list[[1]] data_matrix <- Standard_Normalization(x)
update the sub-matrix alpha to convergence to its local minimum gradually
update_alpha(WL, update_L_list, lambda)
update_alpha(WL, update_L_list, lambda)
WL |
A list of similarity matrices |
update_L_list |
A list with N+2 elements, the former N as modality private sub-matrices, the Nth as the shared sub-matrix and the last one as the current loss |
lambda |
A parameter to set the relative weight of the group sparsity constraints |
A list containing the updated result in this iteration
Xiaoyao Yin
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 lambda <- 0.25 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(new_WL_list,init_list) update_alpha_list <- update_alpha(WL,update_L_list,lambda)
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 lambda <- 0.25 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(new_WL_list,init_list) update_alpha_list <- update_alpha(WL,update_L_list,lambda)
update the sub-matrix Li, for i=1,2,...,N to convergence to its local minimum gradually
update_L(WL, init_list)
update_L(WL, init_list)
WL |
A list of similarity matrices |
init_list |
A list with N+2 elements, the former N as modality private sub-matrices, the Nth as the shared sub-matrix and the last one as 1 |
A list containing the updated result in this iteration
Xiaoyao Yin
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(WL,init_list)
WL <- simu_data_gen() WL[[1]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[1]]),Standard_Normalization(WL[[1]]))) WL[[2]] <- affinityMatrix(dist2eu(Standard_Normalization(WL[[2]]),Standard_Normalization(WL[[2]]))) new_WL_list <- initialize_WL(WL) k <- 5 init_list <- initialization(new_WL_list,k) update_L_list <- update_L(WL,init_list)