Title: | Orthogonal Sparse Non-Negative Matrix Tri-Factorization |
---|---|
Description: | A novel method to implement cancer subtyping and subtype specific drug targets identification via non-negative matrix tri-factorization. To improve the interpretability, we introduce orthogonal constraint to the row coefficient matrix and column coefficient matrix. To meet the prior knowledge that each subtype should be strongly associated with few gene sets, we introduce sparsity constraint to the association sub-matrix. The average residue was introduced to evaluate the row and column cluster numbers. This is part of the work "Liver Cancer Analysis via Orthogonal Sparse Non-Negative Matrix Tri- Factorization" which will be submitted to BBRC. |
Authors: | Xiaoyao Yin |
Maintainer: | Xiaoyao Yin <[email protected]> |
License: | GPL (>= 2) |
Version: | 0.1.0 |
Built: | 2025-03-11 05:31:37 UTC |
Source: | https://github.com/cran/OSNMTF |
To calculate the similarity matrix with the same method in package M2SMF, for asymmetric case
affinityMatrix(Diff, K = 20, sigma = 0.5)
affinityMatrix(Diff, K = 20, sigma = 0.5)
Diff |
The distance matrix to culculate the similarity |
K |
The number of neighbours to culculate the similarity |
sigma |
A hyper-parameter to culculate the similarity |
The similarity matrix
Xiaoyao Yin
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } for (i in 1:20) { data2[i,] <- rnorm(100,5,1) } for (i in 21:40) { data2[i,] <- rnorm(100,10,1) } for (i in 41:60) { data2[i,] <- rnorm(100,15,1) } for (i in 61:80) { data2[i,] <- rnorm(100,20,1) } new_data1 <- Standard_Normalization(data1) new_data2 <- Standard_Normalization(data2) Diff <- dist2eu(new_data1,new_data2) simi_matr1 <- affinityMatrix(Diff, K = 20, sigma = 0.5)
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } for (i in 1:20) { data2[i,] <- rnorm(100,5,1) } for (i in 21:40) { data2[i,] <- rnorm(100,10,1) } for (i in 41:60) { data2[i,] <- rnorm(100,15,1) } for (i in 61:80) { data2[i,] <- rnorm(100,20,1) } new_data1 <- Standard_Normalization(data1) new_data2 <- Standard_Normalization(data2) Diff <- dist2eu(new_data1,new_data2) simi_matr1 <- affinityMatrix(Diff, K = 20, sigma = 0.5)
To calculate average residues of the bi-clustering results
ASR(row_cluster,col_cluster,W)
ASR(row_cluster,col_cluster,W)
row_cluster |
The cluster results of the rows of W, this value should be a vector whose length is the same as the number of rows in W |
col_cluster |
The cluster results of the columns of W, this value should be a vector whose length is the same as the number of columns in W |
W |
The matrix to be factorized |
The average residues of the bi-clustering results
Xiaoyao Yin
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4) row_cluster <- OSNMTF_res[[2]][[1]] column_cluster <- OSNMTF_res[[2]][[2]] ASR_value <- ASR(row_cluster,column_cluster,W)
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4) row_cluster <- OSNMTF_res[[2]][[1]] column_cluster <- OSNMTF_res[[2]][[2]] ASR_value <- ASR(row_cluster,column_cluster,W)
A function to calculate the cost of the objective function
cost(W,init_list,lambda=0.2)
cost(W,init_list,lambda=0.2)
W |
The matrix to be factorized |
init_list |
A list containing the updated results in this iteration |
lambda |
A parameter to set the relative weight of the sparsity constraint |
A number indicating the total cost of the objective function
Xiaoyao Yin
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list) update_C_list <- update_C(W,update_R_list,lambda=0.2,rho=1.1) temp_cost <- cost(W,init_list,lambda=0.2)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list) update_C_list <- update_C(W,update_R_list,lambda=0.2,rho=1.1) temp_cost <- cost(W,init_list,lambda=0.2)
The distance matrix of the two group of samples
dist2eu(X,C)
dist2eu(X,C)
X |
The first samples matrix |
C |
The second samples matrix |
The distance matrix
Xiaoyao Yin
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } for (i in 1:20) { data2[i,] <- rnorm(100,5,1) } for (i in 21:40) { data2[i,] <- rnorm(100,10,1) } for (i in 41:60) { data2[i,] <- rnorm(100,15,1) } for (i in 61:80) { data2[i,] <- rnorm(100,20,1) } new_data1 <- Standard_Normalization(data1) new_data2 <- Standard_Normalization(data2) dist1 <- dist2eu(new_data1,new_data2)
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } for (i in 1:20) { data2[i,] <- rnorm(100,5,1) } for (i in 21:40) { data2[i,] <- rnorm(100,10,1) } for (i in 41:60) { data2[i,] <- rnorm(100,15,1) } for (i in 61:80) { data2[i,] <- rnorm(100,20,1) } new_data1 <- Standard_Normalization(data1) new_data2 <- Standard_Normalization(data2) dist1 <- dist2eu(new_data1,new_data2)
initialize the values which will be updated in NMTFOSC
initialization(W,k,l)
initialization(W,k,l)
W |
The matrix to be factorized |
k |
A parameter to specify the row cluster number |
l |
A parameter to specify the column cluster number |
A list with 6 elements, corresponding to the matrices L,C,R,B,Y and the penalty parameter miu
Xiaoyao Yin
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4)
To calculate mean residue of a sub-matrix block of W, indexed by a row cluster and a column cluster
MSR(Block)
MSR(Block)
Block |
The sub-matrix block of W, indexed by a row cluster and a column cluster |
The mean residue of the block
Xiaoyao Yin
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4) row_cluster <- OSNMTF_res[[2]][[1]] column_cluster <- OSNMTF_res[[2]][[2]] temp_rows <- which(row_cluster==1,TRUE) temp_cols <- which(column_cluster==1,TRUE) MSR_value <- MSR(W[temp_rows,temp_cols])
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4) row_cluster <- OSNMTF_res[[2]][[1]] column_cluster <- OSNMTF_res[[2]][[2]] temp_rows <- which(row_cluster==1,TRUE) temp_cols <- which(column_cluster==1,TRUE) MSR_value <- MSR(W[temp_rows,temp_cols])
Factorize matrix W into the multiplication of L, C and R, with L and R being orthogonal and C being sparse. Then the row cluster results and column cluster results are obtained from L and R.
OSNMTF(W,lambda=0.2,theta=10^-4,k,l)
OSNMTF(W,lambda=0.2,theta=10^-4,k,l)
W |
The matrix to be factorized |
lambda |
A parameter to set the relative weight of the sparsity constraints |
theta |
A parameter to determine the convergence |
k |
A parameter to specify the row cluster number |
l |
A parameter to specify the column cluster number |
A list containing the clustering result
sub_matrices |
a list containing the matrix L, C, R |
cluster_results |
a list containing the row cluster results and the column cluster results |
Xiaoyao Yin
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4)
W <- simu_data_generation() OSNMTF_res <- OSNMTF(W,k=5,l=4)
To generate the simulation data matrix
simu_data_generation()
simu_data_generation()
The simulated data matrix
Xiaoyao Yin
simu_data <- simu_data_generation()
simu_data <- simu_data_generation()
To normalize the data matrix by column
Standard_Normalization(x)
Standard_Normalization(x)
x |
The data matrix to be normalized |
The normalized matrix
Xiaoyao Yin
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } new_data1 <- Standard_Normalization(data1)
data1 <- matrix(0,100,100) data2 <- matrix(0,80,100) for (i in 1:20) { data1[i,] <- rnorm(100,10,1) } for (i in 21:40) { data1[i,] <- rnorm(100,20,1) } for (i in 41:60) { data1[i,] <- rnorm(100,30,1) } for (i in 61:80) { data1[i,] <- rnorm(100,40,1) } for (i in 81:100) { data1[i,] <- rnorm(100,50,1) } new_data1 <- Standard_Normalization(data1)
Update sub-matrix B
update_B(W,update_L_list)
update_B(W,update_L_list)
W |
The matrix to be factorized |
update_L_list |
A list containing the updated results in this iteration after running the function update_L |
A list the same as update_L_list with the matrix B updated
Xiaoyao Yin
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list)
Update sub-matrix C
update_C(W,update_R_list,lambda=0.2,rho=1.1)
update_C(W,update_R_list,lambda=0.2,rho=1.1)
W |
The matrix to be factorized |
update_R_list |
A list containing the updated results in this iteration after running the function update_R |
lambda |
A parameter to set the relative weight of the sparsity constraints |
rho |
A parameter used in the augmented lagrange multiplier method |
A list the same as update_R_list with the matrix C updated
Xiaoyao Yin
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list) update_C_list <- update_C(W,update_R_list,lambda=0.2,rho=1.1)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list) update_C_list <- update_C(W,update_R_list,lambda=0.2,rho=1.1)
Update sub-matrix L
update_L(W,init_list)
update_L(W,init_list)
W |
The matrix to be factorized |
init_list |
A list containing the updated results in this iteration |
A list the same as init_list with the matrix L updated
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list)
Update sub-matrix R
update_R(W,update_B_list)
update_R(W,update_B_list)
W |
The matrix to be factorized |
update_B_list |
A list containing the updated results in this iteration after running the function update_B |
A list the same as update_B_list with the matrix R updated
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list)
W <- simu_data_generation() init_list <- initialization(W,k=5,l=4) update_L_list <- update_L(W,init_list) update_B_list <- update_B(W,update_L_list) update_R_list <- update_R(W,update_B_list)