Commit 825cf697 authored by Gosset Simon's avatar Gosset Simon
Browse files

Update R/methods.R

parent 1b68d8fc
......@@ -4,76 +4,76 @@
#############################################################
construct <- function(part1, part2, e, mat, inputID, inputSGD) {
ind1 <- grep(part1, e)
ind2 <- grep(part2, e)
if ((length(ind1) > 0) && (length(ind2) > 0) && (ind1 < ind2)) {
ind2 <- ind2-1
}
if (length(ind1) > 0) {
part1 <- e[ind1]
e <- e[-ind1]
}
if (length(ind2) > 0) {
part2 <- e[ind2]
e <- e[-ind2]
}
if (length(part1) == length(setdiff(inputID, inputID[inputSGD == part2]))) {
d1 <- round(branch_length(mat, part1, setdiff(inputID, inputID[inputSGD == part1])), 3) / 2.0
d2 <- round(branch_length(mat, part2, setdiff(inputID, inputID[inputSGD == part2])), 3) / 2.0
}
else {
d1 <- round(branch_length(mat, part1, setdiff(inputID, inputID[inputSGD == part1])), 3)
d2 <- round(branch_length(mat, part2, setdiff(inputID, inputID[inputSGD == part2])), 3)
}
e <- c(e, paste("(", part1, ":", d1, ",", part2, ":", d2, ")", sep = ""))
return(e)
ind1 <- grep(part1, e)
ind2 <- grep(part2, e)
if ((length(ind1) > 0) && (length(ind2) > 0) && (ind1 < ind2)) {
ind2 <- ind2-1
}
if (length(ind1) > 0) {
part1 <- e[ind1]
e <- e[-ind1]
}
if (length(ind2) > 0) {
part2 <- e[ind2]
e <- e[-ind2]
}
if (length(part1) == length(setdiff(inputID, inputID[inputSGD == part2]))) {
d1 <- round(branch_length(mat, part1, setdiff(inputID, inputID[inputSGD == part1])), 3) / 2.0
d2 <- round(branch_length(mat, part2, setdiff(inputID, inputID[inputSGD == part2])), 3) / 2.0
}
else {
d1 <- round(branch_length(mat, part1, setdiff(inputID, inputID[inputSGD == part1])), 3)
d2 <- round(branch_length(mat, part2, setdiff(inputID, inputID[inputSGD == part2])), 3)
}
e <- c(e, paste("(", part1, ":", d1, ",", part2, ":", d2, ")", sep = ""))
return(e)
}
#############################################################
branch_length <- function(matrix, set1, set2) {
goodTriples <- 0
badTriples <- 0
# computing triplets with one element in set2 and 2 elements in set1
if (length(set1) > 1) {
for (i in 1:length(set1)) {
for (j in i:length(set1)) {
for (k in 1:length(set2)) {
if (k != i && k != j && i != j) {
if (matrix[i,j] < max(matrix[i,k], matrix[j,k])) {
badTriples <- badTriples + 1
}
else {
goodTriples <- goodTriples + 1
}
}
}
}
}
}
# computing triplets with one element in set1 and 2 elements in set2
if (length(set2) > 1) {
for (i in 1:length(set2)) {
for (j in i:length(set2)) {
for (k in 1:length(set1)) {
if (k != i && k != j && i != j) {
if (matrix[i,j] < max(matrix[i,k], matrix[j,k])) {
badTriples <- badTriples + 1
}
else {
goodTriples <- goodTriples + 1
}
}
}
}
}
}
return (goodTriples / (goodTriples + badTriples));
goodTriples <- 0
badTriples <- 0
# computing triplets with one element in set2 and 2 elements in set1
if (length(set1) > 1) {
for (i in 1:length(set1)) {
for (j in i:length(set1)) {
for (k in 1:length(set2)) {
if (k != i && k != j && i != j) {
if (matrix[i,j] < max(matrix[i,k], matrix[j,k])) {
badTriples <- badTriples + 1
}
else {
goodTriples <- goodTriples + 1
}
}
}
}
}
}
# computing triplets with one element in set1 and 2 elements in set2
if (length(set2) > 1) {
for (i in 1:length(set2)) {
for (j in i:length(set2)) {
for (k in 1:length(set1)) {
if (k != i && k != j && i != j) {
if (matrix[i,j] < max(matrix[i,k], matrix[j,k])) {
badTriples <- badTriples + 1
}
else {
goodTriples <- goodTriples + 1
}
}
}
}
}
}
return (goodTriples / (goodTriples + badTriples));
}
# Search all the databases having the same PPI coming from one or several publications
......@@ -83,7 +83,7 @@ DataBases<-function(Final.List.Redondant){
Inter.red<-as.matrix(Final.List.Redondant)
indices<-rep("NA",dim(Inter.red)[1])
for(i in 1:dim(Inter.red)[1])
{
indi<-intersect(c(grep(Inter.red[i,12],Inter.red[,11]),grep(Inter.red[i,12],Inter.red[,12])),c(grep(Inter.red[i,11],Inter.red[,11]),grep(Inter.red[i,11],Inter.red[,12])))
......@@ -157,23 +157,23 @@ search_id <- function(cible, thesaurus) {
}
}
}
return (resultat)
}
return (resultat)
}
############################################################################################
recup_ppi <- function(inputListFile, Base) {
recup_ppi <- function(inputListFile, Base, remove_degree = 0) {
# Recuperation des colonnes des bases qui sont utilisees dans le reseau
Base.f <- as.matrix(Base[,c(1:5,7:11,14:15)])
ind <- unique(c(as.vector(grep(inputListFile[1], Base.f[,1])), as.vector(grep(inputListFile[1], Base.f[,2]))))
cat('\n>Searching interactions...')
......@@ -182,9 +182,23 @@ recup_ppi <- function(inputListFile, Base) {
input_name = paste("^",inputListFile[i],"$", sep = "")
ind <- unique(c(ind, c(as.vector(grep(input_name, Base.f[,1])), as.vector(grep(input_name, Base.f[,2])))))
ind_temp = unique(c(as.vector(grep(input_name, Base.f[,2])), as.vector(grep(input_name, Base.f[,1]))))
if (remove_degree != 0) {
if(length(ind_temp) < remove_degree) {
ind = unique(c(ind, ind_temp))
}
} else {
ind <- unique(c(ind, c(as.vector(grep(input_name, Base.f[,1])), as.vector(grep(input_name, Base.f[,2])))))
}
# ind <- unique(c(ind, c(as.vector(grep(input_name, Base.f[,1])), as.vector(grep(input_name, Base.f[,2])))))
}
......@@ -248,7 +262,7 @@ pubmed_id <- function(Final.List.Redondant, Name, run) {
}
}
pmids <- unique(as.vector(pmids2))
# Rassemblement des different pubmed-ID de chaque interaction
pmids3 <- c()
b <- length (pmids)
......@@ -565,15 +579,16 @@ remove_unique_links <- function(inputtable) {
prot_id = unique(c(tab_PPI[,4], tab_PPI[,5]))
for (i in prot_id) {
ind = c(grep(x = tab_PPI[,4], pattern = i),grep(x = tab_PPI[,5], pattern = i))
# Eliminating self loop from the count of unique link in case the user choosed to remove the unique links
# but not to remove the self loop
tab_ind = table(ind)
tab_ind = tab_ind[tab_ind == 2]
......@@ -586,16 +601,17 @@ remove_unique_links <- function(inputtable) {
for (j in tab_ind) {
ind2 = ind2[!(ind2 == j)]
}
ind2 = unique(ind2)
if(length(ind2) == 1) {
tab_PPI = tab_PPI[-ind,]
}
# if (length(which(tab_PPI[i,4] == tab_PPI[,4:5])) == 1 || length(which(tab_PPI[i,5] == tab_PPI[,4:5])) == 1 ) {
# tab_PPI <- tab_PPI[-i,]
# }
}
cat('OK')
......@@ -698,17 +714,17 @@ saving_window <- function(network2, network.path, Os, r.i, r.s.i, r.u.l, not_fou
not_founds <- "All IDs are found in thesaurus"
}
cat('\n>Saving network ... ')
# Sauvegarde du reseau
out.put.name <- gfile('Save the network', type = "save", initial.dir = network.path, initial.filename = paste(ex.type, Cor, OS, inter.type, 'interactions.txt', sep = '_'))
network2<-remove_redundants(network2)
network2[is.na(network2)] = ""
for(i in 1:length(network2[,1])) {
......@@ -926,19 +942,19 @@ update_db <- function(db, resume, os, Os, organism.path) {
cat(paste(updated.path, out.put.name, sep = '/'))
}
else {
for (s in 1:length(sources)) {
for (s in 1:length(sources)) {
This.base <- database[sources[s] == database[,11],]
if((is.null(dim(This.base))==FALSE)){
cat(paste("\nTaille de la base : "))
cat(paste(dim(This.base)))
out.put.name <- paste(OS, sources[s], "updated-database.txt", sep = "_")
write.table(This.base[,-16], file = paste(organism.path, "Updated_databases", out.put.name, sep = "/"), append = F, row.names = F, quote = F, col.names = T, sep = "\t")
cat('\n- Updated database ')
cat(s)
cat(' : ')
cat(paste(organism.path, out.put.name, sep = '/'))
if((is.null(dim(This.base))==FALSE)){
cat(paste("\nTaille de la base : "))
cat(paste(dim(This.base)))
out.put.name <- paste(OS, sources[s], "updated-database.txt", sep = "_")
write.table(This.base[,-16], file = paste(organism.path, "Updated_databases", out.put.name, sep = "/"), append = F, row.names = F, quote = F, col.names = T, sep = "\t")
cat('\n- Updated database ')
cat(s)
cat(' : ')
cat(paste(organism.path, out.put.name, sep = '/'))
}
}
}
......@@ -966,7 +982,7 @@ cherche_uniprotID<-function(data_i,data,thesaurus){
v2<-c(grep(paste("^",data_i[11],"$",sep = ""),data[,12]))
if((length(unique(c(data[v1,4],data[v2,5])))>1)||(length(resultatA) != 3))
{
# On recupere les identifiants que le thesaurus ne sait pas remplacer tout seul
not_found <- data_i[c(4,1,11)]
nf<<-rbind(nf, not_found)
......@@ -987,7 +1003,7 @@ cherche_uniprotID<-function(data_i,data,thesaurus){
v4<-c(grep(paste("^",data_i[12],"$",sep = ""),data[,12]))
if((length(unique(c(data[v3,4],data[v4,5])))>1)||(length(resultatB) != 3))
{
# On recupere les identifiants que le thesaurus ne sait pas remplacer tout seul
not_found <- data_i[c(5,3,12)]
nf <<- rbind(nf, not_found)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment