Skip to content
Snippets Groups Projects
Commit 27fee2d9 authored by Sebastien Theil's avatar Sebastien Theil
Browse files

Merge branch 'master' of forgemia.inra.fr:umrf/anomaly

parents 016528ce d802a821
No related branches found
No related tags found
No related merge requests found
......@@ -37,7 +37,6 @@ if (is.null(opt$path) & opt$amplicon=="16S"){
stop("You must provide the reads files folder path.", call.=FALSE)
}
if(opt$verbose == 3){
invisible(flog.threshold(DEBUG))
} else {
......@@ -75,6 +74,8 @@ if(opt$compress==TRUE){
fnFs <- sort(list.files(path, pattern = "_R1.fastq$", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "_R2.fastq$", full.names = TRUE))
}
flog.debug("File list...")
flog.debug(fnFs)
flog.debug(fnRs)
flog.info('Done.')
......
......@@ -35,6 +35,8 @@ option_list = list(
help="Unassigned kingdom or phylum fitering. [default= %default]", metavar="character"),
make_option(c("-k", "--skip"), type="logical", default=FALSE,
help="Skip decontam step. [default= %default]", metavar="character"),
make_option(c("-y", "--manual_cont_rank"), type="character", default="Genus",
help="Rank of taxa to remove, inform 'ASV' to remove ASV. [default= %default]", metavar="character"),
make_option(c("-z", "--manual_cont"), type="character", default=NULL,
help="List of Genus to remove comma separated (eg. g__Enterococcus,g__Cellulosimicrobium,g__Serratia). [default= %default]", metavar="STR")
);
......@@ -309,7 +311,11 @@ if(!is.null(opt$manual_cont)){
ttable = data@tax_table@.Data
taxToKeep5=NULL
for(i in cont_list){
taxToKeep5 = c(taxToKeep5,row.names(ttable)[ttable[,"Genus"]!=i])
if(opt$manual_cont_rank == "ASV"){
taxToKeep5 = c(taxToKeep5,row.names(ttable)[row.names(ttable) !=i ] )
}else{
taxToKeep5 = c(taxToKeep5,row.names(ttable)[ttable[,opt$manual_cont_rank]!=i])
}
}
data <- prune_taxa(taxToKeep5, data)
}
......
......@@ -71,7 +71,7 @@ as.data.frame(as.matrix(data@tax_table))
# Composition
## Rarefaction
```{r rare, results='hide', eval=FALSE}
```{r rare, results='hide', eval=TRUE}
plot_rare <- ggrare(data, step = 100, color = opt$column1, plot = FALSE)
plot_rare <- plot_rare + facet_wrap(opt$column1, nrow = 4) + theme_bw()
......@@ -80,7 +80,7 @@ p0 <- ggplotly(plot_rare)
# htmlwidgets::saveWidget(as_widget(p0), "test.html")
```
```{r plotrare, eval=FALSE}
```{r plotrare, eval=TRUE}
htmltools::tagList(list(p0))
```
......@@ -260,7 +260,7 @@ for (m in metrics){
# Beta diversity
## Beta diversity plots
```{r beta, results='hide', eval=FALSE}
```{r beta, results='hide', eval=TRUE}
b1 <- ggplotly( plot_samples(spdata, ordinate(spdata, "MDS", "bray"), color = opt$column1 )
+ theme_bw() + ggtitle("MDS braycurtis") + stat_ellipse() )
b2 <- ggplotly( plot_samples(spdata, ordinate(spdata, "NMDS", "bray"), color = opt$column1 )
......@@ -277,17 +277,17 @@ b6 <- ggplotly( plot_samples(data, ordinate(data, "NMDS","wunifrac"), color = op
## Beta diversity plots {.tabset}
### BrayCurtis
```{r betaplot1, eval=FALSE}
```{r betaplot1, eval=TRUE}
htmltools::tagList(list(b1,b2))
```
### Unifrac
```{r betaplot2, eval=FALSE}
```{r betaplot2, eval=TRUE}
htmltools::tagList(list(b3,b4))
```
### Weighted Unifrac
```{r betaplot3, eval=FALSE}
```{r betaplot3, eval=TRUE}
htmltools::tagList(list(b5,b6))
```
......
......@@ -13,8 +13,12 @@ option_list = list(
help="Factor to test. [default= %default]", metavar="STR"),
make_option(c("-c", "--subset"), type="character", default="",
help="Subset sample according to 'FACTOR,LEVEL'. [default= %default]", metavar="STR"),
make_option(c("-d", "--lvls"), type="character", default="",
help="Comma separated list levels of factor to print in venn diagram (max. 5). [default= %default]", metavar="STR"),
make_option(c("-k", "--krona"), type="character", default="",
help="Krona of shared ASV between informed level and others. Must be among levels of -b options. [default= %default]", metavar="STR")
help="Krona of exclusive ASV or shared with informed level and others. Must be among levels of -b options. [default= %default]", metavar="STR"),
make_option(c("-l", "--shared"), type="character", default="TRUE",
help="shared [TRUE] or exclusive [FALSE] mode. [default= %default]", metavar="STR")
);
opt_parser = OptionParser(option_list=option_list)
......@@ -30,6 +34,7 @@ if(!dir.exists(opt$out)){
}
flog.info('Loading librairies ...')
suppressMessages(library('glue'))
suppressMessages(library('phyloseq'))
suppressMessages(library('VennDiagram'));
invisible(flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger"))
......@@ -58,7 +63,6 @@ if (is.null(opt$column1)){
quit()
}
# save.image("debug.rdata")
#Subset data
if(opt$subset!=""){
flog.info('Subset phyloseq object ...')
......@@ -73,11 +77,12 @@ if(opt$subset!=""){
#Nombre d'espèce par matrice
flog.info('Parsing factor ...')
# save.image("debug.rdata")
level1 <- na.omit(levels(as.factor(sample_data(data)[,opt$column1]@.Data[[1]])) )
TFdata <- list()
TFtax <- list()
if(!is.null(refseq(data, errorIfNULL=FALSE))){
refseq1 <- as.data.frame(refseq(data)); names(refseq1)="seq"
}else{flog.info('No Tree ...')}
databak <- data
for(i in 1:length(level1)){
databak -> data
......@@ -117,65 +122,130 @@ alltax <- do.call(rbind, TFtax)
alltax <- alltax[!duplicated(alltax[,1]),]
row.names(alltax)=alltax[,1]
flog.info('Plotting ...')
TF <- sapply(TFtax, row.names)
names(TF) = level1
venn.plot <- venn.diagram(TF, filename = NULL, col = "black",
fill = rainbow(length(TF)), alpha = 0.50,
cex = 1.5, cat.col = 1, lty = "blank",
cat.cex = 1.8, cat.fontface = "bold",
margin = 0.1, main=TITRE, main.cex=2.5,
fontfamily ="Arial",main.fontfamily="Arial",cat.fontfamily="Arial") #cat.dist = 0.09,
flog.info('Plotting ...')
VENNFUN <- function(mode = 1){
if(mode==1){
venn.plot <- venn.diagram(TF, filename = NULL, col = "black",
fill = rainbow(length(TF)), alpha = 0.50,
cex = 1.5, cat.col = 1, lty = "blank",
cat.cex = 1.8, cat.fontface = "bold",
margin = 0.1, main=TITRE, main.cex=2.5,
fontfamily ="Arial",main.fontfamily="Arial",cat.fontfamily="Arial") #cat.dist = 0.09,
png(paste(opt$out,'/',TITRE,'_venndiag.png',sep=''), width=20, height=20, units="cm", res=200)
grid.draw(venn.plot)
invisible(dev.off())
ov <- calculate.overlap(TF)
print(sapply(ov, length))
flog.info('Calculating lists ...')
uniqTax = TABf = unique(do.call(c,TF))
for (j in 1:length(TF)){
TABtest = TF[[j]]
TABtest_filt=rep(0, length(uniqTax))
for (i in 1:length(uniqTax)) {
featureI = uniqTax[i]
res=grep( paste('^',featureI,'$', sep="" ) , TABtest)
if(length(res)>0){TABtest_filt[i]=length(res)
}
ov <- calculate.overlap(TF)
print(sapply(ov, length))
flog.info('Calculating lists ...')
uniqTax = TABf = unique(do.call(c,TF))
for (j in 1:length(TF)){
TABtest = TF[[j]]
TABtest_filt=rep(0, length(uniqTax))
for (i in 1:length(uniqTax)) {
featureI = uniqTax[i]
res=grep( paste('^',featureI,'$', sep="" ) , TABtest)
if(length(res)>0){TABtest_filt[i]=length(res)
}
}
TABf=cbind.data.frame( TABtest_filt, TABf )
names(TABf)[1] = names(TF)[j]
}
if(exists("refseq1")){
TABf <- cbind(TABf,alltax[as.character(TABf$TABf),2], refseq1[as.character(TABf$TABf),])
names(TABf) <- c(rev(names(TF)), "ASV", "taxonomy", "seq")
}else{
TABf <- cbind(TABf,alltax[as.character(TABf$TABf),2])
names(TABf) <- c(rev(names(TF)), "ASV", "taxonomy")
}
write.table(TABf, paste(opt$out,"/",TITRE,"_venn_table.csv",sep=""), sep="\t", quote=FALSE, row.names=FALSE)
} else if(mode == 2){
png(paste(opt$out,'/',TITRE,'_venndiag.png',sep=''), width=20, height=20, units="cm", res=200)
venn.plot <- venn::venn(TF , zcol=rainbow(length(TF)))
#venn::venn(TF[c("LI","LA","EA","FI","TR","AI")] , zcol=rainbow(length(TF[c("LI","LA","EA","FI","TR","AI")])))
#venn::venn(TF[c("FI", "TR")] , zcol=rainbow(length(TF[c("FI", "TR")])))
dev.off()
ENVS = names(TF)
Tabf <- NULL; Tab1 <- NULL
for(i in ENVS){
tt = c(i, ENVS[ENVS != i])
print(tt)
yy = Reduce(setdiff, TF[tt])
print(length(yy))
Tab1 <- cbind(rep(i, length(yy)), alltax[yy,])
Tabf <- rbind(Tabf, Tab1)
}
yy <- Reduce(intersect, TF)
Core <- cbind(rep("core", length(yy)), alltax[yy,])
Tabf <- rbind(Core, Tabf)
write.table(Tabf, paste(opt$out,"/",TITRE,"_venn_table.csv",sep=""), sep="\t", quote=FALSE, row.names=FALSE)
}
TABf=cbind.data.frame( TABtest_filt, TABf )
names(TABf)[1] = names(TF)[j]
}
TABf <- cbind(TABf,alltax[as.character(TABf$TABf),2], refseq1[as.character(TABf$TABf),])
names(TABf) <- c(rev(level1), "ASV", "taxonomy", "seq")
# save.image("debug.rdata")
write.table(TABf, paste(opt$out,"/",TITRE,"_venn_table.csv",sep=""), sep="\t", quote=FALSE, row.names=FALSE)
save.image("debug.rdata")
if(opt$krona != ""){
flog.info('Krona ...')
L1=NULL
for(i in level1[level1!=opt$krona]){
l1 = intersect(TF[[i]], TF[[opt$krona]])
L1 = c(L1,l1)
# Specific use to screen taxonomic composition of shared taxa...
if(opt$krona != ""){
TF <- TFbak
flog.info('Krona ...')
env1 <- TF[[opt$krona]]
others1 <- unique( unlist( TF[level1[level1!=opt$krona]] ) )
TF2 <- list(env1, others1)
names(TF2) <- c(opt$krona, "others")
#Venn 2
venn.plot <- venn.diagram(TF2, filename = NULL, col = "black",
fill = rainbow(length(TF2)), alpha = 0.50,
cex = 1.5, cat.col = 1, lty = "blank",
cat.cex = 1.8, cat.fontface = "bold",
margin = 0.1, main=TITRE, main.cex=2.5,
fontfamily ="Arial",main.fontfamily="Arial",cat.fontfamily="Arial") #cat.dist = 0.09,
venn_tab=paste(opt$out,"/",TITRE,"_",opt$krona, "_kronaVenn.png", sep="")
png(venn_tab, width=20, height=20, units="cm", res=200)
grid.draw(venn.plot)
invisible(dev.off())
#Krona
if(opt$shared==TRUE){
L1 = intersect(env1, others1)
}else{
L1 = setdiff(env1, others1)
}
subtaxdata=prune_taxa(L1,data)
ttable <- as.data.frame(subtaxdata@tax_table@.Data)
fttable = cbind.data.frame(rep(1,nrow(ttable)),ttable)
fttable$ASV = row.names(fttable)
dir.create("00test")
flog.info('Write Krona table ...')
krona_tab=paste(opt$out,"/",TITRE,"_",opt$krona, "_krona.txt", sep="")
write.table(fttable, krona_tab, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
flog.info('Generate Krona html ...')
output <- paste(opt$out,"/",TITRE,"_",opt$krona, "_krona.html", sep = "")
system(paste("ktImportText", krona_tab, "-o", output, sep = " "))
# browseURL(output)
}
subtaxdata=prune_taxa(L1,data)
ttable <- as.data.frame(subtaxdata@tax_table@.Data)
fttable = cbind.data.frame(rep(1,nrow(ttable)),ttable)
dir.create("00test")
flog.info('Write Krona table ...')
krona_tab=paste(opt$out,"/",TITRE,"_",opt$krona, "_krona.txt", sep="")
write.table(fttable, krona_tab, col.names=FALSE, row.names=FALSE, quote=FALSE, sep="\t")
flog.info('Generate Krona html ...')
output <- paste(opt$out,"/",TITRE,"_",opt$krona, "_krona.html", sep = "")
system(paste("ktImportText", krona_tab, "-o", output, sep = " "))
# browseURL(output)
TFbak <- TF <- sapply(TFtax, row.names)
names(TFbak) = names(TF) = level1
if(length(level1)>5){
flog.info('Too much levels (max. 5) ...')
if(opt$lvls == ""){
flog.info('Selecting 5 first levels ...')
# TF <- TF[c(1:5)]
VENNFUN(mode=2)
}else{
flog.info(glue('Selecting {opt$lvls} ...'))
LVLs <- unlist(strsplit(opt$lvls,","))
TF <- TF[LVLs]
}
} else {
VENNFUN()
}
flog.info('Done ...')
# phy2cyto.R: use this script to generate link file in SIF format and node file for metadatas (used as input in cytoscape)
library(futile.logger)
suppressMessages(library("optparse"))
options(warn=-1)
option_list = list(
make_option(c("-r", "--rdata"), type="character", default=NULL,
help="Rdata file path.", metavar="path"),
make_option(c("-o", "--out"), type="character", default="./cytoscape/",
help="output .Rdata file name. [default= %default]", metavar="path"),
# make_option(c("-a", "--rank"), type="character", default="ASV",
# help="Rank to glom. [default= %default]", metavar="STR"),
make_option(c("-a", "--column1"), type="character", default=NULL,
help="Factor to test. [default= %default]", metavar="STR"),
make_option(c("-b", "--repl"), type="character", default=NULL,
help="Replicates column. [default= %default]", metavar="STR"),
# make_option(c("-c", "--subset"), type="character", default="",
# help="Subset sample according to 'FACTOR,LEVEL'. [default= %default]", metavar="STR"),
make_option(c("-v", "--verbose"), type="integer", default=1,
help="Verbose level. [default= %default]", metavar="character")
);
opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)
if (is.null(opt$rdata)){
print_help(opt_parser)
stop("You must provide the Rdata file path.", call.=FALSE)
}
if(!dir.exists(opt$out)){
dir.create(opt$out, recursive=TRUE)
}
if(opt$verbose == 3){
invisible(flog.threshold(DEBUG))
} else {
invisible(flog.threshold(INFO))
}
flog.info('Loading librairies ...')
suppressMessages(library('glue'))
suppressMessages(library('phyloseq'))
load(opt$rdata)
if(is.null(opt$column1) & is.null(opt$repl)){
print(data)
flog.info('Sample variables ...')
print(sample_variables(data))
stop("Choose variables for options -a & -b.", call.=FALSE)
}
data1 <- data
otab = as.data.frame(otu_table(data1))
sdat <- sdata <- as.data.frame(as.matrix(sample_data(data1)))
fact <- levels(as.factor(sdata[,opt$column1]))
# opt$repl <- "temps"
if(is.null(opt$repl)){
flog.info('No replicates, performing links ...')
#Test1 VUE globale sans tenir compte des réplicats
# Interaction table
sif_tab=NULL
for(env in fact ){
flog.debug(print(env))
eval(parse(text=glue("tt <- subset_samples(data1, {opt$column1} %in% '{env}')") )) ###
flog.debug(print(nsamples(tt)))
#ASV present dans au moins 3 échantillons, autre filtre sur abondance?
tt2 = prune_taxa( apply(otu_table(tt), 1, function(x){length(which(x>0))>3}), tt)
flog.debug(print(ntaxa(tt2)))
#Effectif des asv dans la table < 20
otab2 <- as.data.frame(otu_table(tt2))
otab2[otab2<20] = 0
otu_table(tt2) = otu_table(otab2, taxa_are_rows=TRUE)
tt2 <- prune_taxa(taxa_sums(tt2) > 0, tt2)
tabf = cbind(taxa_names(tt2), rep(glue("type_{env}"), ntaxa(tt2)), rep(env, ntaxa(tt2)))
sif_tab = rbind(sif_tab, tabf)
}
flog.info('Output ...')
write.table(as.data.frame(sif_tab), glue("{opt$out}/sif_tab1.tsv"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
# Node table
src = unique(sif_tab[,3])
trgt = sif_tab[,1]
freq = taxa_sums(data1) / sum(taxa_sums(data1))
node_table=cbind.data.frame(c(src, trgt), c(rep("source", length(src)), rep("cible", length(trgt))), c(rep(NA, length(src)), freq[trgt]) )
names(node_table)=c("names","attribute", "freq")
node_table[node_table$attribute=="source","freq"] = 0.75*max(na.omit(node_table$freq))
write.table(as.data.frame(node_table), glue("{opt$out}/node_tab1.tsv"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
}else{
flog.info('Replicates, performing links ...')
sif_tab = NULL
for(asv in taxa_names(data1)){
# print(asv)
notu = otab[asv,]
nsamples = names(notu)[notu>20] #nombre d'occurence de l'otu
if(length(nsamples) == 1){flog.debug(print(asv))}
sdat2 = sdat[nsamples,]
rep1 = table(as.character(sdat[nsamples,opt$repl]))
reps = names(rep1) #[which(rep1>1)]
# if(max(rep1[which(rep1>1)]) == 1){print(asv)}
# si aucun partage 1 seul environnement
if(length(rep1)==1){
if(rep1==1){
flog.debug(glue("{asv} exclusif 1 seul réplicat 1 env"))
src <- as.character(unique(sdat2[,opt$column1]))
LINKS = c(asv, glue("type_{src}"), src)
}else{
rep=names(rep1)
sdat3 = sdat2[sdat2$exploit_saison==rep,]
srcs = as.character(unique(sdat3[,opt$column1]))
LINKS = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
}
} else{
#sinon partage et création d'un lien pour chaque environnement source.
LINKS = NULL
for(rep in reps){
sdat3 = sdat2[sdat2[,opt$repl]==rep,]
srcs = as.character(unique(sdat3[,opt$column1]))
# Lien seulement si l'asv présent dans même sample à 2 environnements ou plus
if(length(srcs)>1){
link = cbind(rep(asv, length(srcs)), glue("type_{srcs}"), srcs)
# print(glue("{rep} {srcs} envs"))
# print(link)
LINKS = rbind(LINKS, link)
}
}
}
sif_tab = rbind(sif_tab, LINKS)
}
save.image("debug.rdata")
flog.info('Output ...')
# LINK table output
#Compte le nombre d'occurence de chaque lien (environnement - ASV)
tt=apply(sif_tab, 1, paste, collapse="-")
# Unique + counts de chaque lien
sif_tabF = cbind(unique(sif_tab), table(tt), names(table(tt)) )
length(unique(sif_tabF[,1]))
length(taxa_names(data1))
write.table(as.data.frame(sif_tabF), glue("{opt$out}/sif_tab2.tsv"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
# Node table output
src = unique(sif_tabF[,3])
trgt = sif_tabF[,1]
freq = taxa_sums(data1) / sum(taxa_sums(data1))
# Taille des noeud en fonction de la frequence, noeud "groupe" 0.75 de la freq max.
node_table=cbind.data.frame(c(src, trgt), c(rep("source", length(src)), rep("cible", length(trgt))), c(rep(NA, length(src)), freq[trgt]) )
names(node_table)=c("names","attribute", "freq")
node_table[node_table$attribute=="source","freq"] = 0.75*max(na.omit(node_table$freq))
node_table$tax = "source"
node_table[node_table$attribute == "cible","tax"] = tax_table(data1)[as.character(node_table[node_table$attribute == "cible",1]),"Phylum"]
write.table(as.data.frame(node_table), glue("{opt$out}/node_tab2.tsv"), sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment