# TOOL mothur-classifyseqs.R: "Classify sequences to taxonomic units" (Classifies 16S rRNA sequences to taxonomic units using the Wang method with Silva reference set and taxonomy. As inputs you need a fasta file of aligned sequences, and a count_table file generated by Mothur. This tool is based on the Mothur tools classify.seqs and remove.lineage.)
# INPUT a.fasta: "Aligned reads in FASTA file" TYPE FASTA
# INPUT OPTIONAL a.count_table: "Count table" TYPE MOTHUR_COUNT
# OUTPUT OPTIONAL sequences-taxonomy-assignment.txt
# OUTPUT OPTIONAL classification-summary.tsv
# OUTPUT OPTIONAL picked.fasta 
# OUTPUT OPTIONAL picked.count_table
# PARAMETER OPTIONAL reference: "Reference" TYPE [bacterial, full] DEFAULT bacterial (Silva reference set to use.)
# PARAMETER OPTIONAL iters: "Number of iterations" TYPE INTEGER FROM 10 TO 1000 DEFAULT 100 (How many iterations to do when calculating the bootstrap confidence score for your taxonomy.)
# PARAMETER OPTIONAL toremove: "Remove lineages" TYPE STRING DEFAULT empty (List of lineages to remove. You must wrap your taxon in quotes so mothur knows to ignore the semicolon characters. For example try Chloroplast-mitochondria-Archaea-Eukaryota-unknown.)


# OUTPUT OPTIONAL log.txt
# OUTPUT counttable.tsv: counttable.tsv


# EK 18.06.2013
# JTT 28.8.2013 count table and phenodata added
# ML 21.12.2016 update (new Silva version)
# ML 14.3.2017 reference option (bacterial vs whole)
# ML 23.3.2017 detach the last steps to another tool (mothur-classify-counttable.R), add iters-parameter and remove.lineage option

# check out if the file is compressed and if so unzip it
source(file.path(chipster.common.path, "zip-utils.R"))
unzipIfGZipFile("a.fasta")

# binary
binary <- c(file.path(chipster.tools.path, "mothur", "mothur"))

if (reference=="bacterial"){
	# new bacterial references:
	data.path <- c(file.path(chipster.tools.path, "mothur-silva-reference", "silva.bacteria"))
	template.path <- c(file.path(data.path, "silva.bacteria.fasta"))
	taxonomy.path <- c(file.path(data.path, "silva.bacteria.silva.tax"))
}
if (reference=="full"){
	# new whole references:
	data.path <- c(file.path(chipster.tools.path,"mothur-silva-reference", "mothur-silva-reference-whole"))
	template.path <- c(file.path(data.path, "silva.nr_v123.align")) 
	taxonomy.path <- c(file.path(data.path, "silva.nr_v123.tax"))
}

# batch file
# write(paste("classify.seqs(fasta=a.fasta, iters=1000, template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)
if (file.exists("a.count_table")){
	write(paste("classify.seqs(fasta=a.fasta, count=a.count_table, iters=",iters,", template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)
	
}else {
	write(paste("classify.seqs(fasta=a.fasta, iters=", iters,", template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)
	
}

# command
command <- paste(binary, "batch.mth", "> log.txt 2>&1")

# run
system(command)

# Output File Names: 
# a.silva.wang.taxonomy
# a.silva.wang.tax.summary


## testi
write("get.current()", "batch2.mth", append=F)
system(paste(binary, "batch2.mth", ">> log.txt 2>&1"))


# Postprocess output
if (reference=="full"){
	system("mv a.nr_v123.wang.taxonomy sequences-taxonomy-assignment.txt")
	system("mv a.nr_v123.wang.tax.summary classification-summary.tsv")
}
if (reference=="bacterial"){
	system("mv a.silva.wang.taxonomy sequences-taxonomy-assignment.txt")
	system("mv a.silva.wang.tax.summary classification-summary.tsv")
}

# batch file 2: remove lineage, if the taxons to remove were listed:

if (toremove!="empty"){

	if (file.exists("a.count_table")){
		write(paste("remove.lineage(fasta=a.fasta, count=a.count_table, taxonomy=sequences-taxonomy-assignment.txt, taxon=",toremove ,")", sep=""), "batch.mth", append=F)
	}else {
		write(paste("remove.lineage(fasta=a.fasta, taxonomy=sequences-taxonomy-assignment.txt, taxon=",toremove ,")", sep=""), "batch.mth", append=F)
	}

	# command
	command <- paste(binary, "batch.mth", ">> log.txt 2>&1")
	
	# run
	system(command)   
	
	# Output files: a.pick.fasta   a.pick.count_table
	system("mv a.pick.fasta picked.fasta")
	if (file.exists("a.pick.count_table")){ 
		system("mv a.pick.count_table picked.count_table")
	}
	
	# batch file 3 -classify.seqs again:
	
	# write(paste("classify.seqs(fasta=a.fasta, iters=1000, template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)
	if (file.exists("picked.count_table")){
		write(paste("classify.seqs(fasta=picked.fasta, count=picked.count_table, iters=",iters,", template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)
	}else {
		write(paste("classify.seqs(fasta=picked.fasta, iters=", iters,", template=", template.path, ", taxonomy=", taxonomy.path, ")", sep=""), "batch.mth", append=F)	
	}
	
	# command
	command <- paste(binary, "batch.mth", "> log.txt 2>&1")
	# run
	system(command)
	
	## testi
	write("get.current()", "batch2.mth", append=F)
	system(paste(binary, "batch2.mth", ">> log.txt 2>&1"))
	
	
	# Postprocess output
	if (reference=="full"){
		system("mv picked.nr_v123.wang.taxonomy sequences-taxonomy-assignment.txt")
		system("mv picked.nr_v123.wang.tax.summary classification-summary.tsv")
	}
	if (reference=="bacterial"){
		system("mv picked.silva.wang.taxonomy sequences-taxonomy-assignment.txt")
		system("mv picked.silva.wang.tax.summary classification-summary.tsv")
	}
	
#	# batch file 3 -summary 
#	write("summary.seqs(fasta=picked.fasta, count=picked.count_table)", "summary.mth", append=F)
#	
#	# command 3
#	command3 <- paste(binary, "summary.mth", "> log_raw.txt")
#	# run
#	system(command3)
#	
#	# Post process output
#	system("grep -A 10 Start log_raw.txt > picked-summary.tsv")
#	# Remove one tab to get the column naming look nice:
#	system("sed 's/^		/	/' picked-summary2.tsv > picked-summary.tsv")
	
}
	

