Commit 702933b8 authored by Julien GUGLIELMINI's avatar Julien GUGLIELMINI

Initial commit

parents
#!/bin/bash
AWKCMD='
BEGIN{
split(clade,tax,",")
FS=OFS="\t"
for(i in tax) {
taxa[tax[i]]++
}
first=0
}
NR==FNR && substr($1,2) in taxa {
if(first) {
split($2,seq,"")
for(i in seq) {
if(seq[i]!=clade_seq[i]) {
synapomorphies[i]=0
}
if(!states[i,seq[i]]++) {
scores[i]++
}
maxc[seq[i]]++
}
}
else {
split($2,clade_seq,"")
for(i in clade_seq) {
synapomorphies[i]=1
}
if(!states[i,seq[i]]++) {
scores[i]++
}
maxc[seq[i]]++
first=1
}
}
NR>FNR && !(substr($1,2) in taxa) {
split($2,seq,"")
for(i in seq) {
if(seq[i] == clade_seq[i]) {
synapomorphies[i] = 0
}
if(!states[i,seq[i]]++) {
scores[i]++
}
maxc[seq[i]]++
}
}
END{
max=length(maxc)-1
for(i=1;i<length(synapomorphies);i++) {
if(synapomorphies[i]) {
nsyna++
outsyna=outsyna""i"\t"clade_seq[i]"\t"(scores[i]-1)/(max-1)"\n"
}
}
if(!nsyna) {
print "No synapomorphy found for taxa " clade
}
else {
print "Found " nsyna " synapomorphies."
print "Site","State","Score" > out
print substr(outsyna,1,length(outsyna)-1) >> out
}
}
'
if [ "$#" -gt 2 ];then
name="$(basename ${2%.*})"
tmp="$name"".000.tmp"
paste - - < <(awk 'NF>0' "$2" | awk '$0~/^>/ && NR==1{print $0;seq=""}$0~/^>/ && NR>1{print seq;seq="";print $0}$0!~/^>/{seq=seq""$0}END{print seq}') > "$tmp"
check="$(awk -v clade="$1" 'BEGIN{split(clade,tax,",");FS=OFS="\t";for(i in tax){taxa[tax[i]]=0}}substr($1,2) in taxa{taxa[substr($1,2)]++}END{for(i in taxa){if(!taxa[i]){print i}}}' "$tmp")"
if [ -z "$check" ];then
awk -v clade="$1" -v out="$3" "$AWKCMD" "$tmp" "$tmp"
else
echo "Error : Taxa not found."
echo "$check"
fi
rm "$tmp"
else
echo "Error : wrong number of argument."
echo "Usage :"
echo " ./get_synapomorphies.sh \"sequence1,sequence2\" <input_file> <output_file>"
fi
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment