diff --git a/annotate/sqlAnnotToTxt.py b/annotate/sqlAnnotToTxt.py
index 146b0db869ef902568be5e0ae599ac60ff02d45c..0525509b410a1159a500487e2811b5d5f9fd55b6 100644
--- a/annotate/sqlAnnotToTxt.py
+++ b/annotate/sqlAnnotToTxt.py
@@ -24,6 +24,7 @@ ccf = conn.cursor()
 ckw = conn.cursor()
 
 for currFamR in ccf.execute('SELECT * FROM family'):
+    print("b")
     currF,currSize,currNbseq,currLCA = currFamR
     o = open("{dir}/{prefix}{id}.txt".format(dir=args.txtDir,prefix=args.prefix,id=currF),"w")
     lineage = tdb_taxid.lineage_name(int(currLCA), reverse=True)
@@ -35,6 +36,8 @@ for currFamR in ccf.execute('SELECT * FROM family'):
         for line in f:
             if line.startswith(">"):
                 o.write(line[1:])
+    o.close()
+    print("e")
     
     
 
diff --git a/build/makeFastaFromCluster.py b/build/makeFastaFromCluster.py
index 0077063422c43609beaeae1d83d6d8d4ff21da36..5a691af5d08fb9c4573deb24b0e33c808a6b6694 100644
--- a/build/makeFastaFromCluster.py
+++ b/build/makeFastaFromCluster.py
@@ -7,22 +7,24 @@ parser = argparse.ArgumentParser()
 parser.add_argument("master", help="fasta containing all sequences")
 parser.add_argument("silix", help="result of Silix computation")
 parser.add_argument("destination", help="directory in which create new fasta files (must exist, should be empty)")
+parser.add_argument("--minSize", help="minimum number of sequences to be considered as a cluster",type=int,default=2)
 args = parser.parse_args()
 
 
 # load clusters in memory
-clusters = open(args.silix)
-seqToFam = {}
-for seq in clusters:
-    ids = seq.strip().split()
-    seqToFam[ids[1]] = ids[0]
-clusters.close()    
+clusters = map(lambda l: l.strip().split(),open(args.silix))
+seqToFam = {l[1]: l[0] for l in clusters}
 
 
+from collections import Counter
+famSize = Counter(seqToFam.values())
+
 
 def addSeqToFamilyFile(name,sequence):
     if len(name) == 0:
         return
+    elif famSize[seqToFam[name.split()[0]]] < args.minSize:
+        return
     family = seqToFam[name.split()[0]]
     ofasta = open(args.destination + "/" + family + ".fasta","a")
     ofasta.write(">" + name + "\n" + "".join(sequence))
diff --git a/pipeline/Snakefile b/pipeline/Snakefile
index b9ec267095c5a3a738c753137ba6fcc5dbf18182..75ffcece390d8d171bcb7d996da763d4fc4492b2 100644
--- a/pipeline/Snakefile
+++ b/pipeline/Snakefile
@@ -1,38 +1,39 @@
-localrules: ConvProt_splitInNames, ConvProt_mergeFasta, Profiles_splitProtFasta, Profiles_makeBlastDB, Profiles_blastToClusters, Profiles_clustersToFamilies, Profiles_filterNonUnique, Pofiles_mergeBlast, Profiles_genAnnot, Profiles_mergeHmm, origNucl,Profiles_genTxtAnnot
+localrules: ConvProt_splitInNames, ConvProt_mergeFasta, Profiles_splitProtFasta, Profiles_makeBlastDB, Profiles_blastToClusters, Profiles_clustersToFamilies, Pofiles_mergeBlast, Profiles_genAnnot, Profiles_mergeHmm, origNucl,Profiles_genTxtAnnot
 
 workdir: "/pasteur/scratch/tbigot/rvdb/profiles/"
 jobname="rvdb"
 
 shell("mkdir -p logs/cluster")
 
+sampleName = "rvdb"
+
 rule all:
     input:
         "prot.hmm",
-        "prot-hmm.sqlite3",
-        dynamic("annot/{nuId}.txt")
+        "prot-hmm.sqlite",
+        dynamic("annot/{famId2}.txt")
         
 rule Profiles_genTxtAnnot:
     input:
-        sqlite = "prot-hmm.sqlite3",
-        txtDir = "annot/",
+        sqlite = "prot-hmm.sqlite",
         taxadb = "/local/databases/rel/taxadb/current/db/taxadb_full.sqlite",
     params:
-        prefix = ""
+        txtDir = "annot/",
     output:
-        dynamic("annot/{nuId}.txt")
+        dynamic("annot/{famId2}.txt")
     shell:
-        "module load Python/3.6.0 ; python3 {workflow.basedir}/../annotate/sqlAnnotToTxt.py {input.sqlite} {input.txtDir} {input.taxadb} --prefix {params.prefix} "
+        "module load Python/3.6.0 ; python3 {workflow.basedir}/../annotate/sqlAnnotToTxt.py {input.sqlite} {params.txtDir} {input.taxadb}"
         
 rule Profiles_genAnnot:
     input:
-        dynamic("hmm/{nuId}.hmm")
+        dynamic("hmm/{famId}.hmm")
     output:
-        "prot-hmm.sqlite3"
+        "prot-hmm.sqlite"
     params:
         hmmDir = "hmm",
         alignedDir = "aligned",
         taxaDB = "/local/databases/rel/taxadb/current/db/taxadb_full.sqlite",
-        schema = "{workflow.basedir}/../annotate/annotations_schema.sql"
+        schema = "{}/../annotate/annotations_schema.sql".format(workflow.basedir)
     shell:
         "module load Python/3.6.0 ; python3 {workflow.basedir}/../annotate/makeAnnotationsIntoSQL.py {params.hmmDir} {params.alignedDir} {params.taxaDB} {params.schema} {output}"
     
@@ -40,7 +41,7 @@ rule Profiles_genAnnot:
     
 rule Profiles_mergeHmm:
     input:
-        dynamic("hmm/{nuId}.hmm")
+        dynamic("hmm/{famId}.hmm")
     output:
         "prot.hmm"
     run:
@@ -54,8 +55,7 @@ rule Profiles_hmmbuild:
         hmm="hmm/{part}.hmm",
         log="hmm/{part}.log"
     run:
-        famName = wildcards["part"]
-        shell("module load hmmer/3.1b2 && hmmbuild --informat afa -n {famName} -o {output.log} {output.hmm} {input}")
+        shell("module load hmmer/3.1b2 ; hmmbuild --informat afa -n {wildcards.part} -o {output.log} {output.hmm} {input}")
 
 rule Profiles_align:
     input:
@@ -63,35 +63,21 @@ rule Profiles_align:
     output:
         "aligned/{part}.fasta"
     threads:
-        28
+        1
     shell:
         "module load fasta mafft && mafft --thread {threads} --anysymbol --auto {input} > {output}"
 
-rule Profiles_filterNonUnique:
-    input:
-        dynamic("familiesRaw/{famId}.fasta")
-    output:
-        dynamic("families/{nuId}.fasta")
-    run:
-        for raw in input:
-            cr = open(raw)
-            nbSeq = 0
-            for l in cr:
-                if l.startswith(">"):
-                    nbSeq+=1
-                if nbSeq > 1:
-                    break
-            if nbSeq > 1:
-                shell("cd families && ln -s ../{raw} .")
 
 rule Profiles_clustersToFamilies:
     input:
         clusters="prot.fnodes",
         fasta="prot_collapsed.fasta"
     output:
-        dynamic("familiesRaw/{famId}.fasta")
+        dynamic("families/{famId}.fasta")
+    params:
+        minSize=4
     shell:
-        "python3 {workflow.basedir}/../build/makeFastaFromCluster.py {input.fasta} {input.clusters} familiesRaw/"
+        "python3 {workflow.basedir}/../build/makeFastaFromCluster.py --minSize {params.minSize} {input.fasta} {input.clusters} familiesRaw/"
 
 
 rule Profiles_blastToClusters: