diff --git a/README.md b/README.md
index 0f73f1a4b60a30d0268812a1eec979ef4f4b1df3..40c43449b8cf3a407ec2b552c879a1a91ed444c5 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,25 @@
 # contig_info
 
 _contig_info_ is a command line program written in [Bash](https://www.gnu.org/software/bash/) for quickly estimating several standard descriptive statistics from FASTA-formatted contig files inferred by _de novo_ genome assembly methods.
-Estimated statistics are sequence number, residue counts, AT- and GC-content, sequence lengths, [auN](https://lh3.github.io/2020/04/08/a-new-metric-on-assembly-contiguity) (also called E-size, Salzberg et al. 2012), [N50](https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics) (Lander et al. 2001), [NG50](https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics) (Earl et al. 2011), and the related N(G)75, N(G)G90, L(G)50, L(G)75, L(G)90.
-_contig_info_ can also estimates nucleotide content statistics for each contig sequence.
+Estimated statistics are:
+
+  ▹   sequence number, 
+
+  ▹   nucleotide residue counts, 
+
+  ▹   AT- and GC-content, 
+
+  ▹   sequence lengths, 
+
+  ▹   [auN](https://lh3.github.io/2020/04/08/a-new-metric-on-assembly-contiguity) (also called E-size, Salzberg et al. 2012) or auNG,
+
+  ▹   [N50](https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics) (Lander et al. 2001) and the related N75 and N90 (e.g. Reinhardt et al. 2009, Craig Venter et al. 2001),
+
+  ▹   [L50](https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics) and the related L75 and L90,
+
+  ▹   [NG50](https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics) (Earl et al. 2011) and the related NG75, NGG90, LG50, LG75, LG90.
+
+_contig_info_ can also compute nucleotide content statistics for each contig sequence.
 
 ## Installation and execution
 
@@ -37,7 +54,7 @@ Run _contig_info_ without option to read the following documentation:
 
 ## Examples
 
-The following [Bash](https://www.gnu.org/software/bash/) command lines allows the genome sequences of the 5 _Mucor circinelloides_ strains 1006PhL, CBS 277.49, WJ11, B8987 and JCM 22480 to be downloaded from the [NCBI genome repository](https://www.ncbi.nlm.nih.gov/genome):
+The following [Bash](https://www.gnu.org/software/bash/) command lines enable to download the genome sequences of the 5 _Mucor circinelloides_ strains 1006PhL, CBS 277.49, WJ11, B8987 and JCM 22480 from the [NCBI genome repository](https://www.ncbi.nlm.nih.gov/genome):
 
 ```bash
 NCBIFTP="wget -q -O- https://ftp.ncbi.nlm.nih.gov/sra/wgs_aux/"; Z=".1.fsa_nt.gz";
@@ -63,7 +80,7 @@ Residue counts:
   Number of C's                6747611  19.76 %
   Number of G's                6731530  19.72 %
   Number of T's                10335465  30.27 %
-  Number of N's                0  0 %
+  Number of N's                0  0.00 %
   Total                        34134616
 
   %AT                          60.52 %
@@ -94,8 +111,8 @@ The same results can be outputted in tab-delimited format using option `-t`:
 ```
 
 ```
-#File               Nseq   Nres     A        C       G       T        N    %A     %C     %G     %T     %N   %AT    %GC     Min   Q25   Med   Q75   Max    Avg       auN    N50   N75   N90   L50 L75 L90
-Mucor.1006PhL.fasta 1459   34134616 10320010 6747611 6731530 10335465 0    30.23% 19.76% 19.72% 30.27% 0%   60.52% 39.48%  410   1660  6176  37608 213712 23395.89  65329  58982 36291 18584 194 376 562
+#File               Nseq   Nres     A        C       G       T        N    %A     %C     %G     %T     %N     %AT    %GC     Min   Q25   Med   Q75   Max    Avg       auN    N50   N75   N90   L50 L75 L90
+Mucor.1006PhL.fasta 1459   34134616 10320010 6747611 6731530 10335465 0    30.23% 19.76% 19.72% 30.27% 0.00%  60.52% 39.48%  410   1660  6176  37608 213712 23395.89  65329  58982 36291 18584 194 376 562
 ```
 
 Of note, the five downloaded FASTA files can be analyzed with a single command line:
@@ -106,11 +123,11 @@ Of note, the five downloaded FASTA files can be analyzed with a single command l
 
 ```
 #File                 Nseq   Nres      A        C       G       T        N       %A     %C     %G     %T     %N    %AT    %GC     Min   Q25   Med    Q75     Max     Avg         auN     N50      N75     N90      L50 L75 L90
-Mucor.1006PhL.fasta   1459   34134616  10320010 6747611 6731530 10335465 0       30.23% 19.76% 19.72% 30.27% 0%    60.52% 39.48%  410   1660  6176   37608   213712  23395.89    65329   58982    36291   18584    194 376 562
-Mucor.B8987.fasta     2210   36700617  11096810 7247117 7233795 11122895 0       30.23% 19.74% 19.71% 30.30% 0%    60.55% 39.45%  206   839   2482   20727   258792  16606.61    69144   58460    30025   13274    193 416 674
-Mucor.CBS277.49.fasta 21     36567582  10571030 7715901 7705901 10574750 0       28.90% 21.10% 21.07% 28.91% 0%    57.83% 42.17%  4155  41542 934259 3187354 6050249 1741313.42  3912950 4318338  3096690 1074709  4   7   9
+Mucor.1006PhL.fasta   1459   34134616  10320010 6747611 6731530 10335465 0       30.23% 19.76% 19.72% 30.27% 0.00% 60.52% 39.48%  410   1660  6176   37608   213712  23395.89    65329   58982    36291   18584    194 376 562
+Mucor.B8987.fasta     2210   36700617  11096810 7247117 7233795 11122895 0       30.23% 19.74% 19.71% 30.30% 0.00% 60.55% 39.45%  206   839   2482   20727   258792  16606.61    69144   58460    30025   13274    193 416 674
+Mucor.CBS277.49.fasta 21     36567582  10571030 7715901 7705901 10574750 0       28.90% 21.10% 21.07% 28.91% 0.00% 57.83% 42.17%  4155  41542 934259 3187354 6050249 1741313.42  3912950 4318338  3096690 1074709  4   7   9
 Mucor.JCM22480.fasta  401    36616466  10586281 6882218 6899109 10581984 1659222 28.91% 18.79% 18.84% 28.89% 4.53% 60.57% 39.43%  1038  4814  50332  135940  659822  91312.88    229712  197059   109360  63107    61  121 183
-Mucor.WJ11.fasta      2519   33065171  9974064  6559358 6556539 9975210  0       30.16% 19.83% 19.82% 30.16% 0%    60.34% 39.66%  430   3275  7692   18010   118704  13126.30    28368   24148    12884   5672     429 898 1455
+Mucor.WJ11.fasta      2519   33065171  9974064  6559358 6556539 9975210  0       30.16% 19.83% 19.82% 30.16% 0.00% 60.34% 39.66%  430   3275  7692   18010   118704  13126.30    28368   24148    12884   5672     429 898 1455
 ```
 
 The tab-delimited output format can be useful for focusing on specific fields like, e.g. the seven contiguity statistics:
@@ -153,45 +170,51 @@ Option `-r` can be used together with option `-t` to obtain a global view of the
 ```
 
 ```
-#File                  Seq                                                                                           Nres    A       C       G       T       N  %A    %C    %G    %T    %N  %AT   %GC    Pval
-Mucor.CBS277.49.fasta  AMYB01000001.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_01, whole genome shotgun sequence   6050249 1750309 1276313 1271843 1751784 0  28.92 21.09 21.02 28.95 0   57.89 42.11  1.0000
-Mucor.CBS277.49.fasta  AMYB01000002.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_02, whole genome shotgun sequence   5009828 1445835 1059454 1055351 1449188 0  28.85 21.14 21.06 28.92 0   57.79 42.21  0.000
-Mucor.CBS277.49.fasta  AMYB01000003.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_03, whole genome shotgun sequence   4868387 1404688 1027031 1026257 1410411 0  28.85 21.09 21.08 28.97 0   57.83 42.17  0.020
-Mucor.CBS277.49.fasta  AMYB01000004.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_04, whole genome shotgun sequence   4318338 1250156 914240  913373  1240569 0  28.94 21.17 21.15 28.72 0   57.68 42.32  0.000
-Mucor.CBS277.49.fasta  AMYB01000005.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_05, whole genome shotgun sequence   3239665 934794  681540  688359  934972  0  28.85 21.03 21.24 28.86 0   57.72 42.28  0.222
-Mucor.CBS277.49.fasta  AMYB01000006.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_06, whole genome shotgun sequence   3187354 921853  671173  669308  925020  0  28.92 21.05 20.99 29.02 0   57.95 42.05  0.626
-Mucor.CBS277.49.fasta  AMYB01000007.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_07, whole genome shotgun sequence   3096690 894782  653220  654088  894600  0  28.89 21.09 21.12 28.88 0   57.79 42.21  0.444
-Mucor.CBS277.49.fasta  AMYB01000008.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_08, whole genome shotgun sequence   2213752 637973  467830  470692  637257  0  28.81 21.13 21.26 28.78 0   57.61 42.39  0.526
-Mucor.CBS277.49.fasta  AMYB01000009.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_09, whole genome shotgun sequence   1074709 310418  227365  225234  311692  0  28.88 21.15 20.95 29.00 0   57.89 42.11  0.808
-Mucor.CBS277.49.fasta  AMYB01000010.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_10, whole genome shotgun sequence   976311  285831  206369  203542  280569  0  29.27 21.13 20.84 28.73 0   58.02 41.98  0.792
-Mucor.CBS277.49.fasta  AMYB01000011.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_11, whole genome shotgun sequence   934259  273240  195919  194769  270331  0  29.24 20.97 20.84 28.93 0   58.19 41.81  0.378
-Mucor.CBS277.49.fasta  AMYB01000012.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_12, whole genome shotgun sequence   832466  240427  175344  173907  242788  0  28.88 21.06 20.89 29.16 0   58.05 41.95  0.720
-Mucor.CBS277.49.fasta  AMYB01000013.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_13, whole genome shotgun sequence   423239  121227  88619   87831   125562  0  28.64 20.93 20.75 29.66 0   58.31 41.69  0.180
-Mucor.CBS277.49.fasta  AMYB01000014.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_14, whole genome shotgun sequence   155282  45402   31673   31509   46698   0  29.23 20.39 20.29 30.07 0   59.32 40.68  0.000
-Mucor.CBS277.49.fasta  AMYB01000015.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_15, whole genome shotgun sequence   97977   28739   20928   20035   28275   0  29.33 21.36 20.44 28.85 0   58.20 41.80  0.702
-Mucor.CBS277.49.fasta  AMYB01000016.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_16, whole genome shotgun sequence   41542   11324   9228    9406    11584   0  27.25 22.21 22.64 27.88 0   55.15 44.85  0.000
-Mucor.CBS277.49.fasta  AMYB01000017.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_17, whole genome shotgun sequence   17493   5063    3195    3919    5316    0  28.94 18.26 22.40 30.38 0   59.34 40.66  0.504
-Mucor.CBS277.49.fasta  AMYB01000018.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_18, whole genome shotgun sequence   11355   3216    2365    2256    3518    0  28.32 20.82 19.86 30.98 0   59.31 40.69  0.486
-Mucor.CBS277.49.fasta  AMYB01000019.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_19, whole genome shotgun sequence   9869    2972    2278    2160    2459    0  30.11 23.08 21.88 24.91 0   55.04 44.96  0.144
-Mucor.CBS277.49.fasta  AMYB01000020.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_21, whole genome shotgun sequence   4662    1534    815     946     1367    0  32.90 17.48 20.29 29.32 0   62.23 37.77  0.216
-Mucor.CBS277.49.fasta  AMYB01000021.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_22, whole genome shotgun sequence   4155    1247    1002    1116    790     0  30.01 24.11 26.85 19.01 0   49.03 50.97  0.036
+#File                  Seq                                                                                           Nres    A       C       G       T       N  %A     %C     %G     %T     %N     %AT    %GC     Pval
+Mucor.CBS277.49.fasta  AMYB01000001.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_01, whole genome shotgun sequence   6050249 1750309 1276313 1271843 1751784 0  28.92% 21.09% 21.02% 28.95% 0.00%  57.89% 42.11%  0.2610
+Mucor.CBS277.49.fasta  AMYB01000002.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_02, whole genome shotgun sequence   5009828 1445835 1059454 1055351 1449188 0  28.85% 21.14% 21.06% 28.92% 0.00%  57.79% 42.21%  0.2910
+Mucor.CBS277.49.fasta  AMYB01000003.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_03, whole genome shotgun sequence   4868387 1404688 1027031 1026257 1410411 0  28.85% 21.09% 21.08% 28.97% 0.00%  57.83% 42.17%  0.1907
+Mucor.CBS277.49.fasta  AMYB01000004.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_04, whole genome shotgun sequence   4318338 1250156 914240  913373  1240569 0  28.94% 21.17% 21.15% 28.72% 0.00%  57.68% 42.32%  0.8201
+Mucor.CBS277.49.fasta  AMYB01000005.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_05, whole genome shotgun sequence   3239665 934794  681540  688359  934972  0  28.85% 21.03% 21.24% 28.86% 0.00%  57.72% 42.28%  0.1120
+Mucor.CBS277.49.fasta  AMYB01000006.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_06, whole genome shotgun sequence   3187354 921853  671173  669308  925020  0  28.92% 21.05% 20.99% 29.02% 0.00%  57.95% 42.05%  0.3346
+Mucor.CBS277.49.fasta  AMYB01000007.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_07, whole genome shotgun sequence   3096690 894782  653220  654088  894600  0  28.89% 21.09% 21.12% 28.88% 0.00%  57.79% 42.21%  0.2955
+Mucor.CBS277.49.fasta  AMYB01000008.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_08, whole genome shotgun sequence   2213752 637973  467830  470692  637257  0  28.81% 21.13% 21.26% 28.78% 0.00%  57.61% 42.39%  0.6272
+Mucor.CBS277.49.fasta  AMYB01000009.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_09, whole genome shotgun sequence   1074709 310418  227365  225234  311692  0  28.88% 21.15% 20.95% 29.00% 0.00%  57.89% 42.11%  0.8349
+Mucor.CBS277.49.fasta  AMYB01000010.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_10, whole genome shotgun sequence   976311  285831  206369  203542  280569  0  29.27% 21.13% 20.84% 28.73% 0.00%  58.02% 41.98%  0.0381
+Mucor.CBS277.49.fasta  AMYB01000011.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_11, whole genome shotgun sequence   934259  273240  195919  194769  270331  0  29.24% 20.97% 20.84% 28.93% 0.00%  58.19% 41.81%  0.1318
+Mucor.CBS277.49.fasta  AMYB01000012.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_12, whole genome shotgun sequence   832466  240427  175344  173907  242788  0  28.88% 21.06% 20.89% 29.16% 0.00%  58.05% 41.95%  0.7506
+Mucor.CBS277.49.fasta  AMYB01000013.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_13, whole genome shotgun sequence   423239  121227  88619   87831   125562  0  28.64% 20.93% 20.75% 29.66% 0.00%  58.31% 41.69%  0.0489
+Mucor.CBS277.49.fasta  AMYB01000014.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_14, whole genome shotgun sequence   155282  45402   31673   31509   46698   0  29.23% 20.39% 20.29% 30.07% 0.00%  59.32% 40.68%  0.0000
+Mucor.CBS277.49.fasta  AMYB01000015.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_15, whole genome shotgun sequence   97977   28739   20928   20035   28275   0  29.33% 21.36% 20.44% 28.85% 0.00%  58.20% 41.80%  0.2347
+Mucor.CBS277.49.fasta  AMYB01000016.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_16, whole genome shotgun sequence   41542   11324   9228    9406    11584   0  27.25% 22.21% 22.64% 27.88% 0.00%  55.15% 44.85%  0.0000
+Mucor.CBS277.49.fasta  AMYB01000017.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_17, whole genome shotgun sequence   17493   5063    3195    3919    5316    0  28.94% 18.26% 22.40% 30.38% 0.00%  59.34% 40.66%  0.0239
+Mucor.CBS277.49.fasta  AMYB01000018.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_18, whole genome shotgun sequence   11355   3216    2365    2256    3518    0  28.32% 20.82% 19.86% 30.98% 0.00%  59.31% 40.69%  0.5719
+Mucor.CBS277.49.fasta  AMYB01000019.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_19, whole genome shotgun sequence   9869    2972    2278    2160    2459    0  30.11% 23.08% 21.88% 24.91% 0.00%  55.04% 44.96%  0.0099
+Mucor.CBS277.49.fasta  AMYB01000020.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_21, whole genome shotgun sequence   4662    1534    815     946     1367    0  32.90% 17.48% 20.29% 29.32% 0.00%  62.23% 37.77%  0.0002
+Mucor.CBS277.49.fasta  AMYB01000021.1 Mucor lusitanicus CBS 277.49 MUCCIscaffold_22, whole genome shotgun sequence   4155    1247    1002    1116    790     0  30.01% 24.11% 26.85% 19.01% 0.00%  49.03% 50.97%  0.0000
 ```
 
-Note that the last column `Pval` assesses the GC-content adequation between each contig and the longest one.
-When _Pval_ is close to 0, then the %GC of the corresponding contig is significantly different to the %GC of the longest one.
-This _p_-value can be used as an indicator when searching for particular replicons (e.g. plasmids, mitochondrion) or artefactual contigs, as such sequences often induce specific nucleotide compositions.
-Indeed, when considering a FASTA file outputted by a _de novo_ assembly program, the longest contig generally does not correspond to such replicon(s), therefore giving a good approximation of the expected GC-content within the hole chromosome.
+Note that the last column `Pval` assesses the GC-content adequation between each contig and the overall file content.
+Briefly, (up to) 5,000 nucleotide segments (non-overlapping, of length 200 bases) are first sampled from all the contig sequences, each being used to estimate the %GC, therefore leading to (up to) 5,000 %GC values (i.e. the set GC<sub>all</sub>) representative of the GC-content variation within the whole genome assembly.
+Next, for each contig, (up to) 500 nucleotide segments (non-overlapping, of length 200 bases) are sampled, leading to (up to) 500 %GC values (i.e. the set GC<sub>seq</sub>) representative of the GC-content variation within the contig.
+For each contig sequence, the adequation between GC<sub>seq</sub> and GC<sub>all</sub> is assessed using a Mann-Whitney (1947) _U_ test.
+When _Pval_ is close to 0, the GC-content of the corresponding contig is significantly different to the overall %GC.
+These _U_ test _p_-values can be used to identify artefactual or particular (e.g. plasmid, mitochondrion) contigs, as such sequences often induce specific nucleotide compositions.
 
 
 
 
 ## References
 
-Earl D, Bradnam K, St John J, Darling A, Lin D, Fass J, Yu HO, Buffalo V, Zerbino DR, Diekhans M, Nguyen N, Ariyaratne PN, Sung WK, Ning Z, Haimel M, Simpson JT, Fonseca NA, Birol Ä°, Docking TR, Ho IY, Rokhsar DS, Chikhi R, Lavenier D, Chapuis G, Naquin D, Maillet N, Schatz MC, Kelley DR, Phillippy AM, Koren S, Yang SP, Wu W, Chou WC, Srivastava A, Shaw TI, Ruby JG, Skewes-Cox P, Betegon M, Dimon MT, Solovyev V, Seledtsov I, Kosarev P, Vorobyev D, Ramirez-Gonzalez R, Leggett R, MacLean D, Xia F, Luo R, Li Z, Xie Y, Liu B, Gnerre S, MacCallum I, Przybylski D, Ribeiro FJ, Yin S, Sharpe T, Hall G, Kersey PJ, Durbin R, Jackman SD, Chapman JA, Huang X, DeRisi JL, Caccamo M, Li Y, Jaffe DB, Green RE, Haussler D, Korf I, Paten B (2011) _Assemblathon 1: a competitive assessment of de novo short read assembly methods_. **Genome Research**, 21(12):2224-2241. [doi:10.1101/gr.126599.111](https://genome.cshlp.org/content/21/12/2224).
+Craig Venter J, Adams MD, Myers EW, Li PW, Mural RJ, Sutton GG, Smith HO, Yandell M, Evans CA, Holt RA, Gocayne JD, Amanatides P, Ballew RM, Huson DH, Russo Wortman J, Zhang Q, Kodira CD, Zheng XH, Chen L, Skupski M, Subramanian G, Thomas PD, Zhang J, Gabor Miklos GL, Nelson C, Broder S, Clark AG, Nadeau J, McKusick VA, Zinder N, Levine AJ, Roberts RJ, Simon M, Slayman C, Hunkapiller M, Bolanos R, Delcher A, Dew I, Fasulo D, Flanigan M, Florea M, Halpern A, Hannenhalli S, Kravitz S, Levy S, Mobarry C, Reinert K, Remington K, Abu-Threideh J, Beasley E, Biddick K, Bonazzi V, Brandon R, Cargill M, Chandramouliswaran I, Charlab, Chaturvedi K, Deng Z, Di Francesco V, Dunn P, Eilbeck K, Evangelista C, Gabrielian AE, Gan W, Ge W, Gong F, Gu Z, Guan P, Heiman TJ, Higgins ME, Ji M-R, Ke Z, Ketchum KA, Lai Z, Lei Y, Li Z, Li J, Liang Y, Lin X, Lu F, Merkulov GV, Milshina N, Moore HM, Naik HK, Narayan VA, Neelam B, Nusskern D, Rusch DB, Salzberg S, Shao W, Shue B, Sun J, Yuan Wang Z, Wang A, Wang X, Wang J, Wei M-H, Wides R, Xiao C, Yan C, Yao A, Ye J, Zhan M, Zhang W, Zhang H, Zhao Q, Zheng L, Zhong F, Zhong W, Zhu SC, Zhao S, Gilbert D, Baumhueter S, Spier G, Carter C, Cravchik A, Woodage T, Ali F, An H, Awe A, Baldwin D, Baden H, Barnstead M, Barrow I, Beeson K, Busam D, Carver A, Center A, Lai Cheng M, Curry L, Danaher S, Davenport L, Desilets R, Dietz S, Dodson K, Doup L, Ferriera S, Garg N, Gluecksmann A, Hart B, Haynes J, Haynes C, Heiner C, Hladun S, Hostin D, Houck J, Howland T, Ibegwam C, Johnson J, Kalush F, Kline L, Koduru S, Love A, Mann F, May D, McCawley S, McIntosh T, McMullen I, Moy M, Moy L, Murphy B, Nelson K, Pfannkoch C, Pratts E, Puri V, Qureshi V, Reardon M, Rodriguez R, Rogers Y-H, Romblad D, Ruhfel B, Scott R, Sitter C, Smallwood M, Stewart E, Strong R, Suh E, Thomas R, Tint NN, Tse S, Vech C, Wang G, Wetter J, Williams S, Williams M, Windsor S, Winn-Deen E, Wolfe K, Zaveri J, Zaveri K, Abril JF, Guigó R, Campbell MJ, Sjolander KV, Karlak B, Kejariwal A, Mi H, Lazareva B, Hatton T, Narechania A, Diemer K, Muruganujan A, Guo N, Sato S, Bafna V, Istrail S, Lippert R, Schwartz R, Walenz B, Yooseph R, Allen D, Basu A, Baxendale J, Blick L, Caminha M, Carnes-Stine J, Caulk P, Chiang Y-H, Coyne M, Dahlke C, Deslattes Mays A, Dombroski M, Donnelly M, Ely D, Esparham S, Fosler C, Gire H, Glanowski S, Glasser K, Glodek A, Gorokhov M, Graham K, Gropman B, Harris M, Heil J, Henderson S, Hoover J, Jennings D, Jordan C, Jordan J, Kasha J, Kagan L, Kraft C, Levitsky A, Lewis M, Liu X, Lopez J, Ma D, Majoros W, McDaniel J, Murphy S, Newman M, Nguyen T, Nguyen N, Nodell M, Pan S, Peck D, Peterson M, Rowe W, Sanders R, Scott J, Simpson M, Smith T, Sprague A, Stockwell T, Turner R, Venter E, Wang M, Wen M, Wu D, Wu M, Xia A, Zandieh A, Zhu X (2001) _The Sequence of the Human Genome_. **Science**, 291(5507):1304-1351. [doi:10.1126/science.1058040](https://science.sciencemag.org/content/291/5507/1304).
 
+Earl D, Bradnam K, St John J, Darling A, Lin D, Fass J, Yu HO, Buffalo V, Zerbino DR, Diekhans M, Nguyen N, Ariyaratne PN, Sung WK, Ning Z, Haimel M, Simpson JT, Fonseca NA, Birol Ä°, Docking TR, Ho IY, Rokhsar DS, Chikhi R, Lavenier D, Chapuis G, Naquin D, Maillet N, Schatz MC, Kelley DR, Phillippy AM, Koren S, Yang SP, Wu W, Chou WC, Srivastava A, Shaw TI, Ruby JG, Skewes-Cox P, Betegon M, Dimon MT, Solovyev V, Seledtsov I, Kosarev P, Vorobyev D, Ramirez-Gonzalez R, Leggett R, MacLean D, Xia F, Luo R, Li Z, Xie Y, Liu B, Gnerre S, MacCallum I, Przybylski D, Ribeiro FJ, Yin S, Sharpe T, Hall G, Kersey PJ, Durbin R, Jackman SD, Chapman JA, Huang X, DeRisi JL, Caccamo M, Li Y, Jaffe DB, Green RE, Haussler D, Korf I, Paten B (2011) _Assemblathon 1: a competitive assessment of de novo short read assembly methods_. **Genome Research**, 21(12):2224-2241. [doi:10.1101/gr.126599.111](https://genome.cshlp.org/content/21/12/2224).
 
 Lander ES, Linton LM, Birren B, Nusbaum C, Zody MC, Baldwin J, Devon K, Dewar K, Doyle M, FitzHugh W, Funke R, Gage D, Harris K, Heaford A, Howland J, Kann L, Lehoczky J, LeVine R, McEwan P, McKernan K, Meldrim J, Mesirov JP, Miranda C, Morris W, Naylor J, Raymond C, Rosetti M, Santos R, Sheridan A, Sougnez C, Stange-Thomann Y, Stojanovic N, Subramanian A, Wyman D, Rogers J, Sulston J, Ainscough R, Beck S, Bentley D, Burton J, Clee C, Carter N, Coulson A, Deadman R, Deloukas P, Dunham A, Dunham I, Durbin R, French L, Grafham D, Gregory S, Hubbard T, Humphray S, Hunt A, Jones M, Lloyd C, McMurray A, Matthews L, Mercer S, Milne S, Mullikin JC, Mungall A, Plumb R, Ross M, Shownkeen R, Sims S, Waterston RH, Wilson RK, Hillier LW, McPherson JD, Marra MA, Mardis ER, Fulton LA, Chinwalla AT, Pepin KH, Gish WR, Chissoe SL, Wendl MC, Delehaunty KD, Miner TL, Delehaunty A, Kramer JB, Cook LL, Fulton RS, Johnson DL, Minx PJ, Clifton SW, Hawkins T, Branscomb E, Predki P, Richardson P, Wenning S, Slezak T, Doggett N, Cheng JF, Olsen A, Lucas S, Elkin C, Uberbacher E, Frazier M, Gibbs RA, Muzny DM, Scherer SE, Bouck JB, Sodergren EJ, Worley KC, Rives CM, Gorrell JH, Metzker ML, Naylor SL, Kucherlapati RS, Nelson DL, Weinstock GM, Sakaki Y, Fujiyama A, Hattori M, Yada T, Toyoda A, Itoh T, Kawagoe C, Watanabe H, Totoki Y, Taylor T, Weissenbach J, Heilig R, Saurin W, Artiguenave F, Brottier P, Bruls T, Pelletier E, Robert C, Wincker P, Smith DR, Doucette-Stamm L, Rubenfield M, Weinstock K, Lee HM, Dubois J, Rosenthal A, Platzer M, Nyakatura G, Taudien S, Rump A, Yang H, Yu J, Wang J, Huang G, Gu J, Hood L, Rowen L, Madan A, Qin S, Davis RW, Federspiel NA, Abola AP, Proctor MJ, Myers RM, Schmutz J, Dickson M, Grimwood J, Cox DR, Olson MV, Kaul R, Raymond C, Shimizu N, Kawasaki K, Minoshima S, Evans GA, Athanasiou M, Schultz R, Roe BA, Chen F, Pan H, Ramser J, Lehrach H, Reinhardt R, McCombie WR, de la Bastide M, Dedhia N, Blöcker H, Hornischer K, Nordsiek G, Agarwala R, Aravind L, Bailey JA, Bateman A, Batzoglou S, Birney E, Bork P, Brown DG, Burge CB, Cerutti L, Chen HC, Church D, Clamp M, Copley RR, Doerks T, Eddy SR, Eichler EE, Furey TS, Galagan J, Gilbert JG, Harmon C, Hayashizaki Y, Haussler D, Hermjakob H, Hokamp K, Jang W, Johnson LS, Jones TA, Kasif S, Kaspryzk A, Kennedy S, Kent WJ, Kitts P, Koonin EV, Korf I, Kulp D, Lancet D, Lowe TM, McLysaght A, Mikkelsen T, Moran JV, Mulder N, Pollara VJ, Ponting CP, Schuler G, Schultz J, Slater G, Smit AF, Stupka E, Szustakowki J, Thierry-Mieg D, Thierry-Mieg J, Wagner L, Wallis J, Wheeler R, Williams A, Wolf YI, Wolfe KH, Yang SP, Yeh RF, Collins F, Guyer MS, Peterson J, Felsenfeld A, Wetterstrand KA, Patrinos A, Morgan MJ, de Jong P, Catanese JJ, Osoegawa K, Shizuya H, Choi S, Chen YJ, Szustakowki J; International Human Genome Sequencing Consortium (2001) _Initial sequencing and analysis of the human genome_. **Nature**, 409(6822):860-921. [doi:10.1038/35057062](https://www.nature.com/articles/35057062).
 
+Mann HB, Whitney DR (1947) _On a Test of Whether one of Two Random Variables is Stochastically Larger than the Other_. **Annals of Mathematical Statistics**, 18(1):50-60. [doi:10.1214/aoms/1177730491](https://doi.org/10.1214/aoms/1177730491).
+
+Reinhardt JA, Baltrus DA, Nishimura MT, Jeck WR, Jones CD, Dangl JL (2009) _De novo assembly using low-coverage short read sequence data from the rice pathogen Pseudomonas syringae pv. oryzae_ **Genome Research**, 19:294-305. [doi:10.1101/gr.083311.108](https://dx.doi.org/10.1101%2Fgr.083311.108).
 
 Salzberg SL, Phillippy AM, Zimin A, Puiu D, Magoc T, Koren S, Treangen TJ, Schatz MC, Delcher AL, Roberts M, Marçais G, Pop M, Yorke JA (2012) _GAGE: A critical evaluation of genome assemblies and assembly algorithms_. **Genome Research**, 22(3):557-567. [doi:10.1101/gr.131383.111](https://genome.cshlp.org/content/22/3/557.long). 
 
diff --git a/contig_info.sh b/contig_info.sh
index d279e13a5e60195bce8c09560916db015e8fac9e..cd020ae84730aa299416038271d95de2cf92ea66 100755
--- a/contig_info.sh
+++ b/contig_info.sh
@@ -33,7 +33,7 @@
 # = VERSIONS =                                                                                               #
 # ============                                                                                               #
 #                                                                                                            #
-  VERSION=2.0.210312ac                                                                                       #
+  VERSION=2.0.210315ac                                                                                       #
 # + adding option -r                                                                                         #
 # + adding trap when interrupting script                                                                     #
 #                                                                                                            #
@@ -157,7 +157,7 @@ then
 fi
 
 SEQS=$(randomfile); ## txt file with one sequence per line
-trap "echo -n interrupting ; rm -f $SEQS &>/dev/null ; echo ; exit " SIGINT
+trap "echo -n interrupting ... ; rm -f $SEQS &>/dev/null ; echo ; exit " SIGINT
 
 for INFILE in "$@"
 do
@@ -165,26 +165,35 @@ do
     
   tr -d '\15\32' < $INFILE | awk -v thr=$MIN_CONTIG_LGT '/^>/{if(length(s)>=thr)print s;s="";next}{s=s$0}END{if(length(s)>=thr)print s}' | tr '[:lower:]' '[:upper:]' > $SEQS ;
   
+  A=$(tr -cd A < $SEQS | wc -c);
+  C=$(tr -cd C < $SEQS | wc -c); 
+  G=$(tr -cd G < $SEQS | wc -c);
+  T=$(tr -cd T < $SEQS | wc -c); 
+  ACGT=$(( $A + $C + $G + $T ));
+
   if ! $RES_CONTENT
   then
-    R=$(tr -d [:cntrl:] < $SEQS | wc -c); S=$(wc -l < $SEQS); AVG=$(bc -l<<<"scale=2;$R/$S" | sed 's/^\./0./');
-    A=$(tr -cd A < $SEQS | wc -c); fA=$(bc -l <<<"scale=2;100*$A/$R" | sed 's/^\./0./');
-    C=$(tr -cd C < $SEQS | wc -c); fC=$(bc -l <<<"scale=2;100*$C/$R" | sed 's/^\./0./');
-    G=$(tr -cd G < $SEQS | wc -c); fG=$(bc -l <<<"scale=2;100*$G/$R" | sed 's/^\./0./');
-    T=$(tr -cd T < $SEQS | wc -c); fT=$(bc -l <<<"scale=2;100*$T/$R" | sed 's/^\./0./');
-    N=$(tr -cd N < $SEQS | wc -c); fN=$(bc -l <<<"scale=2;100*$N/$R" | sed 's/^\./0./');
-    ACGT=$(( $A + $C + $G + $T ));
-    fGC=$(bc -l <<<"scale=2;100*($C+$G)/$ACGT" | sed 's/^\./0./'); fAT=$(bc -l <<<"scale=2;100-$fGC" | sed 's/^\./0./');
+    R=$(tr -d [:cntrl:] < $SEQS | wc -c);
+    S=$(wc -l < $SEQS);
+    AVG=$(bc -l<<<"scale=2;$R/$S" | sed 's/^\./0./');
+    fA=$(bc -l <<<"scale=2;100*$A/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    fC=$(bc -l <<<"scale=2;100*$C/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    fG=$(bc -l <<<"scale=2;100*$G/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    fT=$(bc -l <<<"scale=2;100*$T/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    N=0; [ $R -ne $ACGT ] && N=$(tr -cd N < $SEQS | wc -c);
+    fN=$(bc -l <<<"scale=2;100*$N/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    fGC=$(bc -l <<<"scale=2;100*($C+$G)/$ACGT" | sed 's/^0$/0.00/;s/^\./0./');
+    fAT=$(bc -l <<<"scale=2;100-$fGC" | sed 's/^0$/0.00/;s/^\./0./');
     ER=$R; [ $GENOME_SIZE != 0 ] && ER=$GENOME_SIZE;
     STATS=$(awk '{print length}' $SEQS | sort -rn | awk -v g=$ER '{l[++n]=$0;aun+=$0*$0}
                                                                   END{OFMT="%f";g50=g/2;g75=3*g/4;g90=9*g/10;i=s=n50=n75=n90=0;
                                                                       while(++i<=n&&n90==0){s+=l[i];n50==0&&s>=g50&&n50=l[i]+(l50=i);n75==0&&s>=g75&&n75=l[i]+(l75=i);n90==0&&s>=g90&&n90=l[i]+(l90=i)}
                                                                       n90==0&&n90=l[n]+(l90=n);n75==0&&n75=l[n]+(l75=n);n50==0&&n50=l[n]+(l50=n);
-                                                                      iq1=int(n/4+1);iq2=int(n/2+1);iq3=int(3*n/4+1);
-                                                                      print(n50-l50)"\t"(n75-l75)"\t"(n90-l90)"\t"l50"\t"l75"\t"l90"\t"l[1]"\t"l[iq1]"\t"l[iq2]"\t"l[iq3]"\t"l[n]"\t"int(0.5+aun/g)}');
-    N50=$(cut -f1 <<<"$STATS");  N75=$(cut -f2 <<<"$STATS");  N90=$(cut -f3 <<<"$STATS"); 
-    L50=$(cut -f4 <<<"$STATS");  L75=$(cut -f5 <<<"$STATS");  L90=$(cut -f6 <<<"$STATS"); 
-    Q75=$(cut -f8 <<<"$STATS");  Q50=$(cut -f9 <<<"$STATS");  Q25=$(cut -f10 <<<"$STATS"); 
+                                                                      iq3=int(n/4+1);iq2=int(n/2+1);iq1=int(3*n/4+1);
+                                                                      print(n50-l50)"\t"(n75-l75)"\t"(n90-l90)"\t"l50"\t"l75"\t"l90"\t"l[1]"\t"l[iq3]"\t"l[iq2]"\t"l[iq1]"\t"l[n]"\t"int(0.5+aun/g)}');
+    N50=$(cut -f1  <<<"$STATS"); N75=$(cut -f2 <<<"$STATS");  N90=$(cut -f3  <<<"$STATS"); 
+    L50=$(cut -f4  <<<"$STATS"); L75=$(cut -f5 <<<"$STATS");  L90=$(cut -f6  <<<"$STATS"); 
+    Q75=$(cut -f8  <<<"$STATS"); Q50=$(cut -f9 <<<"$STATS");  Q25=$(cut -f10 <<<"$STATS"); 
     MIN=$(cut -f11 <<<"$STATS"); MAX=$(cut -f7 <<<"$STATS");  AUN=$(cut -f12 <<<"$STATS");
 
     if ! $TSVOUT
@@ -235,12 +244,16 @@ do
   
   ## residue details
 
-  lseq="$(awk '(length()>max){s=$0;max=length()}END{print s}' $SEQS)"; # lseq = the longest contig
-  wslseq="$(tr -cd ACGT <<<"$lseq" | tr ACGT WSSW)"; # the longest contig with only AT/CG replaced by W/S
-  tr -d '\15\32' < $INFILE | awk '!/^>/{s=s$0;next}(s!=""){print toupper(s);s=""}{print}END{print toupper(s)}' | paste - - > $SEQS ; ## tsv file: FASTA header \t contig
+  WS=200;              # window size
+  NS=5000;             # no. samples from all the contigs
+  GCALL=$(randomfile); # %GC estimated from $NS segments of length $WS regularly sampled from all the contigs
+  tr ACGT WSSW < $SEQS | fold -w $WS | tr -cd WS'\n' | awk -v ws=$WS -v step=$(( 1 + $ACGT / ($WS*$NS) )) '(length()!=ws){next}(++n==step){printf("%.6f\n",gsub("S","S")/ws);n=0}' > $GCALL ;
+  NS=500;              # no. samples from each contig
+  GCSEQ=$(randomfile); # %GC estimated from $NS segments of length $WS regularly sampled from each contig
+  
+  trap "echo -n interrupting ... ; rm -f $SEQS $GCALL $GCSEQ &>/dev/null ; echo ; exit " SIGINT
 
-  FTMP=$(randomfile); ## $REP GC% observed from substrings of length $R of the longest contig
-  trap "echo -n interrupting ; rm -f $SEQS $FTMP &>/dev/null ; echo ; exit " SIGINT
+  tr -d '\15\32' < $INFILE | awk '!/^>/{s=s$0;next}(s!=""){print toupper(s);s=""}{print}END{print toupper(s)}' | paste - - > $SEQS ; ## tsv file: FASTA header \t contig
 
   while IFS=$'\t' read -r fh seq
   do
@@ -248,22 +261,34 @@ do
     if [ $R -lt $MIN_CONTIG_LGT ]; then continue; fi
 
     NAME=$(tr -d '>' <<<"$fh");
-    A=$(tr -cd A <<<"$seq" | wc -c); fA=$(bc -l <<<"scale=2;100*$A/$R" | sed 's/^\./0./');
-    C=$(tr -cd C <<<"$seq" | wc -c); fC=$(bc -l <<<"scale=2;100*$C/$R" | sed 's/^\./0./');
-    G=$(tr -cd G <<<"$seq" | wc -c); fG=$(bc -l <<<"scale=2;100*$G/$R" | sed 's/^\./0./');
-    T=$(tr -cd T <<<"$seq" | wc -c); fT=$(bc -l <<<"scale=2;100*$T/$R" | sed 's/^\./0./');
-    N=$(tr -cd N <<<"$seq" | wc -c); fN=$(bc -l <<<"scale=2;100*$N/$R" | sed 's/^\./0./');
+    A=$(tr -cd A <<<"$seq" | wc -c); fA=$(bc -l <<<"scale=2;100*$A/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    C=$(tr -cd C <<<"$seq" | wc -c); fC=$(bc -l <<<"scale=2;100*$C/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    G=$(tr -cd G <<<"$seq" | wc -c); fG=$(bc -l <<<"scale=2;100*$G/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    T=$(tr -cd T <<<"$seq" | wc -c); fT=$(bc -l <<<"scale=2;100*$T/$R" | sed 's/^0$/0.00/;s/^\./0./');
     ACGT=$(( $A + $C + $G + $T ));
-    fGC=$(bc -l <<<"scale=2;100*($C+$G)/$ACGT" | sed 's/^\./0./'); fAT=$(bc -l <<<"scale=2;100-$fGC" | sed 's/^\./0./');
+    N=0; [ $R -ne $ACGT ] && N=$(tr -cd N <<<"$seq" | wc -c);
+    fN=$(bc -l <<<"scale=2;100*$N/$R" | sed 's/^0$/0.00/;s/^\./0./');
+    fGC=$(bc -l <<<"scale=2;100*($C+$G)/$ACGT" | sed 's/^0$/0.00/;s/^\./0./');
+    fAT=$(bc -l <<<"scale=2;100-$fGC" | sed 's/^0$/0.00/;s/^\./0./');
 
-    REP=111; [ $R -gt 1000000 ] && REP=99; [ $R -gt 10000000 ] && REP=77;
-    [ "$seq" != "$lseq" ] && awk -v l=$R -v r=$REP '{srand(l-r);x=length()-l;while(--r>=0){s=substr($0,1+int(x*rand()),l);printf("%.6f\n",gsub("S","S",s)/length(s))}}' <<<"$wslseq" > $FTMP ;
-    gc=$(awk -v gc=$(( $C + $G )) -v acgt=$ACGT 'BEGIN{printf("%.6f\n",gc/acgt)}');
-    #echo "$gc   $(tr '\n' ' ' <$FTMP)" ;
-    [ -e $FTMP ] && pv=$(awk -v x=$gc '{++n;(x<=$0)&&++p}END{printf("%.3f",p/n)}' $FTMP) || pv=0.500;
-    pv=$(awk '{p=(($0<0.5)?0.5-$0:$0-0.5);printf("%.3f",1-2*p)}' <<<"$pv");
-    [ ! -e $FTMP ] && pv=$pv"0";
-    rm -f $FTMP ;
+    ### 1. getting (up to) $NS segments of length $WS regularly sampled from $seq and saving all the corresponding %GC in $GCSEQ
+    tr ACGT WSSW <<<"$seq" | fold -w $WS | tr -cd WS'\n' | awk -v ws=$WS -v step=$(( 1 + ($R / ($WS*$NS)) )) '(length()!=ws){next}(++n==step){printf("%.6f\n",gsub("S","S")/ws);n=0}' > $GCSEQ ; 
+    ### 2. comparing the %GC values sampled from $seq (file $GCSEQ) to the %GC values sampled from all the contigs (file $GCALL)
+    ###    the adequation between the two sets of %GC values is assessed by a Mann-Whitney U test
+    pv=$(awk 'function abs(x){return (x<0)?-x:x}
+              function erf(x){q=t=1.0/(1+0.47047*x);sum=0.3480242*t;sum-=0.0958798*(t*=q);sum+=0.7478556*(t*=q);return 1.0-(sum*exp(-x*x))}
+              (FNR==NR){a1[++n1]=$0;next}{a2[++n2]=$0}
+              END{while(++i2<=n2){v2=a2[i2];i1=0;while(++i1<=n1)(v2>a1[i1])&&++u}
+                  if(n2==0){print"n/a";exit}
+                  if(n2==1){p=u/n1;p=abs(0.5-p);printf("%.4f",1-2*p);exit}
+                  mean=n1*n2/2;(u>mean)&&u=2*mean-u;sd=sqrt(mean*(n1+n2+1)/6);z=abs(u-mean)/sd;p=(1+erf(z/sqrt(2)))/2;p=2*(1-p); printf("%.4f",p)}' $GCALL $GCSEQ);
+    false && awk 'function abs(x){return (x<0)?-x:x}
+                  function erf(x){q=t=1.0/(1+0.47047*x);sum=0.3480242*t;sum-=0.0958798*(t*=q);sum+=0.7478556*(t*=q);return 1.0-(sum*exp(-x*x))}
+                  (FNR==NR){a1[++n1]=$0;next}{a2[++n2]=$0}
+                  END{while(++i2<=n2){v2=a2[i2];i1=0;while(++i1<=n1)(v2>a1[i1])&&++u}
+                      if(n2==0){print"n/a";exit}
+                      if(n2==1){p=u/n1;p=abs(0.5-p);print n1" "n2" "u" "(u/n1)" "(1-2*p);exit}
+                      mean=n1*n2/2;(u>mean)&&u=2*mean-u;sd=sqrt(mean*(n1+n2+1)/6);z=abs(u-mean)/sd;p=(1+erf(z/sqrt(2)))/2;p=2*(1-p); print n1" "n2" "u" "mean" "sd" "z" "p}' $GCALL $GCSEQ
     
     if ! $TSVOUT
     then
@@ -286,14 +311,14 @@ do
       echo "Composition test p-value:      $pv" ;
       echo ;
     else
-      CSVLINE="$(basename $INFILE)\t$NAME\t$R\t$A\t$C\t$G\t$T\t$N\t$fA\t$fC\t$fG\t$fT\t$fN\t$fAT\t$fGC\t$pv";
+      CSVLINE="$(basename $INFILE)\t$NAME\t$R\t$A\t$C\t$G\t$T\t$N\t$fA%\t$fC%\t$fG%\t$fT%\t$fN%\t$fAT%\t$fGC%\t$pv";
       echo -e "$CSVLINE" ;
     fi
   done < $SEQS
   
 done
 
-rm -f $SEQS $FTMP ;
+rm -f $SEQS $GCALL $GCSEQ ;
 
 exit ;