From 61926e2fdb7a7d6615c8d24055a36109fd38be48 Mon Sep 17 00:00:00 2001
From: hanna julienne <hanna.julienne@pasteur.fr>
Date: Wed, 22 Aug 2018 18:19:05 +0200
Subject: [PATCH] enriched doc string

---
 impute_jass/doc/source/index.rst              |  6 ++++
 .../impute_jass/imputation_launcher.py        | 35 +++++++++++--------
 impute_jass/impute_jass/ld_matrix.py          | 31 ++++++++++++----
 impute_jass/impute_jass/stat_models.py        |  1 +
 4 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/impute_jass/doc/source/index.rst b/impute_jass/doc/source/index.rst
index e6546bb..e275b27 100644
--- a/impute_jass/doc/source/index.rst
+++ b/impute_jass/doc/source/index.rst
@@ -13,6 +13,12 @@ Welcome to Peppa-PIG's documentation!
 What is Peppa-PIG ?
 ===================
 
+Dependancies
+============
+
+peppa-pig require plink version
+
+
 Installation
 ============
 
diff --git a/impute_jass/impute_jass/imputation_launcher.py b/impute_jass/impute_jass/imputation_launcher.py
index 85a250b..2d70452 100644
--- a/impute_jass/impute_jass/imputation_launcher.py
+++ b/impute_jass/impute_jass/imputation_launcher.py
@@ -7,14 +7,12 @@ import pandas as pd
 from .windows import ld_region_centered_window_imputation, prepare_zscore_for_imputation, impg_like_imputation, realigned_zfiles_on_panel
 
 
-
 class ImputationLauncher(object):
     """
-    Class perform imputation of snp from summary statistic
-
+    Class to perform imputation of snp from summary statistic
     """
     def __init__(self, window_size=10000, buf=2500,
-                 lamb= 0.01, pinv_rcond = 0.01 ):
+                 lamb= 0.01, pinv_rcond = 0.01):
         """
         Initialise the imputation object. Fix the windows size, the buffer size
         and the king of imputation employed
@@ -22,12 +20,11 @@ class ImputationLauncher(object):
         Args:
             window_size (int): size of the imputation window in bp
             buffer (int): the size of the padding around the windows of
-                            imputation (relevant only for batch imputation)
+                imputation (relevant only for batch imputation)
             lamb (float): size of the increment added to snp correlation
-                        matrices to make it less singular
+                matrices to make it less singular
             pinv_rcond (float): the rcond scipy.linalg.pinv function argument.
-            The scipy.linalg.pinv is used to invert
-             the correlation matrices
+                The scipy.linalg.pinv is used to invert the correlation matrices
         """
 
         self.window_size = window_size
@@ -42,12 +39,13 @@ class ImputationLauncher(object):
         parameters
 
         Args:
-            chrom : str specifying chromosome
-            zscore : known zscore
-            ref_panel : location of the folder of reference chromosome
-            ld_folder: location of linkage desiquilibrium matrices
-        Returns
-            Imputed zscore dataframe
+            chrom (str): chromosome "chr*"
+            zscore (pandas dataframe): known zscore
+            ref_panel (str): path of the folder of reference panel
+            ld_folder (str): path of the folder containing linkage desiquilibrium matrices
+
+        Returns:
+            pandas dataframe: Imputed zscore dataframe
         """
         pattern = "{0}/{1}_*.ld".format(ld_folder, chrom)
         zscore = prepare_zscore_for_imputation(ref_panel, zscore)
@@ -70,8 +68,15 @@ class ImputationLauncher(object):
 
     def genome_imputation(self, gwas_tag, ref_panel_folder, ld_folder, zscore_folder, folder_output):
         """
-        Launch imputation on all chromosome for one trait
+        Launch imputation on all chromosome for one trait by calling
+        chromosome_imputation for each chromosome
 
+        Args:
+            gwas_tag (str): a short string to annotate imputed GWAS files
+            ref_panel_folder (str): path of the folder of reference panel
+            ld_folder (str): path of the folder containing linkage desiquilibrium matrices
+            zscore_folder (str): path of the folder for input GWAS files
+            folder_output (str): path of the folder for imputed GWAS files
         """
 
         for i in range(1, 23):
diff --git a/impute_jass/impute_jass/ld_matrix.py b/impute_jass/impute_jass/ld_matrix.py
index 95a1626..5b17f41 100644
--- a/impute_jass/impute_jass/ld_matrix.py
+++ b/impute_jass/impute_jass/ld_matrix.py
@@ -3,7 +3,7 @@
     Function set to compute LD correlation from a reference panel
     in predefined Region
 
-    LD matrix are then stored to the scipy sparse matrix format
+    LD matrix are then transformed to the pandas sparse format
 """
 
 import scipy as sc
@@ -16,7 +16,15 @@ import re
 
 def launch_plink_ld(startpos, endpos, chr, reffile, folder):
     """
-    launch plink ld
+    launch plink linkage desiquilibrium correlation and save
+    the ouput
+
+    Args:
+        startpos (int): position of the start of the window
+        endpos (int): position of the end of the window
+        chr (str): chromosome position
+        reffile (str): reference panel file
+        folder (str): output folder
     """
     bimref = reffile + ".bim"
     ref_panel = pd.read_csv(bimref, sep="\t", names=['chr', "nothing", 'pos', 'Ref_all', 'alt_all'], index_col = 1)
@@ -26,16 +34,23 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder):
 
     fo = "{0}/chr{1}_{2}_{3}".format(folder, chr, startpos, endpos)
 
-    cmd = "p-link --noweb --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --ld-window-r2 0.4 --chr {1} --out {2}".format(reffile, chr, fo)
-
-
+    cmd = "plink --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --ld-window-r2 0.4 --chr {1} --out {2}".format(reffile, chr, fo)
     sub.check_output(cmd, shell=True)
 
 
 def generate_sparse_matrix(plink_ld, ref_chr_df):
     """
+    Extract correlation matrix from the plink correlation
+    file generated by ld_matrix.launch_plink_ld
     read plink results create a sparse dataframe LD-matrix
     then save it to a zipped pickle
+
+    Args:
+        plink_ld (str): path to the plink correlation matrix file
+        ref_chr_df (str):
+
+    Returns:
+        pandas.SparseDataFrame : Linkage desiquilibrium matrix
     """
 
     plink_ld = pd.read_csv(plink_ld, sep = "\s+")
@@ -51,12 +66,16 @@ def generate_sparse_matrix(plink_ld, ref_chr_df):
     mat_ld = mat_ld.loc[re_index, re_index]
     mat_ld = mat_ld.to_sparse()
     return mat_ld
-    #mat_ld.to_pickle(path_ld_mat,, compression='gzip')
 
 def generate_genome_matrices(region_files, reffolder, folder_output):
     """
     go through region files and compute LD matrix for each transform and
     save the results in a pandas sparse dataframe
+
+    Args:
+        region_files (str) : region file containing beginning and end position
+        reffolder (str) : folder of reference panel
+        folder_output (str): folder to save plink LD correlation result files
     """
     regions = pd.read_csv(region_files)
     for reg in regions.iterrows():
diff --git a/impute_jass/impute_jass/stat_models.py b/impute_jass/impute_jass/stat_models.py
index 16cca0c..79f73a0 100644
--- a/impute_jass/impute_jass/stat_models.py
+++ b/impute_jass/impute_jass/stat_models.py
@@ -21,6 +21,7 @@ import scipy.linalg
 def compute_mu(sig_i_t, sig_t_inv, zt):
     """
     Compute the estimation of z-score from neighborring snp
+    
     Args:
         sig_i_t (matrix?) : correlation matrix with line corresponding to
         unknown Snp (snp to impute) and column to known SNPs
-- 
GitLab