Commit b3cb6a19 authored by Lucie  TROUBAT's avatar Lucie TROUBAT
Browse files

Generalisation generate_genome_matrices

parent f6f688c3
......@@ -109,7 +109,7 @@ def load_sparse_matrix(path_sparse_LD, ref_chr_df):
return(mat_ld.loc[valid_id, valid_id])
def generate_genome_matrices(region_files, reffolder, folder_output, suffix = ""):
def generate_genome_matrices(region_files, reffolder, folder_output, prefix, suffix = "", separa=","):
"""
go through region files and compute LD matrix for each transform and
save the results in a pandas sparse dataframe
......@@ -118,13 +118,17 @@ def generate_genome_matrices(region_files, reffolder, folder_output, suffix = ""
region_files (str) : region file containing beginning and end position
reffolder (str) : folder of reference panel
folder_output (str): folder to save plink LD correlation result files
separa (str): separateur used in region_files
prefix (str): part of file name in reffolder before the chromosome number
suffix (str): part of file name in reffolder after the chromosome number (without extension)
"""
regions = pd.read_csv(region_files)
regions = pd.read_csv(region_files,sep=separa)
for reg in regions.iterrows():
# input reference panel file
fi_ref = "{0}/{1}.{2}".format(reffolder, reg[1]['chr'], suffix)
chr_int = re.search('([0-9]{1,2})', str(reg[1]['chr'])).group()
fi_ref = "{0}/{1}{2}{3}".format(reffolder,prefix, reg[1]['chr'], suffix)
chr_int = re.search('([0-9X]{1,2})', str(reg[1]['chr'])).group()
# Compute the LD correlation with LD
launch_plink_ld(reg[1]['start'], reg[1]['stop'], chr_int, fi_ref, folder_output)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment