Commit a3d2b269 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files
parents 9130f5a4 cd96b94a
......@@ -32,7 +32,7 @@ def launch_plink_ld(startpos, endpos, chr, reffile, folder):
ref_panel = ref_panel.loc[(ref_panel.pos > startpos) & (ref_panel.pos < endpos)]
ref_panel.index.to_series().to_csv("./snp_list.txt", index=False)
fo = "{0}/chr{1}_{2}_{3}".format(folder, chr, startpos, endpos)
fo = "{0}/chr{1}_{2}_{3}".format(folder, chr, int(startpos), int(endpos))
cmd = "plink --bfile {0} --r --ld-snp-list ./snp_list.txt --ld-window 50 --ld-window-kb 3000 --chr {1} --out {2}".format(reffile, chr, fo)
print(cmd)
......@@ -109,7 +109,7 @@ def load_sparse_matrix(path_sparse_LD, ref_chr_df):
return(mat_ld.loc[valid_id, valid_id])
def generate_genome_matrices(region_files, reffolder, folder_output, suffix = ""):
def generate_genome_matrices(region_files, reffolder, folder_output, prefix, suffix = "", separa=","):
"""
go through region files and compute LD matrix for each transform and
save the results in a pandas sparse dataframe
......@@ -118,13 +118,18 @@ def generate_genome_matrices(region_files, reffolder, folder_output, suffix = ""
region_files (str) : region file containing beginning and end position
reffolder (str) : folder of reference panel
folder_output (str): folder to save plink LD correlation result files
separa (str): separateur used in region_files
prefix (str): part of file name in reffolder before the chromosome number
suffix (str): part of file name in reffolder after the chromosome number (without extension)
"""
regions = pd.read_csv(region_files)
regions = pd.read_csv(region_files,sep=separa)
for reg in regions.iterrows():
# input reference panel file
fi_ref = "{0}/{1}.{2}".format(reffolder, reg[1]['chr'], suffix)
chr_int = re.search('([0-9]{1,2})', str(reg[1]['chr'])).group()
fi_ref = "{0}/{1}{2}{3}".format(reffolder,prefix, reg[1]['chr'], suffix)
#print(fi_ref)
chr_int = re.search('([0-9X]{1,2})', str(reg[1]['chr'])).group()
# Compute the LD correlation with LD
launch_plink_ld(reg[1]['start'], reg[1]['stop'], chr_int, fi_ref, folder_output)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment