Skip to content
Snippets Groups Projects
Commit c76aa8f6 authored by Vincent  LAVILLE's avatar Vincent LAVILLE
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
j2s.py 0 → 100644
#!/usr/bin/env python3
import math
import numpy as np
import pandas as pd
from scipy.stats import chi2
import sys
import time
stt = time.time()
# Checking number of arguments (4 expected)
if len(sys.argv) != 4 :
print("Incorrect call: wrong number of arguments\n")
sys.exit(1)
infile = sys.argv[1]
samplesize = int(sys.argv[2])
Nexpo = int(sys.argv[3])
outfile = sys.argv[4]
print("Arguments passed:")
print("\tSample size = " + str(samplesize))
print("\tN exposed = " + str(Nexpo))
print("Output file = " + outfile + "\n")
print("\n")
meanE = Nexpo / samplesize
sdE = math.sqrt(meanE * (1 - meanE))
df = pd.read_csv(infile, header = 0, sep = "\t")
if 'TotalSampleSize' in list(df) :
df = df.rename(columns = {'TotalSampleSize' : 'N'})
# Filtering out SNPs with low sample size compared to the sample size distribution
n_min = np.percentile(df['N'], 90) / 1.5
df = df.loc[df['N'] > n_min,:]
df['propSample'] = df['N'] / samplesize
# Computation of summary statistics in the exposed, unexposed and total sample
df['Unexp_eff'] = df['Effect']
df['Unexp_eff_sd'] = df['StdErr']
df['Unexp_p'] = chi2.sf((df['Unexp_eff'] / df['Unexp_eff_sd']) ** 2, 1)
df['Unexp_N'] = np.floor(df['propSample'] * (samplesize - Nexpo))
df['Exp_eff'] = df['Effect'] + df['IntEffect']
df['Exp_eff_sd'] = (df['StdErr'] ** 2 + df['IntStdErr'] ** 2 + 2 * df['IntCov']) ** 0.5
df['Exp_p'] = chi2.sf((df['Exp_eff'] / df['Exp_eff_sd']) ** 2, 1)
df['Exp_N'] = np.floor(df['propSample'] * Nexpo)
df['Marg_eff'] = df['Effect'] + df['IntEffect'] * meanE
df['Marg_eff_sd'] = (df['StdErr'] ** 2 + (meanE * df['IntStdErr']) ** 2 + 2 * meanE * df['IntCov']) ** 0.5
df['Marg_p'] = chi2.sf((df['Marg_eff'] / df['Marg_eff_sd']) ** 2, 1)
df['Marg_N'] = df['N']
# Writing output file
df.loc[:,['rsID', 'Chr', 'BP', 'MarkerName', 'Allele1', 'Allele2', 'Freq1', 'Exp_eff', 'Exp_eff_sd', 'Exp_p', 'Exp_N', 'Unexp_eff', 'Unexp_eff_sd', 'Unexp_p', 'Unexp_N', 'Marg_eff', 'Marg_eff_sd', 'Marg_p' , 'Marg_N']].to_csv(outfile, sep = '\t', index = False, header = True, compression = 'gzip')
print("Analysis finished ")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment