diff --git a/CreateTables/.idea/CreateTables.iml b/CreateTables/.idea/CreateTables.iml deleted file mode 100644 index 5e800d67a08a2d7b6a45ee7ab62ac54fcc19d94f..0000000000000000000000000000000000000000 --- a/CreateTables/.idea/CreateTables.iml +++ /dev/null @@ -1,11 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<module type="PYTHON_MODULE" version="4"> - <component name="NewModuleRootManager"> - <content url="file://$MODULE_DIR$" /> - <orderEntry type="jdk" jdkName="Python 3.6.0 virtualenv at ~/dev" jdkType="Python SDK" /> - <orderEntry type="sourceFolder" forTests="false" /> - </component> - <component name="TestRunnerService"> - <option name="PROJECT_TEST_RUNNER" value="Unittests" /> - </component> -</module> \ No newline at end of file diff --git a/CreateTables/.idea/misc.xml b/CreateTables/.idea/misc.xml deleted file mode 100644 index 0ce33c2b277c4c599395a4f913098dabd7169822..0000000000000000000000000000000000000000 --- a/CreateTables/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.0 virtualenv at ~/dev" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/CreateTables/.idea/modules.xml b/CreateTables/.idea/modules.xml deleted file mode 100644 index 60a4a253484eb36920387caba4937eeaefd5112e..0000000000000000000000000000000000000000 --- a/CreateTables/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ProjectModuleManager"> - <modules> - <module fileurl="file://$PROJECT_DIR$/.idea/CreateTables.iml" filepath="$PROJECT_DIR$/.idea/CreateTables.iml" /> - </modules> - </component> -</project> \ No newline at end of file diff --git a/CreateTables/.idea/workspace.xml b/CreateTables/.idea/workspace.xml deleted file mode 100644 index 9cbe9749b0d41d080397dc2e7e0c0376903fbe4c..0000000000000000000000000000000000000000 --- a/CreateTables/.idea/workspace.xml +++ /dev/null @@ -1,348 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="ChangeListManager"> - <list default="true" id="fb790f1c-e4a1-4fda-b93e-a7b1253a7aed" name="Default" comment="" /> - <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" /> - <option name="TRACKING_ENABLED" value="true" /> - <option name="SHOW_DIALOG" value="false" /> - <option name="HIGHLIGHT_CONFLICTS" value="true" /> - <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> - <option name="LAST_RESOLUTION" value="IGNORE" /> - </component> - <component name="CreatePatchCommitExecutor"> - <option name="PATCH_PATH" value="" /> - </component> - <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" /> - <component name="FileEditorManager"> - <leaf SIDE_TABS_SIZE_LIMIT_KEY="300"> - <file leaf-file-name="CreateInitTable.py" pinned="false" current-in-tab="true"> - <entry file="file://$PROJECT_DIR$/CreateInitTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="307"> - <caret line="60" column="0" lean-forward="true" selection-start-line="60" selection-start-column="0" selection-end-line="60" selection-end-column="0" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - </file> - <file leaf-file-name="CreateWorkTable.py" pinned="false" current-in-tab="false"> - <entry file="file://$PROJECT_DIR$/CreateWorkTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="570"> - <caret line="39" column="36" lean-forward="false" selection-start-line="39" selection-start-column="36" selection-end-line="39" selection-end-column="36" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - </file> - </leaf> - </component> - <component name="IdeDocumentHistory"> - <option name="CHANGED_PATHS"> - <list> - <option value="$PROJECT_DIR$/CreateWorkTable.py" /> - <option value="$PROJECT_DIR$/CreateInitTable.py" /> - </list> - </option> - </component> - <component name="ProjectFrameBounds"> - <option name="x" value="24" /> - <option name="y" value="35" /> - <option name="width" value="1400" /> - <option name="height" value="753" /> - </component> - <component name="ProjectView"> - <navigator currentView="ProjectPane" proportions="" version="1"> - <flattenPackages /> - <showMembers /> - <showModules /> - <showLibraryContents /> - <hideEmptyPackages /> - <abbreviatePackageNames /> - <autoscrollToSource /> - <autoscrollFromSource /> - <sortByType /> - <manualOrder /> - <foldersAlwaysOnTop value="true" /> - </navigator> - <panes> - <pane id="ProjectPane"> - <subPane> - <PATH> - <PATH_ELEMENT> - <option name="myItemId" value="CreateTables" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" /> - </PATH_ELEMENT> - <PATH_ELEMENT> - <option name="myItemId" value="CreateTables" /> - <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" /> - </PATH_ELEMENT> - </PATH> - </subPane> - </pane> - <pane id="Scope" /> - <pane id="Scratches" /> - </panes> - </component> - <component name="PropertiesComponent"> - <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" /> - </component> - <component name="RunManager"> - <configuration default="true" type="CompoundRunConfigurationType" factoryName="Compound Run Configuration"> - <method /> - </configuration> - <configuration default="true" type="PythonConfigurationType" factoryName="Python"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs> - <env name="PYTHONUNBUFFERED" value="1" /> - </envs> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="PARAMETERS" value="" /> - <option name="SHOW_COMMAND_LINE" value="false" /> - <method /> - </configuration> - <configuration default="true" type="Tox" factoryName="Tox"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <method /> - </configuration> - <configuration default="true" type="tests" factoryName="Attests"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="FOLDER_NAME" value="" /> - <option name="TEST_TYPE" value="TEST_SCRIPT" /> - <option name="PATTERN" value="" /> - <option name="USE_PATTERN" value="false" /> - <method /> - </configuration> - <configuration default="true" type="tests" factoryName="Doctests"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="FOLDER_NAME" value="" /> - <option name="TEST_TYPE" value="TEST_SCRIPT" /> - <option name="PATTERN" value="" /> - <option name="USE_PATTERN" value="false" /> - <method /> - </configuration> - <configuration default="true" type="tests" factoryName="Nosetests"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="FOLDER_NAME" value="" /> - <option name="TEST_TYPE" value="TEST_SCRIPT" /> - <option name="PATTERN" value="" /> - <option name="USE_PATTERN" value="false" /> - <option name="PARAMS" value="" /> - <option name="USE_PARAM" value="false" /> - <method /> - </configuration> - <configuration default="true" type="tests" factoryName="Unittests"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="FOLDER_NAME" value="" /> - <option name="TEST_TYPE" value="TEST_SCRIPT" /> - <option name="PATTERN" value="" /> - <option name="USE_PATTERN" value="false" /> - <option name="PUREUNITTEST" value="true" /> - <option name="PARAMS" value="" /> - <option name="USE_PARAM" value="false" /> - <method /> - </configuration> - <configuration default="true" type="tests" factoryName="py.test"> - <option name="INTERPRETER_OPTIONS" value="" /> - <option name="PARENT_ENVS" value="true" /> - <envs /> - <option name="SDK_HOME" value="" /> - <option name="WORKING_DIRECTORY" value="" /> - <option name="IS_MODULE_SDK" value="false" /> - <option name="ADD_CONTENT_ROOTS" value="true" /> - <option name="ADD_SOURCE_ROOTS" value="true" /> - <module name="CreateTables" /> - <option name="SCRIPT_NAME" value="" /> - <option name="CLASS_NAME" value="" /> - <option name="METHOD_NAME" value="" /> - <option name="FOLDER_NAME" value="" /> - <option name="TEST_TYPE" value="TEST_SCRIPT" /> - <option name="PATTERN" value="" /> - <option name="USE_PATTERN" value="false" /> - <option name="testToRun" value="" /> - <option name="keywords" value="" /> - <option name="params" value="" /> - <option name="USE_PARAM" value="false" /> - <option name="USE_KEYWORD" value="false" /> - <method /> - </configuration> - </component> - <component name="ShelveChangesManager" show_recycled="false"> - <option name="remove_strategy" value="false" /> - </component> - <component name="TaskManager"> - <task active="true" id="Default" summary="Default task"> - <changelist id="fb790f1c-e4a1-4fda-b93e-a7b1253a7aed" name="Default" comment="" /> - <created>1494320531025</created> - <option name="number" value="Default" /> - <option name="presentableId" value="Default" /> - <updated>1494320531025</updated> - </task> - <servers /> - </component> - <component name="TodoView"> - <todo-panel id="selected-file"> - <is-autoscroll-to-source value="true" /> - </todo-panel> - <todo-panel id="all"> - <are-packages-shown value="true" /> - <is-autoscroll-to-source value="true" /> - </todo-panel> - </component> - <component name="ToolWindowManager"> - <frame x="24" y="35" width="1400" height="753" extended-state="0" /> - <editor active="true" /> - <layout> - <window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24945612" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" /> - <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" /> - <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.42519686" sideWeight="0.5025381" order="7" side_tool="true" content_ui="tabs" /> - <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.42519686" sideWeight="0.49746192" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" /> - <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.24945612" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" /> - <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" /> - <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" /> - <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" /> - <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> - <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" /> - <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" /> - <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" /> - <window_info id="Inspection Results" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32913387" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" /> - <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" /> - </layout> - </component> - <component name="VcsContentAnnotationSettings"> - <option name="myLimit" value="2678400000" /> - </component> - <component name="XDebuggerManager"> - <breakpoint-manager /> - <watches-manager /> - </component> - <component name="editorHistoryManager"> - <entry file="file://$PROJECT_DIR$/CreateInitTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="0"> - <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CreateWorkTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="570"> - <caret line="39" column="36" lean-forward="false" selection-start-line="39" selection-start-column="36" selection-end-line="39" selection-end-column="36" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CreateInitTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="210"> - <caret line="14" column="11" lean-forward="false" selection-start-line="14" selection-start-column="11" selection-end-line="14" selection-end-column="11" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CreateWorkTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="0"> - <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CreateWorkTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="570"> - <caret line="39" column="36" lean-forward="false" selection-start-line="39" selection-start-column="36" selection-end-line="39" selection-end-column="36" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - <entry file="file://$PROJECT_DIR$/CreateInitTable.py"> - <provider selected="true" editor-type-id="text-editor"> - <state relative-caret-position="307"> - <caret line="60" column="0" lean-forward="true" selection-start-line="60" selection-start-column="0" selection-end-line="60" selection-end-column="0" /> - <folding> - <element signature="e#229#256#0" expanded="true" /> - </folding> - </state> - </provider> - </entry> - </component> -</project> \ No newline at end of file diff --git a/CreateTables/CreateInitTable.py b/CreateTables/CreateInitTable.py deleted file mode 100644 index b660688e7c1c44d7aae4a7242a85c50e750fd8f4..0000000000000000000000000000000000000000 --- a/CreateTables/CreateInitTable.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created 2017-05-03 - -Create the hdf5 file InitTable containing: - - all the Z scores - - the covariance matrix - - the table describing all the phenotypes - -@author: vguillem -""" - -from pandas import HDFStore -import pandas - -# Base Path : to remove by something useful -PATH0 = '/Users/vguillem' - -# Input File PATHs -PATH_f = PATH0 + '/git/jass/data/finalData_2017-05-18.csv' -PATH_COV = PATH0 + "/git/jass/data/covariance.txt" -PATH_PhenoList = PATH0 + '/git/jass/data/sumtab_170523.csv' -# Ouput File PATHs -PATH_InitTable = PATH0 + '/git/jass/data/initTable.hdf5' -PATH_WorkTable = PATH0 + '/git/jass/data/workTable.hdf5' - -# Read summary statistics -f = pandas.read_csv(PATH_f) -summary_phe = pandas.read_csv(PATH_PhenoList) -covariance = pandas.read_csv(PATH_COV, sep='\t', index_col=0) -covariance.rename(columns={x: y for x, y in zip(covariance.columns, 'z_' + covariance.columns)}, inplace=True) -covariance.rename(index={x: y for x, y in zip(covariance.index, 'z_' + covariance.index)}, inplace=True) - -# pheno_select = ['z_MAGIC_HOMA-B', 'z_GIANT_HIP', 'z_GEFOS_BMD-SPINE', 'z_CARDIOGRAM_CHD'] -pheno_select = covariance.columns & f.columns - -COV = covariance.loc[pheno_select, pheno_select] -Zsel = f.loc[:, pheno_select] -# Remove Infinite and NaN values -# Zsel = Zsel.replace([-np.inf, np.inf], value=np.NaN).dropna(axis=0) - - -datapheno = summary_phe -datapheno['ID'] = 'z_' + summary_phe.consortia + '_' + summary_phe.outcome -datapheno = datapheno[['ID', 'consortia', 'outcome', 'fullName', 'type', 'reference', 'linkRef','dataLink','internalDataLink']] -datapheno.columns = ['ID', 'Consortium', 'Outcome', 'FullName', 'Type', 'Reference', 'ReferenceLink','dataLink','internalDataLink'] -datapheno.index = datapheno['ID'] -PhenoList = datapheno.loc[pheno_select,:] - -whichCols = ['Region', 'CHR', 'position', 'snp_ids', 'MiddlePosition'] -whichCols.extend(list(pheno_select)) -SumStatTab = f[whichCols] - -#df.to_csv('/Users/vguillem/Desktop/STATGEN/PCMA/imputation/chrtot.csv') -hdf_init = HDFStore(PATH_InitTable) -hdf_init.put('PhenoList', PhenoList, format='table', data_columns=True) -hdf_init.put('SumStatTab', SumStatTab, format='table', data_columns=True) -hdf_init.put('COV', COV, format='table', data_columns=True) -hdf_init.close() diff --git a/CreateTables/CreateWorkTable.py b/CreateTables/CreateWorkTable.py deleted file mode 100644 index 2ffd5bdd2fb280ca221036d91ad6ab38d13f741c..0000000000000000000000000000000000000000 --- a/CreateTables/CreateWorkTable.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created 2017-05-03 - -Create the hdf5 file InitTable containing: - - all the Z scores - - the covariance matrix - - the table describing all the phenotypes - -@author: vguillem -""" - -from pandas import HDFStore -import pandas - -# Base Path : to remove by something useful -PATH0 = '/Users/vguillem' - -# Input File PATHs -PATH_f = PATH0 + '/git/jass/data/finalData_2017-05-09.csv' -PATH_COV = PATH0 + "/git/jass/data/covariance.txt" -PATH_PhenoList = PATH0 + '/git/jass/data/summary_phe.csv' -# Ouput File PATHs -PATH_InitTable = PATH0 + '/git/jass/data/initTable.hdf5' -PATH_WorkTable = PATH0 + '/git/jass/data/workTable.hdf5' - - -SumStatTab = pandas.read_hdf(PATH_InitTable, 'SumStatTab') - -chromosome = 'chr6' -region = 'Region642' - -dataframe = SumStatTab -dataframe = dataframe[(dataframe["Region"] == region) & (dataframe["CHR"] == chromosome)] -dataframe = dataframe.sort_values('position') # - - -# dataframe = dataframe[(dataframe["CHR"]==chromosome)] -dataframe.drop(["Region","CHR","position", "MiddlePosition"], axis=1, inplace=True) -dataframe.rename(columns = {'snp_ids':'ID'}, inplace=True) -column_order = list(dataframe.ID) -pivoted_dataframe = dataframe.pivot_table(columns='ID') -pivoted_dataframe = pivoted_dataframe.reindex_axis(column_order, axis=1) -pivoted_dataframe - diff --git a/PrepareData_2017-04-04.R b/PrepareData_2017-04-04.R deleted file mode 100755 index 389df704b7eda4fbcd7be4d6bb34394d2fe15eaf..0000000000000000000000000000000000000000 --- a/PrepareData_2017-04-04.R +++ /dev/null @@ -1,151 +0,0 @@ -rm(list=ls()) ; graphics.off() ; gc() - -# Librairies R non nécessaires -library(rhdf5) -library(ggplot2) -library(reshape2) -library(pheatmap) -library(RColorBrewer) - -# Chargement du dataset sous forme de dataframe R, se trouve sous une forme équivalente dans initTable -load("DATA_2017-03-28.RData") -# Chargement de toutes les covariances disponibles : il y en a plus que de Z-scores -CC <- read.table("~/Desktop/STATGEN/PCMA/imputation/covariance.txt", header =TRUE,row.names = 1, check.names = F) -# Chargement de la matrice décrivant les régions -fourier <- read.table("fourier_ls-all.bed", header=TRUE) - -# La matrice des Z-scores = DATA moins les colones 1 (position), 2 (snp id) et dernière (CHR) -Z <- DATA[,-c(1:2, ncol(DATA))] -# i = indices dans la matrice C qui correspond aux Z-scores de la matrice Z -i <- na.omit(match(gsub("z_","",colnames(Z)), rownames(CC))) -# Restriction de C aux phénotypes communs à Z -C <- CC[i, i] -# Ajout d'un prefixe "z_" aux noms des phénotypes dans C pour correspondre à la nomenclature utilisée dans Z -dimnames(C) <- lapply(dimnames(C), function(x) paste0("z_", x)) - -# Transtypage des Z scores en valeurs numériques -for (j in 1:ncol(Z)) Z[,j] <- as.numeric(gsub(" ", "", Z[,j])) -# Transformation en matrice pour les calculs : la dataframe en R utilise des types différents par colonne, il faut donc transformer en matrice pour pouvoir faire des calculs -Z <- as.matrix(Z) -# Phenotypes communs à Z et C + ordonner les lignes et colonnes de C et les colonnes de Z pour s'assurer que les phénotypes soient les mêmes ET dans le même ordre -commonPheno <- intersect(colnames(C), colnames(Z)) -Z <- Z[, commonPheno] -C <- C[commonPheno,commonPheno] -# Nombre de lignes de Z -n <- nrow(Z) -# Nombre de colonnes de Z -p <- ncol(Z) -# P-valeurs associées au Z-scores -PVAL <- 2*(1-pnorm(abs(Z))) -# Calcul de l'inverse de la matrice de covariance -invcov <- solve(C) -# Statistique jointe -chi <- rowSums(Z * (Z %*% invcov)) -# P-valeur jointe -pj <- 1-pchisq(chi, df = p) -# Remplacement des valeurs non finies par une p-valeur très petite -pj[is.nan(pj)] <- 1e-32 - -# res = deux colonnes : -# - MinZ = par région, minimum des P-valeurs associées aux phénotypes -# - PJ = par région, minimum de la p-valeur jointe -res <- matrix(NA, nrow=nrow(fourier), ncol=2) -colnames(res) <- c("MinZ", "PJ") -regions <- rep("NoRegion", nrow(Z)) -for (r in 1:nrow(fourier)) { - print(sprintf("Region %i / %i",r,nrow(fourier))) - chr <- fourier[r, "chr"] - left <- fourier[r, "start"] - right <- fourier[r, "stop"] - R <- DATA$CHR==chr & DATA$position >= left & DATA$position <= right - regions[R] <- sprintf("Region%i",r) - region.pval <- PVAL[R,] - region.pj <- pj[R] - if (sum(R)!=0) - res[r,] <- c( min(region.pval), min(region.pj) ) -} - -# Transformation en -log10 -datres <- as.data.frame(-log10(res)) -# Comparaison entre MinZ et PJ -datres$Q0 <- sprintf("Quadrant %i", ((datres$MinZ >= 8)+1 )* ((datres$PJ >= 8)*2+1 )) - -# Fonction qui transforme Q0 en texte pour les graphes -foo <- function(x) { - xx <- x[1] - q <- switch(xx, - "1" = "MinZ > 1e-8 and PJ > 1e-8", - "2" = "MinZ < 1e-8 and PJ > 1e-8", - "3" = "MinZ > 1e-8 and PJ < 1e-8", - "4" = "MinZ < 1e-8 and PJ < 1e-8", - "5" = "Both are -Inf.") - if (is.null(q)) stop("x Not found!") - sprintf("%s (%i)", q, length(x)) -} - -# Application de foo de manière vectorielle -datres$Quadrants <- ave(as.numeric(factor(datres$Q0)), datres$Q0, FUN=foo) - -### Create files for initTable -cohort <- sapply(strsplit(commonPheno, "_"), "[", 2) -pheno <- sapply(strsplit(commonPheno, "_"), "[", 3) -PhenoList <- data.frame(ID = commonPheno, - Cohort = cohort, - Phenotype = pheno, - Reference = NA) - -SumStatTab <- data.frame(Region = regions, - CHR = DATA$CHR, - position = DATA$position, - snp_ids = DATA$snp_ids, - Z) - -COV <- C - -write.csv(PhenoList, file="IT/PhenoList.csv") -write.csv(SumStatTab, file="IT/SumStatTab.csv") -write.csv(COV, file="IT/COV.csv") - -### Create files for workTable -SummaryTable <- table("MinZ(<t)"=res[,"MinZ"] <= 1e-8, - "PJ(<t)"=res[,"PJ"] <= 1e-8) -dimnames(SummaryTable) <- list(c("MinZ>1e-8","MinZ<1e-8"), c("JOST>1e-8","JOST<1e-8")) - -SumStatJostTab <- data.frame(Region = regions, - CHR = DATA$CHR, - position = DATA$position, - snp_ids = DATA$snp_ids, - PVALJOST = pj, - Z) -RegionSubTable <- data.frame( - Region = sprintf("Region%i", 1:nrow(fourier)), - MiddlePosition = rowMeans(fourier[,c("start","stop")]), - CHR = fourier$chr, - JOSTmin = res[,"PJ"]) -SubCOV <- C - -write.csv(SummaryTable, file="WT/SummaryTable.csv") -write.csv(SumStatJostTab, file="WT/SumStatJostTab.csv") -write.csv(RegionSubTable, file="WT/RegionSubTable.csv") -write.csv(SubCOV, file="WT/SubCOV.csv") - -## Sub Sample 4 Pierre -set.seed(123) -# j <- sort(sample(nrow(SumStatJostTab), 5000)) -j0 <- sample(nrow(SumStatJostTab), 1) -j <- (j0-2500):(j0+2499) - -FileHeatmap <- data.frame(ID=colnames(SumStatJostTab)[-c(1:5)], - t(SumStatJostTab[j,-c(1:5)])) -colnames(FileHeatmap)[-1] <- as.character(SumStatJostTab$snp_ids[j]) -FileJOSTmin <- RegionSubTable -FileJOST <- SumStatJostTab[j,1:5] - -write.csv(FileHeatmap, file="/Volumes/PCMA-2/2._TEST/FileHeatMap.csv", row.names = FALSE) -write.csv(FileJOSTmin, file="/Volumes/PCMA-2/2._TEST/FileJOSTmin.csv", row.names = FALSE) -write.csv(FileJOST, file="/Volumes/PCMA-2/2._TEST/FileJOST.csv", row.names = FALSE) - -## -write.csv(PhenoList, file="/Volumes/PCMA-2/2._TEST/PhenoList.csv") -write.csv(SummaryTable, file="/Volumes/PCMA-2/2._TEST/SummaryTable.csv") - diff --git a/generateWorkTable_2016-03-30.py b/generateWorkTable_2016-03-30.py deleted file mode 100755 index 173ed17944bd34b4ff64778c7b769e4a19682e32..0000000000000000000000000000000000000000 --- a/generateWorkTable_2016-03-30.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Tue Mar 28 09:57:33 2017 - -@author: vguillem -""" - -from pandas import HDFStore, DataFrame # create (or open) an hdf5 file and opens in append mode -import pandas -import numpy as np -import scipy.stats as spst - -# Input File PATHs -PATH_InitTable = '/Volumes/PCMA/1._DATA/initTable.hdf5' -# Ouput File PATHs -PATH_WorkTable = '/Volumes/PCMA/1._DATA/workTable_tmp.hdf5' - -## -SumStatTab = pandas.read_hdf(PATH_InitTable, 'SumStatTab') -COV = pandas.read_hdf(PATH_InitTable,'COV') -## -Z = SumStatTab.iloc[:,3:] - -################################## -## Simulate Phenotype selection ## -################################## - -phenoSel = ['z_GLG_HDL', 'z_GLG_LDL', 'z_GLG_TC', 'z_GLG_TG', - 'z_ICBP_DBP', 'z_ICBP_MAP', 'z_ICBP_PP', 'z_ICBP_SBP'] -Zsel = Z[phenoSel] -subCOV = COV.loc[phenoSel, phenoSel] - -n = Zsel.shape[0] -p = Zsel.shape[1] - -invcov = np.linalg.inv(subCOV) - -chi = np.sum(np.multiply(Zsel, Zsel.dot(invcov)), axis=1) -JOST = 1 - spst.chi2.cdf(chi, df=p) - -SumStatJostTab = pandas.concat([SumStatTab[['Region', 'CHR', 'snp_ids']], DataFrame({"JOST":JOST}), Zsel]) - -SumStatJostTab_g = SumStatJostTab.groupby(by="Region") -JOSTmin = SumStatJostTab_g.JOST.min() -Zmin = SumStatJostTab_g.JOST.min() - -RegionSubTable = DataFrame(data={"Region":SumStatJostTab_g.Region.first(), - "CHR":SumStatJostTab_g.CHR.first(), - "JOSTmin":JOSTmin}) -thresh = 1e-8 - -summaryTable = DataFrame(np.array([[ sum((JOSTmin < thresh) & (Zmin < thresh)) , - sum((JOSTmin < thresh) & (Zmin > thresh))], - [ sum((JOSTmin > thresh) & (Zmin < thresh)) , - sum((JOSTmin > thresh) & (Zmin > thresh))]])) - -summaryTable.columns = ['PhenoSignif','NoPhenoSignif'] -summaryTable.index = ['JOSTSignif','NoJOSTSignif'] - -hdf_work = HDFStore(PATH_WorkTable) -hdf_work.put('summaryTable', summaryTable, format='table', data_columns=True) # Summary Table (contigency table) -hdf_work.put('RegionSubTable', RegionSubTable, format='table', data_columns=True) # Min JoSt per region -hdf_work.put('SumStatJostTab', SumStatJostTab, format='table', data_columns=True) # JoSt + z_scores on all the positions -hdf_work.put('subCOV', subCOV, format='table', data_columns=True) # Covariance matrix -hdf_work.close() -