Skip to content
Snippets Groups Projects
Commit 08b99873 authored by Hanna  JULIENNE's avatar Hanna JULIENNE
Browse files

integrate new tests

Merge branch 'rework-tests' into predict_gain
parents d18f40cc 8084f94e
No related branches found
No related tags found
2 merge requests!97Newmain gain,!92Predict gain
Showing
with 0 additions and 156 deletions
No preview for this file type
No preview for this file type
No preview for this file type
ID z_DISNEY_RATATOUY z_DISNEY_POCAHONT
z_DISNEY_RATATOUY 2.05403006060606 0.394332909090909
z_DISNEY_POCAHONT 0.394332909090909 1.17729254545455
rsid position chr region Z_ratatouy Z_pocahont
rs1 1 1 1 0.812 -1.06
rs2 2 1 1 2.197 0.937
rs3 3 1 2 2.049 0.854
rs4 1 2 3 1.632 1.461
rs5 2 2 3 0.254 -1.413
rs6 3 2 4 0.491 0.567
rs7 4 2 4 -0.324 0.583
rs8 5 2 4 -1.662 -1.307
rs9 1 3 5 1.768 -0.54
rs10 2 3 5 0.026 1.948
rs11 3 3 6 1.129 0.054
rs12 4 3 7 -2.38 0.352
File deleted
File deleted
information content
title Mock dataset with disney
description "lorem ipsum"
ancestry DIS
assembly dSNY
chr start stop
chr1 1 2
chr1 3 4
chr2 1 2
chr2 3 5
chr3 1 2
chr3 3 3
chr3 4 4
Outcome FullName Consortium Type Reference ReferenceLink dataLink internalDataLink Nsample Ncase Ncontrol
RATATOUY Ratatouille ou la mort de l'hygiène en cuisine DISNEY BrainWashing Courgette et al., 1754 http://www.marmiton.org/recettes/recette_ratatouille_23223.asp pouet pouet 1000000
POCAHONT Pocahontas mange des tapas DISNEY BrainWashing Rolfe et al., 1614 https://fr.wikipedia.org/wiki/Pocahontas Gargar Gargar 1000000
rsid position refAllele altAllele Z
rs1 1 C T -1.06
rs2 2 G T 0.937
rs3 3 C T 0.854
rsid position refAllele altAllele Z
rs4 1 G T 1.461
rs5 2 A T -1.413
rs6 3 A C 0.567
rs7 4 A G 0.583
rs8 5 C G -1.307
rsid position refAllele altAllele Z
rs9 1 C T -0.54
rs10 2 G T 1.948
rs11 3 A C 0.054
rs12 4 A C 0.352
rsid position refAllele altAllele Z
rs1 1 C T 0.812
rs2 2 G T 2.197
rs3 3 C T 2.049
rsid position refAllele altAllele Z
rs4 1 G T 1.632
rs5 2 A T 0.254
rs6 3 A C 0.491
rs7 4 A G -0.324
rs8 5 C G -1.662
rsid position refAllele altAllele Z
rs9 1 C T 1.768
rs10 2 G T 0.026
rs11 3 A C 1.129
rs12 4 A C -2.38
ID z_BMW_ISETTA z_BMW_MINI z_FIAT_CINQCENT z_FIAT_CINQUECENTO z_MERCO_SMART z_TATA_TATANANO
z_BMW_ISETTA 2 1 1 1 1 1
z_BMW_MINI 1 2 1 1 1 1
z_FIAT_CINQCENT 1 1 2 1 1 1
z_FIAT_CINQUECENTO 1 1 1 2 1 1
z_MERCO_SMART 1 1 1 1 2 1
z_TATA_TATANANO 1 1 1 1 1 2
File deleted
## There 6 phenotypes
sumtab <- read.table("summary.txt", sep="\t", header=TRUE, stringsAsFactors = FALSE)
## Zscore ID: z_CONSORITUM_PHENOTYPE_chr#chr.txt
ids <- sprintf("z_%s_%s",sumtab$Consortium, sumtab$Outcome)
## The covariance is set to 1 and the variance to 2
COV <- toeplitz(c(2,1,1,1,1,1))
rownames(COV) <- colnames(COV) <- ids
## Structure:
# - 5 chromosomes, 2 regions per chromosomes
# - 10 regions, 2 regions per chromosome
# - 30 SNPs, 3 SNPs per region
## Structure of missing values region per region ;
## ".." means no missing values
## "XX" means the whole region is missing
#
# Z1 Z2 Z3 Z4 Z5 Z6
# R1 .. .. .. .. .. ..
# R2 .. .. .. .. .. ..
# R3 XX .. .. .. .. ..
# R4 .. XX .. .. .. ..
# R5 .. .. XX .. .. ..
# R6 .. .. .. XX .. ..
# R7 .. .. .. .. XX ..
# R8 .. .. .. .. .. XX
# R9 XX XX XX .. .. ..
# R10 .. .. .. XX XX XX
filenames <- paste0(rep(ids,e=5), "_chr", rep(1:5, 6), ".txt")
## rsid : rs_#chr_#region_#snp
rsid <- paste0("rs", "_chr", rep(1:5, e=6), # chr
"_reg", sprintf("%02i", rep(1:10, e=3)), # region
"_", sprintf("%02i", 1:30)) # snp
pos <- rep(1:6, 5)
chr <- rep(1:5, e=6)
reg <- rep(1:10, e=3)
ref <- "A"
alt <- "G"
BIGZ <- matrix(1:(30*6), 30, 6)
BIGZ[grep("reg03", rsid),1] <- NA
BIGZ[grep("reg04", rsid),2] <- NA
BIGZ[grep("reg05", rsid),3] <- NA
BIGZ[grep("reg06", rsid),4] <- NA
BIGZ[grep("reg07", rsid),5] <- NA
BIGZ[grep("reg08", rsid),6] <- NA
BIGZ[grep("reg09", rsid),1:3] <- NA
BIGZ[grep("reg10", rsid),4:6] <- NA
rownames(BIGZ) <- rsid
# What does it look like ?
require(pheatmap)
png("zscores.png", res = 100)
pheatmap(BIGZ, cluster_rows = FALSE, cluster_cols = FALSE, cellwidth = 10, cellheight = 10)
dev.off()
# Create regions
# Create Z scores
# Write covariance matrix
write.table(data.frame(ID=ids, COV), file="COV.csv", row.names = F, quote=F, sep="\t")
# Write region file
regions <- data.frame(chr=sprintf("chr%i", rep(1:5,e=2)),
start=rep(c(1,4),5),
stop=rep(c(3,6),5))
write.table(regions, file="regions.txt", row.names = F, quote=F, sep="\t")
# Write all the Z files
k <- 1
for (j in 1:6) {
for (chrnum in 1:5) {
ind <- grep(sprintf("chr%i", chrnum), rsid)
tmp <- data.frame(rsid = rsid[ind],
pos = pos[ind],
ref=ref,
alt=alt,
Zscore=unname(BIGZ[ind, j]))
write.table(na.omit(tmp), file = filenames[k], quote=FALSE)
k <- k+1
}
}
# Compute the summary statistic
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment