Commit 48dad002 authored by Marie Bourdon's avatar Marie Bourdon
Browse files

modif names mark_match mark_poly

parent d5572d89
{
"cursorPosition" : "9,51",
"scrollLine" : "0"
}
\ No newline at end of file
{
"cursorPosition" : "20,46",
"scrollLine" : "0"
}
\ No newline at end of file
{
"cursorPosition" : "89,0",
"cursorPosition" : "106,18",
"last_setup_crc32" : "39B546A65bfca283",
"scrollLine" : "74"
"scrollLine" : "90"
}
\ No newline at end of file
{
"collab_server" : "",
"contents" : "",
"created" : 1622641680399.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "17AE345",
"lastKnownWriteTime" : 140481884136632,
"last_content_update" : 1622641680399,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "E58FACDB",
"caption" : "stuart_tab",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=stuart_tab&cache_key=E58FACDB",
"displayedObservations" : 11125,
"environment" : "",
"expression" : "stuart_tab",
"object" : "stuart_tab",
"preview" : 0,
"totalObservations" : 11125,
"variables" : 7
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 2,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
......@@ -5,13 +5,15 @@
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "897610086",
"hash" : "0",
"id" : "76AC3EC",
"lastKnownWriteTime" : 1622462353,
"last_content_update" : 1622462353,
"lastKnownWriteTime" : 1622644628,
"last_content_update" : 1622644628349,
"path" : "~/stuart_package/stuart/R/mark_match.R",
"project_path" : "R/mark_match.R",
"properties" : {
"cursorPosition" : "20,46",
"scrollLine" : "0"
},
"read_only" : false,
"read_only_alternatives" : [
......
......@@ -13,12 +13,12 @@ mark_match <- function(tab, #tab_mark df
#finds SNPs that are in both files:
snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file
snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file
snp_genfile <- as.character(tab$marker) #extracts SNPs in cross geno file
snp <- intersect(snp_strains,snp_genfile) #take intercept
#add results in exclude column
return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp,
return(tab %>% mutate(exclude_match=ifelse(!marker %in% snp,
1,
0)))
......
{
"collab_server" : "",
"contents" : "",
"created" : 1622642648310.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "8DB3123E",
"lastKnownWriteTime" : 140647294042113,
"last_content_update" : 1622642648310,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "B33E01E3",
"caption" : "tab",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=tab&cache_key=B33E01E3",
"displayedObservations" : 2000,
"environment" : "",
"expression" : "tab",
"object" : "tab",
"preview" : 0,
"totalObservations" : 2000,
"variables" : 7
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 14,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
......@@ -5,16 +5,16 @@
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "3133824271",
"hash" : "0",
"id" : "96AB3736",
"lastKnownWriteTime" : 1622642682,
"last_content_update" : 1622642682378,
"lastKnownWriteTime" : 1622645218,
"last_content_update" : 1622645218034,
"path" : "~/stuart_package/stuart/vignettes/stuaRt.Rmd",
"project_path" : "vignettes/stuaRt.Rmd",
"properties" : {
"cursorPosition" : "89,0",
"cursorPosition" : "106,18",
"last_setup_crc32" : "39B546A65bfca283",
"scrollLine" : "74"
"scrollLine" : "90"
},
"read_only" : false,
"read_only_alternatives" : [
......
......@@ -91,9 +91,9 @@ data(stuart_tab)
summary(stuart_tab)
```
Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
Then we will use the different mark_* functions in order to filter the markers. First, we can use `mark_match()`` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
```{r mark_match,eval=F}
```{r mark_match}
tab2 <- mark_match(stuart_tab,ref=strains)
......@@ -102,28 +102,28 @@ tab2 %>% filter(exclude_match==1)
Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded.
Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.
Then, we can use the `mark_poly()` function, which will exclude the markers that are not polymorphic.
```{r mark_poly ex,eval=F}
```{r mark_poly ex}
tab2 <- mark_poly(tab2)
head(tab2)
```
The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
```{r mark_prop ex,eval=F}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
```
Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
```{r mark_allele,eval=F}
tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
tab2 %>% arrange(desc(exclude_allele)) %>% head()
```
Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.
Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains.
```{r mark_allele-strains,eval=F}
strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
......@@ -131,7 +131,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
# Creation of the R/qtl file
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross.
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`.
```{r write_qtl,eval=F}
rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
......
{
"collab_server" : "",
"contents" : "",
"created" : 1622638761182.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "C2CE7FEA",
"lastKnownWriteTime" : 1622638687,
"last_content_update" : 1622638761182,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "862FB1D9",
"caption" : "annot_mini",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=annot_mini&cache_key=862FB1D9",
"displayedObservations" : 11125,
"environment" : "",
"expression" : "annot_mini",
"object" : "annot_mini",
"preview" : 0,
"totalObservations" : 11125,
"variables" : 12
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 6,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
......@@ -5,13 +5,15 @@
"dirty" : false,
"encoding" : "UTF-8",
"folds" : "",
"hash" : "1539229739",
"hash" : "0",
"id" : "C5228C18",
"lastKnownWriteTime" : 1622462353,
"last_content_update" : 1622462353,
"lastKnownWriteTime" : 1622645200,
"last_content_update" : 1622645200474,
"path" : "~/stuart_package/stuart/R/mark_poly.R",
"project_path" : "R/mark_poly.R",
"properties" : {
"cursorPosition" : "9,51",
"scrollLine" : "0"
},
"read_only" : false,
"read_only_alternatives" : [
......
......@@ -7,7 +7,7 @@
#'
#' @export
mark_poly <- function(tab){
return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE,
return(tab %>% mutate(exclude_poly=ifelse(is.na(allele_2)==TRUE,
1,
0)))
}
{
"collab_server" : "",
"contents" : "",
"created" : 1622642417308.000,
"dirty" : false,
"encoding" : "",
"folds" : "",
"hash" : "0",
"id" : "E7584E4F",
"lastKnownWriteTime" : -1152921504606846976,
"last_content_update" : 1622642417308,
"path" : null,
"project_path" : null,
"properties" : {
"cacheKey" : "6BE6FDA8",
"caption" : "genos",
"contentUrl" : "grid_resource/gridviewer.html?env=&obj=genos&cache_key=6BE6FDA8",
"displayedObservations" : 1957993,
"environment" : "",
"expression" : "genos",
"object" : "genos",
"preview" : 0,
"totalObservations" : 1957993,
"variables" : 11
},
"read_only" : false,
"read_only_alternatives" : [
],
"relative_order" : 13,
"source_on_save" : false,
"source_window" : "",
"type" : "r_dataframe"
}
\ No newline at end of file
......@@ -13,12 +13,12 @@ mark_match <- function(tab, #tab_mark df
#finds SNPs that are in both files:
snp_strains <- as.character(ref$marker) #extracts SNPs in strains ref geno file
snp_genfile <- as.character(tab$SNP.Name) #extracts SNPs in cross geno file
snp_genfile <- as.character(tab$marker) #extracts SNPs in cross geno file
snp <- intersect(snp_strains,snp_genfile) #take intercept
#add results in exclude column
return(tab %>% mutate(exclude_match=ifelse(!SNP.Name %in% snp,
return(tab %>% mutate(exclude_match=ifelse(!marker %in% snp,
1,
0)))
......
......@@ -7,7 +7,7 @@
#'
#' @export
mark_poly <- function(tab){
return(tab %>% mutate(exclude_poly=ifelse(is.na(Allele_2)==TRUE,
return(tab %>% mutate(exclude_poly=ifelse(is.na(allele_2)==TRUE,
1,
0)))
}
No preview for this file type
......@@ -91,9 +91,9 @@ data(stuart_tab)
summary(stuart_tab)
```
Then we will use the different mark_* functions in order to filter the markers. First, we can use mark_match() function. This function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
Then we will use the different mark_* functions in order to filter the markers. First, we can use `mark_match()`` function. Here, the parental strains were genotyped with the F2 individuals, but it can happen that you use previous genotyping results for the parental strains. `mark_match()` function excludes markers that are in your genotype file but not in the reference genotype dataset. We recomend using this function as the chip used for genotyping may change.
```{r mark_match,eval=F}
```{r mark_match}
tab2 <- mark_match(stuart_tab,ref=strains)
......@@ -102,28 +102,28 @@ tab2 %>% filter(exclude_match==1)
Here the reference strains were genotyped with the same version of the chip as the F2 individuals so no marker was excluded.
Then, we can use the mark_poly() function, which will exclude the markers that are not polymorphic.
Then, we can use the `mark_poly()` function, which will exclude the markers that are not polymorphic.
```{r mark_poly ex,eval=F}
```{r mark_poly ex}
tab2 <- mark_poly(tab2)
head(tab2)
```
The mark_prop() function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
The `mark_prop()` function can be used to filter markers depending on the proportion of each genotype. Here, we have a F2 so we can use the "homo" argument in order to filter depending on the proportion of both homozygous genotype. If we have a N2, we can filter with the proportion of homozygous individuals with the "homo" argument and of heterozygous individuals with the hetero" argument. Moreover, this function allows to filter marker depending on the proportion on non genotyped animals. By defaults, markers for which more than 50% of individuals were not genotyped.
```{r mark_prop ex,eval=F}
tab2 <- mark_prop(tab2,cross="F2",homo=0.1,hetero=0.1)
head(tab2)
```
Last, we can use the mark_allele(). This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
Last, we can use the `mark_allele()` function. This very helpful function excludes markers for which the alleles found in the F2/N2 individuals do not correspond to the alleles found in the parental strains. For example, if for a marker is not polymorphic in the parental strains but we found two alleles in the F2/N2 individuals, it will be excluded.
```{r mark_allele,eval=F}
tab2 <- mark_allele(tab=tab2,ref=strains,par1="parent1",par2="parent2")
tab2 %>% arrange(desc(exclude_allele)) %>% head()
```
Indeed, we can see that the markers excluded with mark_allele() have different alleles in the parental strains.
Indeed, we can see that the markers excluded with `mark_allele()` have different alleles in the parental strains.
```{r mark_allele-strains,eval=F}
strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","gUNC15530876","gUNC21555204","gUNC21596600")) %>% arrange(marker) %>% select(marker,parent1,parent2)
......@@ -131,7 +131,7 @@ strains %>% filter(marker %in% c("gJAX00038569","gJAX00425031","gUNC12245354","g
# Creation of the R/qtl file
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with qtl::read.cross.
After excluding the problematic markers, we can create the R/qtl file. The individuals must have the same ID in the geno and in the pheno file. If there is a prefix in the geno file that must be removed in order to acheive this, you can use the "prefix" argument. The "path" argument can be used in order to create a CSV file that you can laod with `qtl::read.cross`.
```{r write_qtl,eval=F}
rqtl_file <- write_rqtl(geno=genos,pheno=phenos,tab=tab2,ref=strains,par1="parent1",par2="parent2",prefix="ind_",pos="cM_cox")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment