@@ -4,8 +4,8 @@ sheetName <- "mouse_1" # sheet number
Name<-"Name"# column of the sequence names
col1<-"VH_VJ"# first column name
col2<-"VL_VJ"# second column name
col1.selection<-"IGHV1-55_IGHJ2"# single vector of characters indicating the values to select inside col1 for the circos representation. Write NULL if not required
col2.selection<-"IGKV4-68_IGKJ5"# single vector of characters indicating the values to select inside col2 for the circos representation. Write NULL if not required
col1.selection<-c("IGHV1-7_IGHJ2","IGHV1-55_IGHJ2","IGHV1-81_IGHJ2")# single vector of characters indicating the values to select inside col1 for the circos representation. Write NULL if not required
col2.selection<-c("IGKV4-68_IGKJ5","IGKV2-112_IGKJ2")# single vector of characters indicating the values to select inside col2 for the circos representation. Write NULL if not required
metadata<-c("mAb_name","Kd_nM")# single vector of characters indicating the name of the column with metadata
erase.objects=TRUE# write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
if(erase.objects==TRUE){
rm(list=ls(all.names=TRUE))
erase.objects=TRUE
}
erase.graphs=TRUE# write TRUE to erase all the graphic windows in R before starting the algorithm and FALSE otherwise
script<-"circos_data_prep"
VH.path<-"X:/ROCURONIUM PROJECT/01 Primary data/04.Repertoire analysis/SORT1/SORT1 Seq-original/RESULT/repertoire_profiler_1708127835/all_passed_seq.tsv"# Must be VH
VL.path<-"X:/ROCURONIUM PROJECT/01 Primary data/04.Repertoire analysis/SORT1/SORT1 Seq-original/RESULT/repertoire_profiler_1708128192/all_passed_seq.tsv"# must be VL
merging_colums_names<-c("initial_sequence_id")# single vector of character strings of the names of the column used to merge the 2 files. Of note, if _VH or _VL in the columns names, they are removed.
metadata_colums_names<-c("sequence_id")# single character string of the name of the metadata column in the two files. Write NULL if no metadata.
kept_colums_names<-c("germline_v_call","germline_j_call")# single vector of character strings of the names of the column of VH.path file that correspond to V (first position) and J. Of note, Same names must be for VH and VL.
renamed_colums_names<-c("V_allele","J_allele")# single vector of character strings of the names of VH.path file to keep, beyond metadata. Of note, Same names must be for VH and VL.
cat(paste0("\n\nWarning: SOME SEQUENCES FROM VH ARE REMOVED BECAUSE NOT IN THE OTHER VL FILE:\n",paste(df1[,merging_colums_names][tempo.log1],collapse="\n"),"\n\n"))
}
if(any(tempo.log2)){
cat(paste0("\n\nWarning: SOME SEQUENCES FROM VL ARE REMOVED BECAUSE NOT IN THE OTHER VH FILE:\n",paste(df2[,merging_colums_names][tempo.log2],collapse="\n"),"\n\n"))
output_all_seq<-merge(df1,df2,by=c("initial_sequence_id"),sort=FALSE)# rd for random. Send the coord of the boxes into the coord data.frame of the dots (in the column x.y). WARNING: by = c("PANEL", "group") without fill column because PANEL & group columns are enough as only one value of x column per group number in box.coord. Thus, no need to consider fill column
nrow(output_all_seq)
if(nrow(output_all_seq)!=nrow(df1)){
tempo.cat<-paste0("INTERNAL CODE ERROR. THE merge() FUNCTION DID NOT RETURN A CORRECT output DATA FRAME. CODE HAS TO BE MODIFIED")
stop(paste0("\n\n================\n\n",tempo.cat,"\n\n================\n\n",call.=FALSE))# == in stop() to be able to add several messages between ==