# $same.names.pos1: positions, in data1, of the element names identical in data2. NULL if no identical names
# $same.names.pos2: positions, in data2, of the elements names identical in data1. NULL if no identical names
# $same.names.match1: positions, in data2, of the names that match the names in data1, as given by match(data1, data2) (NULL otherwise)
# $same.names.match2: positions, in data1, of the names that match the names in data2, as given by match(data1, data2) (NULL otherwise or NULL if data1 and data2 are not factors)
# $same.names.match2: positions, in data1, of the names that match the names in data2, as given by match(data1, data2) (NULL otherwise)
# $common.names: common element names between data1 and data2 (can be a subset of $name or not). NULL if no common element names
# $any.id.element: logical. is there any identical elements ?
# $same.elements.pos1: positions, in data1, of the elements identical in data2. NULL if no identical elements
# $same.elements.pos2: positions, in data2, of the elements identical in data1. NULL if no identical elements
# $same.elements.match1: positions, in data2, of the elements that match the elements in data1, as given by match(data1, data2) (NULL otherwise)
# $same.elements.match2: positions, in data1, of the elements that match the elements in data2, as given by match(data1, data2) (NULL otherwise or NULL if data1 and data2 are not factors)
# $same.elements.match2: positions, in data1, of the elements that match the elements in data2, as given by match(data1, data2) (NULL otherwise)
# $common.elements: common elements between data1 and data2. NULL if no common elements
# $same.order: logical. Are all elements in the same order? TRUE or FALSE if elements of data1 and data2 are identical but not necessary in the same order. NULL otherwise (different length for instance)
# $order1: order of all elements of data1. NULL if $same.order is FALSE
...
...
@@ -1353,7 +1353,7 @@ return(output)
######## fun_comp_2d() #### comparison of two 2D datasets (row & col names, dimensions, etc.)
# add match as in fun_comp_1d
fun_comp_2d <- function(data1, data2){
# AIM
# compare two 2D datasets of the same class or not. Check and report in a list if the 2 datasets have:
# $any.id.row.name: logical. Is there any row names identical ? NULL if no row names in the two 2D datasets
# $same.row.name.pos1: positions, in data1, of the row names identical in data2
# $same.row.name.pos2: positions, in data2, of the row names identical in data1
# $same.row.names.match1: positions, in data2, of the row names that match the row names in data1, as given by match(data1, data2) (NULL otherwise)
# $same.row.names.match2: positions, in data1, of the row names that match the row names in data2, as given by match(data1, data2) (NULL otherwise)
# $common.row.names: common row names between data1 and data2 (can be a subset of $name or not). NULL if no common row names
# $same.col.name: logical. Are column names identical ? NULL if no col names in the two 2D datasets
# $col.name: name of columns of the 2 datasets if identical (NULL otherwise)
# $any.id.col.name: logical. Is there any column names identical ? NULL if no col names in the two 2D datasets
# $same.col.name.pos1: positions, in data1, of the column names identical in data2
# $same.col.name.pos2: positions, in data2, of the column names identical in data1
# $same.col.names.match1: positions, in data2, of the column names that match the column names in data1, as given by match(data1, data2) (NULL otherwise)
# $same.col.names.match2: positions, in data1, of the column names that match the column names in data2, as given by match(data1, data2) (NULL otherwise)
# $common.col.names: common column names between data1 and data2 (can be a subset of $name or not). NULL if no common column names
# $any.id.row: logical. is there identical rows (not considering row names)? NULL if nrow(data1) * nrow(data2) > 1e10
# $same.row.pos1: positions, in data1, of the rows identical in data2 (not considering row names). Return "TOO BIG FOR EVALUATION" if nrow(data1) * nrow(data2) > 1e10
# $same.row.pos2: positions, in data2, of the rows identical in data1 (not considering row names). Return "TOO BIG FOR EVALUATION" if nrow(data1) * nrow(data2) > 1e10
# $same.row.match1: positions, in data2, of the rows that match the rows in data1, as given by match(data1, data2) (NULL otherwise)
# $same.row.match2: positions, in data1, of the rows that match the rows in data2, as given by match(data1, data2) (NULL otherwise)
# $any.id.col: logical. is there identical columns (not considering column names)? NULL if ncol(data1) * ncol(data2) > 1e10
# $same.col.pos1: position in data1 of the cols identical in data2 (not considering column names). Return "TOO BIG FOR EVALUATION" if ncol(data1) * ncol(data2) > 1e10
# $same.col.pos2: position in data2 of the cols identical in data1 (not considering column names). Return "TOO BIG FOR EVALUATION" if ncol(data1) * ncol(data2) > 1e10
# $same.col.match1: positions, in data2, of the columns that match the columns in data1, as given by match(data1, data2) (NULL otherwise)
# $same.row.match2: positions, in data1, of the columns that match the columns in data2, as given by match(data1, data2) (NULL otherwise)
if(same.col.nb == TRUE){ # because if not the same col nb, the row cannot be identical
if(all(sapply(data1, FUN = typeof) == "integer") & all(sapply(data2, FUN = typeof) == "integer") & as.double(nrow(data1)) * nrow(data2) <= 1e10){ # as.double(nrow(data1)) to prevent integer overflow because R is 32 bits for integers
same.row.pos1 <- which(c(as.data.frame(t(data1), stringsAsFactors = FALSE)) %in% c(as.data.frame(t(data2), stringsAsFactors = FALSE))) # this work fast with only integers (because 32 bits)
}else if(as.double(nrow(data1)) * nrow(data2) <= 1e6){ # as.double(nrow(data1)) to prevent integer overflow because R is 32 bits for integers
if(col.nb <= 10){ # if ncol is not to big, the t() should not be that long
same.row.pos1 <- which(c(as.data.frame(t(data1), stringsAsFactors = FALSE)) %in% c(as.data.frame(t(data2), stringsAsFactors = FALSE))) # this work fast with only integers (because 32 bits)
if(same.row.nb == TRUE){ # because if not the same row nb, the col cannot be identical
if(all(sapply(data1, FUN = typeof) == "integer") & all(sapply(data2, FUN = typeof) == "integer") & as.double(ncol(data1)) * ncol(data2) <= 1e10){ # as.double(ncol(data1)) to prevent integer overflow because R is 32 bits for integers
same.col.pos1 <- which(c(data1) %in% c(data2))
same.col.pos2 <- which(c(data2) %in% c(data1))
tempo1 <- c(data1)
tempo2 <- c(data2)
same.col.pos1 <- which(tempo1 %in% tempo2)
same.col.pos2 <- which(tempo2 %in% tempo1)
same.col.match1 <- match(tempo1, tempo2)
same.col.match2 <- match(tempo2, tempo1)
}else if(as.double(ncol(data1)) * ncol(data2) <= 1e6){ # as.double(ncol(data1)) to prevent integer overflow because R is 32 bits for integers
same.col.pos1 <- logical(length = ncol(data1)) # FALSE by default
same.col.pos1[] <- FALSE # security
same.col.pos2 <- logical(length = ncol(data2)) # FALSE by default