| #### Regression Tests that need "much" memory |
| #### (and / or are slow even with enough GBytes of memory) |
| |
| print(si <- sessionInfo(), locale=FALSE) |
| Sys.info() |
| ## Run (currently _only_) when inside tests/ by |
| ' |
| time make test-large |
| ' # giving ~ 35 min [R-devel 2019-01] |
| |
| ## From CRAN package 'sfsmisc': |
| Sys.memGB <- function (kind = "MemTotal") |
| { |
| mm <- drop(read.dcf("/proc/meminfo", fields = kind)) |
| if (any(is.na(mm))) { |
| warning("Non-existing 'kind': ", names(mm)[is.na(mm)][1]) |
| 0 |
| } else if (!all(grepl(" kB$", mm))) { |
| warning("Memory info ", dQuote(kind), |
| " is not returned in 'kB' aka kiloBytes") |
| 0 |
| } else |
| as.numeric(sub(" kB$", "", mm))/(1000 * 1024) |
| } |
| |
| availableGB <- |
| if(file.exists("/proc/meminfo")) { # e.g. on Linux |
| Sys.memGB("MemAvailable") |
| } else { |
| 0 # unless we add something better here |
| } |
| cat("Available (processor aka CPU) memory: ", round(availableGB, 1), |
| "GB (Giga Bytes)\n") |
| |
| if(.Machine$sizeof.pointer < 8) { |
| cat(".Machine :\n"); str(.Machine) |
| cat("not a 64-bit system -- forget about these tests!\n") |
| q("no") |
| } |
| |
| ### Testing readLines() *large* file with embedded nul aka `\0' |
| ## |
| ## takes close to one minute and ~ 10 GB RAM |
| if(availableGB > 11) local(withAutoprint({ |
| ## File construction originally by Bill Dunlap, Cc: R-help, |
| ## Subject: Re: [R] readLines without skipNul=TRUE causes crash |
| ## Date: Mon, 17 Jul 2017 08:36:55 -0700 |
| tf <- tempfile(); file <- file(tf, "wb") |
| txtLine <- c(rep(as.raw(32:127), 2^5), charToRaw("\n")) # <- want many lines |
| system.time({ |
| for(i in 1:(2^15-1)) writeBin(rep_len(txtLine, 2^16), file) |
| for(i in 1:(2^15-1)) writeBin(rep_len(as.raw(0L), 2^16), file) |
| }) |
| close(file) |
| log2(file.size(tf)) ## 31.99996 |
| ## now, this gave a segmentation fault, PR#17311 : |
| "FIXME: on 32-bit Linux (F 24), still see |
| Program received signal SIGSEGV, Segmentation fault. |
| ... in do_readLines (call=0x8.., op=0x8.., ....) |
| at ../../../R/src/main/connections.c:3852 |
| 3852 if(c != '\n') buf[nbuf++] = (char) c; else break; |
| " |
| if(.Machine$sizeof.pointer > 4) withAutoprint({ |
| system.time( x <- readLines(tf) ) # depending on disk,.. takes 15-50 seconds |
| ## --------- |
| str(ncx <- nchar(x, "bytes")) |
| ## int [1:688108] 3072 3072 3072 3072 3072 3072 3072 3072 ... |
| tail(ncx) # ... 3072 3072 3072 1003 |
| table(ncx) # mostly 3072, then some 4075 and the last one |
| head(iL <- which(ncx == 4075)) |
| stopifnot(diff(iL) == 21) |
| }) else cat("32-bit: still seg.faulting - FIXME\n") |
| })) |
| ## + 2 warnings |
| |
| |
| ### Testing PR#17992 c() / unlist() name creation for large vectors |
| ## Part 1 |
| if(availableGB > 21) system.time({ |
| res <- c(a=raw(2), raw(2^31-1)) |
| }) ## 36--44 sec elapsed (ada-16, ~ 120 GB available) after fix |
| ## In R <= 3.4.1, took 51 sec elapsed, and gave Error .. : |
| ## attempt to set index 18446744071562067968/2147483649 in SET_STRING_ELT |
| ## |
| if(FALSE) { # object.size() itself is taking a lot of time! |
| os <- object.size(res) |
| } else { |
| os <- structure(19327353184, class = "object_size") |
| print(os, units = "GB") # 18 |
| } |
| if(exists("res")) rm(res) |
| gc(reset = TRUE) # for the next step |
| |
| ### Testing PR#17992 c() / unlist() name creation for large vectors |
| ## Part 2 (https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17292#c4): |
| if(availableGB > 37) system.time({ |
| res <- c(a = list(rep(c(b=raw(1)), 2^31-2), raw(2)), recursive=TRUE) |
| }) |
| ## 437 sec elapsed (ada-16, ~ 120 GB available) after fix; then ada-20: 566 sec |
| ## In R <= 3.4.1, took 475 sec elapsed, and gave Error .. : |
| ## could not allocate memory (2048 Mb) in C function 'R_AllocStringBuffer' |
| ## ((and that error msg is incorrect because of int overflow)) |
| if(exists("res")) withAutoprint({ |
| str(res) # is fast! |
| ## Named raw [1:2147483648] 00 00 00 00 ... |
| ## - attr(*, "names")= chr [1:2147483648] "a.b" "a.b" "a.b" "a.b" ... |
| gc() # back to ~ 18.4 GB |
| rm(res) |
| }) |
| gc(reset = TRUE) # for the next step |
| |
| ## Large string's encodeString() -- PR#15885 |
| if(availableGB > 4) system.time(local(withAutoprint({ |
| txt <- strrep("test me:", 53687091); object.size(txt) # 429'496'824 bytes |
| nc <- nchar(txt) ## NB this is larger than maximal integer: |
| nc*5L+8L # NA + Warning 'NAs produced by integer overflow' |
| en <- encodeString(txt) |
| ## encodeString() seg.faulted in R <= 3.4.1 |
| stopifnot(identical(txt,en)) # encoding did not change simple ASCII |
| }))) |
| ## 52 sec elapsed [nb-mm4, 8 GB]; then 66.7 [ada-20; much more GB] |
| |
| |
| ## pretty(x, n) for n = <large> or large diff(range(x) gave overflow in C code |
| if(availableGB > 6) system.time(withAutoprint({ |
| r <- pretty(c(-1,1)*1e300, n = 449423288, min.n = 1) |
| head(r) ; length(r) # was only 21 in R < 3.5.0 |
| stopifnot(all.equal(length(r), 400000001, tol = 0.1)) |
| })) ## 4.8--5.5 sec. |
| rm(r) |
| gc() |
| |
| n <- 4e4 # << for quick testing, comment next line |
| n <- 2.2e9 |
| |
| if(availableGB > 60) withAutoprint({ |
| n/.Machine$integer.max # 1.024 ==> need long vectors! |
| ii <- seq_len(n) # user system elapsed [seq_len() fast: ALTREP "compact"] |
| system.time(ii <- ii + 0) # 6.726 17.558 24.450 (slow!, seen faster) |
| system.time(i2 <- ii[-n]) # 14.267 23.532 37.918 (slow!, seen slower: el.= 51) |
| ## |
| ## NB: keep n, i, i2 for "below" |
| }) |
| ## In R <= 3.4.1 : |
| ## Program received signal SIGSEGV, Segmentation fault. |
| ## 0x00000000005a0daf in realSubscript (call=0x3f01408, stretch=<optimized out>, |
| ## nx=2200000000, ns=1, s=0x426db18) at ../../../R/src/main/subscript.c:691 |
| ## 691 LOGICAL(indx)[ix] = 0; |
| |
| if(availableGB > 99) withAutoprint({ |
| system.time( x <- ii/n ) # 5.45 user; 11.5--14.36 elapsed |
| system.time( y <- sin(pi*x) ) # 42 user; 48.9--.. elapsed |
| system.time(sorted <- !is.unsorted(x)) # ~ 4 elapsed |
| stopifnot(sorted) |
| ## default n (= "nout") = 50: |
| system.time(ap1 <- approx(x,y, ties = "ordered"))# 15 user; 25 elapsed |
| stopifnot(exprs = { |
| is.list(ap1) |
| names(ap1) == c("x","y") |
| length(ap1$x) == 50 |
| all.equal(ap1$y, sin(pi*ap1$x), tol= 1e-9) |
| }) |
| rm(ap1) # keep x,y,n,i2 |
| gc() # --> max used: 92322 Mb |
| }) |
| |
| ## which() and ifelse() working for long vectors |
| if(availableGB > 165) withAutoprint({ |
| system.time(iis <- which(isMl <- ii < 9999)) # 5.8 user, 8.8 elapsed |
| gc() # 59 GB max used |
| system.time(r <- ifelse(isMl, ii, ii*1.125)) # user system elapsed |
| stopifnot(exprs = { # in R 3.5.2 : 124.989 174.726 300.656 |
| ## GB's ifelse() + using which(<long>) 3.6.0 : 71.815 81.823 154.124 |
| length(r) == n |
| iis == seq_len(9998) |
| }) |
| rm(isMl, iis, r) |
| }) |
| gc() # 159 GB max used |
| |
| if(availableGB > 211) withAutoprint({ ## continuing from above |
| ## both large (x,y) *and* large output (x,y): |
| system.time(xo <- x + 1/(2*n)) # ~ 9 elapsed |
| system.time(ap <- approx(x,y, ties = "ordered", xout = xo)) |
| # 194 user, 214--500 elapsed |
| gc(reset = TRUE) # showing max.used ~ 1..... Mb |
| stopifnot(exprs = { |
| is.list(ap) |
| names(ap) == c("x","y") |
| length(ap$x) == n |
| is.na(ap$y[n]) # because ap$x[n] > 1, i.e., outside of [0,1] |
| all.equal(ap$y[i2], sin(pi*xo[i2]), tol= if(n < 1e7) 1e-8 else 1e-15) |
| }) |
| rm(ap); gc() # showing used 83930 Mb | max.used 210356.6 Mb |
| ## only large x,y : |
| system.time(apf <- approxfun(x,y, ties="ordered", rule = 2))# elapsed: ~26s |
| xi <- seq(0, 1, by = 2^-12) ## linear interpol. is less accurate than spline: |
| stopifnot(all.equal(apf(xi), sin(pi*xi), tol= if(n < 1e7) 1e-7 else 1e-11)) |
| rm(apf); gc() # (~ unchanged) |
| system.time(ssf <- splinefun(x,y, ties = "ordered")) |
| # elapsed 120 s; using ~ 158 GB |
| system.time(ss <- spline (x,y, ties = "ordered", xout = xi)) |
| # elapsed 126--265 s; using ~ 207 GB |
| gc() |
| stopifnot(exprs = { |
| is.list(ss) |
| names(ss) == c("x","y") |
| length(ss$y) == length(xi) |
| all.equal(ss$y , sin(pi*xi), tol= 1e-15) |
| all.equal(ssf(xi), ss$y, tol= 1e-15) |
| }) |
| rm(x, y, xo, ss, ssf) # remove long vector objects |
| gc(reset=TRUE) |
| }) |
| |
| ## sum(<Integer|Logical>) -- should no longer overflow: ---------------------------------------- |
| ## 1) sum(<long logical>) == counting |
| if(availableGB > 24) withAutoprint({ |
| system.time(L <- rep.int((0:15) %% 7 == 2, 2^28))# -> length 2^32; ~ 22 sec |
| print(object.size(L), unit="GB") # 16 GB |
| system.time(sL <- sum(L)) # 8.4 sec |
| stopifnot(exprs = { |
| is.logical(L) |
| length(L) == 2^32 |
| !is.integer(length(L)) |
| is.integer(sL) |
| identical(sL, as.integer(2^29)) |
| }) |
| }) ## sL would be NA with an "integer overflow" warning in R <= 3.4.x |
| gc(reset=TRUE) |
| |
| ## 2) many (and relatively long and large) integers |
| L <- as.integer(2^31 - 1)## = 2147483647L = .Machine$integer.max ("everywhere") |
| ## a "small" example with this is in ./reg-tests-1d.R (see 'x24') |
| if(availableGB > 12) withAutoprint({ |
| system.time(x31 <- rep.int(L, 2^31+1)) # sum = 2^62 - 1 =.= 2^62 // ~ 5.5 sec |
| print(object.size(x31), unit = "GB") # 8 G |
| system.time(S <- sum(x31)) # ~ 2 sec |
| system.time(S.4 <- sum(x31, x31, x31, x31)) # 8 sec |
| stopifnot(is.integer(x31), |
| identical(S, 2^62), |
| identical(S.4, 2^64)) |
| system.time(x32 <- c(x31, x31)) # 13 user | 20.8 elapsed (and 16 GB) |
| rm(x31)# now, sum vvv will switch to use irsum() [double accumulator] |
| system.time(S.2 <- sum(x32)) # 8 sec |
| stopifnot(S.2 == 2^63) |
| rm(x32) |
| }) |
| |
| |
| ## seq() remaining integer: (PR 17497, comment #9) |
| if(availableGB > 16) withAutoprint({ |
| i <- as.integer(2^30) |
| system.time(i2.31 <- seq(-i, by=1L, length=2*i+1)) # 11.1 user | 19.2 elapsed |
| object.size(i2.31) # 8'589'934'648 bytes [ was 17.17 GB in R <= 3.5.x ] |
| stopifnot(is.integer(i2.31), i2.31[1] == -i, i2.31[length(i2.31)] == i) |
| |
| ## pmax(), pmin() with long vectors, PR 17533 |
| if(availableGB > 24) withAutoprint({ |
| system.time(i2.31 <- pmin(i2.31, 0L)) # 7.2 sec user | 11.2 elapsed |
| str(i2.31) |
| system.time(stopifnot(i2.31[(i+1):length(i2.31)] == 0)) # 16.7 user | 28.0 elapsed |
| }) |
| }) |
| |
| |
| ## match(<long character>, *) PR#17552 |
| if(availableGB > 44) withAutoprint({ ## seen 40 G ('RES') |
| system.time(m <- match(rep("a", 2^31), "a")) # 34.7 sec user (55 elapsed) |
| stopifnot(all(m == 1L)) |
| rm(m) |
| system.time({x <- character(2^31); x[26:1] <- letters }) # 1.6 user | 9.4 elapsed |
| system.time(m <- match(x, "a"))# 18.2 user | 51.6 elapsed |
| head(m, 30) |
| system.time(stopifnot(m[26] == 1L, is.na(m[-26]))) |
| rm(x, m) |
| }) |
| |
| |
| ## readBin() and writeBin() for long rawConnection s, PR#17665 |
| ## ------- -------- ------------- |
| if(availableGB > 14) withAutoprint({ ## seen 11.6 G |
| vec <- rep(0, 3e8) # object.size(vec) > 2^31 |
| raw_con <- rawConnection(serialize(vec, NULL)) # ~ 5 sec. |
| ## Stepping through this connection gives an error after the 2^31st element: |
| repeat { |
| x <- readBin(raw_con, "raw", n = 1e+06) |
| if(length(x) == 0) |
| break |
| cat(".") |
| }; cat("\n") |
| ## Error in readBin(raw_con, "raw", n = 1e+06) : too large a block specified |
| }) |
| |
| ## writeBin() for long vectors |
| if(availableGB > 20) withAutoprint({ ## seen 20.9 G |
| x <- raw(2^31) |
| writeBin(x, con = nullfile()) |
| |
| con <- rawConnection(raw(0L), "w") |
| writeBin(x, con = con) |
| stopifnot(identical(x, rawConnectionValue(con))) |
| |
| system.time(x <- pi*seq_len(2.1*2^30)) # 25 sec |
| zzfil <- tempfile("test-large-bin") |
| zz <- file(zzfil, "wb") ## file size will be 2.5 GB !!! |
| system.time(z <- writeBin(x, zz)) # 32 sec |
| stopifnot(is.null(z)) |
| close(zz); zz <- file(zzfil, "rb") |
| system.time(r <- readBin(zz, double(), n = length(x) + 999)) # 32 sec |
| system.time(stopifnot(identical(x, r))) # 24 sec |
| close(zz); rm(r, zz) |
| }) |
| |
| |
| ## predict(loess(.), se=TRUE) for "large" sample size -- PR#17121 |
| ## No need for very much memory, but is slow and should do several ex. |
| mkDat <- function(n) { |
| x <- 5*(1:n)/(n+1) |
| data.frame(x = x, y = sin(pi*x^2) * exp(-x/2) + rnorm(n)/8) |
| } |
| set.seed(1); dat <- mkDat(n = 42000) |
| system.time( # 14.5 sec (on lynne ~ 2019) |
| fit <- loess(y~x, data=dat) |
| ) |
| r <- tools::assertError( |
| predict(fit, newdata=data.frame(x=.5), se=TRUE) |
| , verbose=TRUE) # |
| ## typically would not seg.fault but give Calloc(..) error (with *wrong* size) |
| stopifnot(grepl("^workspace .* is too large .* 'se = TRUE'", r[[1]]$message)) |
| |
| |
| |
| gc() # NB the "max used" |
| proc.time() # total [ ~ 40 minutes in full case, 2019-04-12] |