blob: b375734af41c885c844378e755eb76fc37742feb [file] [log] [blame]
# File src/library/utils/R/summRprof.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2017 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
# The profile file always starts with a single header line followed by stack lines
# If the header contains "memory profiling", the stack lines have memory info
# The memory info is a fixed width prefix on each line of the form :[0-9]+:[0-9]+:[0-9]+:[0-9]+:
# If the header contains "line profiling", there will be filename lines and stack lines will contain
# line number info of the form [0-9]+#[0-9]+
# The filename lines will start #File [0-9]+:
summaryRprof <-
function(filename = "Rprof.out", chunksize = 5000,
memory = c("none", "both", "tseries", "stats"),
lines = c("hide", "show", "both"),
index = 2, diff = TRUE, exclude = NULL, basenames = 1)
{
con <- file(filename, "rt")
on.exit(close(con))
firstline <- readLines(con, n = 1L)
if(!length(firstline))
stop(gettextf("no lines found in %s", sQuote(filename)), domain = NA)
sample.interval <- as.numeric(strsplit(firstline, "=")[[1L]][2L])/1e6
memory.profiling <- substr(firstline, 1L, 6L) == "memory"
line.profiling <- grepl("line profiling", firstline)
if (line.profiling)
filenames <- character(0)
memory <- match.arg(memory)
if(memory != "none" && !memory.profiling)
stop("profile does not contain memory information")
if (memory == "tseries")
return(Rprof_memory_summary(filename = con, chunksize = chunksize,
label = index, diff = diff, exclude = exclude,
sample.interval = sample.interval))
else if (memory == "stats")
return(Rprof_memory_summary(filename = con, chunksize = chunksize,
aggregate = index, diff = diff, exclude = exclude,
sample.interval = sample.interval))
lines <- match.arg(lines)
if (lines != "hide" && !line.profiling)
stop("profile does not contain line information")
fnames <- NULL
ucounts <- NULL
fcounts <- NULL
memcounts <- NULL
umem <- NULL
repeat({
chunk <- readLines(con, n = chunksize)
if (line.profiling) {
filenamelines <- grep("^#File [0-9]+: ", chunk)
if (length(filenamelines)) {
fnum <- as.integer(sub("^#File ([0-9]+): .*", "\\1", chunk[filenamelines]))
filenames[fnum] <- sub("^#File [0-9]+: ", "", chunk[filenamelines])
if (basenames) {
dirnames <- dirname(filenames[fnum])
filenames[fnum] <- basename(filenames[fnum])
for (i in seq_len(basenames - 1)) {
tail <- basename(dirnames)
filenames[fnum] <- ifelse(tail == ".", filenames[fnum],
paste0(tail, "/", filenames[fnum]))
# May have Windows-style names here where dirname("c:/") == "c:/"
parent <- dirname(dirnames)
dirnames <- ifelse(dirnames == parent, ".", parent)
}
}
chunk <- chunk[-filenamelines]
}
}
if (length(chunk) == 0L)
break
if (memory.profiling) {
memprefix <- attr(regexpr(":[0-9]+:[0-9]+:[0-9]+:[0-9]+:", chunk), "match.length")
if (memory == "both") {
memstuff <- substr(chunk, 2L, memprefix-1L)
memcounts <- pmax(apply(sapply(strsplit(memstuff, ":"), as.numeric), 1, diff), 0)
if (!is.matrix(memcounts)) # Need a matrix result (PR#16395)
memcounts <- matrix(memcounts, nrow = 1)
## memcounts <- c(0, rowSums(memcounts[, 1L:3L]))
## convert to bytes.
memcounts <- c(0, rowSums(cbind(memcounts[, 1L:2L, drop = FALSE] * 8, memcounts[, 3L, drop = FALSE])))
rm(memstuff)
}
chunk <- substr(chunk, memprefix+1L, nchar(chunk, "c"))
if(any((nc <- nchar(chunk, "c")) == 0L)) {
chunk <- chunk[nc > 0L]
memcounts <- memcounts[nc > 0L]
}
}
chunk <- strsplit(chunk, " ")
if (line.profiling)
chunk <- lapply(chunk, function(x) {
locations <- !startsWith(x, '"')
if (lines != "hide") {
fnum <- sub("#.*", "", x[locations])
lnum <- sub(".*#", "", x[locations])
x[locations] <- paste0(filenames[as.integer(fnum)], "#", lnum)
}
switch(lines,
hide = x <- x[!locations],
show = x <- x[locations]
)
if (length(x)) x else "<no location>"
})
newfirsts <- sapply(chunk, "[[", 1L)
newuniques <- lapply(chunk, unique)
ulen <- lengths(newuniques)
newuniques <- unlist(newuniques)
new.utable <- table(newuniques)
new.ftable <- table(factor(newfirsts, levels = names(new.utable)))
if (memory == "both")
new.umem <- rowsum(memcounts[rep.int(seq_along(memcounts), ulen)], newuniques)
fcounts <- rowsum( c(as.vector(new.ftable), fcounts),
c(names(new.ftable), fnames) )
ucounts <- rowsum( c(as.vector(new.utable), ucounts),
c(names(new.utable), fnames) )
if(memory == "both")
umem <- rowsum(c(new.umem, umem), c(names(new.utable), fnames))
fnames <- sort(unique(c(fnames, names(new.utable))))
})
firstnum <- fcounts*sample.interval
uniquenum <- ucounts*sample.interval
## sort and form % on unrounded numbers
index1 <- order(-firstnum, -uniquenum)
index2 <- order(-uniquenum, -firstnum)
if (lines == "show") {
filename <- sub("#.*$", "", fnames)
linenum <- rep.int(0, length(filename))
hasline <- filename != fnames
linenum[hasline] <- as.numeric(sub("^.*#", "", fnames[hasline]))
index3 <- order(filename, linenum)
}
firstpct <- round(100*firstnum/sum(firstnum), 2)
uniquepct <- round(100*uniquenum/sum(firstnum), 2)
digits <- ifelse(sample.interval < 0.01, 3L, 2L)
firstnum <- round(firstnum, digits)
uniquenum <- round(uniquenum, digits)
if (memory == "both") memtotal <- round(umem/1048576, 1) ## 0.1MB
rval <- data.frame(firstnum, firstpct, uniquenum, uniquepct)
names(rval) <- c("self.time", "self.pct", "total.time", "total.pct")
rownames(rval) <- fnames
if (memory == "both") rval$mem.total <- memtotal
by.self <- rval[index1, ]
by.self <- by.self[by.self[,1L] > 0, ]
by.total <- rval[index2, c(3L, 4L, if(memory == "both") 5L, 1L, 2L)]
result <- list(by.self = by.self, by.total = by.total)
if (lines == "show")
result <- c(result, list(by.line = rval[index3,]))
c(result,
sample.interval = sample.interval,
sampling.time = sum(fcounts)*sample.interval)
}
Rprof_memory_summary <- function(filename, chunksize = 5000,
label = c(1, -1), aggregate = 0, diff = FALSE,
exclude = NULL, sample.interval)
{
memcounts <- NULL
firsts <- NULL
labels <- vector("list", length(label))
index <- NULL
repeat({
chunk <- readLines(filename, n = chunksize)
if (length(chunk) == 0L)
break
memprefix <- attr(regexpr(":[0-9]+:[0-9]+:[0-9]+:[0-9]+:", chunk),
"match.length")
memstuff <- substr(chunk, 2L, memprefix-1L)
memcounts <- rbind(t(sapply(strsplit(memstuff, ":"), as.numeric)))
## convert to bytes
memcounts <- cbind(memcounts[, 1L:2L, drop = FALSE] * 8, memcounts[, 3L:4L, drop = FALSE])
chunk <- substr(chunk, memprefix+1, nchar(chunk, "c"))
if(any((nc <- nchar(chunk, "c")) == 0L)) {
memcounts <- memcounts[nc > 0L, ]
chunk <- chunk[nc > 0L]
}
chunk <- strsplit(chunk, " ")
if (length(exclude))
chunk <- lapply(chunk, function(l) l[!(l %in% exclude)])
newfirsts <- sapply(chunk, "[[", 1L)
firsts <- c(firsts, newfirsts)
if (!aggregate && length(label)){
for(i in seq_along(label)){
if (label[i] == 1)
labels[[i]] <- c(labels[[i]], newfirsts)
else if (label[i]>1) {
labels[[i]] <- c(labels[[i]], sapply(chunk,
function(line)
paste(rev(line)[1L:min(label[i], length(line))],
collapse = ":")))
} else {
labels[[i]] <- c(labels[[i]], sapply(chunk,
function(line)
paste(line[1L:min(-label[i], length(line))],
collapse = ":")))
}
}
} else if (aggregate) {
if (aggregate > 0) {
index <- c(index, sapply(chunk,
function(line)
paste(rev(line)[1L:min(aggregate, length(line))],
collapse = ":")))
} else {
index <- c(index, sapply(chunk,
function(line)
paste(line[1L:min(-aggregate, length(line))],
collapse = ":")))
}
}
if (length(chunk) < chunksize)
break
})
if (length(memcounts) == 0L) stop("no events were recorded")
memcounts <- as.data.frame(memcounts)
names(memcounts) <- c("vsize.small", "vsize.large", "nodes", "duplications")
if (!aggregate) {
rownames(memcounts) <- (1L:nrow(memcounts))*sample.interval
names(labels) <- paste0("stack:", label)
memcounts <- cbind(memcounts, labels)
}
if (diff)
memcounts[-1L, 1L:3L] <- pmax(0L, apply(memcounts[, 1L:3L], 2L, diff))
if (aggregate)
memcounts <- by(memcounts, index,
function(these) with(these,
round(c(vsize.small = mean(vsize.small),
max.vsize.small = max(vsize.small),
vsize.large = mean(vsize.large),
max.vsize.large = max(vsize.large),
nodes = mean(nodes),
max.nodes = max(nodes),
duplications = mean(duplications),
tot.duplications = sum(duplications),
samples = nrow(these)
))
)
)
return(memcounts)
}