blob: d5d93eeee48445dd82703d480ecfc1c75c7a1007 [file] [log] [blame]
## File src/library/tools/R/doitools.R
## Part of the R package, https://www.R-project.org
##
## Copyright (C) 2015-2016 The R Core Team
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## A copy of the GNU General Public License is available at
## https://www.R-project.org/Licenses/
doi_db <-
function(dois, parents)
{
db <- data.frame(DOI = trimws(as.character(dois)),
Parent = as.character(parents),
stringsAsFactors = FALSE)
class(db) <- c("doi_db", "data.frame")
db
}
doi_db_from_package_metadata <-
function(meta)
{
dois <- character()
pattern <- "<(DOI|doi):([^>]*)>"
if(!is.na(v <- meta["Description"])) {
m <- gregexpr(pattern, v)
dois <- c(dois, .gregexec_at_pos(pattern, v, m, 3L))
}
doi_db(dois, rep.int("DESCRIPTION", length(dois)))
}
doi_db_from_package_citation <-
function(dir, meta, installed = FALSE)
{
dois <- character()
path <- if(installed) "CITATION" else file.path("inst", "CITATION")
cfile <- file.path(dir, path)
if(file.exists(cfile)) {
cinfo <- .read_citation_quietly(cfile, meta)
if(!inherits(cinfo, "error"))
dois <- trimws(unique(unlist(cinfo$doi, use.names = FALSE)))
}
doi_db(dois, rep.int(path, length(dois)))
}
## \doi a user-defined macro (from system.Rd) which gets expanded by
## parse_Rd(). To extract programmatically, we try to find the user
## macros with the (current) expansion.
## Alternative, we could call .build_Rd_db() on the package Rd sources
## with e.g. macros = c("\\newcommand{\\doi}{<DOI:#1>}" and look for
## TEXT nodes matching the expansion. However, we cannot necessarily
## safely process build-time Sexprs ...
doi_db_from_package_Rd_db <-
function(db)
{
dois <- Filter(length, lapply(db, .get_dois_from_Rd))
doi_db(.canonicalize_doi(unlist(dois, use.names = FALSE)),
rep.int(file.path("man", names(dois)),
lengths(dois)))
}
.get_dois_from_Rd <-
function(x)
{
dois <- character()
recurse <- function(e) {
if(identical(attr(e, "Rd_tag"), "USERMACRO") &&
identical(attr(e, "macro"), "\\doi"))
dois <<- c(dois, e[2L])
else if(is.list(e))
lapply(e, recurse)
}
if(getDynamicFlags(x)["\\Sexpr"])
lapply(x, recurse)
dois
}
doi_db_from_package_sources <-
function(dir, add = FALSE, Rd = FALSE)
{
meta <- .read_description(file.path(dir, "DESCRIPTION"))
db <- rbind(doi_db_from_package_metadata(meta),
doi_db_from_package_citation(dir, meta),
if(Rd) {
rddb <- Rd_db(dir = dir)
doi_db_from_package_Rd_db(rddb)
})
if(add)
db$Parent <- file.path(basename(dir), db$Parent)
db
}
doi_db_from_installed_packages <-
function(packages, lib.loc = NULL, verbose = FALSE, Rd = FALSE)
{
if(!length(packages)) return()
one <- function(p) {
if(verbose)
message(sprintf("processing %s", p))
dir <- system.file(package = p, lib.loc = lib.loc)
if(dir == "") return()
meta <- .read_description(file.path(dir, "DESCRIPTION"))
db <- rbind(doi_db_from_package_metadata(meta),
doi_db_from_package_citation(dir, meta,
installed = TRUE),
if(Rd) {
rddb <- Rd_db(p, lib.loc = dirname(dir))
doi_db_from_package_Rd_db(rddb)
})
db$Parent <- file.path(p, db$Parent)
db
}
do.call(rbind,
c(lapply(packages, one),
list(make.row.names = FALSE)))
}
check_doi_db <-
function(db, verbose = FALSE)
{
use_curl <-
config_val_to_logical(Sys.getenv("_R_CHECK_URLS_USE_CURL_",
"TRUE")) &&
requireNamespace("curl", quietly = TRUE)
.gather <- function(d = character(),
p = list(),
s = rep.int("", length(d)),
m = rep.int("", length(d))) {
y <- data.frame(DOI = d, From = I(p), Status = s, Message = m,
stringsAsFactors = FALSE)
y$From <- p
class(y) <- c("check_doi_db", "data.frame")
y
}
.fetch <- function(u, d) {
if(verbose) message(sprintf("processing %s", d))
tryCatch(curlGetHeaders(u), error = identity)
}
.check <- function(d) {
u <- paste0("https://doi.org/", d)
## Do we need to percent encode parts of the DOI name?
h <- .fetch(u, d)
if(inherits(h, "error")) {
s <- "-1"
msg <- sub("[[:space:]]*$", "", conditionMessage(h))
} else {
s <- as.character(attr(h, "status"))
msg <- table_of_HTTP_status_codes[s]
}
## Similar to URLs, see e.g.
## curl -I -L https://doi.org/10.1016/j.csda.2009.12.005
## (As of 2016-12, this actually gives 400 Bad Request.)
if(any(grepl("301 Moved Permanently", h, useBytes = TRUE))) {
ind <- grep("^[Ll]ocation: ", h, useBytes = TRUE)
new <- sub("^[Ll]ocation: ([^\r]*)\r\n", "\\1", h[max(ind)])
if((s == "503") && grepl("www.sciencedirect.com", new))
s <- "405"
}
if((s != "200") && use_curl) {
g <- .curl_GET_status(u)
if(g == "200") {
s <- g
msg <- "OK"
}
}
c(s, msg)
}
bad <- .gather()
if(!NROW(db)) return(bad)
if(inherits(db, "check_doi_db")) {
## Allow re-checking check results.
parents <- db$From
dois <- db$DOI
} else {
parents <- split(db$Parent, db$DOI)
dois <- names(parents)
}
## See <https://www.doi.org/doi_handbook/2_Numbering.html#2.2>:
## The DOI prefix shall be composed of a directory indicator
## followed by a registrant code. These two components shall be
## separated by a full stop (period).
## The directory indicator shall be "10".
ind <- !startsWith(dois, "10")
if(any(ind)) {
len <- sum(ind)
bad <- rbind(bad,
.gather(dois[ind],
parents[ind],
m = rep.int("Invalid DOI", len)))
}
pos <- which(!ind)
if(length(pos)) {
results <- do.call(rbind, lapply(dois[pos], .check))
status <- as.numeric(results[, 1L])
ind <- (status %notin% c(200L, 405L))
if(any(ind)) {
pos <- pos[ind]
s <- as.character(status[ind])
s[s == "-1"] <- "Error"
m <- results[ind, 2L]
m[is.na(m)] <- ""
bad <- rbind(bad,
.gather(dois[pos],
parents[pos],
m,
s))
}
}
bad
}
format.check_doi_db <-
function(x, ...)
{
if(!NROW(x)) return(character())
paste0(sprintf("DOI: %s", x$DOI),
sprintf("\nFrom: %s",
vapply(x$From, paste, "", collapse = "\n ")),
ifelse((s <- x$Status) == "",
"",
sprintf("\nStatus: %s", s)),
ifelse((m <- x$Message) == "",
"",
sprintf("\nMessage: %s", m)))
}
print.check_doi_db <-
function(x, ...)
{
if(NROW(x))
writeLines(paste(format(x), collapse = "\n\n"))
invisible(x)
}