blob: e785b0f28a90058f0dbe5d8c12d2a8e42c0105c9 [file] [log] [blame]
# File src/library/tools/R/update_packages.R
# Part of the R package, https://www.R-project.org
#
# Copyright (C) 1995-2017 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# https://www.R-project.org/Licenses/
## Code in this file adapted by Gabriel Becker from
## code distributed as part of the switchr R package.
## The modifications in this file and copyright thereof are
## donated without restriction to the R project.
##
## Original code and the switchr R package are
## Copyright 2018 Genentech Inc. All Rights Reserved.
## Author: Gabriel Becker <gabembecker@gmail.com>
## Distributed under the Artistic 2.0 License
## (re-licensed here to GPL 2+)
## canonical field order, from calling available.packages
## on CRAN repository (same result for Bioconductor)
fieldorder = c("Package", "Version", "Priority", "Depends",
"Imports", "LinkingTo", "Suggests", "Enhances",
"License", "License_is_FOSS", "License_restricts_use",
"OS_type", "Archs", "MD5sum", "NeedsCompilation",
"File", "Repository")
update_PACKAGES <- function(dir = ".", fields = NULL,
type = c("source", "mac.binary",
"win.binary"),
verbose.level = as.integer(dryrun),
latestOnly = TRUE,
addFiles = FALSE,
rds_compress = "xz",
strict = TRUE,
dryrun = FALSE)
{
if(!is.integer(verbose.level))
verbose.level = as.integer(verbose.level)
type <- match.arg(type)
stopifnot(verbose.level >= 0L && verbose.level <= 2L)
PKGSfile <- file.path(dir, "PACKAGES")
## whether we will call write_PACKAGES directly/immediately
calldown <- FALSE
retdat <- NULL
if(type == "win.binary" && strict) {
warning("PACKAGES files do not include MD5 sums in the win.binary case",
", so strict checking is impossible. Calling down to write_PACKAGES ",
"directly.")
calldown <- TRUE
} else if (!file.exists(PKGSfile)) {
## no PACKAGES file to update
warning("No existing PACKAGES file found at ", PKGSfile)
calldown <- TRUE
} else if (!all(dim(retdat <- as.data.frame(read.dcf(PKGSfile),
stringsAsFactors = FALSE)) > 0L)) {
## retdat is populated in the if condition here.
## read without fields restriction, because reducing number
## of fields is ok, adding fields means we need reprocessing
##0 rows and/or 0 columns
warning("Existing PACKAGES file contained no rows and/or no columns")
calldown <- TRUE
}
okfields <- names(retdat)
## can't update PACKAGES file if existing entries don't have all
## the required fields
if(!calldown && !is.null(fields) && !all(fields %in% okfields)) {
warning("Specified fields no present in existing PACKAGES file: ",
paste(setdiff(fields, okfields), collapse = " "))
calldown <- TRUE
}
## call straight down to write_PACKAGES if:
## 1. type is win.binary and strict is TRUE (no MD5 sums to check against,
## only way to get full strictness is write_PACKAGES
## 2. no PACKAGES file already exists or it's empty
## 3. 1+ specified field not present in existing PACKAGES file
if(calldown) {
if(verbose.level > 0L)
message("Unable to update existing PACKAGES file. Calling write_PACKAGES directly.")
return(write_PACKAGES(dir = dir, fields = fields, type = type,
verbose = verbose.level == 2,
latestOnly = latestOnly,
addFiles = addFiles, rds_compress = rds_compress))
}
## we know file exists by this point
pmtime <- file.info(PKGSfile)$mtime
if(verbose.level > 0L) {
message("Updating existing repository [strict mode: ",
if(strict) "ON" else "OFF",
"]\nDetected PACKAGES file with ", nrow(retdat),
" entries at ", PKGSfile)
}
if(!is.null(fields))
retdat <- retdat[, fields]
pkgfiles <- list.files(dir, pattern = .get_pkg_file_pattern(type),
full.names = TRUE)
if(length(pkgfiles) == 0L)
stop("unable to find any package tarballs in ", dir)
if(is.null(retdat$File)) {
tbmatches <- match(paste(retdat$Package,
retdat$Version,
sep = "_"),
## above doesn't have the extensions
gsub(.get_pkg_file_pattern(type, ext.only = TRUE),
"",
basename(pkgfiles)))
## this gets NAs for entries that don't have tarballs
## taken care of via keeprows below.
retdat$tarball <- pkgfiles[tbmatches]
} else
retdat$tarball <- retdat$File
## for accounting purposes, removed before final write
retdat$IsNew = FALSE
## detect and remove entries whose files have been deleted
## file.exists(NA_character_) returns FALSE, so this
## is ok without an explicit NA check
keeprows <- file.exists(retdat$tarball)
if(verbose.level > 0L) {
msg <- paste("Tarballs found for", sum(keeprows), " of ",
nrow(retdat), "existing PACKAGES entries.")
message(msg)
}
retdat <- retdat[keeprows,]
## check for tarballs that are too new
## remove entries which might appear to match them
## because the new tarball takes precedence.
tbmtimes <- file.info(retdat$tarball)$mtime
toonew <- which(tbmtimes > pmtime)
if(length(toonew) > 0L) {
if(verbose.level > 0L){
msg <- paste(length(toonew), " tarball(s) matching existing entries are ",
"newer than PACKAGES file and must be reprocessed.")
message(msg)
}
retdat <- retdat[-toonew, ]
}
## If in strict mode we confirm that the MD5 sums match for
## tarballs which match pre-existing PACKAGES entries.
##
## Otherwise we skip this check for speed, assuming that
## any tarball we find is the one used to create the entry.
##
## Note: skipping the check can lead to a 'bad' repo in rare
## cases, but the installation machinery would still protect
## against non-malicious cases of this by failing out when the
##
## Note: MD5 sum didn't match what PACKAGES said it should be.
## In the win.binary case the existing PACKAGES file has no MD5
## sums, but we caught that above, so if strict is TRUE, we know
## type != win.binary.
if(strict && NROW(retdat) > 0L) {
if(verbose.level > 0L) {
msg <- paste("[strict mode] Checking if MD5sums match ",
"for existing tarballs")
message(msg)
}
curMD5sums <- md5sum(normalizePath(retdat$tarball))
## There are no NAs in retdat$MD5sum here, as the only data in
## there now is from the existing PACKAGES file.
notokinds <- which(retdat$MD5sum != curMD5sums)
if(length(notokinds) > 0L) {
msg <- paste0("Detected ", length(notokinds), " MD5sum mismatches",
" between existing PACKAGES file and tarballs")
warning(msg)
} else if(verbose.level > 0L) {
message("All existing entry MD5sums match tarballs.")
}
## tarballs that don't already ahve an entry
## OR that mismatched their existing entry
## possibly needing to be added
if(length(notokinds) > 0L) {
retdat <- retdat[-notokinds,]
}
}
newpkgfiles <- setdiff(normalizePath(pkgfiles),
normalizePath(retdat$tarball))
## If we're willing to assume the filenames are honest and
## accurate, we can skip non-newest package versions without
## ever untaring them and reading their DESCRIPTION files.
##
## this is not the default because it is technically speaking
## less safe than what write_PACKAGES(,latestOnly=TRUE) does
## which is always process everything then prune.
if(!strict &&
latestOnly &&
length(newpkgfiles) > 0L) {
##strip extension, left with pkgname_version
newpkgtmp <- gsub(.get_pkg_file_pattern(type, ext.only = TRUE),
"",
basename(newpkgfiles))
newpkgspl <- strsplit(basename(newpkgtmp), "_")
newpkgdf <- do.call(rbind.data.frame,
c(newpkgspl, stringsAsFactors = FALSE))
## We create a dummy new repository db with only
## Package and Version columns, then fill them
## out with NAs so we can hit .remove_stale_dups
## before ever reading in the DESCRIPTION files
##
## These dummy db rows will all be replaced by the
## real data later in the process before the
## new PACKAGES files are written.
newpkgdf <- newpkgdf[,1:2]
names(newpkgdf) <- c("Package", "Version")
newpkgdf <- .filldfcols(newpkgdf, retdat)
## for accounting purposes, taken back off later
newpkgdf$IsNew <- TRUE
newpkgdf$tarball <- newpkgfiles
retdat <- rbind(retdat, newpkgdf)
## remove non-latest ones now to avoid the expensive stuff
## this is non-strict because it assumes the package name and
## version in the filename are accurate. Technically, not
## guaranteed.
retdat <- .remove_stale_dups(retdat)
newpkgfiles <- retdat$tarball[retdat$IsNew]
}
## Do any packages/package versions need to be added?
numnew <- length(newpkgfiles)
if(numnew > 0L) {
if(verbose.level > 0L) {
message("Found ", numnew, " package versions to process.")
}
## returns a list of character vectors suitable for construction
## into a read.dcf output-style character matrix
newpkgdat <- .process_package_files_for_repository_db(newpkgfiles,
type,
fields,
verbose.level > 1)
newpkgdat <- .process_repository_package_db_to_matrix(newpkgdat,
path = "", #unused here
addFiles,
addPaths = FALSE,
latestOnly)
newpkgdf <- as.data.frame(newpkgdat, stringsAsFactors = FALSE)
if(!identical(names(newpkgdf), names(retdat))) {
## make sure we catch columns only present in one or
## the other in both directions.
##
## the order of columns that comes out of this is columns
## in retdat (ie the original PACKAGES) in the order
## they appear there, THEN fields unique to the new tarballs
## appended in the order they appear there.
newpkgdf <- .filldfcols(newpkgdf, retdat)
retdat <- .filldfcols(retdat, newpkgdf)
}
if(verbose.level > 0L) {
msg <- paste("Processed", nrow(newpkgdf), "entries from ",
"package tarballs.")
message(msg)
}
## just for accounting purposes
## taken back off later
newpkgdf$IsNew <- TRUE
retdat <- rbind(retdat[!retdat$IsNew,],
newpkgdf)
if(latestOnly) {
retdat <- .remove_stale_dups(retdat)
}
if(verbose.level > 0L) {
msg <- paste(sum(retdat$IsNew), "entries added or updated, ",
sum(!retdat$IsNew), " entries retained unchanged.")
message(msg)
}
} else if (verbose.level > 0L) {
message("No new packages or updated package versions detected")
}
if(verbose.level > 0L) {
msg <- paste("Final updated PACKAGES db contains ",
nrow(retdat), " entries.")
message(msg)
}
## write_PACKAGES docs don't define an order of entries, but I
## think it should(?) be sort order of files it processes. We
## reorder our db to give the same order.
retdat <- retdat[order(paste0(retdat$Package, retdat$Version)),]
## clean up temp columns (note this works even if they aren't
## there so we don't need to worry about ones that are only
## defined within if blocks
retdat$IsNew <- NULL
retdat$tarball <- NULL
## guarantee canonical field order, with non-canonical fields
## appearing after
noncanonfs <- setdiff(names(retdat), fieldorder)
canonfs <- fieldorder[fieldorder %in% names(retdat)]
retdat <- retdat[,c(canonfs, noncanonfs)]
if(dryrun) {
if(verbose.level > 0L)
message("[dryrun mode] Dryrun complete.")
} else {
if(verbose.level > 0L)
message("Writing final updated PACKAGES files.")
## crucial that db is written as a matrix
## otherwise available.packages, etc will fail
db <- as.matrix(retdat)
np <- .write_repository_package_db(db, dir, rds_compress)
if(verbose.level > 0L)
message("update_PACKAGES complete.")
}
}
## pad df with columns from srcdf that it is missing,
## populated with NAs of the type appropriate for the
## column in srcdf. Must work for 0 row df or 0 row srcdf
##
## final col order: names(srcdf) followed by
## any columns unique to df
.filldfcols <- function(df, srcdf) {
srcnames <- names(srcdf)
dfnames <- names(df)
newcols <- setdiff(srcnames, dfnames)
df[,newcols] <- srcdf[integer(), newcols]
df <- df[,unique(c(srcnames, dfnames))]
df
}