| % File src/library/base/man/order.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2017 R Core Team |
| % Distributed under GPL 2 or later |
| |
| \name{order} |
| \title{Ordering Permutation} |
| \alias{order} |
| \alias{sort.list} |
| \concept{sort data frame} |
| \description{ |
| \code{order} returns a permutation which rearranges its first |
| argument into ascending or descending order, breaking ties by further |
| arguments. \code{sort.list} is the same, using only one argument.\cr |
| See the examples for how to use these functions to sort data frames, |
| etc. |
| } |
| \usage{ |
| order(\dots, na.last = TRUE, decreasing = FALSE, |
| method = c("auto", "shell", "radix")) |
| |
| sort.list(x, partial = NULL, na.last = TRUE, decreasing = FALSE, |
| method = c("auto", "shell", "quick", "radix")) |
| } |
| \arguments{ |
| \item{\dots}{a sequence of numeric, complex, character or logical |
| vectors, all of the same length, or a classed \R object.} |
| \item{x}{an atomic vector.} |
| \item{partial}{vector of indices for partial sorting. |
| (Non-\code{NULL} values are not implemented.)} |
| \item{decreasing}{logical. Should the sort order be increasing or |
| decreasing? For the \code{"radix"} method, this can be a vector of |
| length equal to the number of arguments in \code{\dots}. For the |
| other methods, it must be length one.} |
| \item{na.last}{for controlling the treatment of \code{NA}s. |
| If \code{TRUE}, missing values in the data are put last; if |
| \code{FALSE}, they are put first; if \code{NA}, they are removed |
| (see \sQuote{Note}.)} |
| \item{method}{the method to be used: partial matches are allowed. The |
| default (\code{"auto"}) implies \code{"radix"} for short numeric vectors, |
| integer vectors, logical vectors and factors. |
| Otherwise, it implies \code{"shell"}. |
| For details of methods \code{"shell"}, |
| \code{"quick"}, and \code{"radix"}, |
| see the help for \code{\link{sort}}.} |
| } |
| \details{ |
| In the case of ties in the first vector, values in the second are used |
| to break the ties. If the values are still tied, values in the later |
| arguments are used to break the tie (see the first example). |
| The sort used is \emph{stable} (except for \code{method = "quick"}), |
| so any unresolved ties will be left in their original ordering. |
| |
| Complex values are sorted first by the real part, then the imaginary |
| part. |
| |
| Except for method \code{"radix"}, the sort order for character vectors |
| will depend on the collating sequence of the locale in use: see |
| \code{\link{Comparison}}. |
| |
| The \code{"shell"} method is generally the safest bet and is the |
| default method, except for short factors, numeric vectors, |
| integer vectors and logical vectors, where \code{"radix"} is assumed. |
| Method \code{"radix"} stably sorts logical, |
| numeric and character vectors in linear time. It outperforms the other |
| methods, although there are caveats (see \code{\link{sort}}). Method |
| \code{"quick"} for \code{sort.list} is only supported for numeric |
| \code{x} with \code{na.last = NA}, is not stable, and is slower than |
| \code{"radix"}. |
| |
| \code{partial = NULL} is supported for compatibility with other |
| implementations of S, but no other values are accepted and ordering is |
| always complete. |
| |
| For a classed \R object, the sort order is taken from |
| \code{\link{xtfrm}}: as its help page notes, this can be slow unless a |
| suitable method has been defined or \code{\link{is.numeric}(x)} is |
| true. For factors, this sorts on the internal codes, which is |
| particularly appropriate for ordered factors. |
| } |
| |
| \value{ |
| An integer vector unless any of the inputs has \eqn{2^{31}}{2^31} or |
| more elements, when it is a double vector. |
| } |
| |
| \section{Warning}{ |
| In programmatic use it is unsafe to name the \code{\dots} arguments, |
| as the names could match current or future control |
| arguments such as \code{decreasing}. A sometimes-encountered unsafe |
| practice is to call \code{do.call('order', df_obj)} where |
| \code{df_obj} might be a data frame: copy \code{df_obj} and |
| remove any names, for example using \code{\link{unname}}. |
| } |
| |
| \note{ |
| \code{sort.list} can get called by mistake as a method for |
| \code{\link{sort}} with a list argument: it gives a suitable error |
| message for list \code{x}. |
| |
| There is a historical difference in behaviour for \code{na.last = NA}: |
| \code{sort.list} removes the \code{NA}s and then computes the order |
| amongst the remaining elements: \code{order} computes the order |
| amongst the non-\code{NA} elements of the original vector. Thus |
| \preformatted{ x[order(x, na.last = NA)] |
| zz <- x[!is.na(x)]; zz[sort.list(x, na.last = NA)] |
| } |
| both sort the non-\code{NA} values of \code{x}. |
| |
| Prior to \R 3.3.0 \code{method = "radix"} was only supported for |
| integers of range less than 100,000. |
| } |
| |
| \references{ |
| Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) |
| \emph{The New S Language}. |
| Wadsworth & Brooks/Cole. |
| |
| Knuth, D. E. (1998) |
| \emph{The Art of Computer Programming, Volume 3: Sorting and |
| Searching.} 2nd ed. Addison-Wesley. |
| } |
| \seealso{ |
| \code{\link{sort}}, \code{\link{rank}}, \code{\link{xtfrm}}. |
| } |
| \examples{ |
| require(stats) |
| |
| (ii <- order(x <- c(1,1,3:1,1:4,3), y <- c(9,9:1), z <- c(2,1:9))) |
| ## 6 5 2 1 7 4 10 8 3 9 |
| rbind(x, y, z)[,ii] # shows the reordering (ties via 2nd & 3rd arg) |
| |
| ## Suppose we wanted descending order on y. |
| ## A simple solution for numeric 'y' is |
| rbind(x, y, z)[, order(x, -y, z)] |
| ## More generally we can make use of xtfrm |
| cy <- as.character(y) |
| rbind(x, y, z)[, order(x, -xtfrm(cy), z)] |
| ## The radix sort supports multiple 'decreasing' values: |
| rbind(x, y, z)[, order(x, cy, z, decreasing = c(FALSE, TRUE, FALSE), |
| method="radix")] |
| |
| ## Sorting data frames: |
| dd <- transform(data.frame(x, y, z), |
| z = factor(z, labels = LETTERS[9:1])) |
| ## Either as above {for factor 'z' : using internal coding}: |
| dd[ order(x, -y, z), ] |
| ## or along 1st column, ties along 2nd, ... *arbitrary* no.{columns}: |
| dd[ do.call(order, dd), ] |
| |
| set.seed(1) # reproducible example: |
| d4 <- data.frame(x = round( rnorm(100)), y = round(10*runif(100)), |
| z = round( 8*rnorm(100)), u = round(50*runif(100))) |
| (d4s <- d4[ do.call(order, d4), ]) |
| (i <- which(diff(d4s[, 3]) == 0)) |
| # in 2 places, needed 3 cols to break ties: |
| d4s[ rbind(i, i+1), ] |
| |
| ## rearrange matched vectors so that the first is in ascending order |
| x <- c(5:1, 6:8, 12:9) |
| y <- (x - 5)^2 |
| o <- order(x) |
| rbind(x[o], y[o]) |
| |
| ## tests of na.last |
| a <- c(4, 3, 2, NA, 1) |
| b <- c(4, NA, 2, 7, 1) |
| z <- cbind(a, b) |
| (o <- order(a, b)); z[o, ] |
| (o <- order(a, b, na.last = FALSE)); z[o, ] |
| (o <- order(a, b, na.last = NA)); z[o, ] |
| |
| \donttest{ |
| ## speed examples on an average laptop for long vectors: |
| ## factor/small-valued integers: |
| x <- factor(sample(letters, 1e7, replace = TRUE)) |
| system.time(o <- sort.list(x, method = "quick", na.last = NA)) # 0.1 sec |
| stopifnot(!is.unsorted(x[o])) |
| system.time(o <- sort.list(x, method = "radix")) # 0.05 sec, 2X faster |
| stopifnot(!is.unsorted(x[o])) |
| ## large-valued integers: |
| xx <- sample(1:200000, 1e7, replace = TRUE) |
| system.time(o <- sort.list(xx, method = "quick", na.last = NA)) # 0.3 sec |
| system.time(o <- sort.list(xx, method = "radix")) # 0.2 sec |
| ## character vectors: |
| xx <- sample(state.name, 1e6, replace = TRUE) |
| system.time(o <- sort.list(xx, method = "shell")) # 2 sec |
| system.time(o <- sort.list(xx, method = "radix")) # 0.007 sec, 300X faster |
| ## double vectors: |
| xx <- rnorm(1e6) |
| system.time(o <- sort.list(xx, method = "shell")) # 0.4 sec |
| system.time(o <- sort.list(xx, method = "quick", na.last = NA)) # 0.1 sec |
| system.time(o <- sort.list(xx, method = "radix")) # 0.05 sec, 2X faster |
| }} |
| \keyword{univar} |
| \keyword{manip} |