| % File src/library/stats/man/dendrogram.Rd |
| % Part of the R package, https://www.R-project.org |
| % Copyright 1995-2020 R Core Team |
| % Copyright 2002-2015 The R Foundation |
| % Distributed under GPL 2 or later |
| |
| \name{dendrogram} |
| \title{General Tree Structures} |
| \alias{dendrogram}% the class |
| \alias{as.dendrogram} |
| \alias{as.dendrogram.dendrogram} |
| \alias{as.dendrogram.hclust} |
| \alias{as.hclust.dendrogram} |
| \alias{cut.dendrogram} |
| \alias{[[.dendrogram} |
| \alias{merge.dendrogram} |
| \alias{nobs.dendrogram} |
| \alias{plot.dendrogram} |
| \alias{print.dendrogram} |
| \alias{rev.dendrogram} |
| \alias{str.dendrogram} |
| \alias{is.leaf} |
| \description{ |
| Class \code{"dendrogram"} provides general functions for handling |
| tree-like structures. It is intended as a replacement for similar |
| functions in hierarchical clustering and classification/regression |
| trees, such that all of these can use the same engine for plotting or |
| cutting trees. |
| } |
| \usage{ |
| as.dendrogram(object, \dots) |
| \method{as.dendrogram}{hclust}(object, hang = -1, check = TRUE, \dots) |
| |
| \method{as.hclust}{dendrogram}(x, \dots) |
| |
| \method{plot}{dendrogram}(x, type = c("rectangle", "triangle"), |
| center = FALSE, |
| edge.root = is.leaf(x) || !is.null(attr(x,"edgetext")), |
| nodePar = NULL, edgePar = list(), |
| leaflab = c("perpendicular", "textlike", "none"), |
| dLeaf = NULL, xlab = "", ylab = "", xaxt = "n", yaxt = "s", |
| horiz = FALSE, frame.plot = FALSE, xlim, ylim, \dots) |
| |
| \method{cut}{dendrogram}(x, h, \dots) |
| |
| \method{merge}{dendrogram}(x, y, \dots, height, |
| adjust = c("auto", "add.max", "none")) |
| |
| \method{nobs}{dendrogram}(object, \dots) |
| |
| \method{print}{dendrogram}(x, digits, \dots) |
| |
| \method{rev}{dendrogram}(x) |
| |
| \method{str}{dendrogram}(object, max.level = NA, digits.d = 3, |
| give.attr = FALSE, wid = getOption("width"), |
| nest.lev = 0, indent.str = "", |
| last.str = getOption("str.dendrogram.last"), stem = "--", |
| \dots) |
| |
| is.leaf(object) |
| } |
| \arguments{ |
| \item{object}{any \R object that can be made into one of class |
| \code{"dendrogram"}.} |
| \item{x, y}{object(s) of class \code{"dendrogram"}.} |
| \item{hang}{numeric scalar indicating how the \emph{height} of leaves |
| should be computed from the heights of their parents; see |
| \code{\link{plot.hclust}}.} |
| \item{check}{logical indicating if \code{object} should be checked for |
| validity. This check is not necessary when \code{x} is known to be |
| valid such as when it is the direct result of \code{hclust()}. The |
| default is \code{check=TRUE}, e.g.\sspace{}for protecting against memory |
| explosion with invalid inputs.} |
| \item{type}{type of plot.} |
| \item{center}{logical; if \code{TRUE}, nodes are plotted centered with |
| respect to the leaves in the branch. Otherwise (default), plot them |
| in the middle of all direct child nodes.} |
| \item{edge.root}{logical; if true, draw an edge to the root node.} |
| \item{nodePar}{a \code{list} of plotting parameters to use for the |
| nodes (see \code{\link{points}}) or \code{NULL} by default which |
| does not draw symbols at the nodes. The list may contain components |
| named \code{pch}, \code{cex}, \code{col}, \code{xpd}, |
| and/or \code{bg} each of |
| which can have length two for specifying separate attributes for |
| \emph{inner} nodes and \emph{leaves}. Note that the default of |
| \code{pch} is \code{1:2}, so you may want to use \code{pch = NA} if |
| you specify \code{nodePar}.} |
| \item{edgePar}{a \code{list} of plotting parameters to use for the |
| edge \code{\link{segments}} and labels (if there's an |
| \code{edgetext}). The list may contain components |
| named \code{col}, \code{lty} and \code{lwd} (for the segments), |
| \code{p.col}, \code{p.lwd}, and \code{p.lty} (for the |
| \code{\link{polygon}} around the text) and \code{t.col} for the text |
| color. As with \code{nodePar}, each can have length two for |
| differentiating leaves and inner nodes. |
| } |
| \item{leaflab}{a string specifying how leaves are labeled. The |
| default \code{"perpendicular"} write text vertically (by default).\cr |
| \code{"textlike"} writes text horizontally (in a rectangle), and \cr |
| \code{"none"} suppresses leaf labels.} |
| \item{dLeaf}{a number specifying the \bold{d}istance in user |
| coordinates between the tip of a leaf and its label. If \code{NULL} |
| as per default, 3/4 of a letter width or height is used.} |
| \item{horiz}{logical indicating if the dendrogram should be drawn |
| \emph{horizontally} or not.} |
| \item{frame.plot}{logical indicating if a box around the plot should |
| be drawn, see \code{\link{plot.default}}.} |
| \item{h}{height at which the tree is cut.} |
| \item{height}{height at which the two dendrograms should be merged. If not |
| specified (or \code{NULL}), the default is ten percent larger than |
| the (larger of the) two component heights.} |
| \item{adjust}{a string determining if the leaf values should be |
| adjusted. The default, \code{"auto"}, checks if the (first) two |
| dendrograms both start at \code{1}; if they do, \code{"add.max"} is |
| chosen, which adds the maximum of the previous dendrogram leaf |
| values to each leaf of the \dQuote{next} dendrogram. Specifying |
| \code{adjust} to another value skips the check and hence is a tad |
| more efficient.} |
| \item{xlim, ylim}{optional x- and y-limits of the plot, passed to |
| \code{\link{plot.default}}. The defaults for these show the full |
| dendrogram.} |
| \item{\dots, xlab, ylab, xaxt, yaxt}{graphical parameters, or arguments for |
| other methods.} |
| \item{digits}{integer specifying the precision for printing, see |
| \code{\link{print.default}}.} |
| \item{max.level, digits.d, give.attr, wid, nest.lev, indent.str}{arguments |
| to \code{str}, see \code{\link{str.default}()}. Note that |
| \code{give.attr = FALSE} still shows \code{height} and \code{members} |
| attributes for each node.} |
| \item{last.str, stem}{strings used for \code{str()} specifying how the |
| last branch (at each level) should start and the \emph{stem} |
| to use for each dendrogram branch. In some environments, using |
| \code{last.str = "'"} will provide much nicer looking output, than |
| the historical default \code{last.str = "`"}.} |
| } |
| \details{ |
| The dendrogram is directly represented as a nested list where each |
| component corresponds to a branch of the tree. Hence, the first |
| branch of tree \code{z} is \code{z[[1]]}, the second branch of the |
| corresponding subtree is \code{z[[1]][[2]]}, or shorter |
| \code{z[[c(1,2)]]}, etc.. Each node of the tree |
| carries some information needed for efficient plotting or cutting as |
| attributes, of which only \code{members}, \code{height} and |
| \code{leaf} for leaves are compulsory: |
| \describe{ |
| \item{\code{members}}{total number of leaves in the branch} |
| \item{\code{height}}{numeric non-negative height at which the node |
| is plotted.} |
| \item{\code{midpoint}}{numeric horizontal distance of the node from |
| the left border (the leftmost leaf) of the branch (unit 1 between |
| all leaves). This is used for \code{plot(*, center = FALSE)}.} |
| \item{\code{label}}{character; the label of the node} |
| \item{\code{x.member}}{for \code{cut()$upper}, |
| the number of \emph{former} members; more generally a substitute |
| for the \code{members} component used for \sQuote{horizontal} |
| (when \code{horiz = FALSE}, else \sQuote{vertical}) alignment.} |
| \item{\code{edgetext}}{character; the label for the edge leading to |
| the node} |
| \item{\code{nodePar}}{a named list (of length-1 components) |
| specifying node-specific attributes for \code{\link{points}} |
| plotting, see the \code{nodePar} argument above.} |
| \item{\code{edgePar}}{a named list (of length-1 components) |
| specifying attributes for \code{\link{segments}} plotting of the |
| edge leading to the node, and drawing of the \code{edgetext} if |
| available, see the \code{edgePar} argument above.} |
| \item{\code{leaf}}{logical, if \code{TRUE}, the node is a leaf of |
| the tree.}% This will often be a \code{\link{character}} which can |
| % be used for plotting instead of the \code{text} attribute.} |
| } |
| |
| \code{cut.dendrogram()} returns a list with components \code{$upper} |
| and \code{$lower}, the first is a truncated version of the original |
| tree, also of class \code{dendrogram}, the latter a list with the |
| branches obtained from cutting the tree, each a \code{dendrogram}. |
| |
| There are \code{\link{[[}}, \code{\link{print}}, and \code{\link{str}} |
| methods for \code{"dendrogram"} objects where the first one |
| (extraction) ensures that selecting sub-branches keeps the class, |
| i.e., returns a dendrogram even if only a leaf. |
| On the other hand, \code{\link{[}} (\emph{single} bracket) extraction |
| returns the underlying list structure.%, useful, e.g., for inspection. |
| |
| Objects of class \code{"hclust"} can be converted to class |
| \code{"dendrogram"} using method \code{as.dendrogram()}, and since R |
| 2.13.0, there is also a \code{\link{as.hclust}()} method as an inverse. |
| |
| \code{rev.dendrogram} simply returns the dendrogram \code{x} with |
| reversed nodes, see also \code{\link{reorder.dendrogram}}. |
| |
| The \code{\link{merge}(x, y, ...)} method merges two or more |
| dendrograms into a new one which has \code{x} and \code{y} (and |
| optional further arguments) as branches. Note that before \R 3.1.2, |
| \code{adjust = "none"} was used implicitly, which is invalid when, |
| e.g., the dendrograms are from \code{\link{as.dendrogram}(hclust(..))}. |
| |
| \code{\link{nobs}(object)} returns the total number of leaves (the |
| \code{members} attribute, see above). |
| |
| \code{is.leaf(object)} returns logical indicating if \code{object} is a |
| leaf (the most simple dendrogram). |
| |
| \code{plotNode()} and \code{plotNodeLimit()} are helper functions. |
| } |
| \note{ |
| \describe{ |
| \item{\code{plot()}:}{When using \code{type = "triangle"}, |
| \code{center = TRUE} often looks better.} |
| \item{\code{str(d)}:}{If you really want to see the \emph{internal} |
| structure, use \code{str(unclass(d))} instead.} |
| } |
| } |
| \section{Warning}{ |
| Some operations on dendrograms such as \code{merge()} make use of |
| recursion. For deep trees it may be necessary to increase |
| \code{\link{options}("expressions")}: if you do, you are likely to need |
| to set the C stack size (\code{\link{Cstack_info}()[["size"]]}) larger |
| than the default where possible. |
| } |
| \seealso{ |
| \code{\link{dendrapply}} for applying a function to \emph{each} node. |
| \code{\link{order.dendrogram}} and \code{\link{reorder.dendrogram}}; |
| further, the \code{\link{labels}} method. |
| } |
| \examples{ |
| require(graphics); require(utils) |
| |
| hc <- hclust(dist(USArrests), "ave") |
| (dend1 <- as.dendrogram(hc)) # "print()" method |
| str(dend1) # "str()" method |
| str(dend1, max.level = 2, last.str = "'") # only the first two sub-levels |
| oo <- options(str.dendrogram.last = "\\\\") # yet another possibility |
| str(dend1, max.level = 2) # only the first two sub-levels |
| options(oo) # .. resetting them |
| |
| op <- par(mfrow = c(2,2), mar = c(5,2,1,4)) |
| plot(dend1) |
| ## "triangle" type and show inner nodes: |
| plot(dend1, nodePar = list(pch = c(1,NA), cex = 0.8, lab.cex = 0.8), |
| type = "t", center = TRUE) |
| plot(dend1, edgePar = list(col = 1:2, lty = 2:3), |
| dLeaf = 1, edge.root = TRUE) |
| plot(dend1, nodePar = list(pch = 2:1, cex = .4*2:1, col = 2:3), |
| horiz = TRUE) |
| |
| ## simple test for as.hclust() as the inverse of as.dendrogram(): |
| stopifnot(identical(as.hclust(dend1)[1:4], hc[1:4])) |
| |
| dend2 <- cut(dend1, h = 70) |
| ## leaves are wrong horizontally in R 4.0 and earlier: |
| plot(dend2$upper) |
| plot(dend2$upper, nodePar = list(pch = c(1,7), col = 2:1)) |
| ## dend2$lower is *NOT* a dendrogram, but a list of .. : |
| plot(dend2$lower[[3]], nodePar = list(col = 4), horiz = TRUE, type = "tr") |
| ## "inner" and "leaf" edges in different type & color : |
| plot(dend2$lower[[2]], nodePar = list(col = 1), # non empty list |
| edgePar = list(lty = 1:2, col = 2:1), edge.root = TRUE) |
| par(op) |
| d3 <- dend2$lower[[2]][[2]][[1]] |
| stopifnot(identical(d3, dend2$lower[[2]][[c(2,1)]])) |
| str(d3, last.str = "'") |
| |
| ## to peek at the inner structure "if you must", use '[..]' indexing : |
| str(d3[2][[1]]) ## or the full |
| str(d3[]) |
| |
| ## merge() to join dendrograms: |
| (d13 <- merge(dend2$lower[[1]], dend2$lower[[3]])) |
| ## merge() all parts back (using default 'height' instead of original one): |
| den.1 <- Reduce(merge, dend2$lower) |
| ## or merge() all four parts at same height --> 4 branches (!) |
| d. <- merge(dend2$lower[[1]], dend2$lower[[2]], dend2$lower[[3]], |
| dend2$lower[[4]]) |
| ## (with a warning) or the same using do.call : |
| stopifnot(identical(d., do.call(merge, dend2$lower))) |
| plot(d., main = "merge(d1, d2, d3, d4) |-> dendrogram with a 4-split") |
| |
| ## "Zoom" in to the first dendrogram : |
| plot(dend1, xlim = c(1,20), ylim = c(1,50)) |
| |
| nP <- list(col = 3:2, cex = c(2.0, 0.75), pch = 21:22, |
| bg = c("light blue", "pink"), |
| lab.cex = 0.75, lab.col = "tomato") |
| plot(d3, nodePar= nP, edgePar = list(col = "gray", lwd = 2), horiz = TRUE) |
| %% now add some "edgetext" : |
| addE <- function(n) { |
| if(!is.leaf(n)) { |
| attr(n, "edgePar") <- list(p.col = "plum") |
| attr(n, "edgetext") <- paste(attr(n,"members"),"members") |
| } |
| n |
| } |
| d3e <- dendrapply(d3, addE) |
| plot(d3e, nodePar = nP) |
| plot(d3e, nodePar = nP, leaflab = "textlike") |
| |
| %% BUG: edge labeling *and* leaflab = "textlike" both fail with horiz = TRUE: |
| %% BUG plot(d3e, nodePar = nP, leaflab = "textlike", horiz = TRUE) |
| } |
| \keyword{multivariate} |
| \keyword{tree}% FIXME: want as.dendrogram.tree() etc! |
| \keyword{hplot}% only for plot.() |