デンドログラムのデータから、すべてのノードについて、ノードの情報(下にあるすべての リーフの数、高さ)を集めたい。
dendrapplyでは、返されるデータ構造が目的に沿っていなかった。
dend2nodelist <- function(hc) { n <- length(hc$labels) denMat <- matrix(rep(0,n*(n-1)),ncol <- (n-1)) nodeData <- c() for(i in 1:(n-1)){ j <- hc$merge[i,1] k <- hc$merge[i,2] if(j < 0){ denMat[i,-j] <- 1 }else{ denMat[i, which(denMat[j,] == 1)] <- 1 } if(k < 0){ denMat[i,-k] <- 1 }else{ denMat[i, which(denMat[k,] == 1)] <- 1 } leafs <- hc$labels[which(denMat[i,] == 1)]#リーフの一覧 size <- length(leafs)#リーフの数 nodeData <- rbind( nodeData, list( leafs = leafs, size = size, height = hc$height[i] )) } nodeData }
> hc <- hclust(dist(USArrests[1:10,])) > dend2nodelist(hc) leafs size height [1,] Character,2 2 16.80625 [2,] Character,2 2 23.19418 [3,] Character,2 2 25.09303 [4,] Character,3 3 36.73486 [5,] Character,3 3 45.18296 [6,] Character,3 3 60.98073 [7,] Character,6 6 77.19741 [8,] Character,9 9 148.7357 [9,] Character,10 10 226.303 > dend2nodelist(hc)[1,] $leafs [1] "Alabama" "Delaware" $size [1] 2 $height [1] 16.80625
f<-function(X){ ret<-NULL f<-function(x){ if(is.leaf(x)){ attr(x,"label") }else{ r<-c(sapply(x,f),recursive=T) ret<<-rbind(ret,list(leafs=r,size=attr(x,"members"),height=attr(x,"height"))) r } } f(X) ret } r<-f(as.dendrogram(hclust(dist(USArrests), "ave")))
dend2nodelist <- function(hc) { GetElement <- function(i, j) { if (i < 0) { if (j < 0) return(c(hc$labels[-i], hc$labels[-j])) else return(c(hc$labels[-i], GetElement(hc$merge[j,1], hc$merge[j,2]))) } else { if (j < 0) return(c(GetElement(hc$merge[i,1], hc$merge[i,2]), hc$labels[-j])) else return(c(GetElement(hc$merge[i,1], hc$merge[i,2]), GetElement(hc$merge[j,1], hc$merge[j,2]))) } } n <- length(hc$height) leafs <- NULL size <- integer(n) for (i in 1:n) { labels <- GetElement(hc$merge[i,1], hc$merge[i,2]) size[i] <- length(labels) leafs[i] <- list(labels) } return(cbind(leafs=leafs, size=size, height=hc$height)) }
dend2nodelist <- function(hc) { temp <- function(hc, j) GetElement(hc$merge[j,1], hc$merge[j,2]) temp2 <- function(hc, j) if (j < 0) hc$labels[-j] else temp(hc, j) GetElement <- function(i, j) if (i < 0) c(hc$labels[-i], temp2(hc, j)) else c(temp(hc,i), temp2(hc, j)) leafs <- size <- integer(n <- length(hc$height)) for (i in 1:n) {size[i] <- length(labels <- temp(hc, i)); leafs[i] <- list(labels) } return(cbind(leafs=leafs, size=size, height=hc$height)) }
> hc <- hclust(dist(USArrests[1:10,])) > result <- dend2nodelist(hc)