使用大型資料集制作嵌套串列的最佳方法是什么？R-有解無憂

使用大型資料集制作嵌套串列的最佳方法是什么。目前我正在使用 a 添加元素，for但我不知道這是否是最有效的方法。

在下面的示例中，我想用分類級別 Domain> Kingdom> Phylum> Class> Order> Family> Genus> Species 填充嵌套串列。

所以代碼目前會遍歷各個分類級別收集資訊并填寫串列，但是由于資料量大，這個程序需要的時間太長，我想知道是否有辦法優化這個程序。

代碼附在下面。我感謝任何建議、意見等。

謝謝


data <- data.frame(Reino   = c("reinoa","reinoa","reinob","reinoc"),
                   Filo    = c("Filoa1","Filoa2","Filob","Filoc"),
                   Clase   = c("Clasea1","Clase2","Claseb","Clasec"),
                   Orden   = c("Ordena1","Ordena2","Ordenb","Ordenc"),
                   Familia = c("Familiaa1","Familiaa2","Familiab","Familiac"),
                   Genero  = c("Generoa1","Generoa2","Generob","Generoc"),
                   Especie = c("Especiea1","Especiea2","Especieb","Especiec"))

for(i in unique(data$Reino)){
  
  dftaxonomica[[i]] <- list()
  
  print(i)
  
  for(j in unique(data[data$Reino==i,]$Filo)){
    
    dftaxonomica[[i]][[j]] <- list()
    
    for(w in unique(data[data$Reino==i & data$Filo==j,]$Clase)){
      
      dftaxonomica[[i]][[j]][[w]] <- list()
      
      
      for(z in unique(data[data$Reino==i & data$Filo==j & data$Clase == w,]$Orden)){
        
        dftaxonomica[[i]][[j]][[w]][[z]] <- list()
        
        for(h in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z,]$Familia)){
          
          dftaxonomica[[i]][[j]][[w]][[z]][[h]] <- list()
          
          for(q in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z & data$Familia == h,]$Genero)){
            
            dftaxonomica[[i]][[j]][[w]][[z]][[h]][[q]] <- list()
            
            for(k in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z & data$Familia == h & data$Género == h,]$Especie)){
              
              dftaxonomica[[i]][[j]][[w]][[z]][[h]][[q]][[k]] <- list()
              
              
              
              
            }
            
            
          } 
          
          
        }  
        
        
        
        
      }  
      
      
    }
    
    
    
    
    
  }
  
}

uj5u.com熱心網友回復：

遞回函式可能會有所幫助：

recurse <- function(x) {
    nms <- names(x)
    if (length(nms) > 1L) {
        lapply(split(x[nms[-1L]], x[[nms[1L]]]), recurse)
    } else {
        nms <- unique(x[[1L]])
        setNames(vector("list", length(nms)), nms)
    }
}

recurse(data)

$reinoa
$reinoa$Filoa1
$reinoa$Filoa1$Clasea1
$reinoa$Filoa1$Clasea1$Ordena1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1$Generoa1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1$Generoa1$Especiea1
NULL

$reinoa$Filoa2
$reinoa$Filoa2$Clase2
$reinoa$Filoa2$Clase2$Ordena2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2$Generoa2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2$Generoa2$Especiea2
NULL


$reinob
$reinob$Filob
$reinob$Filob$Claseb
$reinob$Filob$Claseb$Ordenb
$reinob$Filob$Claseb$Ordenb$Familiab
$reinob$Filob$Claseb$Ordenb$Familiab$Generob
$reinob$Filob$Claseb$Ordenb$Familiab$Generob$Especieb
NULL


$reinoc
$reinoc$Filoc
$reinoc$Filoc$Clasec
$reinoc$Filoc$Clasec$Ordenc
$reinoc$Filoc$Clasec$Ordenc$Familiac
$reinoc$Filoc$Clasec$Ordenc$Familiac$Generoc
$reinoc$Filoc$Clasec$Ordenc$Familiac$Generoc$Especiec
NULL

uj5u.com熱心網友回復：

-package 中的rrapply()函式rrapply有一個（高效的）選項how = "unmelt"正是為此目的：

library(rrapply)

## add value column
data[["Value"]] <- replicate(nrow(data), list())

## unmelt data.frame
out <- rrapply(data, how = "unmelt")

str(out)
#> List of 3
#>  $ reinoa:List of 2
#>   ..$ Filoa1:List of 1
#>   .. ..$ Clasea1:List of 1
#>   .. .. ..$ Ordena1:List of 1
#>   .. .. .. ..$ Familiaa1:List of 1
#>   .. .. .. .. ..$ Generoa1:List of 1
#>   .. .. .. .. .. ..$ Especiea1: list()
#>   ..$ Filoa2:List of 1
#>   .. ..$ Clase2:List of 1
#>   .. .. ..$ Ordena2:List of 1
#>   .. .. .. ..$ Familiaa2:List of 1
#>   .. .. .. .. ..$ Generoa2:List of 1
#>   .. .. .. .. .. ..$ Especiea2: list()
#>  $ reinob:List of 1
#>   ..$ Filob:List of 1
#>   .. ..$ Claseb:List of 1
#>   .. .. ..$ Ordenb:List of 1
#>   .. .. .. ..$ Familiab:List of 1
#>   .. .. .. .. ..$ Generob:List of 1
#>   .. .. .. .. .. ..$ Especieb: list()
#>  $ reinoc:List of 1
#>   ..$ Filoc:List of 1
#>   .. ..$ Clasec:List of 1
#>   .. .. ..$ Ordenc:List of 1
#>   .. .. .. ..$ Familiac:List of 1
#>   .. .. .. .. ..$ Generoc:List of 1
#>   .. .. .. .. .. ..$ Especiec: list()

參考鏈接

https://jorischau.github.io/rrapply/articles/articles/2-efficient-melting-unnesting.html#efficient-unmelting-of-melted-data-frames

轉載請註明出處，本文鏈接：https://www.uj5u.com/net/447092.html

標籤：r for循环

上一篇：for回圈未在零引數上執行

下一篇：在React中回圈一個組件