如何從嵌套資料框創建直方圖并將它們作為物件存盤在r的串列中？-有解無憂

我是新手編碼員。我正在嘗試為作業創建一個閃亮的應用程式，該應用程式需要大量的漁業資料，計算一些指標，然后在 rMarkdown 檔案中吐出所有需要的圖和指標。這些資料集充滿了對多個不同湖泊中多個不同物種的大量觀察。我們想為每個湖泊的每個物種創建地塊。

為了獲得所需的輸出，我相信我需要嵌套資料框，為每個 lake_species 組合創建 geom_histograms（下面我的示例中的 cyl_gear 組合），然后將它們作為物件存盤在主資料框中的串列/列中，以便我可以將物件傳遞到 rMarkdown 進行列印。

這是我要問的一個例子：

library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)

nested <- mtcars %>%
  mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
         gear2=gear) %>%
  group_by(uniqueID, gear) %>%
    nest()

histyfun <- function(x){   ## I know this set of case_when code does not work, but this 
                           ## is my most recent attempt at it.
  
  case_when(x$gear=="3" ~ 
              
              ggplot(data=x$data, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.2, color="black", 
              position = position_stack(reverse=TRUE),
                             breaks=seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", 
              breaks = seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), 
               panel.grid.minor = element_blank(),
               panel.background = element_blank(), 
               axis.line = element_line(colour = "black")),
            
            
            x$gear=="4" ~ 
              
              ggplot(data=x$data, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.1, color="black", 
              position = position_stack(reverse=TRUE),
                             breaks=seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", 
              breaks = seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), 
               panel.grid.minor = element_blank(),
               panel.background = element_blank(), 
               axis.line = element_line(colour = "black")),
              
            x$gear=="5" ~ 
              
              ggplot(data=x$data, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.3, color="black", 
              position = position_stack(reverse=TRUE),
                             breaks=seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", 
              breaks = seq(min(data$wt)-0.2, max(data$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), 
               panel.grid.minor = element_blank(),
               panel.background = element_blank(), 
               axis.line = element_line(colour = "black")),

            TRUE ~ 0
  )
}

mutate(nested, histogram = nested %>% map(histyfun))

我知道上面的代碼不起作用，但它應該有望說明我正在嘗試創建的內容。

I am struggling with how to: A) create my geom_histograms by calling the appropriate column (wt in the example here) inside the nested dataframe and then B) how to store those histograms as objects in the new column/list. I have no idea what I am doing and appreciate any pointers/tips you can give me. Thanks!

uj5u.com熱心網友回復：

tidyverse 包對于大多數資料操作非常有用，但它們并不是真正為實作功能而設計的。雖然這種方法是公認的不優雅和老派，但我認為它會給你你所追求的。我修改了你的函式以在串列中呼叫。case_when()我沒有使用用于更改 tibble 或資料框中的值的函式，而是使用if()和else()陳述句。另外，你的函式沒有return()呼叫，所以我添加了它。看看它，希望它是你所追求的。

library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)

nested <- mtcars %>%
  mutate(uniqueID=paste(mtcars$cyl, sep = "_", mtcars$gear),
         gear2=gear) %>%
  group_by(uniqueID, gear) %>%
  nest()

histyfun <- function(x){   ## I know this set of case_when code does not work, but this is my most
  ## recent attempt at it.
  
  if(unique(x$gear2)==3){ 
              
              Y<-ggplot(data=x, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.2, color="black", position = position_stack(reverse=TRUE),
                             breaks=seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                    panel.background = element_blank(), axis.line = element_line(colour = "black"))
            
  }else{
    if(unique(x$gear2)==4){ 
              
              Y<-ggplot(data=x, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.1, color="black", position = position_stack(reverse=TRUE),
                             breaks=seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                    panel.background = element_blank(), axis.line = element_line(colour = "black"))
    }
    else{
      if(unique(x$gear2)==5){ 
              
              Y<-ggplot(data=x, aes(x=wt, fill=hp))   
              geom_histogram(binwidth = 0.3, color="black", position = position_stack(reverse=TRUE),
                             breaks=seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))  
              scale_fill_continuous(type = "gradient")  
              scale_x_continuous(name="Weight", breaks = seq(min(x$wt)-0.2, max(x$wt) 0.2, 0.2))   
              aes(y=stat(count)/sum(stat(count)))  
              scale_y_continuous(name="Percent Frequency", labels = scales::percent, 
                                 breaks = seq(0, 1, 0.02))  
              labs(fill="")  
              theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                    panel.background = element_blank(), axis.line = element_line(colour = "black"))
      }
    }
  }
  return(Y)
}
nest_list<-as.list(nested$data)
tmp<-lapply(nest_list, as.data.frame)
par(mfrow=c(2,4))
lapply(tmp, histyfun)

uj5u.com熱心網友回復：

一種tidyverse方法可能看起來像這樣。

使您的函式成為兩個（或...）引數的函式，例如gear和一個資料集x
而不是purrr::map您可以使用purrr::pmap(or map2) 來遍歷嵌套資料集的 thegear和datacolumn
您可能還可以大大簡化您的功能。不要復制繪圖代碼，而是使用iforswitch有條件地設定根據齒輪數量而變化的引數，例如，如果您使用 reprexbinwidth引數。

ungroup順便說一句：在 group_by 之后（尤其是嵌套）總是一個好主意。

library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)

histyfun <- function(gear, x) { ## I know this set of case_when code does not work, but this
  binwidth <- switch(as.character(gear), "3" = .2, "4" = 0.1, .3)
  breaks_x <- seq(min(x$wt) - 0.2, max(x$wt)   0.2, 0.2)

  ggplot(data = x, aes(x = wt, fill = hp))  
    geom_histogram(
      binwidth = binwidth, color = "black",
      position = position_stack(reverse = TRUE)
    )  
    scale_fill_continuous(type = "gradient")  
    scale_x_continuous(
      name = "Weight",
      breaks = breaks_x
    )  
    aes(y = stat(count) / sum(stat(count)))  
    scale_y_continuous(
      name = "Percent Frequency", labels = scales::percent,
      breaks = seq(0, 1, 0.02)
    )  
    labs(fill = "")  
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.background = element_blank(),
      axis.line = element_line(colour = "black")
    )
}

nested <- mtcars %>%
  mutate(
    uniqueID = paste(mtcars$cyl, sep = "_", mtcars$gear),
    gear2 = gear
  ) %>%
  group_by(uniqueID, gear) %>%
  nest() %>%
  ungroup()

mutate(nested, histogram = pmap(list(gear = gear, x = data), histyfun))
#> # A tibble: 8 × 4
#>    gear uniqueID data               histogram
#>   <dbl> <chr>    <list>             <list>   
#> 1     4 6_4      <tibble [4 × 11]>  <gg>     
#> 2     4 4_4      <tibble [8 × 11]>  <gg>     
#> 3     3 6_3      <tibble [2 × 11]>  <gg>     
#> 4     3 8_3      <tibble [12 × 11]> <gg>     
#> 5     3 4_3      <tibble [1 × 11]>  <gg>     
#> 6     5 4_5      <tibble [2 × 11]>  <gg>     
#> 7     5 8_5      <tibble [2 × 11]>  <gg>     
#> 8     5 6_5      <tibble [1 × 11]>  <gg>

轉載請註明出處，本文鏈接：https://www.uj5u.com/ruanti/444198.html

標籤：r ggplot2 nested tidyr purrr

上一篇：將€符號添加到氣泡圖中的資料標簽-ggplot？

下一篇：如何計算非線性回歸“y=2/(1 exp(a*x))-1”中的系數“a”？還要畫圖？