合并三個資料集后,我一團糟。有一個唯一的id欄位,然后可以有一個或多個與每個相關聯的樣本id。到目前為止我有
samples <- structure(list(id = c(1029459, 1029459, 1029459, 1029459, 1030272,
1030272, 1030272, 1032157, 1032157, 1032178, 1032178, 1032219,
1032219, 1032229, 1032229, 1032494, 1032494, 1032780, 1032780
), sample1 = c(853401, 853401, 853401, 853401, 852769, 852769,
852769, 850161, 850161, 852711, 852711, 852597, 852597, 850363,
850363, 850717, 850717, 848763, 848763), sample2 = c(853401,
853693, 853667, 853667, 852769, 853597, 853597, NA, NA, 852711,
853419, 852597, 852597, 850363, 852741, 850717, 851811, 848763,
848763), sample3 = c(NA, NA, NA, NA, NA, NA, NA, 853621, 852621,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -19L))
print(samples)
#> # A tibble: 19 × 4
#> id sample1 sample2 sample3
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1029459 853401 853401 NA
#> 2 1029459 853401 853693 NA
#> 3 1029459 853401 853667 NA
#> 4 1029459 853401 853667 NA
#> 5 1030272 852769 852769 NA
#> 6 1030272 852769 853597 NA
#> 7 1030272 852769 853597 NA
#> 8 1032157 850161 NA 853621
#> 9 1032157 850161 NA 852621
#> 10 1032178 852711 852711 NA
#> 11 1032178 852711 853419 NA
#> 12 1032219 852597 852597 NA
#> 13 1032219 852597 852597 NA
#> 14 1032229 850363 850363 NA
#> 15 1032229 850363 852741 NA
#> 16 1032494 850717 850717 NA
#> 17 1032494 850717 851811 NA
#> 18 1032780 848763 848763 NA
#> 19 1032780 848763 848763 NA
我想得到它,以便將每個 id 的所有唯一樣本組合成一個sample具有長資料框的列。例如
id sample
1029459 853401
1029459 853693
1030272 852769
1030272 853597
1032157 850161
1032157 853621
有任何想法嗎?
uj5u.com熱心網友回復:
這是你想要的?例如: id1029459具有唯一的樣本853401 , 853693 , 853667。
samples %>%
pivot_longer(
c(sample1, sample2, sample3)
,names_to = "sample") %>%
count(id, sample, value) %>%
drop_na() %>%
distinct(id, value) %>%
rename(sample = value)
# A tibble: 16 × 2
id sample
<dbl> <dbl>
1 1029459 853401
2 1029459 853667
3 1029459 853693
4 1030272 852769
5 1030272 853597
6 1032157 850161
7 1032157 852621
8 1032157 853621
9 1032178 852711
10 1032178 853419
11 1032219 852597
12 1032229 850363
13 1032229 852741
14 1032494 850717
15 1032494 851811
16 1032780 848763
uj5u.com熱心網友回復:
你可以這樣做:
library(data.table)
unique(melt(setDT(samples), "id",value.name = "sample")[!is.na(sample),c(1,3)])
輸出:
id sample
1: 1029459 853401
2: 1030272 852769
3: 1032157 850161
4: 1032178 852711
5: 1032219 852597
6: 1032229 850363
7: 1032494 850717
8: 1032780 848763
9: 1029459 853693
10: 1029459 853667
11: 1030272 853597
12: 1032178 853419
13: 1032229 852741
14: 1032494 851811
15: 1032157 853621
16: 1032157 852621
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/532164.html
標籤:rdplyr咕噜咕噜
上一篇:java表單不顯示
