DF1:
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
DF2:
ID sex yob disease
ID1 M 10/10/1910 cancer
ID2 F 05/02/2000 CML
ID3 F 01/01/1983 gout
我想將 DF2 中的列作為行添加到 DF1 上,通過匹配 ID 將列名放入 DF1 的變體列中
期望的結果
variant ID1 ID2 ID3 ID4 .... ID80000
123 0 1 2 1 0
321 1 2 1 1 1
543 1 1 2 1 1
6542 1 0 0 1 0
243 1 0 2 1 1
654 0 1 1 2 1
342 1 2 1 2 1
present 0 1 0 1 0
sex M F F NA NA
yob 10/10/1910 05/02/2000 01/01/1983 NA NA
disease cancer CML gout NA NA
我試過了:
df1["sex",] <- df2$sex[match(df2$ID, colnames(df1),]
哪個不起作用。
我有這個作業:
df1["sex",] <- ifelse(colnames(df1) %in% df2$ID, df2$sex, NA)
我什至不知道如何一次處理多于一列。
任何幫助將非常感激
uj5u.com熱心網友回復:
使用data.table:
盡管這適用于本示例,但您不能將其原樣用于“任何”其他資料集。它需要一些資料知識,在遵循準備步驟時可以輕松調整(參見說明)。
library(data.table)
rbindlist(list(df1, cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))), fill=T)
variant ID1 ID2 ID3 ID4
1: 123 0 1 2 1
2: 321 1 2 1 1
3: 543 1 1 2 1
4: 6542 1 0 0 1
5: 243 1 0 2 1
6: 654 0 1 1 2
7: 342 1 2 1 2
8: present 0 1 0 1
9: sex M F F NA
10: yob 10/10/1910 05/02/2000 01/01/1983 NA
11: disease cancer CML gout NA
解釋
df1很好,但df2需要小心,因為我們沒有變體列。
# first part of df2, all "ID" columns [2->end]
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] )
# ID1 ID2 ID3
#sex M F F
#yob 10/10/1910 05/02/2000 01/01/1983
#disease cancer CML gout
# second part of df2, prepare first column
names(df2)[2:ncol(df2)]
#[1] "sex" "yob" "disease"
# put together with name variant
cbind( variant=names(df2)[2:ncol(df2)],
setnames( data.frame( t(df2[,2:ncol(df2)]) ), df2[,1] ))
# variant ID1 ID2 ID3
#sex sex M F F
#yob yob 10/10/1910 05/02/2000 01/01/1983
#disease disease cancer CML gout
# now df2 is ready to be matched with df1s column names using rbindlist like above
資料
df1 <- structure(list(variant = c("123", "321", "543", "6542", "243",
"654", "342", "present"), ID1 = c(0L, 1L, 1L, 1L, 1L, 0L, 1L,
0L), ID2 = c(1L, 2L, 1L, 0L, 0L, 1L, 2L, 1L), ID3 = c(2L, 1L,
2L, 0L, 2L, 1L, 1L, 0L), ID4 = c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L)), class = "data.frame", row.names = c(NA, -8L))
df2 <- structure(list(ID = c("ID1", "ID2", "ID3"), sex = c("M", "F",
"F"), yob = c("10/10/1910", "05/02/2000", "01/01/1983"), disease = c("cancer",
"CML", "gout")), class = "data.frame", row.names = c(NA, -3L))
uj5u.com熱心網友回復:
另一種方法,使用 dplyr 來調整 df2,使用 magrittr 來調整管道運算子,使用 data.table 來連接兩個 df
library(dplyr)
library(magrittr)
df2 <- as_tibble(t(df2[, -1])) %>%
`colnames<-` (df2[["ID"]]) %>%
mutate(variant = rownames(t(df2[, -1]))) %>%
relocate(variant)
library(data.table)
rbindlist(list(df1, df2), fill = TRUE)
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/360308.html
