如何使用ifelse將列與3個或更多選項進行比較？-有解無憂

我有2個資料框：

df1 <- read.table(text ="
pos S01 S02 S03 S04 S05 S06
1    1   G   G   -   A   A   A
2    2   T   T   -   T   T   T
3    3   G   G   -   G   G   G
4    4   C   C   A   C   C   C
5    5   A   A   T   A   A   A
6    6   A   A   G   A   A   A
7    7   T   T   A   T   T   T
8    9   C   C   T   C   C   C
9   12   C   C   C   C   C   C
10  15   T   T   T   T   T   T", header = TRUE)

df2 <- read.table(text ="
pos ref var
1    1   G   -
2    2   T   -
3    3   G   -
4    4   C   A
5    5   A   T
6    6   A   G
7    7   T   A
8    9   C   T
9   12   C   T
10  15   T   C", header = TRUE)

所以我想使用 ref 列將 df1 的每一列（S01 到 S06）與 df2 進行比較，我做了一個 for 回圈：

df3 <- df1
for(i in 2:ncol(df1)){
    df3[i] <- data.frame((ifelse(df1[i] == df2["ref"], 1, 0)))
}

df3
   pos S01 S02 S03 S04 S05 S06
1    1   1   1   0   0   0   0
2    2   1   1   0   1   1   1
3    3   1   1   0   1   1   1
4    4   1   1   0   1   1   1
5    5   1   1   0   1   1   1
6    6   1   1   0   1   1   1
7    7   1   1   0   1   1   1
8    9   1   1   0   1   1   1
9   12   1   1   1   1   1   1
10  15   1   1   1   1   1   1

我有 2 個問題，我需要進行相同的比較，但是在兩個 data.frames 中使用列位置（pos），我的意思是，首先比較 if df1$pos 是否等于 df2$pos，然后進行回圈比較df2$ref 從 S01 到 S0n 的每一列；最后一個問題，我想添加其他選項，如果存在破折號（-），則在回圈中添加一個 2 數字，最后將類似于：

   pos S01 S02 S03 S04 S05 S06
1    1   1   1   2   0   0   0
2    2   1   1   2   1   1   1
3    3   1   1   2   1   1   1
4    4   1   1   0   1   1   1
5    5   1   1   0   1   1   1
6    6   1   1   0   1   1   1
7    7   1   1   0   1   1   1
8    9   1   1   0   1   1   1
9   12   1   1   1   1   1   1
10  15   1   1   1   1   1   1

或避免替換“-”

   pos S01 S02 S03 S04 S05 S06
1    1   1   1   -   0   0   0
2    2   1   1   -   1   1   1
3    3   1   1   -   1   1   1
4    4   1   1   0   1   1   1
5    5   1   1   0   1   1   1
6    6   1   1   0   1   1   1
7    7   1   1   0   1   1   1
8    9   1   1   0   1   1   1
9   12   1   1   1   1   1   1
10  15   1   1   1   1   1   1

uj5u.com熱心網友回復：

首先合并變體 ids - pos。然后檢查每個樣本（列）的基因型是否為“-”，否則檢查基因型是否與參考相匹配：

out <- merge(df1, df2[, c("pos", "ref")])
samples <- colnames(df1)[2:ncol(df1)]

out <- cbind(
  out[ "pos" ],
  ifelse(out[, samples] == "-", 2, out[, samples] == out$ref))

out
#    pos S01 S02 S03 S04 S05 S06
# 1    1   1   1   2   0   0   0
# 2    2   1   1   2   1   1   1
# 3    3   1   1   2   1   1   1
# 4    4   1   1   0   1   1   1
# 5    5   1   1   0   1   1   1
# 6    6   1   1   0   1   1   1
# 7    7   1   1   0   1   1   1
# 8    9   1   1   0   1   1   1
# 9   12   1   1   1   1   1   1
# 10  15   1   1   1   1   1   1

uj5u.com熱心網友回復：

tidyverse

library(tidyverse)
df1 <- data.frame(
  stringsAsFactors = FALSE,
  pos = c(1L,2L,3L,4L,5L,6L,7L,9L, 12L,15L),
  S01 = c("G","T","G","C","A","A","T","C","C","T"),
  S02 = c("G","T","G","C","A","A","T","C","C","T"),
  S03 = c("-","-","-","A","T","G","A","T","C","T"),
  S04 = c("A","T","G","C","A","A","T","C","C","T"),
  S05 = c("A","T","G","C","A","A","T","C","C","T"),
  S06 = c("A","T","G","C","A","A","T","C","C","T")
  )

df2 <- data.frame(
  stringsAsFactors = FALSE,
  pos = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 12L, 15L),
  ref = c("G", "T", "G", "C", "A", "A", "T", "C", "C", "T"),
  var = c("-", "-", "-", "A", "T","G", "A", "T", "T", "C")
)


df1[df1 == "-"] <- NA
map_df(df1, ~ (.x == df2$ref)) %>% 
  mutate(across(everything(), ~replace_na(.x, 2)))
#> # A tibble: 10 x 7
#>      pos   S01   S02   S03   S04   S05   S06
#>    <int> <int> <int> <int> <int> <int> <int>
#>  1     0     1     1     2     0     0     0
#>  2     0     1     1     2     1     1     1
#>  3     0     1     1     2     1     1     1
#>  4     0     1     1     0     1     1     1
#>  5     0     1     1     0     1     1     1
#>  6     0     1     1     0     1     1     1
#>  7     0     1     1     0     1     1     1
#>  8     0     1     1     0     1     1     1
#>  9     0     1     1     1     1     1     1
#> 10     0     1     1     1     1     1     1

根據

df1[df1 == "-"] <- NA
df1[] <- sapply(df1, function(x)  (x == df2$ref))
df1[is.na(df1)] <- 2
df1
#>    pos S01 S02 S03 S04 S05 S06
#> 1    0   1   1   2   0   0   0
#> 2    0   1   1   2   1   1   1
#> 3    0   1   1   2   1   1   1
#> 4    0   1   1   0   1   1   1
#> 5    0   1   1   0   1   1   1
#> 6    0   1   1   0   1   1   1
#> 7    0   1   1   0   1   1   1
#> 8    0   1   1   0   1   1   1
#> 9    0   1   1   1   1   1   1
#> 10   0   1   1   1   1   1   1

^{使用reprex v2.0.2創建于 2022-10-07}

uj5u.com熱心網友回復：

一種沒有的方式ifelse，因此可能更快。它也適用于matrixes。

f <- \(x, y) {
  pos <- match(x[, 'pos'], y[, 'pos'])  ## in x find pos incl. in y
  cc <- setdiff(colnames(x), 'pos')  ## only compare non-pos
  out <- x  ## copy x for output
  out[pos, cc] <- as.numeric(x[pos, cc] == y[pos, 'ref'])  ## find matches
  out[x == '-'] <- '-'  ## replace original `-` with `-`
  return(out[pos, ])
}

f(df1, df2)
#    pos S01 S02 S03 S04 S05 S06
# 1    1   1   1   -   0   0   0
# 2    2   1   1   -   1   1   1
# 3    3   1   1   -   1   1   1
# 4    4   1   1   0   1   1   1
# 5    5   1   1   0   1   1   1
# 6    6   1   1   0   1   1   1
# 7    7   1   1   0   1   1   1
# 8    9   1   1   0   1   1   1
# 9   12   1   1   1   1   1   1
# 10  15   1   1   1   1   1   1

轉載請註明出處，本文鏈接：https://www.uj5u.com/net/512668.html

標籤：rif 语句相比

上一篇：如何檢測Discord訊息是否包含鏈接？[復制]

下一篇：VBA：計算每一行的值，但變數取決于一列