我有這樣的資料框:
df<-tibble(id=c("ls1","ls1","ls1","ls2","ls2","ls3","ls5","ls5","ls10","ls10","ls14"),
target=c("A","A","B","G","H","A","B","B","G","HA","B"))
我想從 id 組內以及 id 變陣列之間的目標列中獲得一個公共值串列。結果可能類似于下表:
res<-tibble(id=c("ls1","ls1","ls1","ls2","ls2","ls3","ls5","ls5","ls10","ls10","ls14"),
target=c("A","A","B","G","H","A","B","B","G","HA","B"),
withinGroup=c(T,T,F,F,F,F,F,T,T,F,F),
numberofRepwithinGroup=c(2,2,1,1,1,1,1,2,2,1,1),
betweenGroups=c(T,T,T,T,F,T,T,T,T,F,T),
numberofRepbetweenGroups=c(2,2,3,2,0,3,3,3,2,0,3))
知道怎么做嗎?
uj5u.com熱心網友回復:
你可以用幾個mutate():
library(dplyr)
df |>
# first group by
group_by(id, target) |>
# add the within columns
mutate(numberofRepwithinGroup = length(target),
withinGroup = ifelse(numberofRepwithinGroup > 1,T,F)) |>
# second group by
group_by(target) |>
# add the between columns
mutate(numberofRepbetweenGroups = ifelse(n_distinct(id) == 1, 0, n_distinct(id)),
betweenGroups = ifelse(numberofRepbetweenGroups > 0,T,F)) |>
# reorder columns
select(id,target, withinGroup, numberofRepwithinGroup, betweenGroups, numberofRepbetweenGroups
) |>
# remove useless grouping
ungroup()
# A tibble: 11 x 6
id target withinGroup numberofRepwithinGroup betweenGroups numberofRepbetweenGroups
<chr> <chr> <lgl> <int> <lgl> <dbl>
1 ls1 A TRUE 2 TRUE 2
2 ls1 A TRUE 2 TRUE 2
3 ls1 B FALSE 1 TRUE 3
4 ls2 G FALSE 1 TRUE 2
5 ls2 H FALSE 1 FALSE 0
6 ls3 A FALSE 1 TRUE 2
7 ls5 B TRUE 2 TRUE 3
8 ls5 B TRUE 2 TRUE 3
9 ls10 G FALSE 1 TRUE 2
10 ls10 HA FALSE 1 FALSE 0
11 ls14 B FALSE 1 TRUE 3
uj5u.com熱心網友回復:
這是一個選項
library(dplyr)
get_reps <- function(x) as.numeric(table(x)[match(x, names(table(x)))] - 1)
df %>%
group_by(id) %>%
mutate(
withinGroup = duplicated(target) | duplicated(target, fromLast = T),
numberofRepwithinGroup = get_reps(target)) %>%
ungroup() %>%
mutate(
betweenGroups = duplicated(target) | duplicated(target, fromLast = T),
numberofRepbetweenGroups = get_reps(target))
## A tibble: 11 x 6
# id target withinGroup numberofRepwithinGroup betweenGroups numberofRepbetweenGroups
# <chr> <chr> <lgl> <dbl> <lgl> <dbl>
# 1 ls1 A TRUE 1 TRUE 2
# 2 ls1 A TRUE 1 TRUE 2
# 3 ls1 B FALSE 0 TRUE 3
# 4 ls2 G FALSE 0 TRUE 1
# 5 ls2 H FALSE 0 FALSE 0
# 6 ls3 A FALSE 0 TRUE 2
# 7 ls5 B TRUE 1 TRUE 3
# 8 ls5 B TRUE 1 TRUE 3
# 9 ls10 G FALSE 0 TRUE 1
#10 ls10 HA FALSE 0 FALSE 0
#11 ls14 B FALSE 0 TRUE 3
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/360278.html
