我有這張表,其中包含一個熱編碼的變數。我想將這些變數合并為一列。例如,任何具有“高”、“中”或“低”的列,我想成為具有高 = 0、中 = 1 和低 = 2 的數字編碼的一列。如何在dplyrR 中執行此操作? 我懷疑旋轉會有所幫助,但我不確定從哪里開始。生成的列名稱應包含三列的名稱,不帶高、中、低指定。例如,我會將列d-high_cm1, d-med_cm1,d-low_cm1轉換d-cm1為數字編碼。
輸入:
sex age cost_cm d-high_cm1 d-med_cm1 d-low_cm1 c-high_cm1 c-med_cm1 c-low_cm1
f old 1 1 0 0 1 0 0
m young 0 1 0 0 1 0 0
m old 0 0 1 0 0 1 0
f young 0 1 0 0 0 0 1
m old 1 0 0 1 0 0 1
預期輸出:
sex age cost_cm d-cm1 c-cm1
f old 1 0 0
m young 0 0 0
m old 0 1 1
f young 0 0 2
m old 1 2 2
uj5u.com熱心網友回復:
我們可能會做
library(stringr)
library(dplyr)
library(tidyr)
df1 %>%
mutate(across(contains("-"), ~ case_when(str_detect(cur_column(),
'low') ~ . * 2, str_detect(cur_column(), 'med') ~ . * 1,
TRUE ~ .* 0))) %>%
rename_with(~ str_replace(., "-(\\w )_(\\w )", "-\\2_\\1"), contains('-')) %>%
pivot_longer(cols = contains('-'), names_to = c(".value"),
names_pattern = "^([^_] )_.*")%>%
group_by(sex, age, cost_cm) %>%
summarise(across(everything(), max), .groups = 'drop')
-輸出
# A tibble: 5 × 5
sex age cost_cm `d-cm1` `c-cm1`
<chr> <chr> <int> <dbl> <dbl>
1 f old 1 0 0
2 f young 0 0 2
3 m old 0 1 1
4 m old 1 2 2
5 m young 0 0 0
或使用 base R
lst1 <- lapply(split.default(df1[-c(1:3)], sub("-[^_] ", "",
names(df1)[-(1:3)])), function(x) do.call(pmax, x * (0:2)[col(x)]))
cbind(df1[1:3], lst1)
-輸出
sex age cost_cm c_cm1 d_cm1
1 f old 1 0 0
2 m young 0 0 0
3 m old 0 1 1
4 f young 0 2 0
5 m old 1 2 2
資料
df1 <- structure(list(sex = c("f", "m", "m", "f", "m"), age = c("old",
"young", "old", "young", "old"), cost_cm = c(1L, 0L, 0L, 0L,
1L), `d-high_cm1` = c(1L, 1L, 0L, 1L, 0L), `d-med_cm1` = c(0L,
0L, 1L, 0L, 0L), `d-low_cm1` = c(0L, 0L, 0L, 0L, 1L), `c-high_cm1` = c(1L,
1L, 0L, 0L, 0L), `c-med_cm1` = c(0L, 0L, 1L, 0L, 0L), `c-low_cm1` = c(0L,
0L, 0L, 1L, 1L)), class = "data.frame", row.names = c(NA, -5L
))
uj5u.com熱心網友回復:
這是一個替代方案,
library(dplyr)
library(tidyr) # pivot_*, unite
dat %>%
pivot_longer(
-c(sex, age, cost_cm),
names_pattern = "([^.] )-([a-z] )_(.*)",
names_to = c("ltr", "fctr", "key")
) %>%
left_join(fctrs, by = "fctr") %>%
mutate(value = value * fctrval) %>%
unite("key", ltr, key) %>%
group_by(sex, age, cost_cm, key) %>%
summarize(value = max(value)) %>%
ungroup() %>%
pivot_wider(c(sex, age, cost_cm), names_from = "key", values_from = "value")
# # A tibble: 5 x 5
# sex age cost_cm c_cm1 d_cm1
# <chr> <chr> <int> <dbl> <dbl>
# 1 f old 1 0 0
# 2 f young 0 2 0
# 3 m old 0 1 1
# 4 m old 1 2 2
# 5 m young 0 0 0
uj5u.com熱心網友回復:
另一種可能的解決方案:
library(tidyverse)
df %>%
mutate(across(contains("high"), ~ 0),
across(contains("low"), ~ ifelse(.x == 1,2,0))) %>%
mutate(`d-cm1` = rowSums(.[,4:6]), `c-cm1` = rowSums(.[,7:9])) %>%
select(-(4:9))
#> sex age cost_cm d-cm1 c-cm1
#> 1 f old 1 0 0
#> 2 m young 0 0 0
#> 3 m old 0 1 1
#> 4 f young 0 0 2
#> 5 m old 1 2 2
轉載請註明出處,本文鏈接:https://www.uj5u.com/net/390428.html
上一篇:在purrr的map()函式中使用levene_test?
下一篇:如何在R中將事件資料重組為二元
