以下資料集包含 7 列(即 AI_1 到 AI_7),每個 ID 有 1440 個觀察值(總共 42 個 ID)。我想創建一個資料集,根據閾值生成每個 AI 的二進制變數。例如,如果 AI_1 > 0,1 它應該在一個名為 ACTIVITY 的新變數中獲得值 1,否則在同一個變數 ACTIVITY 中獲得值 0。我用下面的代碼嘗試了這個,但是當我試圖找到二進制變數的平均值時,它表明平均值高于 1.. 這很奇怪,因為它只能取 0 或 1 的值。所以有人知道嗎如何在均值介于 0 和 1 之間的同一資料集中生成 7 個這些二進制變數?
structure(list(X = 1:30, x1.time = c("00:00:00", "00:01:00",
"00:02:00", "00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00",
"00:08:00", "00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00",
"00:14:00", "00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00",
"00:20:00", "00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00",
"00:26:00", "00:27:00", "00:28:00", "00:29:00"), AI_1 = c(0.17532896077581,
0.174249939439765, 0.174170544792533, 0.172877357886967, 0.173679017353614,
0.174216799443538, 0.174514454250882, 0.174656389074666, 0.173377175454716,
0.173044040397703, 0.172476572884875, 0.174738790856458, 0.173833445732856,
0.174229265722835, 0.174392878820111, 0.174715890976243, 0.174241614289181,
0.173229751013599, 0.173579164085914, 0.173829069216696, 0.173499039975341,
0.174387946222767, 0.173802854581089, 0.174107580137568, 0.174113709936873,
0.173172609295233, 0.174509255493075, 0.173383120975257, 0.173398927511582,
0.173466516952908), AI_2 = c(0.173549588758752, 0, 0.85729795236214,
0.513925586220723, 0.140789239632585, 0.0989981552300843, 0.321625480480368,
0.62540390366724, 0.00714855410741877, 0, 0, 0, 0.212943798631015,
0, 0, 0.023650258664654, 0.00159158576982517, 0.0172670511608436,
0, 0, 0, 0.25653572767355, 0.41158598021939, 0.433889173147664,
0.442200975044019, 0.471931171507954, 0.415009919603445, 0.43364443321512,
0.449930874231746, 0.48397633182816), AI_3 = c(0.026069149474549,
0.0417747330978121, 0.276687600798659, 0.258591321128928, 0.208790296683244,
0.0300099278967508, 0.15234594700642, 0.26519848659315, 0.34220566727692,
0.352310255219813, 0.297621781376737, 0.292800000618149, 0.481566536382664,
0.337770306519177, 0.743182296874282, 0.256202127993172, 0.201340506649845,
0.200155318345632, 0.237126429055375, 0.234974163009848, 0.235808994849961,
0.302168675921402, 0.377936665388589, 0.416123299239618, 0.389279883023212,
0.357972848973051, 0.305268847437493, 0.290040891577408, 0.197384083463156,
0.258282654013295), AI_4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00841646877382803,
0), AI_5 = c(0, 0, 0.0015062890214412, 0.00154798776365785, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), AI_6 = c(0.190018331633492, 0.241159552783285, 0.231916111803065,
0.193196835220518, 0.240381778378367, 0.266125762332231, 0.339227319507121,
0.354841547583334, 0.277011867279295, 0.474462632995715, 0.516356521276347,
0.559477604383845, 0.374857636694405, 0.376675155204282, 0.516347133869462,
0.627633542885353, 0.565732682034457, 0.544148310829377, 0.545022418887296,
0.602327138107482, 0.529578366594453, 0.571672817412653, 0.51963881197827,
0.493590581088222, 0.487545798153711, 0.525272191616523, 0.586906227102549,
0.555446579214151, 0.578788883825157, 0.617822898150646), AI_7 = c(0.139608768263461,
0.165583663096789, 0.326959508587122, 0.221739297198209, 0.160657663051105,
0.107439748199699, 0.117594125364214, 0.133528520361788, 0.117950354159875,
0.131428192187155, 0.125355403562937, 0.119185646272255, 0.196285453922129,
0.167061057207379, 0.169855099745761, 0.141077126343563, 0.078433720675593,
0.0999303057993443, 0.0798045801131668, 0.0331137028671696, 0.0920945831761988,
0.0233052285173748, 0, 0, 0, 0.00876293044107867, 0, 0.109134564970416,
0.110323312017635, 0.117772975747077), ID = c("ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1"
), activity = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0), activity2 = c("0",
"1", "0", "0", "0", "1", "0", "0", "1", "1", "1", "1", "0", "1",
"1", "1", "1", "1", "1", "1", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0"), activity3 = c("1", "1", "0", "0", "0", "1", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0"), activity4 = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1"), activity5 = c("1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), activity6 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0"), activity7 = c("0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "0", "0", "0")), row.names = c(NA,
30L), class = "data.frame")
這是我使用的代碼
Threshold <- Activity_index_1 %>%
mutate(activity = case_when(
AI_1 <= 0.1 ~ "1",
AI_1 > 0.1 ~ "0",
))
Threshold2 <- Threshold %>%
mutate(activity2 = case_when(
AI_2 <= 0.1 ~ "1",
AI_2 > 0.1 ~ "0",
))
Threshold3 <- Threshold2 %>%
mutate(activity3 = case_when(
AI_3 <= 0.1 ~ "1",
AI_3 > 0.1 ~ "0",
))
Threshold4 <- Threshold3 %>%
mutate(activity4 = case_when(
AI_4 <= 0.1 ~ "1",
AI_4 > 0.1 ~ "0",
))
Threshold5 <- Threshold4 %>%
mutate(activity5 = case_when(
AI_5 <= 0.1 ~ "1",
AI_5 > 0.1 ~ "0",
))
Threshold6 <- Threshold5 %>%
mutate(activity6 = case_when(
AI_6 <= 0.1 ~ "1",
AI_6 > 0.1 ~ "0",
))
Threshold7 <- Threshold6 %>%
mutate(activity7 = case_when(
AI_7 <= 0.1 ~ "1",
AI_7 > 0.1 ~ "0",
))
uj5u.com熱心網友回復:
這是一個解決方案,mutate/across回傳一個邏輯條件,FALSE/TRUE然后強制轉換為 integers 0/1。
發布的資料已經有列activity,所以我首先從資料中洗掉它們。
suppressPackageStartupMessages({
library(dplyr)
library(stringr)
})
Threshold <- Activity_index_1 %>%
select(-starts_with("activity")) %>%
mutate(across(starts_with("AI_"), ~ as.integer(.x <= 0.1), .names = "activity_{col}")) %>%
rename_at(vars(starts_with("activity_AI")), ~ str_remove(., "_AI_"))
str(Threshold)
#> 'data.frame': 30 obs. of 17 variables:
#> $ X : int 1 2 3 4 5 6 7 8 9 10 ...
#> $ x1.time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
#> $ AI_1 : num 0.175 0.174 0.174 0.173 0.174 ...
#> $ AI_2 : num 0.174 0 0.857 0.514 0.141 ...
#> $ AI_3 : num 0.0261 0.0418 0.2767 0.2586 0.2088 ...
#> $ AI_4 : num 0 0 0 0 0 0 0 0 0 0 ...
#> $ AI_5 : num 0 0 0.00151 0.00155 0 ...
#> $ AI_6 : num 0.19 0.241 0.232 0.193 0.24 ...
#> $ AI_7 : num 0.14 0.166 0.327 0.222 0.161 ...
#> $ ID : chr "ID1" "ID1" "ID1" "ID1" ...
#> $ activity1: int 0 0 0 0 0 0 0 0 0 0 ...
#> $ activity2: int 0 1 0 0 0 1 0 0 1 1 ...
#> $ activity3: int 1 1 0 0 0 1 0 0 0 0 ...
#> $ activity4: int 1 1 1 1 1 1 1 1 1 1 ...
#> $ activity5: int 1 1 1 1 1 1 1 1 1 1 ...
#> $ activity6: int 0 0 0 0 0 0 0 0 0 0 ...
#> $ activity7: int 0 0 0 0 0 0 0 0 0 0 ...
使用reprex v2.0.2創建于 2022-10-10
uj5u.com熱心網友回復:
僅將AI變數與.1、轉換為數字、設定colnames和cbind.
res <- cbind(dat, ((dat[grep('^AI', names(dat))] <= .1)^1) |>
{\(.) `colnames<-`(., gsub('AI', 'activity', colnames(.)))}())
str(res)
# 'data.frame': 30 obs. of 16 variables:
# $ x1.time : chr "00:00:00" "00:01:00" "00:02:00" "00:03:00" ...
# $ AI_1 : num 0.175 0.174 0.174 0.173 0.174 ...
# $ AI_2 : num 0.174 0 0.857 0.514 0.141 ...
# $ AI_3 : num 0.0261 0.0418 0.2767 0.2586 0.2088 ...
# $ AI_4 : num 0 0 0 0 0 0 0 0 0 0 ...
# $ AI_5 : num 0 0 0.00151 0.00155 0 ...
# $ AI_6 : num 0.19 0.241 0.232 0.193 0.24 ...
# $ AI_7 : num 0.14 0.166 0.327 0.222 0.161 ...
# $ ID : chr "ID1" "ID1" "ID1" "ID1" ...
# $ activity_1: num 0 0 0 0 0 0 0 0 0 0 ...
# $ activity_2: num 0 1 0 0 0 1 0 0 1 1 ...
# $ activity_3: num 1 1 0 0 0 1 0 0 0 0 ...
# $ activity_4: num 1 1 1 1 1 1 1 1 1 1 ...
# $ activity_5: num 1 1 1 1 1 1 1 1 1 1 ...
# $ activity_6: num 0 0 0 0 0 0 0 0 0 0 ...
# $ activity_7: num 0 0 0 0 0 0 0 0 0 0 ...
dat <- structure(list(x1.time = c("00:00:00", "00:01:00", "00:02:00",
"00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00", "00:08:00",
"00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00", "00:14:00",
"00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00", "00:20:00",
"00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00", "00:26:00",
"00:27:00", "00:28:00", "00:29:00"), AI_1 = c(0.17532896077581,
0.174249939439765, 0.174170544792533, 0.172877357886967, 0.173679017353614,
0.174216799443538, 0.174514454250882, 0.174656389074666, 0.173377175454716,
0.173044040397703, 0.172476572884875, 0.174738790856458, 0.173833445732856,
0.174229265722835, 0.174392878820111, 0.174715890976243, 0.174241614289181,
0.173229751013599, 0.173579164085914, 0.173829069216696, 0.173499039975341,
0.174387946222767, 0.173802854581089, 0.174107580137568, 0.174113709936873,
0.173172609295233, 0.174509255493075, 0.173383120975257, 0.173398927511582,
0.173466516952908), AI_2 = c(0.173549588758752, 0, 0.85729795236214,
0.513925586220723, 0.140789239632585, 0.0989981552300843, 0.321625480480368,
0.62540390366724, 0.00714855410741877, 0, 0, 0, 0.212943798631015,
0, 0, 0.023650258664654, 0.00159158576982517, 0.0172670511608436,
0, 0, 0, 0.25653572767355, 0.41158598021939, 0.433889173147664,
0.442200975044019, 0.471931171507954, 0.415009919603445, 0.43364443321512,
0.449930874231746, 0.48397633182816), AI_3 = c(0.026069149474549,
0.0417747330978121, 0.276687600798659, 0.258591321128928, 0.208790296683244,
0.0300099278967508, 0.15234594700642, 0.26519848659315, 0.34220566727692,
0.352310255219813, 0.297621781376737, 0.292800000618149, 0.481566536382664,
0.337770306519177, 0.743182296874282, 0.256202127993172, 0.201340506649845,
0.200155318345632, 0.237126429055375, 0.234974163009848, 0.235808994849961,
0.302168675921402, 0.377936665388589, 0.416123299239618, 0.389279883023212,
0.357972848973051, 0.305268847437493, 0.290040891577408, 0.197384083463156,
0.258282654013295), AI_4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00841646877382803,
0), AI_5 = c(0, 0, 0.0015062890214412, 0.00154798776365785, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), AI_6 = c(0.190018331633492, 0.241159552783285, 0.231916111803065,
0.193196835220518, 0.240381778378367, 0.266125762332231, 0.339227319507121,
0.354841547583334, 0.277011867279295, 0.474462632995715, 0.516356521276347,
0.559477604383845, 0.374857636694405, 0.376675155204282, 0.516347133869462,
0.627633542885353, 0.565732682034457, 0.544148310829377, 0.545022418887296,
0.602327138107482, 0.529578366594453, 0.571672817412653, 0.51963881197827,
0.493590581088222, 0.487545798153711, 0.525272191616523, 0.586906227102549,
0.555446579214151, 0.578788883825157, 0.617822898150646), AI_7 = c(0.139608768263461,
0.165583663096789, 0.326959508587122, 0.221739297198209, 0.160657663051105,
0.107439748199699, 0.117594125364214, 0.133528520361788, 0.117950354159875,
0.131428192187155, 0.125355403562937, 0.119185646272255, 0.196285453922129,
0.167061057207379, 0.169855099745761, 0.141077126343563, 0.078433720675593,
0.0999303057993443, 0.0798045801131668, 0.0331137028671696, 0.0920945831761988,
0.0233052285173748, 0, 0, 0, 0.00876293044107867, 0, 0.109134564970416,
0.110323312017635, 0.117772975747077), ID = c("ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1"
)), row.names = c(NA, 30L), class = "data.frame")
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/512812.html
標籤:r变量二进制临界点
上一篇:Flutter運行失敗[INSTALL_FAILED_OLDER_SDK]需要更新的sdk版本#31(當前版本是#30)
下一篇:如何控制資料框中列的位置?
