我正在嘗試使用多種方式在 r 中進行 vlookup。我正在查找特定時間的值。
[df1]
Start_time
18:48:07
18:48:08
18:48:09
18:48:10
18:48:11
18:48:12
...
[df2]
Time_10 min Time_Groupings
0:00 00:00:00>0:09:59
0:10 00:10:00>0:19:59
0:20 00:20:00>0:29:59
0:30 00:30:00>0:39:59
0:40 00:40:00>0:49:59
0:50 00:50:00>0:59:59
1:00 01:00:00>1:09:59
1:10 01:10:00>1:19:59
...
基本上,我希望 df2 告訴我“Start_time”在“Time_grouping”變數中的位置,并在 df1 中輸出一個名為 grouping 的新列,其中它以與 Start_Time 對應的 time_grouping 開始,就像 Excel 中的“vlookup”函式一樣。所以,輸出是這樣的:
Start_time Grouping
18:48:07 18:40:00>18:49:59
18:48:08 18:40:00>18:49:59
18:48:09 18:40:00>18:49:59
18:48:10 18:40:00>18:49:59
18:48:11 18:40:00>18:49:59
18:48:12 18:40:00>18:49:59
18:48:13 18:40:00>18:49:59
...
我已經嘗試過這些功能,但到目前為止還沒有運氣。
使用 findInterval:
df$grouping <- bins$Time_10min[
findInterval(df$Start_time,
sapply(strsplit(bins$Time_Groupings, '-'),
function(x) as.numeric(x[1])))]
使用資料表:
setDT(df)[, df := setDT(bins)[df, bins , on = df$Start_time, roll = "nearest"]]
我沒有使用 merge 和 left_join 因為我的印象是,當我有一個表(time_10min 和 time_groupings)可以在其中查找“Start_time”變數時,它們只用于查找一個列。
任何幫助表示贊賞。謝謝!
uj5u.com熱心網友回復:
沒有查找表的解決方案,直接從 df1 到所需的輸出
library(data.table)
library(lubridate)
setDT(df1)
df1[, group_from := duration(10L * hms(Start_time) %/% period(10L, units = "minutes"), units = "minutes")]
df1[, group_to := as.period(group_from dminutes(10L) - dseconds(1L))]
df1[, group_from := as.period(group_from)]
df1[, .(Start_time, Time_Groupings = gsub(" ", "0", sprintf('d:%-2d:d>d:%-2d:d', group_from@hour, group_from@minute, 0L, group_to@hour, group_to$minute, 59L)))]
# Start_time Time_Groupings
# 1: 00:18:07 00:10:00>00:19:59
# 2: 00:28:08 00:20:00>00:29:59
# 3: 00:38:09 00:30:00>00:39:59
# 4: 00:48:10 00:40:00>00:49:59
# 5: 00:48:11 00:40:00>00:49:59
# 6: 00:48:12 00:40:00>00:49:59
uj5u.com熱心網友回復:
您可以將時間段轉換為numeric并執行non-equijoin。
# Example data
df1 <- read.table(text="Start_time
00:18:07
00:28:08
00:38:09
00:48:10
00:48:11
00:48:12",header=T)
df2 <- read.table(text="Time_10_min Time_Groupings
0:00 00:00:00>0:09:59
0:10 00:10:00>0:19:59
0:20 00:20:00>0:29:59
0:30 00:30:00>0:39:59
0:40 00:40:00>0:49:59
0:50 00:50:00>0:59:59
1:00 01:00:00>1:09:59
1:10 01:10:00>1:19:59", header=T)
library(data.table)
library(lubridate)
setDT(df1)
setDT(df2)
df1[,starttime:=as.numeric(lubridate::hms(Start_time))]
df2[,c('start','end'):=transpose(strsplit(df2$Time_Groupings,">"))]
df2[,start:=as.numeric(lubridate::hms(start))]
df2[,end:=as.numeric(lubridate::hms(end))]
df2[df1,.(Time_Groupings,Start_time),on=.(start<=starttime,end>=starttime)]
#> Time_Groupings Start_time
#> <char> <char>
#> 1: 00:10:00>0:19:59 00:18:07
#> 2: 00:20:00>0:29:59 00:28:08
#> 3: 00:30:00>0:39:59 00:38:09
#> 4: 00:40:00>0:49:59 00:48:10
#> 5: 00:40:00>0:49:59 00:48:11
#> 6: 00:40:00>0:49:59 00:48:12
uj5u.com熱心網友回復:
解決您的問題的一種可能方法:
library(data.table)
setDT(df1)
df1[, grouping := df2$Time_Groupings[findInterval(as.ITime(Start_time), as.ITime(substr(df2$Time_Groupings, 1, 8)))]]
Start_time grouping
1: 00:18:07 00:10:00>0:19:59
2: 00:28:08 00:20:00>0:29:59
3: 00:38:09 00:30:00>0:39:59
4: 00:48:10 00:40:00>0:49:59
5: 00:48:11 00:40:00>0:49:59
6: 00:48:12 00:40:00>0:49:59
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/514496.html
標籤:r擅长约会时间数据表查找
上一篇:VBA用戶表單串列框右鍵選單?
