如何根據日期間隔拆分或剪切資料框-有解無憂

我得到了一個大資料檔案 (x)，其中包含資料幀、頻率、檔案名、標題和缺失等屬性。資料框（資料）有 4 列（日期、X、Y 和 Z），包含超過 2000 萬個資料點。我想根據日期間隔將資料框拆分為新的日期框。例如，2000 萬個資料點的范圍從 2016-09-28 09:30:00 到 2016-10-06 20:30:00，我只想保留日期在 2016- 之間的資料點 X、Y 和 Z 09-30 08:00:00 至 2016-10-02 20:15:00。我嘗試使用以下代碼：

這是創建一個僅包含資料而不包含其他屬性的新資料框

df1 = x$data
as.POSIXct(df1$time[1],format="%Y-%m-%dT%H:%M")

然后我嘗試創建一個日期間隔，其中新的 df2 應該只包含該日期區域內的值

df2 <- df1[df1$time > "2016-09-30 09:30:00" &   df1$time <= "2016-10-03 20:15:00",]

但是，新資料幀不是從 09:30:01 開始，而是在 20:14:59 結束。所以問題是我怎樣才能改變代碼，以便我將在這個間隔之間。

structure(list(time = structure(c(1475220600.03333, 1475220600.06667, 
1475220600.1, 1475220600.13333, 1475220600.16667, 1475220600.2, 
1475220600.23333, 1475220600.26667, 1475220600.3, 1475220600.33333, 
1475220600.36667, 1475220600.4, 1475220600.43333, 1475220600.46667, 
1475220600.5, 1475220600.53333, 1475220600.56667, 1475220600.6, 
1475220600.63333, 1475220600.66667, 1475220600.7, 1475220600.73333, 
1475220600.76667, 1475220600.8, 1475220600.83333, 1475220600.86667, 
1475220600.9, 1475220600.93333, 1475220600.96667, 1475220601, 
1475220601.03333, 1475220601.06667, 1475220601.1, 1475220601.13333, 
1475220601.16667, 1475220601.2, 1475220601.23333, 1475220601.26667, 
1475220601.3, 1475220601.33333), class = c("POSIXct", "POSIXt"
), tzone = "GMT"), X = c(0.039, 0.043, 0.043, 0.043, 0.039, 0.043, 
0.035, 0.039, 0.039, 0.043, 0.039, 0.039, 0.043, 0.043, 0.035, 
0.043, 0.035, 0.043, 0.035, 0.039, 0.039, 0.043, 0.043, 0.039, 
0.035, 0.035, 0.039, 0.039, 0.039, 0.031, 0.035, 0.035, 0.035, 
0.039, 0.035, 0.035, 0.039, 0.035, 0.039, 0.043), Y = c(0.016, 
0.012, 0.012, 0.02, 0.016, 0.02, 0.016, 0.012, 0.012, 0.02, 0.012, 
0.016, 0.012, 0.016, 0.02, 0.02, 0.012, 0.02, 0.016, 0.012, 0.02, 
0.012, 0.02, 0.023, 0.016, 0.016, 0.016, 0.02, 0.016, 0.012, 
0.016, 0.012, 0.016, 0.012, 0.016, 0.016, 0.02, 0.016, 0.012, 
0.012), Z = c(-0.977, -0.977, -0.969, -0.977, -0.969, -0.969, 
-0.977, -0.969, -0.973, -0.965, -0.973, -0.977, -0.977, -0.973, 
-0.969, -0.977, -0.973, -0.973, -0.973, -0.977, -0.973, -0.969, 
-0.969, -0.969, -0.973, -0.969, -0.969, -0.973, -0.973, -0.977, 
-0.973, -0.969, -0.973, -0.973, -0.973, -0.977, -0.973, -0.977, 
-0.973, -0.973)), subject_name = "1", time_zone = "02:00:00", missingness = structure(list(
    time = structure(c(1475747248, 1475747249, 1475747250, 1475747292, 
    1475747293, 1475747294), class = c("POSIXct", "POSIXt"), tzone = "GMT"), 
    n_missing = c(30L, 30L, 1230L, 30L, 30L, 32490L)), class = "data.frame", row.names = c("1475747248", 
"1475747249", "1475747250", "1475747292", "1475747293", "1475747294"
)), old_version = FALSE, firmware = "1.5.0", last_sample_time = structure(1475748377, tzone = "GMT", class = c("POSIXct", 
"POSIXt")), serial_prefix = "TAS", sample_rate = 30L, acceleration_min = "-8.0", acceleration_max = "8.0", header = structure(list(
    Field = c("Serial Number", "Device Type", "Firmware", "Battery Voltage", 
    "Sample Rate", "Start Date", "Stop Date", "Last Sample Time", 
    "TimeZone", "Download Date", "Board Revision", "Unexpected Resets", 
    "Acceleration Scale", "Acceleration Min", "Acceleration Max", 
    "Mass", "Age", "Limb", "DateOfBirth", "Subject Name", "Serial Prefix"
    ), Value = c(`Serial Number` = "TAS1E44150325", `Device Type` = "Link", 
    Firmware = "1.5.0", `Battery Voltage` = "3,88", `Sample Rate` = "30", 
    `Start Date` = "2016-09-28 08:00:00", `Stop Date` = "2016-10-07 17:00:00", 
    `Last Sample Time` = "2016-10-06 10:06:17", TimeZone = "02:00:00", 
    `Download Date` = "2016-10-06 10:06:17", `Board Revision` = "5", 
    `Unexpected Resets` = "0", `Acceleration Scale` = "256", 
    `Acceleration Min` = "-8.0", `Acceleration Max` = "8.0", 
    Mass = "77,1107028999572", Age = "22", Limb = "Waist", DateOfBirth = "627890912111111100", 
    `Subject Name` = "1", `Serial Prefix` = "TAS")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -21L)), start_time = structure(1475049600, tzone = "GMT", class = c("POSIXct", 
"POSIXt")), stop_time = structure(1475859600, tzone = "GMT", class = c("POSIXct", 
"POSIXt")), total_records = 20930820L, bad_samples = FALSE, row.names = 5130002:14067001, class = c("activity_df", 
"activity_df", "data.frame"), n_head = 40)

uj5u.com熱心網友回復：

要擴展我的評論...

您的測驗資料框不包含您指定的日期范圍內的任何資料，并且“嗯，它仍然不起作用”的資訊量不是很大。所以我會用一些人工資料來演示。

library(tidyverse)
library(lubridate)

df1 <- tibble(
        time=seq.POSIXt(
               ymd_hms("2016-09-30 07:30:00"), 
               ymd_hms("2016-10-06 20:30:00"), 
               120
             )
       )
df1
# A tibble: 4,711 × 1
   time               
   <dttm>             
 1 2016-09-30 07:30:00
 2 2016-09-30 07:32:00
 3 2016-09-30 07:34:00
 4 2016-09-30 07:36:00
 5 2016-09-30 07:38:00
 6 2016-09-30 07:40:00
 7 2016-09-30 07:42:00
 8 2016-09-30 07:44:00
 9 2016-09-30 07:46:00
10 2016-09-30 07:48:00
# … with 4,701 more rows

2016-09-30 07:30:00因此，4711 行的日期時間在和之間每兩分鐘2016-10-06 20:30:00。

根據需要過濾。

df1 %>% 
  filter(
    time > ymd_hms("2016-09-30 09:30:00") & 
    time < ymd_hms("2016-10-03 20:15:00")
  )
# A tibble: 2,482 × 1
   time               
   <dttm>             
 1 2016-09-30 09:32:00
 2 2016-09-30 09:34:00
 3 2016-09-30 09:36:00
 4 2016-09-30 09:38:00
 5 2016-09-30 09:40:00
 6 2016-09-30 09:42:00
 7 2016-09-30 09:44:00
 8 2016-09-30 09:46:00
 9 2016-09-30 09:48:00
10 2016-09-30 09:50:00
# … with 2,472 more rows

轉載請註明出處，本文鏈接：https://www.uj5u.com/shujuku/516708.html

標籤：r日期分裂间隔切

上一篇：錯誤：ENOENT：沒有這樣的檔案或目錄，scandirdiscordbot

下一篇：Android(Kotlin)MaterialDesignDateRangePicker如何正確設定最小和最大日期？