我正在嘗試將我的資料集從寬格式轉換為長格式,但它沒有按預期作業。我的資料集有列rowid, arrest1, arrest2, ..., arrest10, lien1, lien2, ..., lien10,看起來像這樣:
rowid arrest1 arrest2 ... lien1 lien2 ...
1 1/1/2008 NA 2/2/2009 NA
我正在嘗試獲取一個很長的資料集,其中我有一個時間變數,它采用 1-10 的值和單獨的變數arrest并lien包含日期。我嘗試了以下代碼,但我的時間變數取值為 0-9,除了arrestandlien變數之外,還有arrest1and lien2。names_pattern論證肯定有問題。
df_long <- df_wide %>%
select(rowid, lien1:lien10, arrest1:arrest10) %>%
pivot_longer(-rowid,
names_to = c(".value", "time"),
names_pattern = "(\\w ).*?(\\d{1,2})")
以下是一些示例資料:
structure(list(rowid = c(9317L, 31447L, 37939L, 40198L, 19346L
), arrest1 = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), class = "Date"), arrest2 = structure(c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), class = "Date"), arrest3 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest4 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest5 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest6 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest7 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest8 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest9 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), arrest10 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien1 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien2 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien3 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien4 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien5 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien6 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien7 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien8 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien9 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), lien10 = structure(c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), class = "Date")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
uj5u.com熱心網友回復:
使用names_sep(與正則運算式環視- names_sep = "(?<=\\D)(?=\\d)")或捕捉為基團在names_pattern(names_pattern = "(\\D )(\\d )"-這里我們捕獲一個或多個非數字(\\D )作為一個組((...)),隨后一個或多個數字(\\d )分別對應于在通過矢量names_to即“ .value" 將是用于 'arrest'、'lien' 的列的值,而 "grp" 將使用列名中的后綴數字創建新列)
library(tidyr)
pivot_longer(df_wide, cols = -rowid, names_to = c(".value", "grp"),
names_pattern = "(\\D )(\\d )")
-輸出
# A tibble: 50 × 4
rowid grp arrest lien
<int> <chr> <date> <date>
1 9317 1 NA NA
2 9317 2 NA NA
3 9317 3 NA NA
4 9317 4 NA NA
5 9317 5 NA NA
6 9317 6 NA NA
7 9317 7 NA NA
8 9317 8 NA NA
9 9317 9 NA NA
10 9317 10 NA NA
# … with 40 more rows
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/388515.html
下一篇:將列名作為公式中的函式引數傳遞
