我面臨著將嵌套串列系結到資料框中進行處理的挑戰。
編輯:下面是我嘗試將它們展平之前的原始嵌套資料的示例。
list(list(list(url = "https://lda.senate.gov/api/v1/filings/5e4bbd96-db94-4ea3-a310-7a7fb1e93fff/",
filing_uuid = "5e4bbd96-db94-4ea3-a310-7a7fb1e93fff", filing_type = "Q1",
filing_type_display = "1st Quarter - Report", filing_year = 2021L,
filing_period = "first_quarter", filing_period_display = "1st Quarter (Jan 1 - Mar 31)",
filing_document_url = "https://lda.senate.gov/filings/public/filing/5e4bbd96-db94-4ea3-a310-7a7fb1e93fff/print/",
filing_document_content_type = "text/html", income = "15000.00",
expenses = NULL, expenses_method = NULL, expenses_method_display = NULL,
posted_by_name = "Christian Smith", dt_posted = "2021-04-30T10:20:59.217000-04:00",
termination_date = NULL, registrant = list(id = 8214L, url = "https://lda.senate.gov/api/v1/registrants/8214/",
house_registrant_id = 31113L, name = "CAPSTONE NATIONAL PARTNERS",
description = "public affairs", address_1 = "501 Capitol Court NE",
address_2 = "Suite 100", address_3 = NULL, address_4 = NULL,
city = "Washington", state = "DC", state_display = "District of Columbia",
zip = "20002", country = "US", country_display = "United States of America",
ppb_country = "US", ppb_country_display = "United States of America",
contact_name = "", contact_telephone = "", dt_updated = "2022-01-13T14:47:31.828778-05:00"),
client = list(id = 111342L, url = "https://lda.senate.gov/api/v1/clients/111342/",
client_id = 303L, name = "OSHKOSH CORPORATION", general_description = "manufacturing",
client_government_entity = FALSE, client_self_select = NULL,
state = "WI", state_display = "Wisconsin", country = "US",
country_display = "United States of America", ppb_state = "WI",
ppb_state_display = "Wisconsin", ppb_country = "US",
ppb_country_display = "United States of America", effective_date = "2016-04-01"),
lobbying_activities = list(list(general_issue_code = "BUD",
general_issue_code_display = "Budget/Appropriations",
description = "FY22 Appropriations", foreign_entity_issues = "",
lobbyists = list(list(lobbyist = list(id = 63767L, prefix = NULL,
prefix_display = NULL, first_name = "WILLIAM", nickname = NULL,
middle_name = NULL, last_name = "STONE", suffix = NULL,
suffix_display = NULL), covered_position = "Chief of Staff, Dave Obey: House Appropriations Committee",
new = FALSE)), government_entities = list(list(id = 2L,
name = "HOUSE OF REPRESENTATIVES"), list(id = 1L,
name = "SENATE")))), conviction_disclosures = list(),
foreign_entities = list(), affiliated_organizations = list())),
list(list(url = "https://lda.senate.gov/api/v1/filings/177b995a-3be2-4127-b962-795e76974617/",
filing_uuid = "177b995a-3be2-4127-b962-795e76974617",
filing_type = "Q1", filing_type_display = "1st Quarter - Report",
filing_year = 2021L, filing_period = "first_quarter",
filing_period_display = "1st Quarter (Jan 1 - Mar 31)",
filing_document_url = "https://lda.senate.gov/filings/public/filing/177b995a-3be2-4127-b962-795e76974617/print/",
filing_document_content_type = "text/html", income = "22500.00",
expenses = NULL, expenses_method = NULL, expenses_method_display = NULL,
posted_by_name = "Doyce Boesch", dt_posted = "2021-04-30T11:22:12.233000-04:00",
termination_date = NULL, registrant = list(id = 400677020L,
url = "https://lda.senate.gov/api/v1/registrants/400677020/",
house_registrant_id = NULL, name = "MR. DOYCE BOESCH",
description = "Government Relations", address_1 = "4515 W Street NW",
address_2 = NULL, address_3 = NULL, address_4 = NULL,
city = "Washington", state = "DC", state_display = "District of Columbia",
zip = "20007", country = "US", country_display = "United States of America",
ppb_country = "US", ppb_country_display = "United States of America",
contact_name = "DOYCE BOESCH", contact_telephone = " 1 202-731-9995",
dt_updated = "2022-01-13T14:59:12.412096-05:00"),
client = list(id = 194057L, url = "https://lda.senate.gov/api/v1/clients/194057/",
client_id = 75L, name = "INVESTMENT COMPANY INSTITUTE",
general_description = "Stock Market and Financial Services",
client_government_entity = FALSE, client_self_select = FALSE,
state = "DC", state_display = "District of Columbia",
country = "US", country_display = "United States of America",
ppb_state = NULL, ppb_state_display = NULL, ppb_country = "US",
ppb_country_display = "United States of America",
effective_date = "2012-07-01"), lobbying_activities = list(
list(general_issue_code = "FIN", general_issue_code_display = "Financial Institutions/Investments/Securities",
description = "providing awareness of members positions",
foreign_entity_issues = "", lobbyists = list(
list(lobbyist = list(id = 52828L, prefix = NULL,
prefix_display = NULL, first_name = "DOYCE",
nickname = NULL, middle_name = NULL, last_name = "BOESCH",
suffix = NULL, suffix_display = NULL), covered_position = NULL,
new = FALSE)), government_entities = list(
list(id = 2L, name = "HOUSE OF REPRESENTATIVES"),
list(id = 1L, name = "SENATE")))), conviction_disclosures = list(),
foreign_entities = list(), affiliated_organizations = list())))
所以取這個高度嵌套的資料,呼叫它
my.data
然后我試著把它弄平
flat.df <- lapply(my.data, function(i) list(unlist(i, recursive = F)))
它有點作業,但 flat.df 串列中的每個元素仍然有多個子串列,例如“lobbying_activities”、“lobbyists”。而且它們沒有展開(我想要里面的資訊)。
但是,如果我將遞回設定為“TRUE”,那么展平串列中有重復的列,最令人沮喪的是,我看到一些列被混為一談(例如,人名進入了費用列)
理想情況下,我想在每個表中平展這樣的子串列并將整個內容放在一個表中。然后將它們加入資料框
df<- as.data.frame(do.call("rbind", flat.df))
uj5u.com熱心網友回復:
tibbles 是一種很好的格式,因為它們支持嵌套的 data.frames。我的目標是 2 行的小標題,寬格式。在其中,每個嵌套串列元素都是它自己的 data.frame,我們可以在以后需要時對其進行操作。我會做這樣的事情:
library(tidyverse)
l = unlist(l, recursive = F)
ind_to_nest <- which(map_lgl(l[[1]], is.list))
non_tbl <- map(l, ~ .x[-ind_to_nest])
tbl <- map(l, ~ .x[ind_to_nest])
df <- bind_rows(non_tbl) %>%
mutate(n = 1:n(), .before = 1) %>%
mutate(data = map(tbl, ~ map(.x, ~flatten(.x) %>% bind_cols))) %>%
unnest_wider(data, simplify = F)
請注意,這確實會引發一堆警告。這是因為串列中存在名稱沖突。
#> New names:
#> * id -> id...5
#> * id -> id...10
可以通過指定命名策略來解決,或者通過重新考慮如何將資料讀入 R 以盡早解決命名沖突。
#> Outer names are only allowed for unnamed scalar atomic inputs
這有點難以解決,但這個問題是一個起點。
對于分析,可以在需要時對子小塊進行一些清潔,因為不同的任務需要不同的形狀。
轉載請註明出處,本文鏈接:https://www.uj5u.com/qiye/426864.html
上一篇:獲取父母所有后代的快速方法
