我有兩個名為df1和df2 的資料幀。這兩個資料框都包含用于教育測驗的不同型別的問題,這些問題測量某些內容并具有特定格式
df2<- structure(list(Measures = c("space and shape", "space and shape",
"space and shape", "space and shape", "asdaf"), Format = c("Constructed Response Expert",
"Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded",
"asfas"), Number = c(40, 1, 22, 1, 0)), row.names = c("1", "2",
"4", "5", "6"), class = "data.frame")
df1<-structure(list(Measures = c("space and shape", "space and shape",
"space and shape", "space and shape", "space and shape", "change and relationships",
"change and relationships", "change and relationships", "change and relationships",
"change and relationships", "space and shape", "space and shape",
"space and shape", "space and shape", "uncertainty and data",
"quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data",
"quantity", "change and relationships", "change and relationships",
"space and shape", "space and shape", "space and shape", "quantity",
"quantity", "quantity", "quantity", "quantity", "uncertainty and data",
"change and relationships", "quantity", "quantity", "uncertainty and data",
"change and relationships", "uncertainty and data", "quantity",
"change and relationships", "change and relationships", "quantity",
"quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships",
"uncertainty and data", "change and relationships", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "quantity", "quantity",
"quantity", "space and shape", "change and relationships", "quantity",
"space and shape", "space and shape", "change and relationships",
"change and relationships", "uncertainty and data", "uncertainty and data",
"quantity", "change and relationships", "quantity", "change and relationships",
"space and shape", "quantity", "quantity", "quantity", "space and shape",
"space and shape", "space and shape", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "change and relationships",
"change and relationships", "change and relationships", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "change and relationships",
"change and relationships", "change and relationships", "change and relationships",
"change and relationships", "uncertainty and data", "space and shape",
"space and shape", "uncertainty and data", "uncertainty and data",
"uncertainty and data", "uncertainty and data", "uncertainty and data",
"quantity", "quantity", "space and shape", "space and shape",
"space and shape", "space and shape", "change and relationships",
"space and shape", "space and shape", "quantity", "change and relationships",
"change and relationships"), Format = c("Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Auto-coded",
"Constructed Response Expert", "Constructed Response Expert",
"Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice",
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual",
"Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Manual",
"Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Auto-coded", "Constructed Response Manual",
"Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
"Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice",
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
"Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Constructed Response Expert", "Constructed Response Manual",
"Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Expert",
"Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Expert", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual",
"Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual",
"Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice",
"Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice",
"Constructed Response Expert", "Constructed Response Manual",
"Constructed Response Manual", "Constructed Response Expert",
"Constructed Response Manual", "Constructed Response Expert",
"Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
)), row.names = c(NA, -109L), class = "data.frame")
我用下面的代碼中查找?在DF2內DF1行的數量和它的作品完美。
library(tidyverse)
inner_join(df1,df2) %>%
group_by(Measures, Format) %>%
slice(n=1:min(Number)) %>%
ungroup
Joining, by = c("Measures", "Format")
# A tibble: 17 x 3
Measures Format Number
<chr> <chr> <dbl>
1 space and shape Constructed Response Auto-coded 1
2 space and shape Constructed Response Expert 40
3 space and shape Constructed Response Expert 40
4 space and shape Constructed Response Expert 40
5 space and shape Constructed Response Expert 40
6 space and shape Constructed Response Expert 40
7 space and shape Constructed Response Expert 40
8 space and shape Constructed Response Expert 40
9 space and shape Constructed Response Expert 40
10 space and shape Constructed Response Expert 40
11 space and shape Constructed Response Manual 1
12 space and shape Simple Multiple Choice 22
13 space and shape Simple Multiple Choice 22
14 space and shape Simple Multiple Choice 22
15 space and shape Simple Multiple Choice 22
16 space and shape Simple Multiple Choice 22
17 space and shape Simple Multiple Choice 22
But i also want to find out how many of these are NOT present in df1. For example i obviously dont have have 40 questions which are space and shape Constructed Response Expert type . I want to know how many of each row of df2 is NOT available in df1. There are only 9 types of space and shape Constructed Response Expert type available but i wanted 40 of those which means i should get a dataframe which says that i dont have 31 of space and shape Constructed Response Expert type questions.
uj5u.com熱心網友回復:
怎么樣,
df1 %>% anti_join(df2)
Joining, by = c("Measures", "Format")
Measures Format
1 change and relationships Constructed Response Expert
2 change and relationships Constructed Response Expert
3 change and relationships Constructed Response Expert
4 change and relationships Complex Multiple Choice
5 change and relationships Complex Multiple Choice
6 space and shape Complex Multiple Choice
7 uncertainty and data Complex Multiple Choice
8 quantity Constructed Response Manual
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/368710.html
標籤:r
