這是示例資料的樣子:
exp_data
# Name Greg Matt
# 1 Y.L[ 12,000]STISKDLITY.M NA L[ 12]
# 2 Y.L[ 12,000]STISKDLITY.M NA L[ 12]
輸入:
exp_data <- structure(list(Name = structure(c(71L,71L), .Label = c("F.AM[ 15,995]KTKAAL.A", "F.AMKTKAAL.A", "F.EKIKAAY.L",
"F.EKIKAAYL.S", "F.NPTAGC[ 58,005]ASL[ 12,000]AKEM[ 12,000]F[ 1151,607].A",
"F.QGRVTM[ 15,995].T", "F.SGSNSGNTATL.T", "F.TGYY.M", "F.TNC[ 58,005]DF[ 1151,607]EKIKAAY.L",
"L.DKSITSL[ 370,222]Y.A", "L.DY[ 12,000]WGQGTL.V", "L.DYWGQGTL.V",
"L.EQVSQL.Q", "L.EQVSQLQGLW.R", "L.EWMGW.I", "L.ITY[ 1151,607]M[ 15,995]SGTKSTEF.N",
"L.KQQGGGLEVL.F", "L.KQQGGGLEVLF.Q", "L.L[ 504,270]KQQGGGLEVL.F",
"L.LKQQGGGL.E", "L.LKQQGGGLEVL.F", "L.QGLW.R", "L.RSDDTAVY.Y",
"L.RSDDTAVYY.C", "L.SRLRSDDTAVY.Y", "L.SRLRSDDTAVYY.C", "L.STISKDL[ 12,000]ITY.M",
"L.STISKDLITY.M", "L.STISKDLITY[ 1012,607]M[ 15,995].S", "L.STISKDLITY[ 12,000].M",
"L.STISKDLITY[ 12,000]M[ 386,228].S", "L.STISKDLITY[ 2918,448].M",
"L.STISKDLITY[ 762,322]M[ 15,995].S", "L.STISKDLITYM.S", "L.STISKDLITYM[ 1282,648].S",
"L.STISKDLITYM[ 1456,695].S", "L.STISKDLITYM[ 1490,759].S", "L.STISKDLITYM[ 371,206].S",
"L.TEIQSL.T", "L.TISRVEAGDEADY.Y", "L.TISRVEAGDEADY[ 12,000]Y.C",
"L.TISRVEAGDEADYY.C", "L.TISRVEAGDEADYY[ 12,000].C", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY.V",
"L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1239,661].V", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1987,847].V",
"L.VVY[ 1501,680]DDSDRPSGIPERF.S", "L.VVYDDSDRPSGIPERF.S", "M.KKARKSKVTTNKC[ 58,005]L[ 2909,467]EQVSQLQGL.W",
"M.SGTKSTEF.N", "M.TELDYW.G", "M.TRDTSISTAY.M", "M.TRDTSISTAY[ 12,000].M",
"M.TRDTSISTAYM.E", "M.TRDTSISTAYMEL.S", "W.GQGTL.V", "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY.V",
"W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY[ 1239,661].V", "W.INPNSGGTNY.A",
"W.INPNSGGTNY[ 12,000].A", "W.VRQAPGQGL.E", "W.VRQAPGQGLEW.M",
"W.VRQAPGQGLEW[ 12,000]M[ 486,244].G", "W.VRQAPGQGLEWM.G", "W.Y[ 12,000]QQKPGQAPVLVVY.D",
"W.YQQKPGQAPVL.V", "W.YQQKPGQAPVL[ 12,000]VVY.D", "W.YQQKPGQAPVLVVY.D",
"Y.AQKF.Q", "Y.DDSDRPSGIPERF.S", "Y.L[ 12,000]STISKDLITY.M",
"Y.LSTISKDL.I", "Y.LSTISKDL[ 12,000]ITY.M", "Y.LSTISKDLITY.M",
"Y.M[ 12,000]SGTKSTEF.N", "Y.M[ 15,995]EL.S", "Y.M[ 15,995]ELSRL.R",
"Y.M[ 15,995]SGTKSTEF.N", "Y.MELSRL.R", "Y.MSGTKSTEF.N", "Y.QQKPGQAPVL.V",
"Y.QQKPGQAPVL[ 12,000]VVY.D", "Y.QQKPGQAPVL[ 12,000]VVYDDSDRPSGIPERF.S",
"Y.QQKPGQAPVLVVY.D", "Y.QQKPGQAPVLVVYDDSDRPSGIPERF.S", "Y.TFTGY.Y",
"Y.TFTGYY.M", "Y.TILDKSITSL.Y", "Y.VLTQPPSVSVAPGQTARITC[ 58,005]GGNNIGSKSVHW.Y",
"Y.WGQGTL.V", "Y.YMHW.V"), class = "factor"), Greg = c(NA,
NA), Matt = structure(c(6L, 6L), .Label = c("","C[ 58]", "C[ 58], F[ 1152]", "C[ 58], F[ 1152], L[ 12], M[ 12]",
"C[ 58], L[ 2909]", "L[ 12]", "L[ 370]", "L[ 504]", "M[ 12]",
"M[ 1283]", "M[ 1457]", "M[ 1491]", "M[ 16]", "M[ 16], Y[ 1013]",
"M[ 16], Y[ 1152]", "M[ 16], Y[ 762]", "M[ 371]", "M[ 386], Y[ 12]",
"M[ 486], W[ 12]", "Y[ 12]", "Y[ 1240]", "Y[ 1502]", "Y[ 1988]",
"Y[ 2918]"), class = "factor")), row.names = 1:2, class = "data.frame")
我想專注于命名的列,Name并在所有行中只保留字母。資料框非常長,行在字串的開頭、中間、結尾、特定字母之間包含所有型別的字符(數字、點、問號等)。我只想在所有這些行中保留字母。
uj5u.com熱心網友回復:
使用gsub:
exp_data$Name <- gsub("[^A-Za-z] ", "", exp_data$Name)
uj5u.com熱心網友回復:
exp_data$clean_name = gsub(x = exp_data$Name, pattern = "[^a-zA-Z]", replacement = "")
exp_data
# Name Greg Matt clean_name
# 1 Y.L[ 12,000]STISKDLITY.M NA L[ 12] YLSTISKDLITYM
# 2 Y.L[ 12,000]STISKDLITY.M NA L[ 12] YLSTISKDLITYM
uj5u.com熱心網友回復:
Gregor 和 Tim 解決方案的另一個變體:
exp_data$Name <- gsub('[^[:alpha:]] ', '', exp_data$Name)
head(exp_data)
# Name Greg Matt
# 1 YLSTISKDLITYM NA L[ 12]
# 2 YLSTISKDLITYM NA L[ 12]
轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/377733.html
標籤:r
