4.9 命名捕捉
函数 regexpr(..., perl = TRUE)
和 gregexpr(..., perl = TRUE)
支持命名捕捉
## named capture
<- c(" Ben Franklin and Jefferson Davis",
notables "\tMillard Fillmore")
# name groups 'first' and 'last'
<- "(?<first>[[:upper:]][[:lower:]]+) (?<last>[[:upper:]][[:lower:]]+)"
name.rex
<- regexpr(name.rex, notables, perl = TRUE)) (parsed
## [1] 3 2
## attr(,"match.length")
## [1] 12 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 3 7
## [2,] 2 10
## attr(,"capture.length")
## first last
## [1,] 3 8
## [2,] 7 8
## attr(,"capture.names")
## [1] "first" "last"
attr(parsed, 'capture.names')
## [1] "first" "last"
regmatches(notables, parsed)
## [1] "Ben Franklin" "Millard Fillmore"
希望返回一个 data.frame,列名是指定的 named group 名字
# 有多个结果
<- gregexpr(name.rex, notables, perl = TRUE)) (idx
## [[1]]
## [1] 3 20
## attr(,"match.length")
## [1] 12 15
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 3 7
## [2,] 20 30
## attr(,"capture.length")
## first last
## [1,] 3 8
## [2,] 9 5
## attr(,"capture.names")
## [1] "first" "last"
##
## [[2]]
## [1] 2
## attr(,"match.length")
## [1] 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 2 10
## attr(,"capture.length")
## first last
## [1,] 7 8
## attr(,"capture.names")
## [1] "first" "last"
regmatches(notables, idx)
## [[1]]
## [1] "Ben Franklin" "Jefferson Davis"
##
## [[2]]
## [1] "Millard Fillmore"
attr(idx[[1]], 'capture.names')
## [1] "first" "last"
library(magrittr)
data.frame(notable = notables) %>%
::extract(
tidyrc("first", "last"), name.rex,
notable, remove = FALSE
)
## notable first last
## 1 Ben Franklin and Jefferson Davis Ben Franklin
## 2 \tMillard Fillmore Millard Fillmore