4.9 命名捕捉
函数 regexpr(..., perl = TRUE) 和 gregexpr(..., perl = TRUE) 支持命名捕捉
## named capture
notables <- c(" Ben Franklin and Jefferson Davis",
"\tMillard Fillmore")
# name groups 'first' and 'last'
name.rex <- "(?<first>[[:upper:]][[:lower:]]+) (?<last>[[:upper:]][[:lower:]]+)"
(parsed <- regexpr(name.rex, notables, perl = TRUE))## [1] 3 2
## attr(,"match.length")
## [1] 12 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 3 7
## [2,] 2 10
## attr(,"capture.length")
## first last
## [1,] 3 8
## [2,] 7 8
## attr(,"capture.names")
## [1] "first" "last"
attr(parsed, 'capture.names')## [1] "first" "last"
regmatches(notables, parsed)## [1] "Ben Franklin" "Millard Fillmore"
希望返回一个 data.frame,列名是指定的 named group 名字
# 有多个结果
(idx <- gregexpr(name.rex, notables, perl = TRUE))## [[1]]
## [1] 3 20
## attr(,"match.length")
## [1] 12 15
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 3 7
## [2,] 20 30
## attr(,"capture.length")
## first last
## [1,] 3 8
## [2,] 9 5
## attr(,"capture.names")
## [1] "first" "last"
##
## [[2]]
## [1] 2
## attr(,"match.length")
## [1] 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
## first last
## [1,] 2 10
## attr(,"capture.length")
## first last
## [1,] 7 8
## attr(,"capture.names")
## [1] "first" "last"
regmatches(notables, idx)## [[1]]
## [1] "Ben Franklin" "Jefferson Davis"
##
## [[2]]
## [1] "Millard Fillmore"
attr(idx[[1]], 'capture.names')## [1] "first" "last"
library(magrittr)
data.frame(notable = notables) %>%
tidyr::extract(
notable, c("first", "last"), name.rex,
remove = FALSE
)## notable first last
## 1 Ben Franklin and Jefferson Davis Ben Franklin
## 2 \tMillard Fillmore Millard Fillmore