4.9 命名捕捉

函数 regexpr(..., perl = TRUE) 和 gregexpr(..., perl = TRUE) 支持命名捕捉

## named capture
notables <- c("  Ben Franklin and Jefferson Davis",
              "\tMillard Fillmore")
# name groups 'first' and 'last'
name.rex <- "(?<first>[[:upper:]][[:lower:]]+) (?<last>[[:upper:]][[:lower:]]+)"

(parsed <- regexpr(name.rex, notables, perl = TRUE))

## [1] 3 2
## attr(,"match.length")
## [1] 12 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##      first last
## [1,]     3    7
## [2,]     2   10
## attr(,"capture.length")
##      first last
## [1,]     3    8
## [2,]     7    8
## attr(,"capture.names")
## [1] "first" "last"

attr(parsed, 'capture.names')

## [1] "first" "last"

regmatches(notables, parsed)

## [1] "Ben Franklin"     "Millard Fillmore"

希望返回一个 data.frame，列名是指定的 named group 名字

# 有多个结果
(idx <- gregexpr(name.rex, notables, perl = TRUE))

## [[1]]
## [1]  3 20
## attr(,"match.length")
## [1] 12 15
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##      first last
## [1,]     3    7
## [2,]    20   30
## attr(,"capture.length")
##      first last
## [1,]     3    8
## [2,]     9    5
## attr(,"capture.names")
## [1] "first" "last" 
## 
## [[2]]
## [1] 2
## attr(,"match.length")
## [1] 16
## attr(,"index.type")
## [1] "chars"
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##      first last
## [1,]     2   10
## attr(,"capture.length")
##      first last
## [1,]     7    8
## attr(,"capture.names")
## [1] "first" "last"

regmatches(notables, idx)

## [[1]]
## [1] "Ben Franklin"    "Jefferson Davis"
## 
## [[2]]
## [1] "Millard Fillmore"

attr(idx[[1]], 'capture.names')

## [1] "first" "last"

library(magrittr)
data.frame(notable = notables) %>% 
tidyr::extract(
    notable, c("first", "last"), name.rex, 
    remove = FALSE
  )

##                              notable   first     last
## 1   Ben Franklin and Jefferson Davis     Ben Franklin
## 2                 \tMillard Fillmore Millard Fillmore