4.11 模糊匹配

近似字符串匹配 (Approximate String Matching) 也叫模糊匹配 (Fuzzy Matching)

agrep() agrepl() aregexec() adist()

agrep(pattern = "lasy", x = "1 lazy 2")

## [1] 1

agrep("lasy", c(" 1 lazy 2", "1 lasy 2"), max = list(sub = 0))

## [1] 2

agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2)

## [1] 1

agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, value = TRUE)

## [1] "1 lazy"

agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, ignore.case = TRUE)

## [1] 1 3

agrepl(pattern = "lasy", x = "1 lazy 2")

## [1] TRUE

## Cf. the examples for agrep.
x <- c("1 lazy", "1", "1 LAZY")

aregexec("laysy", x, max.distance = 2)

## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 4
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1

aregexec("(lay)(sy)", x, max.distance = 2)

## [[1]]
## [1] 3 3 5
## attr(,"match.length")
## [1] 4 2 2
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1

aregexec("(lay)(sy)", x, max.distance = 2, ignore.case = TRUE)

## [[1]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1
## 
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
## 
## [[3]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1

m <- aregexec("(lay)(sy)", x, max.distance = 2)
regmatches(x, m)

## [[1]]
## [1] "lazy" "la"   "zy"  
## 
## [[2]]
## character(0)
## 
## [[3]]
## character(0)

## Cf. https://en.wikipedia.org/wiki/Levenshtein_distance
adist("kitten", "sitting")

##      [,1]
## [1,]    3

## To see the transformation counts for the Levenshtein distance:
drop(attr(adist("kitten", "sitting", counts = TRUE), "counts"))

## ins del sub 
##   1   0   2

## To see the transformation sequences:
attr(adist(c("kitten", "sitting"), counts = TRUE), "trafos")

##      [,1]      [,2]     
## [1,] "MMMMMM"  "SMMMSMI"
## [2,] "SMMMSMD" "MMMMMMM"

## Cf. the examples for agrep:
adist("lasy", "1 lazy 2")

##      [,1]
## [1,]    5

## For a "partial approximate match" (as used for agrep):
adist("lasy", "1 lazy 2", partial = TRUE)

##      [,1]
## [1,]    1

案例

help.search()