4.11 模糊匹配
近似字符串匹配 (Approximate String Matching) 也叫模糊匹配 (Fuzzy Matching)
agrep()
agrepl()
aregexec()
adist()
agrep(pattern = "lasy", x = "1 lazy 2")
## [1] 1
agrep("lasy", c(" 1 lazy 2", "1 lasy 2"), max = list(sub = 0))
## [1] 2
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2)
## [1] 1
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, value = TRUE)
## [1] "1 lazy"
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, ignore.case = TRUE)
## [1] 1 3
agrepl(pattern = "lasy", x = "1 lazy 2")
## [1] TRUE
## Cf. the examples for agrep.
<- c("1 lazy", "1", "1 LAZY")
x
aregexec("laysy", x, max.distance = 2)
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 4
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
##
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1
aregexec("(lay)(sy)", x, max.distance = 2)
## [[1]]
## [1] 3 3 5
## attr(,"match.length")
## [1] 4 2 2
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
##
## [[3]]
## [1] -1
## attr(,"match.length")
## [1] -1
aregexec("(lay)(sy)", x, max.distance = 2, ignore.case = TRUE)
## [[1]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1
##
## [[2]]
## [1] -1
## attr(,"match.length")
## [1] -1
##
## [[3]]
## [1] 3 3 6
## attr(,"match.length")
## [1] 4 3 1
<- aregexec("(lay)(sy)", x, max.distance = 2)
m regmatches(x, m)
## [[1]]
## [1] "lazy" "la" "zy"
##
## [[2]]
## character(0)
##
## [[3]]
## character(0)
## Cf. https://en.wikipedia.org/wiki/Levenshtein_distance
adist("kitten", "sitting")
## [,1]
## [1,] 3
## To see the transformation counts for the Levenshtein distance:
drop(attr(adist("kitten", "sitting", counts = TRUE), "counts"))
## ins del sub
## 1 0 2
## To see the transformation sequences:
attr(adist(c("kitten", "sitting"), counts = TRUE), "trafos")
## [,1] [,2]
## [1,] "MMMMMM" "SMMMSMI"
## [2,] "SMMMSMD" "MMMMMMM"
## Cf. the examples for agrep:
adist("lasy", "1 lazy 2")
## [,1]
## [1,] 5
## For a "partial approximate match" (as used for agrep):
adist("lasy", "1 lazy 2", partial = TRUE)
## [,1]
## [1,] 1
案例
help.search()