附录 F 文件操作

考虑添加 Shell 下的命令实现,参考 命令行的艺术

library(magrittr) # 提供管道命令 %>%

fsJim Hester 开发,提供文件系统操作的统一接口,相比于 R 默认的文件系统的操作函数有显而易见的优点,详情请看 https://fs.r-lib.org/

对于文件操作,Jim Hester 开发了 fs 包 目的是统一文件操作的命令,由于时间和历史原因,R内置的文件操作函数的命名很不统一,如 path.expand()normalizePath()Sys.chmod()file.access()

# 加载 R 包
library(fs)

F.1 查看文件

文件夹只包含文件,目录既包含文件又包含文件夹,list.dirs 列出目录或文件夹,list.files 列出文件或文件夹

  • list.dirs(path = ".", full.names = TRUE, recursive = TRUE)

    • path: 指定完整路径名,默认使用当前路径 getwd()
    • full.names: TRUE 返回相对路径,FALSE 返回目录的名称
    • recursive: 是否递归的方式列出目录,如果是的话,目录下的子目录也会列出
    # list.dirs(path = '.', full.names = TRUE, recursive = TRUE)
    list.dirs(path = '.', full.names = TRUE, recursive = FALSE)
    ##  [1] "./_book"                           "./.git"                           
    ##  [3] "./.github"                         "./bayesian-models_files"          
    ##  [5] "./case-study_cache"                "./case-study_files"               
    ##  [7] "./code"                            "./dashboard"                      
    ##  [9] "./data"                            "./data-manipulation_files"        
    ## [11] "./data-transportation_files"       "./data-visualization_cache"       
    ....
    list.dirs(path = '.', full.names = FALSE, recursive = FALSE)
    ##  [1] "_book"                           ".git"                           
    ##  [3] ".github"                         "bayesian-models_files"          
    ##  [5] "case-study_cache"                "case-study_files"               
    ##  [7] "code"                            "dashboard"                      
    ##  [9] "data"                            "data-manipulation_files"        
    ## [11] "data-transportation_files"       "data-visualization_cache"       
    ....
  • list.files(path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, recursive = FALSE,ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE)

    是否递归的方式列出目录,如果是的话,目录下的子目录也会列出

    • path: 指定完整路径名,默认使用当前路径 getwd()
    • full.names: TRUE 返回相对路径,FALSE 返回目录的名称
    • recursive: 是否递归的方式列出目录,如果是的话,目录下的子目录也会列出
  • file.show(..., header = rep("", nfiles), title = "R Information", delete.file = FALSE, pager = getOption("pager"),encoding = "")

    打开文件内容,file.show 会在R终端中新开一个窗口显示文件

    rinternals <- file.path(R.home("include"), "Rinternals.h")
    # file.show(rinternals)
  • file.info(..., extra_cols = TRUE)

    获取文件信息,此外 file.mode(...)file.mtime(...)file.size(...) 分别表示文件的读写权限,修改时间和文件大小。

    file.info(rinternals)
    ##                                          size isdir mode               mtime
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 63180 FALSE  644 2021-08-10 08:11:27
    ##                                                       ctime               atime
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 2021-09-25 07:32:42 2021-09-25 07:40:51
    ##                                         uid gid uname grname
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h   0   0  root   root
    file.mode(rinternals)
    ## [1] "644"
    file.mtime(rinternals)
    ## [1] "2021-08-10 08:11:27 UTC"
    file.size(rinternals)
    ## [1] 63180
    # 查看当前目录的权限
    file.info(".")
    ##    size isdir mode               mtime               ctime               atime
    ## . 12288  TRUE  755 2021-09-25 10:13:28 2021-09-25 10:13:28 2021-09-25 10:13:29
    ##    uid gid  uname grname
    ## . 1001 121 runner docker
    # 查看指定目录权限
    file.info("./_book/")    
    ##          size isdir mode               mtime               ctime
    ## ./_book/ 4096  TRUE  755 2021-09-25 09:37:05 2021-09-25 09:37:05
    ##                        atime  uid gid  uname grname
    ## ./_book/ 2021-09-25 09:37:12 1001 121 runner docker
  • file.access(names, mode = 0)

    文件是否可以被访问,第二个参数 mode 一共有四种取值 0,1,2,4,分别表示文件的存在性,可执行,可写和可读四种,返回值 0 表示成功,返回值 -1 表示失败。

    file.access(rinternals,mode = 0)
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 
    ##                                       0
    file.access(rinternals,mode = 1)
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 
    ##                                      -1
    file.access(rinternals,mode = 2)
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 
    ##                                      -1
    file.access(rinternals,mode = 4)
    ## /opt/R/4.1.1/lib/R/include/Rinternals.h 
    ##                                       0
  • dir(path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE)

    查看目录,首先看看和目录操作有关的函数列表

    apropos("^dir.")
    ##  [1] "dir_copy"   "dir_create" "dir_delete" "dir_exists" "dir_info"  
    ##  [6] "dir_ls"     "dir_map"    "dir_tree"   "dir_walk"   "dir.create"
    ## [11] "dir.exists" "dirname"

    显而易见,dir.createdir.exists 分别是创建目录和查看目录的存在性。dirnamebasename 是一对函数用来操作文件路径。以当前目录/home/runner/work/masr/masr为例,dirname(getwd()) 返回 /home/runner/work/masr 而 basename(getwd()) 返回 masr。对于文件路径而言,dirname(rinternals) 返回文件所在的目录/opt/R/4.1.1/lib/R/include, basename(rinternals) 返回文件名Rinternals.h。dir 函数查看指定路径或目录下的文件,支持以模式匹配和递归的方式查找目录下的文件

    # 当前目录下的子目录和文件
    dir()
    ##   [1] "_book"                                      
    ##   [2] "_bookdown.yml"                              
    ##   [3] "_build.sh"                                  
    ##   [4] "_common.R"                                  
    ##   [5] "_deploy-book.R"                             
    ##   [6] "_output.yml"                                
    ....
    # 查看指定目录的子目录和文件
    dir(path = "./")
    ##   [1] "_book"                                      
    ##   [2] "_bookdown.yml"                              
    ##   [3] "_build.sh"                                  
    ##   [4] "_common.R"                                  
    ##   [5] "_deploy-book.R"                             
    ##   [6] "_output.yml"                                
    ....
    # 只列出以字母R开头的子目录和文件
    dir(path = "./", pattern = "^R")
    ## [1] "README.md"
    # 列出目录下所有目录和文件,包括隐藏文件
    dir(path = "./", all.files = TRUE)
    ##   [1] "_book"                                      
    ##   [2] "_bookdown.yml"                              
    ##   [3] "_build.sh"                                  
    ##   [4] "_common.R"                                  
    ##   [5] "_deploy-book.R"                             
    ##   [6] "_output.yml"                                
    ....
    # 支持正则表达式
    dir(pattern = '^[A-Z]+[.]txt$', full.names=TRUE, system.file('doc', 'SuiteSparse', package='Matrix'))
    ## [1] "/opt/R/4.1.1/lib/R/library/Matrix/doc/SuiteSparse/AMD.txt"    
    ## [2] "/opt/R/4.1.1/lib/R/library/Matrix/doc/SuiteSparse/CHOLMOD.txt"
    ## [3] "/opt/R/4.1.1/lib/R/library/Matrix/doc/SuiteSparse/COLAMD.txt" 
    ## [4] "/opt/R/4.1.1/lib/R/library/Matrix/doc/SuiteSparse/SPQR.txt"
    # 在临时目录下递归创建一个目录
    dir.create(paste0(tempdir(), "/_book/tmp"), recursive = TRUE)

查看当前目录下的文件和文件夹 tree -L 2 . 或者 ls -l .

F.2 操作文件

实现文件增删改查的函数如下

apropos("^file.")
##  [1] "file_access"    "file_chmod"     "file_chown"     "file_copy"     
##  [5] "file_create"    "file_delete"    "file_exists"    "file_info"     
##  [9] "file_move"      "file_show"      "file_size"      "file_temp"     
## [13] "file_temp_pop"  "file_temp_push" "file_test"      "file_touch"    
## [17] "file.access"    "file.append"    "file.choose"    "file.copy"     
## [21] "file.create"    "file.edit"      "file.exists"    "file.info"     
## [25] "file.link"      "file.mode"      "file.mtime"     "file.path"     
## [29] "file.remove"    "file.rename"    "file.show"      "file.size"     
## [33] "file.symlink"   "fileSnapshot"
  1. file.create(..., showWarnings = TRUE)

    创建/删除文件,检查文件的存在性

    file.create('demo.txt')
    ## [1] TRUE
    file.exists('demo.txt')
    ## [1] TRUE
    file.remove('demo.txt')
    ## [1] TRUE
    file.exists('demo.txt')
    ## [1] FALSE
  2. file.rename(from, to) 文件重命名

    file.create('demo.txt')
    ## [1] TRUE
    file.rename(from = 'demo.txt', to = 'tmp.txt')
    ## [1] TRUE
    file.exists('tmp.txt')
    ## [1] TRUE
  3. file.append(file1, file2) 追加文件 file2 的内容到文件 file1

    if(!dir.exists(paths = 'data/')) dir.create(path = 'data/')
    # 创建两个临时文件
    # file.create(c('data/tmp1.md','data/tmp2.md'))
    # 写入内容
    cat("AAA\n", file = 'data/tmp1.md')
    cat("BBB\n", file = 'data/tmp2.md')
    # 追加文件
    file.append(file1 = 'data/tmp1.md', file2 = 'data/tmp2.md')
    ## [1] TRUE
    # 展示文件内容
    readLines('data/tmp1.md')
    ## [1] "AAA" "BBB"
  4. file.copy(from, to, overwrite = recursive, recursive = FALSE,copy.mode = TRUE, copy.date = FALSE) 复制文件,参考 https://blog.csdn.net/wzj_110/article/details/86497860

    file.copy(from = 'Makefile', to = 'data/Makefile')
    ## [1] TRUE
  5. file.symlink(from, to) 创建符号链接 file.link(from, to) 创建硬链接

  6. Sys.junction(from, to) windows 平台上的函数,提供类似符号链接的功能

  7. Sys.readlink(paths) 读取文件的符号链接(软链接)

  8. choose.files 在 Windows 环境下交互式地选择一个或多个文件,所以该函数只运行于 Windows 环境

    # 选择 zip 格式的压缩文件或其它
    if (interactive())
         choose.files(filters = Filters[c("zip", "All"),])

    Filters 参数传递一个矩阵,用来描述或标记R识别的文件类型,上面这个例子就能筛选出 zip 格式的文件

  9. download.file 文件下载

    download.file(url = 'https://mirrors.tuna.tsinghua.edu.cn/CRAN/src/base/R-latest.tar.gz',
                  destfile = 'data/R-latest.tar.gz', method = 'auto')

F.3 压缩文件

tar 和 zip 是两种常见的压缩文件工具,具有免费和跨平台的特点,因此应用范围广70。 R 内对应的压缩与解压缩命令是 tar/untar

tar(tarfile, files = NULL,
    compression = c("none", "gzip", "bzip2", "xz"),
    compression_level = 6, tar = Sys.getenv("tar"),
    extra_flags = "")

比较常用的压缩文件格式是 .tar.gz.tar.bz2,将目录 _book/及其文件分别压缩成 _book.tar.gz_book.tar.bz2 压缩包的名字可以任意取,后者压缩比率高。.tar.xz 的压缩比率最高,需要确保系统中安装了 gzip,bzip2 和 xz-utils 软件,R 软件自带的 tar 软件来自 Rtools71,我们可以通过设置系统环境变量 Sys.setenv(tar="path/to/tar") 指定外部 tar。tar 实际支持的压缩类型只有 .tar.gz72zip/unzip 压缩与解压缩就不赘述了。

# 打包目录 _book
tar(tarfile = 'data/_book.tar', files = '_book', compression = 'none')
# 文件压缩成 _book.xz 格式
tar(tarfile = 'data/_book.tar.xz', files = 'data/_book', compression = 'xz')
# tar -cf data/_book.tar _book 然后 xz -z data/_book.tar.xz data/_book.tar
# 或者一次压缩到位 tar -Jcf data/_book.tar.xz _book/

# 解压 xz -d data/_book.tar.xz 再次解压 tar -xf data/_book.tar
# 或者一次解压 tar -Jxf data/_book.tar.xz

# 文件压缩成 _book.tar.gz 格式
# tar -czf data/_book.tar.gz _book
tar(tarfile = 'data/_book.tar.gz', files = '_book', compression = 'gzip')
# 解压 tar -xzf data/_book.tar.gz

# 文件压缩成 .tar.bz2 格式
# tar -cjf data/book2.tar.bz2 _book
tar(tarfile = 'data/_book.tar.bz2', files = '_book', compression = 'bzip2')
# 解压 tar -xjf data/book2.tar.bz2
untar(tarfile, files = NULL, list = FALSE, exdir = ".",
      compressed = NA, extras = NULL, verbose = FALSE,
      restore_times =  TRUE, tar = Sys.getenv("TAR"))

F.4 路径操作

环境变量算是路径操作

# 获取环境变量
Sys.getenv("PATH")
## [1] "/home/runner/.TinyTeX/bin/x86_64-linux:/home/linuxbrew/.linuxbrew/bin:/home/linuxbrew/.linuxbrew/sbin:/home/runner/.local/bin:/opt/pipx_bin:/usr/share/rust/.cargo/bin:/home/runner/.config/composer/vendor/bin:/usr/local/.ghcup/bin:/home/runner/.dotnet/tools:/snap/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"
# 设置环境变量 Windows
# Sys.setenv(R_GSCMD = "C:/Program Files/gs/gs9.26/bin/gswin64c.exe")
# 设置 pandoc 环境变量
pandoc_path <- Sys.getenv("RSTUDIO_PANDOC", NA)
if (Sys.which("pandoc") == "" && !is.na(pandoc_path)) {
  Sys.setenv(PATH = paste(
    Sys.getenv("PATH"), pandoc_path,
    sep = if (.Platform$OS.type == "unix") ":" else ";"
  ))
}

操作文件路径

  1. file.path Construct Path to File

    file.path('./_book')
    ## [1] "./_book"
  2. path.expand(path) Expand File Paths

    path.expand('./_book')
    ## [1] "./_book"
    ## [1] "/home/runner"
  3. normalizePath() Express File Paths in Canonical Form

    ## [1] "/home/runner"
    normalizePath('./_book')
    ## [1] "/home/runner/work/masr/masr/_book"
  4. shortPathName(path) 只在 Windows 下可用,Express File Paths in Short Form

    cat(shortPathName(c(R.home(), tempdir())), sep = "\n")
  5. Sys.glob Wildcard Expansion on File Paths

    Sys.glob(file.path(R.home(), "library", "compiler", "R", "*.rdx")) 
    ## [1] "/opt/R/4.1.1/lib/R/library/compiler/R/compiler.rdx"

F.5 查找文件

here 包用来查找你的文件,查找文件、可执行文件的完整路径、R 包

  1. Sys.which Find Full Paths to Executables

    Sys.which('pandoc')
    ##            pandoc 
    ## "/usr/bin/pandoc"
  2. system.file Find Names of R System Files

    system.file('CITATION',package = 'base')
    ## [1] "/opt/R/4.1.1/lib/R/library/base/CITATION"
  3. R.home

    # R 安装目录
    R.home()
    ## [1] "/opt/R/4.1.1/lib/R"
    # R执行文件目录
    R.home('bin')
    ## [1] "/opt/R/4.1.1/lib/R/bin"
    # 配置文件目录
    R.home('etc')
    ## [1] "/opt/R/4.1.1/lib/R/etc"
    # R 基础扩展包存放目录
    R.home('library')
    ## [1] "/opt/R/4.1.1/lib/R/library"
  4. .libPaths() R 包存放的路径有哪些

    ## [1] "/home/runner/work/_temp/Library" "/opt/R/4.1.1/lib/R/library"
  5. find.package 查找R包所在目录

    find.package(package = 'MASS')
    ## [1] "/opt/R/4.1.1/lib/R/library/MASS"
  6. file.exist 检查文件是否存在

    file.exists(paste(R.home('etc'),"Rprofile.site",sep = .Platform$file.sep))
    ## [1] FALSE
  7. aproposfind 查找对象

apropos(what, where = FALSE, ignore.case = TRUE, mode = "any")
find(what, mode = "any", numeric = FALSE, simple.words = TRUE)

匹配含有 find 的函数

apropos("find")
##  [1] "find"                 "Find"                 "find.package"        
##  [4] "findClass"            "findFunction"         "findInterval"        
##  [7] "findLineNum"          "findMethod"           "findMethods"         
## [10] "findMethodSignatures" "findPackageEnv"       "findRestart"         
## [13] "findUnique"

问号 ? 加函数名搜索R软件内置函数的帮助文档,如 ?regrex。如果不知道具体的函数名,可采用关键词搜索,如

help.search(keyword = "character", package = "base")

browseEnv 函数用来在浏览器中查看当前环境下,对象的列表,默认环境是 sys.frame()

F.6 文件权限

操作目录和文件的权限 Manipulation of Directories and File Permissions

  1. dir.exists(paths) 检查目录是否存在

    dir.exists(c('./_book','./book'))
    ## [1]  TRUE FALSE
  2. dir.create(path, showWarnings = TRUE, recursive = FALSE, mode = "0777") 创建目录

    dir.create('./_book/tmp')
  3. Sys.chmod(paths, mode = "0777", use_umask = TRUE) 修改权限

    Sys.chmod('./_book/tmp')
  4. Sys.umask(mode = NA)

F.7 区域设置

  1. Sys.getlocale(category = "LC_ALL") 查看当前区域设置

    Sys.getlocale(category = "LC_ALL")
    ## [1] "LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C"
  2. Sys.setlocale(category = "LC_ALL", locale = "") 设置区域

    # 默认设置
    Sys.setlocale(category = "LC_ALL", locale = "")
    ## [1] "LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C"
    # 保存当前区域设置
    old <- Sys.getlocale()
    
    Sys.setlocale("LC_MONETARY", locale = "")
    ## [1] "en_US.UTF-8"
    ##     decimal_point     thousands_sep          grouping   int_curr_symbol 
    ##               "."                ""                ""            "USD " 
    ##   currency_symbol mon_decimal_point mon_thousands_sep      mon_grouping 
    ##               "$"               "."               ","        "\003\003" 
    ##     positive_sign     negative_sign   int_frac_digits       frac_digits 
    ##                ""               "-"               "2"               "2" 
    ##     p_cs_precedes    p_sep_by_space     n_cs_precedes    n_sep_by_space 
    ##               "1"               "0"               "1"               "0" 
    ##       p_sign_posn       n_sign_posn 
    ##               "1"               "1"
    Sys.setlocale("LC_MONETARY", "de_AT")
    ## Warning in Sys.setlocale("LC_MONETARY", "de_AT"): OS reports request to set
    ## locale to "de_AT" cannot be honored
    ## [1] ""
    ##     decimal_point     thousands_sep          grouping   int_curr_symbol 
    ##               "."                ""                ""            "USD " 
    ##   currency_symbol mon_decimal_point mon_thousands_sep      mon_grouping 
    ##               "$"               "."               ","        "\003\003" 
    ##     positive_sign     negative_sign   int_frac_digits       frac_digits 
    ##                ""               "-"               "2"               "2" 
    ##     p_cs_precedes    p_sep_by_space     n_cs_precedes    n_sep_by_space 
    ##               "1"               "0"               "1"               "0" 
    ##       p_sign_posn       n_sign_posn 
    ##               "1"               "1"
    # 恢复区域设置
    Sys.setlocale(locale = old)
    ## Warning in Sys.setlocale(locale = old): OS reports request to set locale to
    ## "LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C"
    ## cannot be honored
    ## [1] ""
  3. Sys.localeconv() 当前区域设置下,数字和货币的表示

    ##     decimal_point     thousands_sep          grouping   int_curr_symbol 
    ##               "."                ""                ""            "USD " 
    ##   currency_symbol mon_decimal_point mon_thousands_sep      mon_grouping 
    ##               "$"               "."               ","        "\003\003" 
    ##     positive_sign     negative_sign   int_frac_digits       frac_digits 
    ##                ""               "-"               "2"               "2" 
    ##     p_cs_precedes    p_sep_by_space     n_cs_precedes    n_sep_by_space 
    ##               "1"               "0"               "1"               "0" 
    ##       p_sign_posn       n_sign_posn 
    ##               "1"               "1"

    本地化信息

    ## $MBCS
    ## [1] TRUE
    ## 
    ## $`UTF-8`
    ## [1] TRUE
    ## 
    ## $`Latin-1`
    ## [1] FALSE
    ## 
    ## $codeset
    ## [1] "UTF-8"

F.8 进程管理

ps 包用来查询进程信息

  • Sys.getpid 获取当前运行中的 R 控制台(会话)的进程 ID

    ## [1] 152067
  • proc.time() R 会话运行时间,常用于计算R程序在当前R控制台的运行时间

    t1 <- proc.time()
    tmp <- rnorm(1e6)
    proc.time() - t1
    ##    user  system elapsed 
    ##   0.074   0.001   0.074
  • system.time 计算 R 表达式/程序块运行耗费的CPU时间

    system.time({
      rnorm(1e6)
    }, gcFirst = TRUE)
    ##    user  system elapsed 
    ##   0.065   0.003   0.068
  • gc.time 报告垃圾回收耗费的时间

    ## [1] 0 0 0 0 0

F.9 系统命令

systemsystem2 调用系统命令,推荐使用后者,它更灵活更便携。此外,Jeroen Ooms 开发的 sys 包 可看作 base::system2 的替代品

system <- function(...) cat(base::system(..., intern = TRUE), sep = '\n')
system2 <- function(...) cat(base::system2(..., stdout = TRUE), sep = "\n")
system(command = "xelatex --version")
## XeTeX 3.141592653-2.6-0.999993 (TeX Live 2021)
## kpathsea version 6.3.3
## Copyright 2021 SIL International, Jonathan Kew and Khaled Hosny.
## There is NO warranty.  Redistribution of this software is
## covered by the terms of both the XeTeX copyright and
## the Lesser GNU General Public License.
## For more information about these matters, see the file
## named COPYING and the XeTeX source.
## Primary author of XeTeX: Jonathan Kew.
## Compiled with ICU version 68.2; using 68.2
## Compiled with zlib version 1.2.11; using 1.2.11
## Compiled with FreeType2 version 2.10.4; using 2.10.4
## Compiled with Graphite2 version 1.3.14; using 1.3.14
## Compiled with HarfBuzz version 2.7.4; using 2.7.4
## Compiled with libpng version 1.6.37; using 1.6.37
## Compiled with pplib version v2.05 less toxic i hope
## Compiled with fontconfig version 2.11.0; using 2.13.1
system2(command = 'pdflatex', args = '--version')
## pdfTeX 3.141592653-2.6-1.40.23 (TeX Live 2021)
## kpathsea version 6.3.3
## Copyright 2021 Han The Thanh (pdfTeX) et al.
## There is NO warranty.  Redistribution of this software is
## covered by the terms of both the pdfTeX copyright and
## the Lesser GNU General Public License.
## For more information about these matters, see the file
## named COPYING and the pdfTeX source.
## Primary author of pdfTeX: Han The Thanh (pdfTeX) et al.
## Compiled with libpng 1.6.37; using libpng 1.6.37
## Compiled with zlib 1.2.11; using zlib 1.2.11
## Compiled with xpdf version 4.03

F.10 时间管理

  1. Sys.timezone 获取时区信息

    Sys.timezone(location = TRUE)
    ## [1] "UTC"
  2. Sys.time 系统时间,可以给定时区下,显示当前时间,精确到秒,返回数据类型为 POSIXct

    # 此时美国洛杉矶时间
    format(Sys.time(), tz = 'America/Los_Angeles', usetz = TRUE)
    ## [1] "2021-09-25 03:13:30 PDT"
    # 此时加拿大东部时间
    format(Sys.time(), tz = 'Canada/Eastern', usetz = TRUE)
    ## [1] "2021-09-25 06:13:30 EDT"
  3. Sys.Date 显示当前时区下的日期,精确到日,返回数据类型为 date

    ## [1] "2021-09-25"
  4. date 返回当前系统日期和时间,数据类型是字符串

    ## [1] "Sat Sep 25 10:13:30 2021"
    ## 也可以这样表示
    format(Sys.time(), "%a %b %d %H:%M:%S %Y")
    ## [1] "Sat Sep 25 10:13:30 2021"
  5. as.POSIX* 是一个 Date-time 转换函数

    as.POSIXlt(Sys.time(), "GMT") # the current time in GMT
    ## [1] "2021-09-25 10:13:30 GMT"
  6. 时间计算

    (z <- Sys.time())             # the current date, as class "POSIXct"
    ## [1] "2021-09-25 10:13:30 UTC"
    Sys.time() - 3600             # an hour ago
    ## [1] "2021-09-25 09:13:30 UTC"
  7. .leap.seconds 是内置的日期序列

    .leap.seconds
    ##  [1] "1972-07-01 GMT" "1973-01-01 GMT" "1974-01-01 GMT" "1975-01-01 GMT"
    ##  [5] "1976-01-01 GMT" "1977-01-01 GMT" "1978-01-01 GMT" "1979-01-01 GMT"
    ##  [9] "1980-01-01 GMT" "1981-07-01 GMT" "1982-07-01 GMT" "1983-07-01 GMT"
    ## [13] "1985-07-01 GMT" "1988-01-01 GMT" "1990-01-01 GMT" "1991-01-01 GMT"
    ## [17] "1992-07-01 GMT" "1993-07-01 GMT" "1994-07-01 GMT" "1996-01-01 GMT"
    ## [21] "1997-07-01 GMT" "1999-01-01 GMT" "2006-01-01 GMT" "2009-01-01 GMT"
    ## [25] "2012-07-01 GMT" "2015-07-01 GMT" "2017-01-01 GMT"

    计算日期对应的星期weekdays,月 months 和季度 quarters

    weekdays(.leap.seconds)
    ##  [1] "Saturday"  "Monday"    "Tuesday"   "Wednesday" "Thursday"  "Saturday" 
    ##  [7] "Sunday"    "Monday"    "Tuesday"   "Wednesday" "Thursday"  "Friday"   
    ## [13] "Monday"    "Friday"    "Monday"    "Tuesday"   "Wednesday" "Thursday" 
    ## [19] "Friday"    "Monday"    "Tuesday"   "Friday"    "Sunday"    "Thursday" 
    ## [25] "Sunday"    "Wednesday" "Sunday"
    months(.leap.seconds)
    ##  [1] "July"    "January" "January" "January" "January" "January" "January"
    ##  [8] "January" "January" "July"    "July"    "July"    "July"    "January"
    ## [15] "January" "January" "July"    "July"    "July"    "January" "July"   
    ## [22] "January" "January" "January" "July"    "July"    "January"
    quarters(.leap.seconds)
    ##  [1] "Q3" "Q1" "Q1" "Q1" "Q1" "Q1" "Q1" "Q1" "Q1" "Q3" "Q3" "Q3" "Q3" "Q1" "Q1"
    ## [16] "Q1" "Q3" "Q3" "Q3" "Q1" "Q3" "Q1" "Q1" "Q1" "Q3" "Q3" "Q1"
  8. Sys.setFileTime() 使用系统调用 system call 设置文件或目录的时间

    # 修改时间前
    file.info('./_common.R')
    ##             size isdir mode               mtime               ctime
    ## ./_common.R 1636 FALSE  644 2021-09-25 07:31:51 2021-09-25 07:31:51
    ##                           atime  uid gid  uname grname
    ## ./_common.R 2021-09-25 09:37:05 1001 121 runner docker
    # 修改时间后,对比一下
    Sys.setFileTime(path = './_common.R', time = Sys.time())
    file.info('./_common.R')
    ##             size isdir mode               mtime               ctime
    ## ./_common.R 1636 FALSE  644 2021-09-25 10:13:30 2021-09-25 10:13:30
    ##                           atime  uid gid  uname grname
    ## ./_common.R 2021-09-25 10:13:30 1001 121 runner docker
  9. strptime 用于字符串与 POSIXltPOSIXct 类对象之间的转化,format 默认 tz = ""usetz = TRUE

    # 存放时区信息的数据库所在目录
    list.files(file.path(R.home("share"), "zoneinfo"))
    ## character(0)
    # 比较不同的打印方式
    strptime(Sys.time(), format ="%Y-%m-%d %H:%M:%S", tz = "Asia/Taipei")
    ## [1] "2021-09-25 10:13:30 CST"
    format(Sys.time(), format = "%Y-%m-%d %H:%M:%S") # 默认情形
    ## [1] "2021-09-25 10:13:30"
    format(Sys.time(), format = "%Y-%m-%d %H:%M:%S", tz = "Asia/Taipei", usetz = TRUE)
    ## [1] "2021-09-25 18:13:30 CST"
  10. 设置时区

    ## [1] "UTC"
    Sys.setenv(TZ = "Asia/Shanghai")
    Sys.timezone()
    ## [1] "Asia/Shanghai"

    全局修改,在文件 /opt/R/4.1.1/lib/R/etc/Rprofile.site 内添加Sys.setenv(TZ="Asia/Shanghai")。 局部修改,就是在本地R项目下,创建 .Rprofile,然后同样添加 Sys.setenv(TZ="Asia/Shanghai")

F.11 R 包管理

相关的函数大致有

apropos('package')
##  [1] ".packages"                      ".packageStartupMessage"        
##  [3] "$.package_version"              "as.package_version"            
##  [5] "aspell_package_C_files"         "aspell_package_R_files"        
##  [7] "aspell_package_Rd_files"        "aspell_package_vignettes"      
##  [9] "available.packages"             "download.packages"             
## [11] "find.package"                   "findPackageEnv"                
## [13] "format.packageInfo"             "getPackageName"                
## [15] "install.packages"               "installed.packages"            
## [17] "is.package_version"             "make.packages.html"            
## [19] "methodsPackageMetaName"         "new.packages"                  
....
  1. .packages(T) 已安装的 R 包

    .packages(T) %>% length()
    ## [1] 516
  2. available.packages 查询可用的 R 包

    available.packages()[,"Package"] %>% head()
    ##         A3      aaSEA   AATtools     ABACUS     abbyyR        abc 
    ##       "A3"    "aaSEA" "AATtools"   "ABACUS"   "abbyyR"      "abc"

    查询 repos 的 R 包

    rforge <- available.packages(repos = "https://r-forge.r-project.org/")
    cran <- available.packages(repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
    setdiff(rforge[, "Package"], cran[, "Package"])
  3. download.packages 下载 R 包

    download.packages("Rbooks", destdir = "~/", repos = "https://r-forge.r-project.org/")
  4. install.packages 安装 R 包

    install.packages("rmarkdown")
  5. installed.packages 已安装的 R 包

    installed.packages(fields = c("Package","Version")) %>% head()
  6. remove.packages 卸载/删除/移除已安装的R包

    remove.packages('rmarkdown')
  7. update.packages 更新已安装的 R 包

    update.packages(ask = FALSE)
  8. old.packages 查看过时/可更新的 R 包

    ##         Package   LibPath                           Installed Built   ReposVer 
    ## arrow   "arrow"   "/home/runner/work/_temp/Library" "5.0.0"   "4.1.1" "5.0.0.2"
    ## cpp11   "cpp11"   "/home/runner/work/_temp/Library" "0.3.1"   "4.1.1" "0.4.0"  
    ## deSolve "deSolve" "/home/runner/work/_temp/Library" "1.28"    "4.1.1" "1.29"   
    ## digest  "digest"  "/home/runner/work/_temp/Library" "0.6.27"  "4.1.1" "0.6.28" 
    ## e1071   "e1071"   "/home/runner/work/_temp/Library" "1.7-8"   "4.1.1" "1.7-9"  
    ## gert    "gert"    "/home/runner/work/_temp/Library" "1.3.2"   "4.1.1" "1.4.1"  
    ##         Repository                               
    ## arrow   "https://cloud.r-project.org/src/contrib"
    ## cpp11   "https://cloud.r-project.org/src/contrib"
    ## deSolve "https://cloud.r-project.org/src/contrib"
    ## digest  "https://cloud.r-project.org/src/contrib"
    ## e1071   "https://cloud.r-project.org/src/contrib"
    ## gert    "https://cloud.r-project.org/src/contrib"
  9. new.packages 还没有安装的 R 包

    ## [1] "A3"       "aaSEA"    "AATtools" "ABACUS"   "abbyyR"   "abc"
  10. packageStatus 查看已安装的 R 包状态,可更新、可下载等

    ## Number of installed packages:
    ##                                  
    ##                                    ok upgrade unavailable
    ##   /home/runner/work/_temp/Library 440      35          13
    ##   /opt/R/4.1.1/lib/R/library       25       4           0
    ## 
    ## Number of available packages (each package counted only once):
    ##                                          
    ##                                           installed not installed
    ##   https://cloud.r-project.org/src/contrib       489         17754
  11. packageDescription 查询 R 包描述信息

    packageDescription('graphics')
    ## Package: graphics
    ## Version: 4.1.1
    ## Priority: base
    ## Title: The R Graphics Package
    ## Author: R Core Team and contributors worldwide
    ## Maintainer: R Core Team <do-use-Contact-address@r-project.org>
    ....
  12. 查询 R 包的依赖关系

    # rmarkdown 依赖的 R 包
    tools::package_dependencies('rmarkdown', recursive = TRUE)
    ## $rmarkdown
    ##  [1] "tools"     "utils"     "knitr"     "yaml"      "htmltools" "evaluate" 
    ##  [7] "jsonlite"  "tinytex"   "xfun"      "jquerylib" "methods"   "stringr"  
    ## [13] "digest"    "grDevices" "base64enc" "rlang"     "fastmap"   "highr"    
    ## [19] "glue"      "magrittr"  "stringi"   "stats"
    # 依赖 rmarkdown 的 R 包
    tools::dependsOnPkgs('rmarkdown', recursive = TRUE)
    ##  [1] "bookdown"       "flexdashboard"  "formattable"    "hrbrthemes"    
    ##  [5] "kableExtra"     "prettydoc"      "reprex"         "tint"          
    ##  [9] "packagemetrics" "tidyverse"      "projpred"       "brms"

    ggplot2 生态,仅列出以 gg 开头的 R 包

    pdb <- available.packages()
    gg <- tools::dependsOnPkgs("ggplot2", recursive = FALSE, installed = pdb)
    grep("^gg", gg, value = TRUE)
    ##   [1] "gg.gap"            "ggallin"           "ggalluvial"       
    ##   [4] "ggalt"             "gganimate"         "ggasym"           
    ##   [7] "ggbeeswarm"        "ggborderline"      "ggbreak"          
    ##  [10] "ggBubbles"         "ggbuildr"          "ggbump"           
    ##  [13] "ggcharts"          "ggChernoff"        "ggcleveland"      
    ##  [16] "ggconf"            "ggcorrplot"        "ggdag"            
    ##  [19] "ggdark"            "ggDCA"             "ggdemetra"        
    ##  [22] "ggdendro"          "ggdist"            "ggdmc"            
    ##  [25] "ggeasy"            "ggedit"            "ggenealogy"       
    ##  [28] "ggetho"            "ggExtra"           "ggfan"            
    ##  [31] "ggfittext"         "ggfocus"           "ggforce"          
    ##  [34] "ggformula"         "ggfortify"         "ggfun"            
    ##  [37] "ggfx"              "gggap"             "gggenes"          
    ##  [40] "ggghost"           "gggibbous"         "ggguitar"         
    ##  [43] "ggh4x"             "gghalfnorm"        "gghalves"         
    ##  [46] "ggheatmap"         "gghighlight"       "gghilbertstrings" 
    ##  [49] "ggHoriPlot"        "ggimage"           "ggimg"            
    ##  [52] "gginference"       "gginnards"         "ggip"             
    ##  [55] "ggiraph"           "ggiraphExtra"      "ggjoy"            
    ##  [58] "gglm"              "gglogo"            "ggloop"           
    ##  [61] "gglorenz"          "ggmap"             "ggmcmc"           
    ##  [64] "ggmosaic"          "ggmotif"           "ggmuller"         
    ##  [67] "ggmulti"           "ggnetwork"         "ggnewscale"       
    ##  [70] "ggnormalviolin"    "ggnuplot"          "ggpacman"         
    ##  [73] "ggpage"            "ggparallel"        "ggparliament"     
    ##  [76] "ggparty"           "ggperiodic"        "ggplot.multistats"
    ##  [79] "ggplotAssist"      "ggplotgui"         "ggplotify"        
    ##  [82] "ggplotlyExtra"     "ggpmisc"           "ggPMX"            
    ##  [85] "ggpointdensity"    "ggpol"             "ggpolypath"       
    ##  [88] "ggpp"              "ggprism"           "ggpubr"           
    ##  [91] "ggpval"            "ggQC"              "ggQQunif"         
    ##  [94] "ggquickeda"        "ggquiver"          "ggRandomForests"  
    ##  [97] "ggraph"            "ggraptR"           "ggrasp"           
    ## [100] "ggrastr"           "ggrepel"           "ggResidpanel"     
    ## [103] "ggridges"          "ggrisk"            "ggROC"            
    ## [106] "ggsci"             "ggseas"            "ggseg"            
    ## [109] "ggseqlogo"         "ggshadow"          "ggside"           
    ## [112] "ggsignif"          "ggsn"              "ggsoccer"         
    ## [115] "ggsolvencyii"      "ggsom"             "ggspatial"        
    ## [118] "ggspectra"         "ggstance"          "ggstar"           
    ## [121] "ggstatsplot"       "ggstream"          "ggstudent"        
    ## [124] "ggswissmaps"       "ggtern"            "ggtext"           
    ## [127] "ggThemeAssist"     "ggthemes"          "ggtikz"           
    ## [130] "ggupset"           "ggvenn"            "ggVennDiagram"    
    ## [133] "ggvoronoi"         "ggwordcloud"       "ggx"
  13. 重装R包,与 R 版本号保持一致

    db <- installed.packages()
    db <- as.data.frame(db, stringsAsFactors = FALSE)
    pkgs <- db[db$Built < getRversion(), "Package"]
    install.packages(pkgs)

F.12 查找函数

lookup R 函数完整定义,包括编译的代码,S3 和 S4 方法。目前 lookup 包处于开发版,我们可以用 remotes::install_github 函数来安装它

# install.packages("remotes")
remotes::install_github("jimhester/lookup")

R-level 的源代码都可以直接看

body
## function (fun = sys.function(sys.parent())) 
## {
##     if (is.character(fun)) 
##         fun <- get(fun, mode = "function", envir = parent.frame())
##     .Internal(body(fun))
## }
## <bytecode: 0x55730f12a860>
## <environment: namespace:base>

此外,lookup 可以定位到 C-level 的源代码,需要联网才能查看,lookup 基于 Winston Chang 在 Github 上维护的 R 源码镜像

lookup(body)
base::body [closure] 
function (fun = sys.function(sys.parent())) 
{
    if (is.character(fun)) 
        fun <- get(fun, mode = "function", envir = parent.frame())
    .Internal(body(fun))
}
<bytecode: 0x00000000140d6158>
<environment: namespace:base>
// c source: src/main/builtin.c#L264-L277
SEXP attribute_hidden do_body(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    checkArity(op, args);
    if (TYPEOF(CAR(args)) == CLOSXP) {
        SEXP b = BODY_EXPR(CAR(args));
        RAISE_NAMED(b, NAMED(CAR(args)));
        return b;
    } else {
        if(!(TYPEOF(CAR(args)) == BUILTINSXP ||
             TYPEOF(CAR(args)) == SPECIALSXP))
            warningcall(call, _("argument is not a function"));
        return R_NilValue;
    }
}

F.13 运行环境

## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] rmarkdown_2.10 fs_1.5.0       magrittr_2.0.1
## 
## loaded via a namespace (and not attached):
##  [1] knitr_1.33      xml2_1.3.2      downlit_0.2.1   R6_2.5.1       
##  [5] rlang_0.4.11    fastmap_1.1.0   fansi_0.5.0     stringr_1.4.0  
##  [9] tools_4.1.1     xfun_0.25       utf8_1.2.2      jquerylib_0.1.4
## [13] htmltools_0.5.2 ellipsis_0.3.2  yaml_2.2.1      digest_0.6.27  
## [17] tibble_3.1.4    lifecycle_1.0.0 crayon_1.4.1    bookdown_0.24  
## [21] vctrs_0.3.8     sass_0.4.0      curl_4.3.2      evaluate_0.14  
## [25] stringi_1.7.4   compiler_4.1.1  bslib_0.3.0     pillar_1.6.2   
## [29] jsonlite_1.7.2  pkgconfig_2.0.3