Find DOIs with regular expressions
regex_doi(type = c("doi.org", "cr-modern"), ...) doi_patterns(type = c("doi.org", "cr-modern")) str_extract_doi(string) str_extract_all_doi(string, type = "doi.org")
type | a character string giving the type of validation to run. Implemented as regular expressions (see source code). Must be one these syntax specifications:
|
---|---|
... | Arguments passed on to
|
string | Input vector. Either a character vector, or something coercible to one. |
doi_patterns
: Find DOI fields with regular expressions
str_extract_doi
: Extract first DOIs from character strings
str_extract_all_doi
: Extract all DOIs from character strings
Other doi:
doiEntry
,
doi_api
,
doi_examples()
,
doi_ra
,
doi()
,
view_doi_matches()
regex_doi("doi.org") #> [1] "10[.][0-9]+(?:[.][0-9]+)*/(?:(?![\"&\\'])\\S)+" #> attr(,"options") #> attr(,"options")$case_insensitive #> [1] FALSE #> #> attr(,"options")$comments #> [1] FALSE #> #> attr(,"options")$dotall #> [1] FALSE #> #> attr(,"options")$multiline #> [1] FALSE #> #> attr(,"class") #> [1] "regex" "pattern" "character" regex_doi("cr-modern") #> [1] "10.\\d{4,9}/[-._;()/:A-Z0-9]+" #> attr(,"options") #> attr(,"options")$case_insensitive #> [1] FALSE #> #> attr(,"options")$comments #> [1] FALSE #> #> attr(,"options")$dotall #> [1] FALSE #> #> attr(,"options")$multiline #> [1] FALSE #> #> attr(,"class") #> [1] "regex" "pattern" "character" str_extract_doi(string = c( "10.1594/PANGAEA.726855", # nothing to do here "10.1119/1.16433 ", # remove space " 10.1594/PANGAEA.667386", # remove space "doi:10.3866/PKU.WHXB201112303", # remove DOI "http://dx.doi.org/10.3352/jeehp.2013.10.3", # parse URL "10.3972/water973.0145.db&", # remove forbidden symbol "foo bar" # no DOI here )) #> [1] "10.1594/PANGAEA.726855" "10.1119/1.16433" #> [3] "10.1594/PANGAEA.667386" "10.3866/PKU.WHXB201112303" #> [5] "10.3352/jeehp.2013.10.3" "10.3972/water973.0145.db" #> [7] NA str_extract_all_doi(string = c( # nothing to do here "10.17487/rfc1149", # space separated "10.1016/j.iheduc.2003.11.004 doi:10.7875/leading.author.2.e008", # separated by forbidden "doi:10.6084/m9.figshare.97218&doi:10.1126/science.169.3946.635 ", # separated by linebreak "10.5194/wes-2019-70\n10.5194/wes-5-819-202", # no DOI here "quux" )) #> [,1] [,2] #> [1,] "10.17487/rfc1149" NA #> [2,] "10.1016/j.iheduc.2003.11.004" "10.7875/leading.author.2.e008" #> [3,] "10.6084/m9.figshare.97218" "10.1126/science.169.3946.635" #> [4,] "10.5194/wes-2019-70" "10.5194/wes-5-819-202" #> [5,] NA NA