diff --git a/inst/include/re2r.h b/inst/include/re2r.h index 0f80c61..cf6533e 100644 --- a/inst/include/re2r.h +++ b/inst/include/re2r.h @@ -128,6 +128,6 @@ XPtr cpp_re2_compile(const char* pattern, SEXP toprotect_optstring_sexp(const optstring& input); SEXP toprotect_vec_string_sexp(const vector& input); - +vector> as_vec_opt_string(CharacterVector& input); #endif diff --git a/src/re2r_match.cpp b/src/re2r_match.cpp index e738e44..7d6a671 100644 --- a/src/re2r_match.cpp +++ b/src/re2r_match.cpp @@ -329,7 +329,12 @@ SEXP cpp_detect(CharacterVector& input, LogicalVector res(input.size()); auto resi = res.begin(); for(auto it = 0; it != input.size(); it++, resi ++){ - auto r_char = R_CHAR(STRING_ELT(inputx, it)); + auto rstr = STRING_ELT(inputx, it); + if (rstr == NA_STRING){ + *resi = NA_LOGICAL; + continue; + } + auto r_char = R_CHAR(rstr); *resi = pattern->Match( r_char ,0, strlen(r_char), anchor_type, nullptr, 0); } @@ -338,13 +343,13 @@ SEXP cpp_detect(CharacterVector& input, struct BoolP : public Worker { - vector& input; + vector>& input; RVector output; RE2& tt; RE2::Options& opt; const RE2::Anchor anchor_type; - BoolP (vector& input_,RVector output_, RE2& tt_, RE2::Options& opt_, const RE2::Anchor& anchor_type_) + BoolP (vector>& input_,RVector output_, RE2& tt_, RE2::Options& opt_, const RE2::Anchor& anchor_type_) : input(input_), output(output_), tt(tt_), opt(opt_), anchor_type(anchor_type_){} void operator()(std::size_t begin, std::size_t end) { @@ -352,8 +357,11 @@ struct BoolP : public Worker std::transform(input.begin() + begin, input.begin() + end, output.begin() + begin, - [this,&pattern](string& x)->int{ - return pattern.Match(x, 0, (int) x.length(), + [this,&pattern](tr2::optional& x)->int{ + if (!bool(x)){ + return NA_LOGICAL; + } + return pattern.Match(x.value(), 0, (int) x.value().length(), anchor_type, nullptr, 0); }); } @@ -366,7 +374,7 @@ SEXP cpp_detect_parallel(CharacterVector& input, RE2::Anchor anchor_type){ LogicalVector reso(input.size()); RVector res(reso); - vector inputv = as>(input); + auto inputv = as_vec_opt_string(input); BoolP pobj(inputv, res, *pattern, opt, anchor_type); parallelFor(0, input.size(), pobj, 1000000); return wrap(reso); diff --git a/src/re2r_util.cpp b/src/re2r_util.cpp index 06ee17e..2308573 100644 --- a/src/re2r_util.cpp +++ b/src/re2r_util.cpp @@ -14,6 +14,23 @@ SEXP toprotect_vec_string_sexp(const vector& input){ return x; } +vector> as_vec_opt_string(CharacterVector& input){ + SEXP inputx = input; + vector> res; + res.reserve(input.size()); + + for(auto it = 0; it != input.size(); it++){ + auto rstr = STRING_ELT(inputx, it); + if (rstr == NA_STRING){ + res.push_back(tr2::nullopt); + continue; + }else{ + res.push_back(tr2::make_optional(string(R_CHAR(rstr)))); + } + } + return res; +} + SEXP toprotect_optstring_sexp(const optstring& input){ SEXP x; PROTECT(x = Rf_allocVector(STRSXP, input.size())); diff --git a/tests/testthat/test-match_group.R b/tests/testthat/test-match_group.R index 5f70ea3..5a23916 100644 --- a/tests/testthat/test-match_group.R +++ b/tests/testthat/test-match_group.R @@ -117,3 +117,17 @@ test_that("tolist",{ re2_pmatch_all(str,"(?Pthis)( is)") ) }) + +library(stringi) + +test_that("match NA",{ + expect_identical(structure(c(NA, "sd"), .Dim = c(2L, 1L), .Dimnames = list(NULL, "?nocapture")), re2_match(c(NA,"sd"),"sd")) + expect_identical(re2_match(c(NA,"sd"),"sd"),re2_pmatch(c(NA,"sd"),"sd")) + + expect_identical(c(NA,TRUE), re2_detect(c(NA,"sd"),"sd")) + expect_identical(c(NA, FALSE), re2_detect(c(NA,"sd"),"NA")) + + expect_identical(re2_detect(c(NA,"sd"),"NA"),re2_pdetect(c(NA,"sd"),"NA")) + expect_identical(re2_detect(c(NA,"sd"),"sd"),re2_pdetect(c(NA,"sd"),"sd")) + +})