Skip to content

Commit

Permalink
Fix: re2_detect with NA #8
Browse files Browse the repository at this point in the history
  • Loading branch information
qinwf committed May 8, 2016
1 parent d5deeac commit ca857bd
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 7 deletions.
2 changes: 1 addition & 1 deletion inst/include/re2r.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ XPtr<RE2Obj> cpp_re2_compile(const char* pattern,

SEXP toprotect_optstring_sexp(const optstring& input);
SEXP toprotect_vec_string_sexp(const vector<string>& input);

vector<tr2::optional<string>> as_vec_opt_string(CharacterVector& input);

#endif
20 changes: 14 additions & 6 deletions src/re2r_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,12 @@ SEXP cpp_detect(CharacterVector& input,
LogicalVector res(input.size());
auto resi = res.begin();
for(auto it = 0; it != input.size(); it++, resi ++){
auto r_char = R_CHAR(STRING_ELT(inputx, it));
auto rstr = STRING_ELT(inputx, it);
if (rstr == NA_STRING){
*resi = NA_LOGICAL;
continue;
}
auto r_char = R_CHAR(rstr);
*resi = pattern->Match( r_char ,0, strlen(r_char),
anchor_type, nullptr, 0);
}
Expand All @@ -338,22 +343,25 @@ SEXP cpp_detect(CharacterVector& input,

struct BoolP : public Worker
{
vector<string>& input;
vector<tr2::optional<string>>& input;
RVector<int> output;
RE2& tt;
RE2::Options& opt;
const RE2::Anchor anchor_type;

BoolP (vector<string>& input_,RVector<int> output_, RE2& tt_, RE2::Options& opt_, const RE2::Anchor& anchor_type_)
BoolP (vector<tr2::optional<string>>& input_,RVector<int> output_, RE2& tt_, RE2::Options& opt_, const RE2::Anchor& anchor_type_)
: input(input_), output(output_), tt(tt_), opt(opt_), anchor_type(anchor_type_){}

void operator()(std::size_t begin, std::size_t end) {
RE2 pattern(tt.pattern(),opt);
std::transform(input.begin() + begin,
input.begin() + end,
output.begin() + begin,
[this,&pattern](string& x)->int{
return pattern.Match(x, 0, (int) x.length(),
[this,&pattern](tr2::optional<string>& x)->int{
if (!bool(x)){
return NA_LOGICAL;
}
return pattern.Match(x.value(), 0, (int) x.value().length(),
anchor_type, nullptr, 0);
});
}
Expand All @@ -366,7 +374,7 @@ SEXP cpp_detect_parallel(CharacterVector& input,
RE2::Anchor anchor_type){
LogicalVector reso(input.size());
RVector<int> res(reso);
vector<string> inputv = as<vector<string>>(input);
auto inputv = as_vec_opt_string(input);
BoolP pobj(inputv, res, *pattern, opt, anchor_type);
parallelFor(0, input.size(), pobj, 1000000);
return wrap(reso);
Expand Down
17 changes: 17 additions & 0 deletions src/re2r_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,23 @@ SEXP toprotect_vec_string_sexp(const vector<string>& input){
return x;
}

vector<tr2::optional<string>> as_vec_opt_string(CharacterVector& input){
SEXP inputx = input;
vector<tr2::optional<string>> res;
res.reserve(input.size());

for(auto it = 0; it != input.size(); it++){
auto rstr = STRING_ELT(inputx, it);
if (rstr == NA_STRING){
res.push_back(tr2::nullopt);
continue;
}else{
res.push_back(tr2::make_optional(string(R_CHAR(rstr))));
}
}
return res;
}

SEXP toprotect_optstring_sexp(const optstring& input){
SEXP x;
PROTECT(x = Rf_allocVector(STRSXP, input.size()));
Expand Down
14 changes: 14 additions & 0 deletions tests/testthat/test-match_group.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,17 @@ test_that("tolist",{
re2_pmatch_all(str,"(?P<testname>this)( is)")
)
})

library(stringi)

test_that("match NA",{
expect_identical(structure(c(NA, "sd"), .Dim = c(2L, 1L), .Dimnames = list(NULL, "?nocapture")), re2_match(c(NA,"sd"),"sd"))
expect_identical(re2_match(c(NA,"sd"),"sd"),re2_pmatch(c(NA,"sd"),"sd"))

expect_identical(c(NA,TRUE), re2_detect(c(NA,"sd"),"sd"))
expect_identical(c(NA, FALSE), re2_detect(c(NA,"sd"),"NA"))

expect_identical(re2_detect(c(NA,"sd"),"NA"),re2_pdetect(c(NA,"sd"),"NA"))
expect_identical(re2_detect(c(NA,"sd"),"sd"),re2_pdetect(c(NA,"sd"),"sd"))

})

0 comments on commit ca857bd

Please sign in to comment.