Skip to content

Commit

Permalink
feat: String.Pos.isValid (#3959)
Browse files Browse the repository at this point in the history
This adds a function that can be used to check whether a position is on
a UTF-8 byte boundary.
  • Loading branch information
digama0 authored Apr 20, 2024
1 parent 291bb84 commit aeacb7b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/Init/Data/String/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ def append : String → (@& String) → String
def toList (s : String) : List Char :=
s.data

/-- Returns true if `p` is a valid UTF-8 position in the string `s`, meaning that `p ≤ s.endPos`
and `p` lies on a UTF-8 character boundary. This has an O(1) implementation in the runtime. -/
@[extern "lean_string_is_valid_pos"]
def Pos.isValid (s : @&String) (p : @& Pos) : Bool :=
go s.data 0
where
go : List Char → Pos → Bool
| [], i => i = p
| c::cs, i => if i = p then true else go cs (i + c)

def utf8GetAux : List Char → Pos → Pos → Char
| [], _, _ => default
| c::cs, i, p => if i = p then c else utf8GetAux cs (i + c) p
Expand Down
13 changes: 13 additions & 0 deletions src/runtime/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1922,6 +1922,19 @@ static inline bool is_utf8_first_byte(unsigned char c) {
return (c & 0x80) == 0 || (c & 0xe0) == 0xc0 || (c & 0xf0) == 0xe0 || (c & 0xf8) == 0xf0;
}

extern "C" LEAN_EXPORT uint8 lean_string_is_valid_pos(b_obj_arg s, b_obj_arg i0) {
if (!lean_is_scalar(i0)) {
/* See comment at string_utf8_get */
return false;
}
usize i = lean_unbox(i0);
usize sz = lean_string_size(s) - 1;
if (i > sz) return false;
if (i == sz) return true;
char const * str = lean_string_cstr(s);
return is_utf8_first_byte(str[i]);
}

extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b0, b_obj_arg e0) {
if (!lean_is_scalar(b0) || !lean_is_scalar(e0)) {
/* See comment at string_utf8_get */
Expand Down

0 comments on commit aeacb7b

Please sign in to comment.