Skip to content

Commit

Permalink
ver.2
Browse files Browse the repository at this point in the history
  • Loading branch information
digama0 committed Apr 23, 2024
1 parent 475a10c commit 1e6967c
Show file tree
Hide file tree
Showing 16 changed files with 132 additions and 116 deletions.
2 changes: 1 addition & 1 deletion src/Init/Data/String/Extra.lean
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def toNat! (s : String) : Nat :=
opaque validateUTF8 (a : @& ByteArray) : Bool

/-- Converts a [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoded `ByteArray` string to `String`. -/
@[extern "lean_string_from_utf8"]
@[extern "lean_string_from_utf8_unchecked"]
opaque fromUTF8 (a : @& ByteArray) (h : validateUTF8 a) : String

/-- Converts a [UTF-8](https://en.wikipedia.org/wiki/UTF-8) encoded `ByteArray` string to `String`,
Expand Down
6 changes: 5 additions & 1 deletion src/Lean/Compiler/IR/EmitC.lean
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,11 @@ def emitLit (z : VarId) (t : IRType) (v : LitVal) : M Unit := do
emitLhs z;
match v with
| LitVal.num v => emitNumLit t v; emitLn ";"
| LitVal.str v => emit "lean_mk_string_from_bytes("; emit (quoteString v); emit ", "; emit v.utf8ByteSize; emitLn ");"
| LitVal.str v =>
emit "lean_mk_string_unchecked(";
emit (quoteString v); emit ", ";
emit v.utf8ByteSize; emit ", ";
emit v.length; emitLn ");"

def emitVDecl (z : VarId) (t : IRType) (v : Expr) : M Unit :=
match v with
Expand Down
13 changes: 7 additions & 6 deletions src/Lean/Compiler/IR/EmitLLVM.lean
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,14 @@ def callLeanUnsignedToNatFn (builder : LLVM.Builder llvmctx)
let nv ← constIntUnsigned n
LLVM.buildCall2 builder fnty f #[nv] name

def callLeanMkStringFromBytesFn (builder : LLVM.Builder llvmctx)
(strPtr nBytes : LLVM.Value llvmctx) (name : String) : M llvmctx (LLVM.Value llvmctx) := do
let fnName := "lean_mk_string_from_bytes"
def callLeanMkStringUncheckedFn (builder : LLVM.Builder llvmctx)
(strPtr nBytes nChars : LLVM.Value llvmctx) (name : String) : M llvmctx (LLVM.Value llvmctx) := do
let fnName := "lean_mk_string_unchecked"
let retty ← LLVM.voidPtrType llvmctx
let argtys := #[← LLVM.voidPtrType llvmctx, ← LLVM.size_tType llvmctx]
let argtys := #[← LLVM.voidPtrType llvmctx, ← LLVM.size_tType llvmctx, ← LLVM.size_tType llvmctx]
let fn ← getOrCreateFunctionPrototype (← getLLVMModule) retty fnName argtys
let fnty ← LLVM.functionType retty argtys
LLVM.buildCall2 builder fnty fn #[strPtr, nBytes] name
LLVM.buildCall2 builder fnty fn #[strPtr, nBytes, nChars] name

def callLeanMkString (builder : LLVM.Builder llvmctx)
(strPtr : LLVM.Value llvmctx) (name : String) : M llvmctx (LLVM.Value llvmctx) := do
Expand Down Expand Up @@ -772,7 +772,8 @@ def emitLit (builder : LLVM.Builder llvmctx)
(← LLVM.opaquePointerTypeInContext llvmctx)
str_global #[zero] ""
let nbytes ← constIntSizeT v.utf8ByteSize
callLeanMkStringFromBytesFn builder strPtr nbytes ""
let nchars ← constIntSizeT v.length
callLeanMkStringUncheckedFn builder strPtr nbytes nchars ""
LLVM.buildStore builder zv zslot
return zslot

Expand Down
9 changes: 5 additions & 4 deletions src/include/lean/lean.h
Original file line number Diff line number Diff line change
Expand Up @@ -990,9 +990,10 @@ static inline size_t lean_string_capacity(lean_object * o) { return lean_to_stri
static inline size_t lean_string_byte_size(lean_object * o) { return sizeof(lean_string_object) + lean_string_capacity(o); }
/* instance : inhabited char := ⟨'A'⟩ */
static inline uint32_t lean_char_default_value() { return 'A'; }
LEAN_EXPORT lean_obj_res lean_mk_string_unchecked(char const * s, size_t sz, size_t len);
LEAN_EXPORT lean_obj_res lean_mk_string_from_bytes(char const * s, size_t sz);
LEAN_EXPORT lean_obj_res lean_mk_ascii_string(char const * s);
LEAN_EXPORT lean_obj_res lean_mk_utf8_string(char const * s);
LEAN_EXPORT lean_obj_res lean_mk_string_from_bytes_unchecked(char const * s, size_t sz);
LEAN_EXPORT lean_obj_res lean_mk_ascii_string_unchecked(char const * s);
LEAN_EXPORT lean_obj_res lean_mk_string(char const * s);
static inline char const * lean_string_cstr(b_lean_obj_arg o) {
assert(lean_is_string(o));
Expand Down Expand Up @@ -2066,11 +2067,11 @@ static inline uint8_t lean_version_get_is_release(lean_obj_arg _unit) {
}

static inline lean_obj_res lean_version_get_special_desc(lean_obj_arg _unit) {
return lean_mk_ascii_string(LEAN_SPECIAL_VERSION_DESC);
return lean_mk_string(LEAN_SPECIAL_VERSION_DESC);
}

static inline lean_obj_res lean_system_platform_target(lean_obj_arg _unit) {
return lean_mk_ascii_string(LEAN_PLATFORM_TARGET);
return lean_mk_string(LEAN_PLATFORM_TARGET);
}

static inline uint8_t lean_internal_is_stage0(lean_obj_arg _unit) {
Expand Down
2 changes: 1 addition & 1 deletion src/library/print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,6 @@ void init_default_print_fn() {
extern "C" LEAN_EXPORT object * lean_expr_dbg_to_string(b_obj_arg e) {
std::ostringstream out;
out << expr(e, true);
return mk_utf8_string(out.str());
return mk_string(out.str());
}
}
2 changes: 1 addition & 1 deletion src/library/trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ static void io_eprint(obj_arg s) {
}

tout::~tout() {
io_eprint(mk_utf8_string(m_out.str()));
io_eprint(mk_string(m_out.str()));
}

std::ostream & operator<<(std::ostream & ios, tclass const & c) {
Expand Down
10 changes: 5 additions & 5 deletions src/runtime/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ extern "C" LEAN_EXPORT obj_res lean_io_get_random_bytes (size_t nbytes, obj_arg
#if !defined(LEAN_WINDOWS)
int fd_urandom = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
if (fd_urandom < 0) {
return io_result_mk_error(decode_io_error(errno, lean_mk_ascii_string("/dev/urandom")));
return io_result_mk_error(decode_io_error(errno, lean_mk_ascii_string_unchecked("/dev/urandom")));
}
#endif

Expand Down Expand Up @@ -618,7 +618,7 @@ extern "C" LEAN_EXPORT obj_res lean_io_timeit(b_obj_arg msg, obj_arg fn, obj_arg
} else {
out << string_cstr(msg) << " " << diff.count() << "s";
}
io_eprintln(mk_utf8_string(out.str()));
io_eprintln(mk_string(out.str()));
return w;
}

Expand All @@ -630,7 +630,7 @@ extern "C" LEAN_EXPORT obj_res lean_io_allocprof(b_obj_arg msg, obj_arg fn, obj_
allocprof prof(out, string_cstr(msg));
res = apply_1(fn, w);
}
io_eprintln(mk_utf8_string(out.str()));
io_eprintln(mk_string(out.str()));
return res;
}

Expand Down Expand Up @@ -822,7 +822,7 @@ extern "C" LEAN_EXPORT obj_res lean_io_rename(b_obj_arg from, b_obj_arg to, lean
if (!ok) {
std::ostringstream s;
s << string_cstr(from) << " and/or " << string_cstr(to);
object_ref out{mk_utf8_string(s.str())};
object_ref out{mk_string(s.str())};
return io_result_mk_error(decode_io_error(errno, out.raw()));
}
#endif
Expand Down Expand Up @@ -1086,7 +1086,7 @@ extern "C" LEAN_EXPORT obj_res lean_io_exit(uint8_t code, obj_arg /* w */) {
}

void initialize_io() {
g_io_error_nullptr_read = lean_mk_io_user_error(mk_ascii_string("null reference read"));
g_io_error_nullptr_read = lean_mk_io_user_error(mk_ascii_string_unchecked("null reference read"));
mark_persistent(g_io_error_nullptr_read);
g_io_handle_external_class = lean_register_external_class(io_handle_finalizer, io_handle_foreach);
#if defined(LEAN_WINDOWS)
Expand Down
100 changes: 53 additions & 47 deletions src/runtime/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,12 @@ extern "C" LEAN_EXPORT lean_object * lean_array_data(lean_obj_arg a) {
}

extern "C" LEAN_EXPORT lean_obj_res lean_array_get_panic(lean_obj_arg def_val) {
return lean_panic_fn(def_val, lean_mk_ascii_string("Error: index out of bounds"));
return lean_panic_fn(def_val, lean_mk_ascii_string_unchecked("Error: index out of bounds"));
}

extern "C" LEAN_EXPORT lean_obj_res lean_array_set_panic(lean_obj_arg a, lean_obj_arg v) {
lean_dec(v);
return lean_panic_fn(a, lean_mk_ascii_string("Error: index out of bounds"));
return lean_panic_fn(a, lean_mk_ascii_string_unchecked("Error: index out of bounds"));
}

// =======================================
Expand Down Expand Up @@ -1552,9 +1552,9 @@ extern "C" LEAN_EXPORT lean_obj_res lean_float_to_string(double a) {
if (isnan(a))
// override NaN because we don't want NaNs to be distinguishable
// because the sign bit / payload bits can be architecture-dependent
return mk_ascii_string("NaN");
return mk_ascii_string_unchecked("NaN");
else
return mk_ascii_string(std::to_string(a));
return mk_ascii_string_unchecked(std::to_string(a));
}

extern "C" LEAN_EXPORT double lean_float_scaleb(double a, b_lean_obj_arg b) {
Expand Down Expand Up @@ -1598,42 +1598,59 @@ static object * string_ensure_capacity(object * o, size_t extra) {
}
}

extern "C" LEAN_EXPORT object * lean_mk_string_core(char const * s, size_t sz, size_t len) {
extern "C" LEAN_EXPORT object * lean_mk_string_unchecked(char const * s, size_t sz, size_t len) {
size_t rsz = sz + 1;
object * r = lean_alloc_string(rsz, rsz, len);
memcpy(w_string_cstr(r), s, sz);
w_string_cstr(r)[sz] = 0;
return r;
}

extern "C" LEAN_EXPORT object * lean_mk_string_from_bytes(char const * s, size_t sz) {
return lean_mk_string_core(s, sz, utf8_strlen(s, sz));
object * lean_mk_string_lossy_recover(char const * s, size_t sz, size_t pos, size_t i) {
std::string str(s, pos);
size_t start = pos;
while (pos < sz) {
if (!validate_utf8_one((const uint8_t *)s, sz, pos)) {
str.append(s + start, pos - start);
str.append("\ufffd"); // U+FFFD REPLACEMENT CHARACTER
do pos++; while (pos < sz && (s[pos] & 0xc0) == 0x80);
start = pos;
}
i++;
}
str.append(s + start, pos - start);
return lean_mk_string_unchecked(str.data(), str.size(), i);
}

extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) {
size_t len = strlen(s);
if (validate_utf8((const uint8_t *)s, len)) {
return lean_mk_string_from_bytes(s, len);
extern "C" LEAN_EXPORT object * lean_mk_string_from_bytes(char const * s, size_t sz) {
size_t pos = 0, i = 0;
if (validate_utf8((const uint8_t *)s, sz, pos, i)) {
return lean_mk_string_unchecked(s, pos, i);
} else {
return lean_mk_string_core("", 0, 0);
return lean_mk_string_lossy_recover(s, sz, pos, i);
}
}

extern "C" LEAN_EXPORT object * lean_mk_utf8_string(char const * s) {
extern "C" LEAN_EXPORT object * lean_mk_string_from_bytes_unchecked(char const * s, size_t sz) {
return lean_mk_string_unchecked(s, sz, utf8_strlen(s, sz));
}

extern "C" LEAN_EXPORT object * lean_mk_string(char const * s) {
return lean_mk_string_from_bytes(s, strlen(s));
}

extern "C" LEAN_EXPORT object * lean_mk_ascii_string(char const * s) {
extern "C" LEAN_EXPORT object * lean_mk_ascii_string_unchecked(char const * s) {
size_t len = strlen(s);
return lean_mk_string_core(s, len, len);
return lean_mk_string_unchecked(s, len, len);
}

extern "C" LEAN_EXPORT obj_res lean_string_from_utf8(b_obj_arg a) {
return lean_mk_string_from_bytes(reinterpret_cast<char *>(lean_sarray_cptr(a)), lean_sarray_size(a));
extern "C" LEAN_EXPORT obj_res lean_string_from_utf8_unchecked(b_obj_arg a) {
return lean_mk_string_from_bytes_unchecked(reinterpret_cast<char *>(lean_sarray_cptr(a)), lean_sarray_size(a));
}

extern "C" LEAN_EXPORT uint8 lean_string_validate_utf8(b_obj_arg a) {
return validate_utf8(lean_sarray_cptr(a), lean_sarray_size(a));
size_t pos = 0, i = 0;
return validate_utf8(lean_sarray_cptr(a), lean_sarray_size(a), pos, i);
}

extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) {
Expand All @@ -1644,19 +1661,11 @@ extern "C" LEAN_EXPORT obj_res lean_string_to_utf8(b_obj_arg s) {
}

object * mk_string(std::string const & s) {
if (validate_utf8((const uint8_t *)s.data(), s.size())) {
return lean_mk_string_from_bytes(s.data(), s.size());
} else {
return lean_mk_string_core("", 0, 0);
}
}

object * mk_utf8_string(std::string const & s) {
return lean_mk_string_from_bytes(s.data(), s.size());
}

object * mk_ascii_string(std::string const & s) {
return lean_mk_string_core(s.data(), s.size(), s.size());
object * mk_ascii_string_unchecked(std::string const & s) {
return lean_mk_string_unchecked(s.data(), s.size(), s.size());
}

std::string string_to_std(b_obj_arg o) {
Expand Down Expand Up @@ -1726,16 +1735,6 @@ extern "C" LEAN_EXPORT bool lean_string_lt(object * s1, object * s2) {
return r < 0 || (r == 0 && sz1 < sz2);
}

static std::string list_as_string(b_obj_arg lst) {
std::string s;
b_obj_arg o = lst;
while (!lean_is_scalar(o)) {
push_unicode_scalar(s, lean_unbox_uint32(lean_ctor_get(o, 0)));
o = lean_ctor_get(o, 1);
}
return s;
}

static obj_res string_to_list_core(std::string const & s, bool reverse = false) {
std::vector<unsigned> tmp;
utf8_decode(s, tmp);
Expand All @@ -1754,9 +1753,16 @@ static obj_res string_to_list_core(std::string const & s, bool reverse = false)
}

extern "C" LEAN_EXPORT obj_res lean_string_mk(obj_arg cs) {
std::string s = list_as_string(cs);
std::string s;
b_obj_arg o = cs;
size_t len = 0;
while (!lean_is_scalar(o)) {
push_unicode_scalar(s, lean_unbox_uint32(lean_ctor_get(o, 0)));
o = lean_ctor_get(o, 1);
len++;
}
lean_dec(cs);
return mk_utf8_string(s);
return lean_mk_string_unchecked(s.data(), s.size(), len);
}

extern "C" LEAN_EXPORT obj_res lean_string_data(obj_arg s) {
Expand Down Expand Up @@ -1889,7 +1895,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_get_opt(b_obj_arg s, b_obj_arg i
}

static uint32 lean_string_utf8_get_panic() {
lean_panic_fn(lean_box(0), lean_mk_ascii_string("Error: invalid `String.Pos` at `String.get!`"));
lean_panic_fn(lean_box(0), lean_mk_ascii_string_unchecked("Error: invalid `String.Pos` at `String.get!`"));
return lean_char_default_value();
}

Expand Down Expand Up @@ -1970,10 +1976,10 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b
usize e = lean_unbox(e0);
char const * str = lean_string_cstr(s);
usize sz = lean_string_size(s) - 1;
if (b >= e || b >= sz) return lean_mk_string_core("", 0, 0);
if (b >= e || b >= sz) return lean_mk_string_unchecked("", 0, 0);
/* In the reference implementation if `b` is not pointing to a valid UTF8
character start position, the result is the empty string. */
if (!is_utf8_first_byte(str[b])) return lean_mk_string_core("", 0, 0);
if (!is_utf8_first_byte(str[b])) return lean_mk_string_unchecked("", 0, 0);
if (e > sz) e = sz;
lean_assert(b < e);
lean_assert(e > 0);
Expand All @@ -1982,7 +1988,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_extract(b_obj_arg s, b_obj_arg b
if (e < sz && !is_utf8_first_byte(str[e])) e = sz;
usize new_sz = e - b;
lean_assert(new_sz > 0);
return lean_mk_string_from_bytes(lean_string_cstr(s) + b, new_sz);
return lean_mk_string_from_bytes_unchecked(lean_string_cstr(s) + b, new_sz);
}

extern "C" LEAN_EXPORT obj_res lean_string_utf8_prev(b_obj_arg s, b_obj_arg i0) {
Expand Down Expand Up @@ -2033,7 +2039,7 @@ extern "C" LEAN_EXPORT obj_res lean_string_utf8_set(obj_arg s, b_obj_arg i0, uin
std::string new_s = string_to_std(s);
dec(s);
new_s.replace(i, get_utf8_char_size_at(new_s, i), tmp);
return mk_utf8_string(new_s);
return lean_mk_string_unchecked(new_s.data(), new_s.size(), lean_string_len(s));
}

extern "C" LEAN_EXPORT uint64 lean_string_hash(b_obj_arg s) {
Expand All @@ -2043,7 +2049,7 @@ extern "C" LEAN_EXPORT uint64 lean_string_hash(b_obj_arg s) {
}

extern "C" LEAN_EXPORT obj_res lean_string_of_usize(size_t n) {
return mk_ascii_string(std::to_string(n));
return mk_ascii_string_unchecked(std::to_string(n));
}

// =======================================
Expand Down Expand Up @@ -2332,7 +2338,7 @@ extern "C" LEAN_EXPORT object * lean_dbg_sleep(uint32 ms, obj_arg fn) {

extern "C" LEAN_EXPORT object * lean_dbg_trace_if_shared(obj_arg s, obj_arg a) {
if (!lean_is_scalar(a) && lean_is_shared(a)) {
io_eprintln(mk_utf8_string(std::string("shared RC ") + lean_string_cstr(s)));
io_eprintln(mk_string(std::string("shared RC ") + lean_string_cstr(s)));
}
return a;
}
Expand Down
5 changes: 1 addition & 4 deletions src/runtime/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,8 @@ inline obj_res byte_array_set(obj_arg a, b_obj_arg i, uint8 b) { return lean_byt
inline size_t string_capacity(object * o) { return lean_string_capacity(o); }
inline uint32 char_default_value() { return lean_char_default_value(); }
inline obj_res alloc_string(size_t size, size_t capacity, size_t len) { return lean_alloc_string(size, capacity, len); }
inline obj_res mk_ascii_string(char const * s) { return lean_mk_ascii_string(s); }
inline obj_res mk_utf8_string(char const * s) { return lean_mk_utf8_string(s); }
inline obj_res mk_string(char const * s) { return lean_mk_string(s); }
LEAN_EXPORT obj_res mk_ascii_string(std::string const & s);
LEAN_EXPORT obj_res mk_utf8_string(std::string const & s);
LEAN_EXPORT obj_res mk_ascii_string_unchecked(std::string const & s);
LEAN_EXPORT obj_res mk_string(std::string const & s);
LEAN_EXPORT std::string string_to_std(b_obj_arg o);
inline char const * string_cstr(b_obj_arg o) { return lean_string_cstr(o); }
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" LEAN_EXPORT uint8 lean_system_platform_emscripten(obj_arg) {
#endif
}

extern "C" object * lean_get_githash(obj_arg) { return lean_mk_ascii_string(LEAN_GITHASH); }
extern "C" object * lean_get_githash(obj_arg) { return lean_mk_string(LEAN_GITHASH); }

extern "C" LEAN_EXPORT uint8_t lean_internal_has_llvm_backend(lean_obj_arg _unit) {
#ifdef LEAN_LLVM
Expand Down
Loading

0 comments on commit 1e6967c

Please sign in to comment.