Skip to content

Commit

Permalink
feat: implement new methods
Browse files Browse the repository at this point in the history
And deprecate old ones
  • Loading branch information
ObserverOfTime committed Mar 27, 2024
1 parent 6b4f18f commit 81edd6c
Show file tree
Hide file tree
Showing 22 changed files with 378 additions and 221 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ jobs:
shell: python
# run: python -Wignore:::tree_sitter -munittest
run: |-
import tree_sitter as ts
try:
ts.Language(1, "foo")
except RuntimeError as err:
print(err)
try: __import__('tree_sitter').Language(1048576)
except RuntimeError as err: print(err)
env:
PYTHONFAULTHANDLER: 1
2 changes: 1 addition & 1 deletion .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
CIBW_ARCHS_WINDOWS: AMD64
CIBW_ARCHS_LINUX: x86_64 aarch64
CIBW_ARCHS_MACOS: x86_64 arm64
CIBW_TEST_SKIP: cp312* *arm64 *aarch64
CIBW_TEST_SKIP: "*arm64 *aarch64"
- name: Upload wheels
uses: actions/upload-artifact@v4
with:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
],
define_macros=[
("PY_SSIZE_T_CLEAN", None),
("TREE_SITTER_HIDE_SYMBOLS", None),
],
undef_macros=[
"TREE_SITTER_FEATURE_WASM",
Expand Down
2 changes: 1 addition & 1 deletion tests/fixtures/tree-sitter-html
Submodule tree-sitter-html updated 1 files
+1 −1 src/tag.h
132 changes: 57 additions & 75 deletions tree_sitter/binding/language.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,92 +13,91 @@ static void segfault_handler(int signal) {
longjmp(segv_jmp, true);
}
}

// HACK: recover from invalid pointer using a signal handler (Unix)
TSLanguage *language_check_pointer(void *ptr) {
PyOS_setsig(SIGSEGV, segfault_handler);
if (!setjmp(segv_jmp)) {
(void)ts_language_version(ptr);
} else {
PyErr_SetString(PyExc_RuntimeError, "Invalid TSLanguage pointer.");
}
PyOS_setsig(SIGSEGV, SIG_DFL);
return PyErr_Occurred() ? NULL : (TSLanguage *)ptr;
}
#else
#include <windows.h>

// HACK: recover from invalid pointer using SEH (Windows)
TSLanguage *language_check_pointer(void *ptr) {
__try {
(void)ts_language_version(ptr);
} __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER
: EXCEPTION_CONTINUE_SEARCH) {
PyErr_SetString(PyExc_RuntimeError, "Invalid TSLanguage pointer.");
}
return PyErr_Occurred() ? NULL : (TSLanguage *)ptr;
}
#endif

int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) {
PyObject *ptr;
const char *name;
Py_ssize_t length;

if (!PyArg_ParseTuple(args, "Os#:__init__", &ptr, &name, &length)) {
PyObject *language;
if (!PyArg_ParseTuple(args, "O:__init__", &language)) {
return -1;
}

if (PyLong_AsLong(ptr) < 1) {
if (PyLong_AsLong(language) < 1) {
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError, "Language ID must be a positive int.");
PyErr_SetString(PyExc_ValueError, "The language ID must be positive.");
}
return -1;
}

if (length == 0) {
PyErr_SetString(PyExc_ValueError, "Language name cannot be empty.");
self->language = language_check_pointer(PyLong_AsVoidPtr(language));
if (self->language == NULL) {
return -1;
}

self->language = (TSLanguage *)PyLong_AsVoidPtr(ptr);

// HACK: recover from invalid pointer using a signal handler or SEH
#ifndef _MSC_VER
PyOS_sighandler_t old_handler = PyOS_setsig(SIGSEGV, segfault_handler);
if (!setjmp(segv_jmp)) {
#else
__try {
#endif
self->version = ts_language_version(self->language);
#ifndef _MSC_VER
} else {
#else
} __except (GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER
: EXCEPTION_CONTINUE_SEARCH) {
#endif
PyErr_SetString(PyExc_RuntimeError, "Invalid TSLanguage pointer.");
}
#ifndef _MSC_VER
PyOS_setsig(SIGSEGV, old_handler);
self->version = ts_language_version(self->language);
#if HAS_LANGUAGE_NAMES
self->name = ts_language_name(self->language);
#endif
if (PyErr_Occurred()) {
return -1;
}

self->name = PyMem_Calloc(length + 1, sizeof(char));
strncpy(self->name, name, length);

return 0;
}

void language_dealloc(Language *self) {
PyMem_Free(self->name);
Py_TYPE(self)->tp_free(self);
}
void language_dealloc(Language *self) { Py_TYPE(self)->tp_free(self); }

PyObject *language_repr(Language *self) {
const char *format_string = "<Language id=%" PRIuPTR ", name=\"%s\", version=%u>";
return PyUnicode_FromFormat(format_string, (uintptr_t)self->language, self->name,
self->version);
#if HAS_LANGUAGE_NAMES
if (self->name == NULL) {
return PyUnicode_FromFormat("<Language id=%" PRIuPTR ", version=%u, name=None>",
(Py_uintptr_t)self->language, self->version);
}
return PyUnicode_FromFormat("<Language id=%" PRIuPTR ", version=%u, name=\"%s\">",
(Py_uintptr_t)self->language, self->version, self->name);
#else
return PyUnicode_FromFormat("<Language id=%" PRIuPTR ", version=%u>",
(Py_uintptr_t)self->language, self->version);
#endif
}

PyObject *language_str(Language *self) { return PyUnicode_FromString(self->name); }
PyObject *language_int(Language *self) { return PyLong_FromVoidPtr(self->language); }

PyObject *language_int(Language *self) { return PyLong_FromVoidPtr((void *)self->language); }

Py_hash_t language_hash(Language *self) { return (1 << self->version) * (uintptr_t)self->language; }
Py_hash_t language_hash(Language *self) { return (Py_hash_t)self->language; }

PyObject *language_compare(Language *self, PyObject *other, int op) {
if ((op != Py_EQ && op != Py_NE) || !IS_INSTANCE(other, language_type)) {
Py_RETURN_NOTIMPLEMENTED;
}

Language *lang = (Language *)other;
bool result = self->version == lang->version && strcmp(self->name, lang->name) == 0;
bool result = (Py_uintptr_t)self->language == (Py_uintptr_t)lang->language;
return PyBool_FromLong(result & (op == Py_EQ));
}

#if HAS_LANGUAGE_NAMES
PyObject *language_get_name(Language *self, void *Py_UNUSED(payload)) {
return PyUnicode_FromString(self->name);
}
#endif

PyObject *language_get_version(Language *self, void *Py_UNUSED(payload)) {
return PyLong_FromUnsignedLong(self->version);
Expand All @@ -122,10 +121,7 @@ PyObject *language_node_kind_for_id(Language *self, PyObject *args) {
return NULL;
}
const char *name = ts_language_symbol_name(self->language, symbol);
if (name == NULL) {
Py_RETURN_NONE;
}
return PyUnicode_FromString(name);
return name == NULL ? Py_None : PyUnicode_FromString(name);
}

PyObject *language_id_for_node_kind(Language *self, PyObject *args) {
Expand All @@ -136,10 +132,7 @@ PyObject *language_id_for_node_kind(Language *self, PyObject *args) {
return NULL;
}
TSSymbol symbol = ts_language_symbol_for_name(self->language, kind, length, named);
if (symbol == 0) {
Py_RETURN_NONE;
}
return PyLong_FromUnsignedLong(symbol);
return symbol == 0 ? Py_None : PyLong_FromUnsignedLong(symbol);
}

PyObject *language_node_kind_is_named(Language *self, PyObject *args) {
Expand All @@ -166,12 +159,7 @@ PyObject *language_field_name_for_id(Language *self, PyObject *args) {
return NULL;
}
const char *field_name = ts_language_field_name_for_id(self->language, field_id);

if (field_name == NULL) {
Py_RETURN_NONE;
}

return PyUnicode_FromString(field_name);
return field_name == NULL ? Py_None : PyUnicode_FromString(field_name);
}

PyObject *language_field_id_for_name(Language *self, PyObject *args) {
Expand All @@ -180,14 +168,8 @@ PyObject *language_field_id_for_name(Language *self, PyObject *args) {
if (!PyArg_ParseTuple(args, "s#:field_id_for_name", &field_name, &length)) {
return NULL;
}

TSFieldId field_id = ts_language_field_id_for_name(self->language, field_name, length);

if (field_id == 0) {
Py_RETURN_NONE;
}

return PyLong_FromUnsignedLong(field_id);
return field_id == 0 ? Py_None : PyLong_FromUnsignedLong(field_id);
}

PyObject *language_next_state(Language *self, PyObject *args) {
Expand All @@ -207,21 +189,19 @@ PyObject *language_lookahead_iterator(Language *self, PyObject *args) {
return NULL;
}
TSLookaheadIterator *lookahead_iterator = ts_lookahead_iterator_new(self->language, state_id);

if (lookahead_iterator == NULL) {
Py_RETURN_NONE;
}

return lookahead_iterator_new_internal(state, lookahead_iterator);
}

PyObject *language_query(Language *self, PyObject *args) {
ModuleState *state = PyType_GetModuleState(Py_TYPE(self));
char *source;
Py_ssize_t length;
if (!PyArg_ParseTuple(args, "s#:query", &source, &length)) {
return NULL;
}
ModuleState *state = PyType_GetModuleState(Py_TYPE(self));
return query_new_internal(state, self->language, source, length);
}

Expand Down Expand Up @@ -277,7 +257,9 @@ static PyMethodDef language_methods[] = {
};

static PyGetSetDef language_accessors[] = {
#if HAS_LANGUAGE_NAMES
{"name", (getter)language_get_name, NULL, "The name of the language.", NULL},
#endif
{"version", (getter)language_get_version, NULL,
"Get the ABI version number that indicates which version of "
"the Tree-sitter CLI was used to generate this Language.",
Expand All @@ -295,13 +277,13 @@ static PyType_Slot language_type_slots[] = {
{Py_tp_doc, "A tree-sitter language."},
{Py_tp_init, language_init},
{Py_tp_repr, language_repr},
{Py_tp_str, language_str},
{Py_tp_hash, language_hash},
{Py_tp_richcompare, language_compare},
{Py_tp_dealloc, language_dealloc},
{Py_tp_methods, language_methods},
{Py_tp_getset, language_accessors},
{Py_nb_int, language_int},
{Py_nb_index, language_int},
{0, NULL},
};

Expand Down
7 changes: 5 additions & 2 deletions tree_sitter/binding/language.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ void language_dealloc(Language *self);

PyObject *language_repr(Language *self);

PyObject *language_str(Language *self);

PyObject *language_int(Language *self);

PyObject *language_compare(Language *self, PyObject *other, int op);

Py_hash_t language_hash(Language *self);

#if HAS_LANGUAGE_NAMES
PyObject *language_get_name(Language *self, void *payload);
#endif

PyObject *language_get_version(Language *self, void *payload);

Expand All @@ -43,3 +43,6 @@ PyObject *language_next_state(Language *self, PyObject *args);
PyObject *language_lookahead_iterator(Language *self, PyObject *args);

PyObject *language_query(Language *self, PyObject *args);

// TODO(0.23): remove and replace with a static converter
TSLanguage *language_check_pointer(void *ptr);
Loading

0 comments on commit 81edd6c

Please sign in to comment.