From 42b3449d93f69f759c54a5bcb3ac82e18b463f19 Mon Sep 17 00:00:00 2001 From: Noe Thalheim <2394624+noeleont@users.noreply.github.com> Date: Sun, 5 Nov 2023 16:21:09 +0000 Subject: [PATCH 1/2] feat(since): only sync since last sync Storing the last synchronization timestamp in a file allows for incremental syncing, efficiently updating only the new highlights since the last sync, ideal for large collections. --- zotero2readwise/helper.py | 27 +++++++++++++++++++++++++++ zotero2readwise/run.py | 13 ++++++++++++- zotero2readwise/zt2rw.py | 4 +++- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/zotero2readwise/helper.py b/zotero2readwise/helper.py index 263c2cd..6f8a3d5 100644 --- a/zotero2readwise/helper.py +++ b/zotero2readwise/helper.py @@ -17,3 +17,30 @@ def sanitize_tag(tag: str) -> str: """ return tag.strip().replace(" ", "_") + +def read_library_version(): + """ + Reads the library version from the 'since' file and returns it as an integer. + If the file does not exist or does not include a number, returns 0. + """ + try: + with open('since', 'r', encoding='utf-8') as file: + return int(file.read()) + except FileNotFoundError: + print("since file does not exist, using library version 0") + except ValueError: + print("since file does not include a number, using library version 0") + return 0 + +def write_library_version(zotero_client): + """ + Writes the library version of the given Zotero client to a file named 'since'. + + Args: + zotero_client: A Zotero client object. + + Returns: + None + """ + with open('since', 'w', encoding='utf-8') as file: + file.write(str(zotero_client.last_modified_version())) \ No newline at end of file diff --git a/zotero2readwise/run.py b/zotero2readwise/run.py index fe392d5..83b62dc 100644 --- a/zotero2readwise/run.py +++ b/zotero2readwise/run.py @@ -3,6 +3,8 @@ from zotero2readwise.zt2rw import Zotero2Readwise +from helper import write_library_version, read_library_version + if __name__ == "__main__": parser = ArgumentParser(description="Generate Markdown files") parser.add_argument( @@ -40,6 +42,11 @@ default=[], help="Filter Zotero annotations by given color | Options: '#ffd400' (yellow), '#ff6666' (red), '#5fb236' (green), '#2ea8e5' (blue), '#a28ae5' (purple), '#e56eee' (magenta), '#f19837' (orange), '#aaaaaa' (gray)" ) + parser.add_argument( + "--use_since", + action='store_true', + help="Include Zotero items since last run" + ) args = vars(parser.parse_args()) @@ -52,6 +59,7 @@ f"Invalid value for --{bool_arg}. Use 'n' or 'y' (default)." ) + since = read_library_version() if args["use_since"] else 0 zt2rw = Zotero2Readwise( readwise_token=args["readwise_token"], zotero_key=args["zotero_key"], @@ -59,6 +67,9 @@ zotero_library_type=args["library_type"], include_annotations=args["include_annotations"], include_notes=args["include_notes"], - filter_colors=args["filter_color"] + filter_colors=args["filter_color"], + since=since ) zt2rw.run() + if args["use_since"]: + write_library_version(zt2rw.zotero_client) \ No newline at end of file diff --git a/zotero2readwise/zt2rw.py b/zotero2readwise/zt2rw.py index b0301e5..1b9a3bc 100644 --- a/zotero2readwise/zt2rw.py +++ b/zotero2readwise/zt2rw.py @@ -18,7 +18,8 @@ def __init__( zotero_library_type: str = "user", include_annotations: bool = True, include_notes: bool = False, - filter_colors: List[str] = [] + filter_colors: List[str] = [], + since: int = 0 ): self.readwise = Readwise(readwise_token) self.zotero_client = get_zotero_client( @@ -29,6 +30,7 @@ def __init__( self.zotero = ZoteroAnnotationsNotes(self.zotero_client, filter_colors) self.include_annots = include_annotations self.include_notes = include_notes + self.since = since def get_all_zotero_items(self) -> List[Dict]: annots, notes = [], [] From 349eaa6a6570e85ee6aea6121a8988f0c9d392e0 Mon Sep 17 00:00:00 2001 From: Noe Thalheim <2394624+noeleont@users.noreply.github.com> Date: Sun, 5 Nov 2023 16:24:34 +0000 Subject: [PATCH 2/2] feat(since): finish functionallity - get_all_zotero_items use since - refactor: retrieve_all_* to one function with item_type as parameter --- zotero2readwise/zotero.py | 14 +----------- zotero2readwise/zt2rw.py | 46 ++++++++++++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/zotero2readwise/zotero.py b/zotero2readwise/zotero.py index 811734d..1fbd1d9 100644 --- a/zotero2readwise/zotero.py +++ b/zotero2readwise/zotero.py @@ -230,16 +230,4 @@ def save_failed_items_to_json(self, json_filepath_failed_items: str = None): with open(out_filepath, "w") as f: dump(self.failed_items, f, indent=4) - print(f"\nZOTERO: Detail of failed items are saved into {out_filepath}\n") - - -def retrieve_all_annotations(zotero_client: Zotero) -> List[Dict]: - print( - "Retrieving ALL annotations from Zotero Database. \nIt may take some time...\n" - ) - return zotero_client.everything(zotero_client.items(itemType="annotation")) - - -def retrieve_all_notes(zotero_client: Zotero) -> List[Dict]: - print("Retrieving ALL notes from Zotero Database. \nIt may take some time...\n") - return zotero_client.everything(zotero_client.items(itemType="note")) + print(f"\nZOTERO: Detail of failed items are saved into {out_filepath}\n") \ No newline at end of file diff --git a/zotero2readwise/zt2rw.py b/zotero2readwise/zt2rw.py index 1b9a3bc..cd912ee 100644 --- a/zotero2readwise/zt2rw.py +++ b/zotero2readwise/zt2rw.py @@ -4,8 +4,6 @@ from zotero2readwise.zotero import ( ZoteroAnnotationsNotes, get_zotero_client, - retrieve_all_annotations, - retrieve_all_notes, ) @@ -33,17 +31,22 @@ def __init__( self.since = since def get_all_zotero_items(self) -> List[Dict]: - annots, notes = [], [] - if self.include_annots: - annots = retrieve_all_annotations(self.zotero_client) + """ + Retrieves all Zotero items of the specified types (notes and/or annotations) that were modified since the specified date. - if self.include_notes: - notes = retrieve_all_notes(self.zotero_client) + Returns: + A list of dictionaries representing the retrieved Zotero items. + """ + items = [] + if self.include_annots: + items.extend(self.retrieve_all("annotation", self.since)) - all_zotero_items = annots + notes - print(f"{len(all_zotero_items)} Zotero items are retrieved.") + if self.include_notes: + items.extend(self.retrieve_all("note", self.since)) - return all_zotero_items + print(f"{len(items)} Zotero items are retrieved.") + + return items def run(self, zot_annots_notes: List[Dict] = None) -> None: if zot_annots_notes is None: @@ -55,3 +58,26 @@ def run(self, zot_annots_notes: List[Dict] = None) -> None: self.zotero.save_failed_items_to_json("failed_zotero_items.json") self.readwise.post_zotero_annotations_to_readwise(formatted_items) + + def retrieve_all(self, item_type: str, since: int = 0): + """ + Retrieves all items of a given type from Zotero Database since a given timestamp. + + Args: + item_type (str): Either "annotation" or "note". + since (int): Timestamp in seconds since the Unix epoch. Defaults to 0. + + Returns: + List[Dict]: List of dictionaries containing the retrieved items. + """ + if item_type not in ["annotation", "note"]: + raise ValueError("item_type must be either 'annotation' or 'note'") + + if since == 0: + print(f"Retrieving ALL {item_type}s from Zotero Database") + else: + print(f"Retrieving {item_type}s since last run from Zotero Database") + + print("It may take some time...") + query = self.zotero_client.items(itemType={item_type}, since=since) + return self.zotero_client.everything(query) \ No newline at end of file