From 1e2393a1415c7974bc0666202de48dd1fbd01b7e Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 12:55:19 +0200 Subject: [PATCH 01/29] extract:command_extract - in case of unknown errors, log event with file's uuid --- digiarch/extract/extract.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/digiarch/extract/extract.py b/digiarch/extract/extract.py index 99ab6e05..c16340ba 100644 --- a/digiarch/extract/extract.py +++ b/digiarch/extract/extract.py @@ -180,6 +180,7 @@ def command_extract( offset += 1 continue + # noinspection PyBroadException try: extractor = extractor_cls(database, archive_file, root) extracted_files_paths = list(extractor.extract()) @@ -230,6 +231,10 @@ def command_extract( database.history.insert(event) event.log(ERROR, log_file, log_stdout) continue + except Exception as err: + event = HistoryEntry.command_history(ctx, "error", archive_file.uuid, None, repr(err)) + event.log(ERROR, log_file, log_stdout) + raise for path in extracted_files_paths: extracted_file, file_history = identify_file( From ef115deb2b3a8f4fae3db792ceb39a082133e190 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 12:55:39 +0200 Subject: [PATCH 02/29] version - patch 3.2.16 > 3.2.17 --- digiarch/__version__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/digiarch/__version__.py b/digiarch/__version__.py index 87a7690e..e8c69c35 100644 --- a/digiarch/__version__.py +++ b/digiarch/__version__.py @@ -1 +1 @@ -__version__ = "3.2.16" +__version__ = "3.2.17" diff --git a/pyproject.toml b/pyproject.toml index d39011ed..b3f80d5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "digiarch" -version = "3.2.16" +version = "3.2.17" description = "Tools for the Digital Archive Project at Aarhus Stadsarkiv" authors = ["Aarhus Stadsarkiv "] license = "GPL-3.0" From 9b4da4bf4d6bcd0096c42cddaca0050101a6f735 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:07:57 +0200 Subject: [PATCH 03/29] edit.common:find_files - add limit parameter --- digiarch/edit/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/digiarch/edit/common.py b/digiarch/edit/common.py index 30f48cfc..dc015591 100644 --- a/digiarch/edit/common.py +++ b/digiarch/edit/common.py @@ -75,7 +75,13 @@ def inner(callback: FC) -> FC: return inner -def find_files(database: FileDB, ids: tuple[str, ...], id_type: str, id_files: bool) -> Generator[File, None, None]: +def find_files( + database: FileDB, + ids: tuple[str, ...], + id_type: str, + id_files: bool, + limit: int | None = None, +) -> Generator[File, None, None]: if id_files: ids = tuple(i.strip() for f in ids for i in Path(f).read_text().splitlines() if i.strip()) @@ -94,4 +100,4 @@ def find_files(database: FileDB, ids: tuple[str, ...], id_type: str, id_files: b where.append(f"{id_type} = ?") parameters.append(id_value) - yield from database.files.select(where=" or ".join(where), parameters=parameters) + yield from database.files.select(where=" or ".join(where), parameters=parameters, limit=limit) From 87d82d953c4f9336880f353e9d25a4a84e9e2c38 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:11:31 +0200 Subject: [PATCH 04/29] edit.common:find_files - add order by parameter --- digiarch/edit/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/digiarch/edit/common.py b/digiarch/edit/common.py index dc015591..99702c14 100644 --- a/digiarch/edit/common.py +++ b/digiarch/edit/common.py @@ -80,6 +80,7 @@ def find_files( ids: tuple[str, ...], id_type: str, id_files: bool, + order_by: list[tuple[str, str]] | None = None, limit: int | None = None, ) -> Generator[File, None, None]: if id_files: @@ -100,4 +101,4 @@ def find_files( where.append(f"{id_type} = ?") parameters.append(id_value) - yield from database.files.select(where=" or ".join(where), parameters=parameters, limit=limit) + yield from database.files.select(where=" or ".join(where), parameters=parameters, order_by=order_by, limit=limit) From de39174bebe55ef795540c078c5c503706c847bf Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:12:48 +0200 Subject: [PATCH 05/29] search:command_search - add command to search and display files --- digiarch/cli.py | 2 ++ digiarch/search.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 digiarch/search.py diff --git a/digiarch/cli.py b/digiarch/cli.py index 03baf20f..d26461af 100644 --- a/digiarch/cli.py +++ b/digiarch/cli.py @@ -10,6 +10,7 @@ from .history import command_history from .identify import command_identify from .identify import command_reidentify +from .search import command_search from .upgrade import command_upgrade @@ -24,6 +25,7 @@ def app(): app.add_command(command_reidentify, command_reidentify.name) app.add_command(command_extract, command_extract.name) app.add_command(group_edit, group_edit.name) +app.add_command(command_search, command_search.name) app.add_command(command_history, command_history.name) app.add_command(command_doctor, command_doctor.name) app.add_command(command_upgrade, command_upgrade.name) diff --git a/digiarch/search.py b/digiarch/search.py new file mode 100644 index 00000000..bb431c92 --- /dev/null +++ b/digiarch/search.py @@ -0,0 +1,67 @@ +from pathlib import Path +from pathlib import PosixPath +from pathlib import WindowsPath +from sys import stdout +from uuid import UUID + +import yaml +from acacore.database import FileDB +from click import Choice +from click import command +from click import Context +from click import IntRange +from click import option +from click import pass_context + +from digiarch.edit.common import argument_ids +from digiarch.edit.common import find_files +from .common import argument_root +from .common import check_database_version +from .common import ctx_params + + +@command("search", no_args_is_help=True, short_help="Search the database.") +@argument_root(True) +@argument_ids(True) +@option( + "--order-by", + type=Choice(["relative_path", "size", "action"]), + default="relative_path", + show_default=True, + show_choices=True, + help="Set sorting field.", +) +@option("--sort", type=Choice(["asc", "desc"]), default="asc", help="Set sorting direction.") +@option("--limit", type=IntRange(1), default=None, help="Limit the number of results.") +@pass_context +def command_search( + ctx: Context, + root: Path, + ids: tuple[str], + id_type: str, + id_files: bool, + order_by: str, + sort: str, + limit: int | None, +): + check_database_version(ctx, ctx_params(ctx)["root"], (db_path := root / "_metadata" / "files.db")) + + yaml.add_representer(UUID, lambda dumper, data: dumper.represent_str(str(data))) + yaml.add_representer(Path, lambda dumper, data: dumper.represent_str(str(data))) + yaml.add_representer(PosixPath, lambda dumper, data: dumper.represent_str(str(data))) + yaml.add_representer(WindowsPath, lambda dumper, data: dumper.represent_str(str(data))) + yaml.add_representer( + str, + lambda dumper, data: ( + dumper.represent_str(str(data)) + if len(data) < 200 + else dumper.represent_scalar("tag:yaml.org,2002:str", str(data), style="|") + ), + ) + + with FileDB(db_path) as database: + for file in find_files(database, ids, id_type, id_files, [(order_by, sort)], limit): + model_dump = file.model_dump() + del model_dump["root"] + yaml.dump(model_dump, stdout, yaml.Dumper, sort_keys=False) + print() From ff0cdafb5873cdbab7e3591e06674505e893929a Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:13:47 +0200 Subject: [PATCH 06/29] Revert "version - patch 3.2.16 > 3.2.17" This reverts commit ef115deb2b3a8f4fae3db792ceb39a082133e190. --- digiarch/__version__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/digiarch/__version__.py b/digiarch/__version__.py index e8c69c35..87a7690e 100644 --- a/digiarch/__version__.py +++ b/digiarch/__version__.py @@ -1 +1 @@ -__version__ = "3.2.17" +__version__ = "3.2.16" diff --git a/pyproject.toml b/pyproject.toml index b3f80d5e..d39011ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "digiarch" -version = "3.2.17" +version = "3.2.16" description = "Tools for the Digital Archive Project at Aarhus Stadsarkiv" authors = ["Aarhus Stadsarkiv "] license = "GPL-3.0" From 3d6feb6b22ed832b2ab6ac9f60584ad7dd18a390 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:14:04 +0200 Subject: [PATCH 07/29] version - minor 3.2.16 > 3.3.0 --- digiarch/__version__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/digiarch/__version__.py b/digiarch/__version__.py index 87a7690e..88c513ea 100644 --- a/digiarch/__version__.py +++ b/digiarch/__version__.py @@ -1 +1 @@ -__version__ = "3.2.16" +__version__ = "3.3.0" diff --git a/pyproject.toml b/pyproject.toml index d39011ed..3eb5b408 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "digiarch" -version = "3.2.16" +version = "3.3.0" description = "Tools for the Digital Archive Project at Aarhus Stadsarkiv" authors = ["Aarhus Stadsarkiv "] license = "GPL-3.0" From 9bffd2ab554eb592a506dd6e267e0f46586914f0 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Thu, 29 Aug 2024 13:16:31 +0200 Subject: [PATCH 08/29] search - sort imports --- digiarch/search.py | 1 + 1 file changed, 1 insertion(+) diff --git a/digiarch/search.py b/digiarch/search.py index bb431c92..1dd87c86 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -15,6 +15,7 @@ from digiarch.edit.common import argument_ids from digiarch.edit.common import find_files + from .common import argument_root from .common import check_database_version from .common import ctx_params From bd897c54806ef7ee531d80d04fa838416e23c0be Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:16:01 +0200 Subject: [PATCH 09/29] extract.extractors.extractor_msg:MsgExtractor.extract - use attachment-{n} in case an attachment doesn't have a name --- digiarch/extract/extractors/extractor_msg.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/digiarch/extract/extractors/extractor_msg.py b/digiarch/extract/extractors/extractor_msg.py index 470c3295..3b870045 100644 --- a/digiarch/extract/extractors/extractor_msg.py +++ b/digiarch/extract/extractors/extractor_msg.py @@ -6,6 +6,7 @@ import chardet from acacore.models.file import File +from extract_msg import Attachment from extract_msg import AttachmentBase from extract_msg import Message from extract_msg import MSGFile @@ -117,9 +118,10 @@ def extract(self) -> Generator[Path, None, None]: _, body_html, body_rtf = msg_body(msg) inline_attachments, attachments = msg_attachments(msg, body_html, body_rtf) - for attachment in inline_attachments + attachments: + for n, attachment in enumerate(inline_attachments + attachments): if isinstance(attachment, (Message, MessageSigned)): - path: Path = extract_folder.joinpath(sanitize_path(attachment.filename.replace("/", "_"))) + name = attachment.filename or f"attachment-{n}" + path: Path = extract_folder.joinpath(sanitize_path(name.replace("/", "_"))) if path.suffix != ".msg": path = path.with_name(path.name + ".msg") attachment.export(path) @@ -127,7 +129,8 @@ def extract(self) -> Generator[Path, None, None]: elif attachment.data is not None and not isinstance(attachment.data, bytes): raise ExtractError(self.file, f"Cannot extract attachment with data of type {type(attachment.data)}") else: - name = attachment.longFilename if isinstance(attachment, SignedAttachment) else attachment.getFilename() + name: str = attachment.getFilename() if isinstance(attachment, Attachment) else attachment.longFilename + name = name or f"attachment-{n}" path: Path = extract_folder.joinpath(sanitize_path(name.replace("/", "_"))) with path.open("wb") as fh: # noinspection PyTypeChecker From 0448856611ce195f35294d7788b4f3abe70e0ecd Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:17:26 +0200 Subject: [PATCH 10/29] extract.extractors.extractor_msg:MsgExtractor.extract - do not set .msg extension for Message attachments They could be EML --- digiarch/extract/extractors/extractor_msg.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/digiarch/extract/extractors/extractor_msg.py b/digiarch/extract/extractors/extractor_msg.py index 3b870045..16bd2dcd 100644 --- a/digiarch/extract/extractors/extractor_msg.py +++ b/digiarch/extract/extractors/extractor_msg.py @@ -122,8 +122,6 @@ def extract(self) -> Generator[Path, None, None]: if isinstance(attachment, (Message, MessageSigned)): name = attachment.filename or f"attachment-{n}" path: Path = extract_folder.joinpath(sanitize_path(name.replace("/", "_"))) - if path.suffix != ".msg": - path = path.with_name(path.name + ".msg") attachment.export(path) yield path elif attachment.data is not None and not isinstance(attachment.data, bytes): From 9a05d709fe42624ad63188d0162c70976d9aa6f9 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:18:10 +0200 Subject: [PATCH 11/29] extract.extractors.extractor_msg - format --- digiarch/extract/extractors/extractor_msg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/digiarch/extract/extractors/extractor_msg.py b/digiarch/extract/extractors/extractor_msg.py index 16bd2dcd..e5ad04d9 100644 --- a/digiarch/extract/extractors/extractor_msg.py +++ b/digiarch/extract/extractors/extractor_msg.py @@ -127,7 +127,7 @@ def extract(self) -> Generator[Path, None, None]: elif attachment.data is not None and not isinstance(attachment.data, bytes): raise ExtractError(self.file, f"Cannot extract attachment with data of type {type(attachment.data)}") else: - name: str = attachment.getFilename() if isinstance(attachment, Attachment) else attachment.longFilename + name: str = attachment.getFilename() if isinstance(attachment, Attachment) else attachment.longFilename name = name or f"attachment-{n}" path: Path = extract_folder.joinpath(sanitize_path(name.replace("/", "_"))) with path.open("wb") as fh: From 06c902d64108738ee05d3eb0e7557a22ab829908 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:28:31 +0200 Subject: [PATCH 12/29] edit.action - add --lock option to all commands --- digiarch/edit/action.py | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/digiarch/edit/action.py b/digiarch/edit/action.py index 6a5dd063..ff9c6fb1 100644 --- a/digiarch/edit/action.py +++ b/digiarch/edit/action.py @@ -40,6 +40,17 @@ from .common import find_files +def set_lock(ctx: Context, database: FileDB, file: File, reason: str, dry_run: bool, *loggers: Logger,): + if file.lock is True: + return + event = HistoryEntry.command_history(ctx, "lock", file.uuid, [file.lock, True], reason) + file.lock = True + if not dry_run: + database.files.update(file, {"uuid": file.uuid}) + database.history.insert(event) + event.log(INFO, *loggers) + + def set_action( ctx: Context, database: FileDB, @@ -83,6 +94,7 @@ def group_action(): callback=param_regex("^(.[a-zA-Z0-9]+)+$"), help='The file extensions to generate. [multiple; required for tools other than "copy"]', ) +@option("--lock", is_flag=True, default=False, help="Lock the edited files.") @option_dry_run() @pass_context def action_convert( @@ -94,6 +106,7 @@ def action_convert( id_files: bool, tool: str, outputs: tuple[str, ...], + lock: bool, dry_run: bool, ): """ @@ -101,6 +114,8 @@ def action_convert( The --outputs option may be omitted when using the "copy" tool. + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -118,6 +133,8 @@ def action_convert( with ExceptionManager(BaseException) as exception: for file in find_files(database, ids, id_type, id_files): set_action(ctx, database, file, "convert", data, reason, dry_run, log_stdout) + if lock: + set_lock(ctx, database, file, reason, dry_run, log_stdout) end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -133,6 +150,7 @@ def action_convert( callback=param_regex(r"^(.[a-zA-Z0-9]+)+$"), help="The extension the file must have for extraction to succeed.", ) +@option("--lock", is_flag=True, default=False, help="Lock the edited files.") @option_dry_run() @pass_context def action_extract( @@ -144,11 +162,14 @@ def action_extract( id_files: bool, tool: str, extension: str | None, + lock: bool, dry_run: bool, ): """ Set files' action to "extract". + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -163,6 +184,8 @@ def action_extract( with ExceptionManager(BaseException) as exception: for file in find_files(database, ids, id_type, id_files): set_action(ctx, database, file, "extract", data, reason, dry_run, log_stdout) + if lock: + set_lock(ctx, database, file, reason, dry_run, log_stdout) end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -186,6 +209,7 @@ def action_extract( callback=param_regex(r"^.*\S.*$"), help="The steps to take to process the file.", ) +@option("--lock", is_flag=True, default=False, help="Lock the edited files.") @option_dry_run() @pass_context def action_manual( @@ -197,11 +221,14 @@ def action_manual( id_files: bool, data_reason: str | None, process: str, + lock: bool, dry_run: bool, ): """ Set files' action to "manual". + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -216,6 +243,8 @@ def action_manual( with ExceptionManager(BaseException) as exception: for file in find_files(database, ids, id_type, id_files): set_action(ctx, database, file, "manual", data, reason, dry_run, log_stdout) + if lock: + set_lock(ctx, database, file, reason, dry_run, log_stdout) end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -238,6 +267,7 @@ def action_manual( callback=param_regex(r"^.*\S.*$"), help='The reason why the file is ignored. [required for "text" template]', ) +@option("--lock", is_flag=True, default=False, help="Lock the edited files.") @option_dry_run() @pass_context @docstring_format(templates="\n".join(f" * {t}" for t in TemplateTypeEnum).strip()) @@ -250,6 +280,7 @@ def action_ignore( id_files: bool, template: TTemplateType, data_reason: str | None, + lock: bool, dry_run: bool, ): """ @@ -260,6 +291,8 @@ def action_ignore( {templates} The --reason option may be omitted when using a template other than "text". + + To lock the file(s) after editing them, use the --lock option. To see the changes without committing them, use the --dry-run option. @@ -278,6 +311,8 @@ def action_ignore( with ExceptionManager(BaseException) as exception: for file in find_files(database, ids, id_type, id_files): set_action(ctx, database, file, "ignore", data, reason, dry_run, log_stdout) + if lock: + set_lock(ctx, database, file, reason, dry_run, log_stdout) end_program(ctx, database, exception, dry_run, log_file, log_stdout) @@ -298,6 +333,7 @@ def action_ignore( callback=lambda _ctx, _param, value: Path(value) if value else None, help="Path to a YAML file containing file format actions.", ) +@option("--lock", is_flag=True, default=False, help="Lock the edited files.") @option_dry_run() @pass_context def command_copy( @@ -310,6 +346,7 @@ def command_copy( id_type: str, id_files: bool, actions_file: Path | None, + lock: bool, dry_run: bool, ): """ @@ -323,8 +360,12 @@ def command_copy( * ignore If no actions file is give with --actions, the latest version will be downloaded from GitHub. + + To lock the file(s) after editing them, use the --lock option. To see the changes without committing them, use the --dry-run option. + + For details on the ID arguments, see the edit command. """ # noqa: D301 check_database_version(ctx, ctx_params(ctx)["root"], (db_path := root / "_metadata" / "files.db")) @@ -344,6 +385,8 @@ def command_copy( with ExceptionManager(BaseException) as exception: for file in find_files(database, ids, id_type, id_files): set_action(ctx, database, file, action, data, reason, dry_run, log_stdout) + if lock: + set_lock(ctx, database, file, reason, dry_run, log_stdout) end_program(ctx, database, exception, dry_run, log_file, log_stdout) From f6c3a7bbbfda8f81acebd5e52c9554af2db101dd Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:34:35 +0200 Subject: [PATCH 13/29] edit.action - format --- digiarch/edit/action.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/digiarch/edit/action.py b/digiarch/edit/action.py index ff9c6fb1..f81f0dfd 100644 --- a/digiarch/edit/action.py +++ b/digiarch/edit/action.py @@ -40,7 +40,14 @@ from .common import find_files -def set_lock(ctx: Context, database: FileDB, file: File, reason: str, dry_run: bool, *loggers: Logger,): +def set_lock( + ctx: Context, + database: FileDB, + file: File, + reason: str, + dry_run: bool, + *loggers: Logger, +): if file.lock is True: return event = HistoryEntry.command_history(ctx, "lock", file.uuid, [file.lock, True], reason) @@ -291,7 +298,7 @@ def action_ignore( {templates} The --reason option may be omitted when using a template other than "text". - + To lock the file(s) after editing them, use the --lock option. To see the changes without committing them, use the --dry-run option. @@ -360,7 +367,7 @@ def command_copy( * ignore If no actions file is give with --actions, the latest version will be downloaded from GitHub. - + To lock the file(s) after editing them, use the --lock option. To see the changes without committing them, use the --dry-run option. From eb7f93a7752b668fb771ec5c61a95f9dbdab9dd6 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:41:41 +0200 Subject: [PATCH 14/29] extract.extractors.extractor_msg:MsgExtractor.extract - use Message.subject if filename is not available --- digiarch/extract/extractors/extractor_msg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/digiarch/extract/extractors/extractor_msg.py b/digiarch/extract/extractors/extractor_msg.py index e5ad04d9..38bc82e2 100644 --- a/digiarch/extract/extractors/extractor_msg.py +++ b/digiarch/extract/extractors/extractor_msg.py @@ -120,7 +120,7 @@ def extract(self) -> Generator[Path, None, None]: for n, attachment in enumerate(inline_attachments + attachments): if isinstance(attachment, (Message, MessageSigned)): - name = attachment.filename or f"attachment-{n}" + name = attachment.filename or attachment.subject or f"attachment-{n}" path: Path = extract_folder.joinpath(sanitize_path(name.replace("/", "_"))) attachment.export(path) yield path From 11218ea9f2285293c51493b49df0af761c6b7d60 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:48:40 +0200 Subject: [PATCH 15/29] doctor:invalid_characters - improve invalid characters list --- digiarch/doctor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/digiarch/doctor.py b/digiarch/doctor.py index ab50e4f1..28ab6cc3 100644 --- a/digiarch/doctor.py +++ b/digiarch/doctor.py @@ -21,7 +21,7 @@ from digiarch.common import option_dry_run from digiarch.common import start_program -invalid_characters: str = r'\?%*|"<>,:;=+[]!@' + bytes(range(20)).decode("ascii") + "\x7f" +invalid_characters: str = '\\#%&{}[]<>*?/$!\'":@+`|=' + bytes(range(32)).decode("ascii") + "\x7f" def sanitize_path(path: str | PathLike) -> Path: From b6b0faf1b6581efe1182b94f1018fe1d1fcd1077 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 30 Aug 2024 15:52:14 +0200 Subject: [PATCH 16/29] doctor - format --- digiarch/doctor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/digiarch/doctor.py b/digiarch/doctor.py index 28ab6cc3..4fa0986d 100644 --- a/digiarch/doctor.py +++ b/digiarch/doctor.py @@ -21,7 +21,7 @@ from digiarch.common import option_dry_run from digiarch.common import start_program -invalid_characters: str = '\\#%&{}[]<>*?/$!\'":@+`|=' + bytes(range(32)).decode("ascii") + "\x7f" +invalid_characters: str = "\\#%&{}[]<>*?/$!'\":@+`|=" + bytes(range(32)).decode("ascii") + "\x7f" def sanitize_path(path: str | PathLike) -> Path: From 21c0b1e26dcf4f82bc5e3da8c73e3ed02bd70169 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Sun, 1 Sep 2024 10:36:36 +0200 Subject: [PATCH 17/29] readme - update help messages --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 8bdbba9c..fb31f332 100755 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ * [lock](#digiarch-edit-lock) * [remove](#digiarch-edit-remove) * [rollback](#digiarch-edit-rollback) + * [search](#digiarch-search) * [history](#digiarch-history) * [doctor](#digiarch-doctor) * [upgrade](#digiarch-upgrade) @@ -42,6 +43,7 @@ Commands: reidentify Reidentify files. extract Unpack archives. edit Edit the database. + search Search the database. history View events log. doctor Inspect the database. upgrade Upgrade the database. @@ -466,6 +468,28 @@ Options: --help Show this message and exit. ``` +## digiarch search + +``` +Usage: digiarch search [OPTIONS] ROOT ID... + +Options: + --uuid Use UUIDs as identifiers. [default] + --puid Use PUIDs as identifiers. + --path Use relative paths as identifiers. + --path-like Use relative paths as identifiers, match + with LIKE. + --checksum Use checksums as identifiers. + --warning Use warnings as identifiers. + --from-file Interpret IDs as files from which to read + the IDs. + --order-by [relative_path|size|action] + Set sorting field. [default: relative_path] + --sort [asc|desc] Set sorting direction. + --limit INTEGER RANGE Limit the number of results. [x>=1] + --help Show this message and exit. +``` + ## digiarch history ``` From 590c6b5a511198dceccf554c57c30785c4e2a3fc Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 07:59:02 +0200 Subject: [PATCH 18/29] changelog - update changelog with version 1.0.0 through 3.3.0 --- CHANGELOG.md | 490 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 480 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57921752..087e5e18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,24 +1,494 @@ # Changelog +## v3.3.0 + +### New Features + +* `search` command to search files and display them + * Displays results in YAML format + * Uses the same selectors as the `edit` commands + * Supports sorting by relative path, size, and action (both ascending and descending) +* `edit action` commands have a new `--lock` option that locks the files after editing them + * The default behaviour is to _not_ lock the files + +### Changes + +* When running `extract`, unknown errors are logged with the file's uuid + * The event is not logged to the database, as it is already done with the `end` operation +* `extract` does not automatically add the .msg extension to extracted message attachments, as they could be EML as well +* Improved list of invalid characters for filenames: + * \\#%&{}[]<>*?/$!'`":@+|= + +### Fixes + +* Fix `extract` failing on MSG/EML files that contained a message attachments without a filename + * The subject is used, if available, otherwise `attachment-{n}` is used instead, where n is the index of said + attachment + +## v3.2.16 + +### Fixes + +* Fix issues when extracting attachments from MSG/EML files with forward slashes in the attachment file name + +## v3.2.15 + +### Fixes + +* Fix issue with extract when file is already found + +## v3.2.13 + +### New Features + +* Add `files` fix to `doctor` command + * Ensures that all files in the database exist, if not they are removed +* `edit rollback` supports `doctor` commands + +### Changes + +* `doctor` command events that signal a rename have .rename in their operation name + +## v3.2.12 + +### New Features + +* `completions` command generates completions scripts for Zsh, Bash, and Fish shells +* `edit` commands (and others using identifiers) now accept `@null` as a valid value to match `NULL` fields + +### Changes + +* Use acacore 3.0.8 + * v3.0.7 contained a critical error in the database upgrade function causing `Files.action_data` to be set to + `NULL` + +## v3.2.11 + +### Changes + +* Use acacore 3.0.7 + +## v3.2.10 + +### Changes + +* Folders for extracted files created by `extract` use the UUID of the archive file + * Uses format `_{uuid}` + * Reduces length of nested file paths + * Is still unique to that folder + +## v3.2.9 + +### Changes + +* Use Acacore 3.0.6 + +## v3.2.8 + +### Changes + +* Show start event for `upgrade` immediately + +### Fixes + +* Fix MSG attachments of MSG files not using the proper extension +* Fix `upgrade` adding an end event to the history table only when no update occurred + +## v3.2.7 + +### Fixes + +* Fix issue with MSG empty attachments +* Fix issue with MSG attachment bytes data sometimes being interpreted as a string by extract_msg causing it through a + `FileNotFoundError` + +## v3.2.6 + +### Fixes + +* Fix incorrect handling of MSG attachments in MSG files + +## v3.2.5 + +### Changes + +* Use acacore 3.0.5 + * Fix some edge cases with `edit rename` and `doctor` failing when a file had multiple extensions + +### Fixes + +* Fix `doctor` extension deduplication not working on some system where the SQLite reverse function was not available + +## v3.2.4 + +### Fixes + +* Fix error when extracting HTML and RTF body where they were sometimes None +* Support signed MSG and signed MSG attachments for extraction + +## v3.2.3 + +### New Features + +* `edit rollback` supports extract events + * Archive files are reset to the extract action + * Extracted files are removed from the database and the file system + +### Fixes + +* Fix extract events not being saved in History table + +## v3.2.2 + +### Changes + +* Reidentify resets `processed` column to `False` + +## v3.2.1 + +### New Features + +* Use acacore 3.0.4 +* Added "msg" tool to extract MSG files +* Support `extract.on_success` +* `--exclude` option for `identify` to exclude files or folders with globbing patterns + +### Changes + +* Extracted files now use the "extracted-archive" template when they are set to "ignore" + +## v3.2.0 + +### New Features + +* `extract` command to extract archives + * If the extract tool can't be found, the file is skipped and a warning messages is displayed + * If the file is encrypted, then the file is set to "ignore" action with template "password-protected" + * Detection of encrypted archives with Patool is experimental and needs testing + * If a file should not be preserved, it is set to ignore + * If other errors occur during extraction, the file is set to manual with reason set to the exception's message + +### Changes + +* Improved docstrings and help messages + +## v3.1.0 + +### Fixes + +* Fix missing help from `history` command when running it without arguments + +### Changes + +* Improved error messages when downloading actions and custom signatures +* `edit remove` command uses a different sub-operation when deleting files so that they can be automatically ignored by + rollback + +### New Features + +* `edit action copy` command to copy an action from an existing format + +## v3.0.0 + +### Changes + +* Use a modular structure for commands and subcommands +* Overhauled `edit action` using subcommands and named options for each field +* Added extensions deduplication to `doctor` command +* Improved rollback +* Simplified history events +* Improved handling of exceptions and argument errors + +## v2.1.1 + +### New Features + +* `--data-puid` option in `edit action` command allows copying data from an existing identifier in the reference files + * Fix issues #692 + * If the identifier is not found or the action argument is not found in the data, a `KeyError` exception is raised + +### Fixes + +* Fix `--id-files` option not working with `edit lock` command + +## v2.1.0 + +### New Features + +* Add `edit lock` command to lock specific files + * Can be rolled back to the previous value with `edit rollback` + +## v2.0.2 + +### New Features + +* The `upgrade` command backs up the database file before performing the upgrade + * Can be ignored with the `--no-backup` option +* The `edit remove` command can delete files from the disk as well with the `--delete` option + +## v2.0.1 + +### Changes + +* Use acacore 2.0.1 + * Fix upgrade issues + +## v2.0.0 + +### Changes + +* Update to acacore 2.0.0 + * Python 3.11 + * Simpler logging of events + * Database version checks +* Siegfried batching + * Files are identified in batches + * Defaults to 100 files per batch + +### New Features + +* `doctor` command to fix common database issues +* `upgrade` command to upgrade the database to the latest version + +## v1.5.0 + +### New Features + +* `reidentify` command + * Allows running identification process again on specific files + * Files are selected with the same system as the edit commands +* `history` command + * Allows viewing and searching the events log + * Can search by: + * time (from and/or to) + * uuid (allows multiple) + * operation (LIKE with % only, allows multiple) + * reason (LIKE, allows multiple) +* `edit rename` accepts an empty extension + * To set an empty extension, spaces must be used (e.g., `" "`) + * When used with the `--replace` and `--replace-all` options, existing extensions are removed + +### Changes + +* Stricter extension patterns in `edit rename` + * Only allowed characters are a-z, A-Z, and 0-9 + +## v1.4.0 + +### New Features + +* `edit rollback` command + * Undo other edit operations + * Must select a start and end time for history events +* `--dry-run` option for `edit rename` + * Show changes without committing them + +### Changes + +* `edit rename` uses replace mode options instead of an f-string + * `--replace` replaces the last suffix with the new extension (default) + * `--replace-all` replaces all valid suffixes (matching the expression `\.[^/<>:"\\|?*\x7F\x00-\x20]+`) with the new + extension + * `--append` appends the new extension if it is not already there + +## v1.3.0 + +### New Features + +* `edit rename` command + * Change the extension of files + * Uses the same selector options as the other `edit` commands + * Ignores changes that would duplicate existing extensions or not alter them + * New extensions can be formatted with: + * `suffix` the last extension of the file + * `suffixes` all the extensions of the file, used for append mode (e.g., `{suffixes}.ext` will change " + file.tar.gz" to "file.tar.gz.ext") + +### Changes + +* Added docstring to `edit action` and `edit remove` commands +* `--siegfried-path` can be set with `SIEGFRIED_PATH` environment variable +* `--siegfried-home` can be set with `SIEGFRIED_HOME` environment variable + +## v1.2.0 + +### New Features + +* Add `--id-files` option to edit commands + * Interpret IDs as files from which to read the IDs + * Each line is considered a separate ID + * Blank lines are ignore + * All IDs are stripped of newlines, carriage return, and tab characters, but not spaces + +### Changes + +* `--no-update-siegfried-signature` option is now the default + +### Fixes + +* Fix error in `edit remove` command when using `--path-like` + * Was using the like statement to delete files instead of their UUID +* Fix error in `edit action` command + * SQLite cursor was rewinding to start because INSERT statements were executed in-between iteration steps + +### Dependencies + +* acacore 1.2.0 + +## v1.1.1 + +### New Features + +* LIKE matches for paths + * Added new `--path-like` option to match edit IDs with LIKE statements + +### Changes + +* All files matching the given IDs are edited, not just the first one +* Removed Siegfried signature update from test workflow + * Is already present in test folder +* Updated PRONOM signature file for Siegfried + +## v1.1.0 + +### New Features + +#### edit remove + +* Added new `edit remove` command to remove files by UUID, path, checksum, PUID, or warning + * Can be used to re-identify files + +### Changes + +* Both digiarch's and acacore's versions are saved with the "start" event + +### Fixes + +* Fix non-matching history events for edit action + +## v1.0.7 + +### Fixes + +* Fix traceback of identification errors not being saved in History table + +### Dependencies + +* Use acacore 1.1.4 + +## v1.0.6 + +### Dependencies + +* Use acacore 1.1.3 + +## v1.0.5 + +### Edit + +#### Action + +* Allow to use different identifiers than UUID + * uuid + * puid + * relative path + * checksum + * warnings +* The history event contains both the previous and new action + +## v1.0.4 + +### Identify + +* Improve hadling of exceptions + * `OSError` and `IOError` are always raised + * `Exception`, `UnidentifiedImageError`, `DecompressionBombError` are always caught +* Increase maximum size of images before Pillow raises a `DecompressionBombError` + +### Other + +* Improve end events in history by using the exception repr value in the data column, or None if the program ended with + no errors + +## v1.0.3 + +### Fixes + +* Fix incorrect handling of action data when an `UnidentifiedImageError` exception was caught + +### Changes + +* `UnidentifiedImageError` exceptions are logged + +### Tests + +* Add corrupt GIF to test files + +## v1.0.2 + +### Identify + +* Handle `UnidentifiedImageError` exception by setting the file to action to "manual" +* Add `--siegfried-home` option to set the folder that contains the signature files + +### Dependencies + +* Use acacore 1.1.1 + +### Actions + +* Automatically build necessary wheel files and save them as a release on new pushed tags + +## v1.0.1 + +### Edit Action + +* New command to change an action +* Can optionally specify new data to be used in action data column + +### Dependencies + +* Use acacore 1.0.2 + +## v1.0.0 - Integrate With acacore + +### Changes + +* Use acacore to handle database and file identification +* Remove all unnecessary files and dependencies + +### Dependencies + +* Use acacore 1.0.1 + ## 0.9.23 -- added which version of DRIOD we use to the log -- makes sure we use most / all of the avaivable info given by `sf` + +* added which version of DRIOD we use to the log +* makes sure we use most / all of the avaivable info given by `sf` ## 0.9.22a -- added ability to get reference files version and is printing it to stdout + +* added ability to get reference files version and is printing it to stdout ## 0.9.22 -- added check to ensure updates of the changelog + +* added check to ensure updates of the changelog + ## 0.9.21 -- fixed missing identification of aca-fmt/17 (MapInfo Map Files) + +* fixed missing identification of aca-fmt/17 (MapInfo Map Files) ## 0.9.20 -- added x-fmt/111 to signatures that we re-identify, as Mapinfo TAB files are identified as such -- added aca-fmt/19 (MapInfo TAB files) to list of custom signatures + +* added x-fmt/111 to signatures that we re-identify, as Mapinfo TAB files are identified as such +* added aca-fmt/19 (MapInfo TAB files) to list of custom signatures ## 0.9.19 -- added list of puids that we have to identify with our custom signatures even though Siegfried identified them. Currently "fmt/111". -- added aca-fmt/18 (Lotus Aprroach View File) to custom_signatures.json + +* added list of puids that we have to identify with our custom signatures even though Siegfried identified them. + Currently "fmt/111". +* added aca-fmt/18 (Lotus Aprroach View File) to custom_signatures.json ## 0.9.18 -- added signature for 5 versions of Microsoft Access Database + +* added signature for 5 versions of Microsoft Access Database From 4a8ceeb3e12c9fde52196ad9011fee3b60f7e3dc Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 08:07:55 +0200 Subject: [PATCH 19/29] changelog:3.2.14 - add non-pr version --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 087e5e18..3d791f38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,12 @@ * Fix issue with extract when file is already found +## v3.2.14 + +### Fixes + +* Fix `doctor` command `--fix` option not allowing "files" value + ## v3.2.13 ### New Features From a4aa29fd15d47ec2de130f9e50eb8a874cc15fa2 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:30:19 +0200 Subject: [PATCH 20/29] search:command_search - use JSON dump mode for model_dump No need to specify representers for YAML output --- digiarch/search.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/digiarch/search.py b/digiarch/search.py index 1dd87c86..7db4c82c 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -47,10 +47,6 @@ def command_search( ): check_database_version(ctx, ctx_params(ctx)["root"], (db_path := root / "_metadata" / "files.db")) - yaml.add_representer(UUID, lambda dumper, data: dumper.represent_str(str(data))) - yaml.add_representer(Path, lambda dumper, data: dumper.represent_str(str(data))) - yaml.add_representer(PosixPath, lambda dumper, data: dumper.represent_str(str(data))) - yaml.add_representer(WindowsPath, lambda dumper, data: dumper.represent_str(str(data))) yaml.add_representer( str, lambda dumper, data: ( @@ -62,7 +58,7 @@ def command_search( with FileDB(db_path) as database: for file in find_files(database, ids, id_type, id_files, [(order_by, sort)], limit): - model_dump = file.model_dump() + model_dump = file.model_dump(mode="json") del model_dump["root"] yaml.dump(model_dump, stdout, yaml.Dumper, sort_keys=False) print() From 4ebae7430c1913ccab392fdeca15df4a91f26c97 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:31:21 +0200 Subject: [PATCH 21/29] search:command_search - set limit to 100 if there are no IDs arguments --- digiarch/search.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/digiarch/search.py b/digiarch/search.py index 7db4c82c..42ef0049 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -56,6 +56,9 @@ def command_search( ), ) + if not ids: + limit = limit or 100 + with FileDB(db_path) as database: for file in find_files(database, ids, id_type, id_files, [(order_by, sort)], limit): model_dump = file.model_dump(mode="json") From 002f495fa2bb4a45b87f7722f7f7e2155881938c Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:31:32 +0200 Subject: [PATCH 22/29] search:command_search - add docstring --- README.md | 10 ++++++++++ digiarch/search.py | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/README.md b/README.md index fb31f332..e00f9d6f 100755 --- a/README.md +++ b/README.md @@ -473,6 +473,16 @@ Options: ``` Usage: digiarch search [OPTIONS] ROOT ID... +Search for specific files in the database. + +Files are displayed in YAML format. + +The ID arguments are interpreted as a list of UUID's by default. This behaviour can be changed with the --puid, +--path, --path-like, --checksum, and --warning options. If the --from-file option is used, each ID argument is +interpreted as the path to a file containing a list of IDs (one per line, empty lines are ignored). + +If there are no ID arguments, then the limit is automatically set to 100 if not set with the --limit option. + Options: --uuid Use UUIDs as identifiers. [default] --puid Use PUIDs as identifiers. diff --git a/digiarch/search.py b/digiarch/search.py index 42ef0049..e3fa613b 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -45,6 +45,17 @@ def command_search( sort: str, limit: int | None, ): + """ + Search for specific files in the database. + + Files are displayed in YAML format. + + The ID arguments are interpreted as a list of UUID's by default. This behaviour can be changed with the --puid, + --path, --path-like, --checksum, and --warning options. If the --from-file option is used, each ID argument is + interpreted as the path to a file containing a list of IDs (one per line, empty lines are ignored). + + If there are no ID arguments, then the limit is automatically set to 100 if not set with the --limit option. + """ check_database_version(ctx, ctx_params(ctx)["root"], (db_path := root / "_metadata" / "files.db")) yaml.add_representer( From d05fe3206325f71e08141b541f9723613e56a747 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:33:04 +0200 Subject: [PATCH 23/29] search - remove unused imports --- digiarch/search.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/digiarch/search.py b/digiarch/search.py index e3fa613b..7b12d326 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -1,8 +1,5 @@ from pathlib import Path -from pathlib import PosixPath -from pathlib import WindowsPath from sys import stdout -from uuid import UUID import yaml from acacore.database import FileDB From f910a272c58516948f29ba1dbdbb63bf176c879c Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:35:07 +0200 Subject: [PATCH 24/29] search:command_search - show defaults for --sort option --- digiarch/search.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/digiarch/search.py b/digiarch/search.py index 7b12d326..c57bd2b9 100644 --- a/digiarch/search.py +++ b/digiarch/search.py @@ -29,7 +29,14 @@ show_choices=True, help="Set sorting field.", ) -@option("--sort", type=Choice(["asc", "desc"]), default="asc", help="Set sorting direction.") +@option( + "--sort", + type=Choice(["asc", "desc"]), + default="asc", + show_default=True, + show_choices=True, + help="Set sorting direction.", +) @option("--limit", type=IntRange(1), default=None, help="Limit the number of results.") @pass_context def command_search( From a1f622a65538982abdfa7bf0a36a6ee805a3f4e6 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:35:29 +0200 Subject: [PATCH 25/29] readme - update help messages --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e00f9d6f..094b6031 100755 --- a/README.md +++ b/README.md @@ -495,7 +495,7 @@ Options: the IDs. --order-by [relative_path|size|action] Set sorting field. [default: relative_path] - --sort [asc|desc] Set sorting direction. + --sort [asc|desc] Set sorting direction. [default: asc] --limit INTEGER RANGE Limit the number of results. [x>=1] --help Show this message and exit. ``` From 8523a65b32cecd62bd2fbf10f9e9f815a9b1b8b5 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:37:51 +0200 Subject: [PATCH 26/29] readme - update help messages --- README.md | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 094b6031..55c4d7dc 100755 --- a/README.md +++ b/README.md @@ -219,6 +219,8 @@ Usage: digiarch edit action convert [OPTIONS] ROOT ID... REASON The --outputs option may be omitted when using the "copy" tool. + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -234,6 +236,7 @@ Options: --tool TEXT The tool to use for conversion. [required] --outputs TEXT The file extensions to generate. [multiple; required for tools other than "copy"] + --lock Lock the edited files. --dry-run Show changes without committing them. --help Show this message and exit. ``` @@ -245,6 +248,8 @@ Usage: digiarch edit action extract [OPTIONS] ROOT ID... REASON Set files' action to "extract". + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -260,6 +265,7 @@ Options: --tool TEXT The tool to use for extraction. [required] --extension TEXT The extension the file must have for extraction to succeed. + --lock Lock the edited files. --dry-run Show changes without committing them. --help Show this message and exit. ``` @@ -271,6 +277,8 @@ Usage: digiarch edit action manual [OPTIONS] ROOT ID... REASON Set files' action to "manual". + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -286,6 +294,7 @@ Options: --reason TEXT The reason why the file must be processed manually. [required] --process TEXT The steps to take to process the file. [required] + --lock Lock the edited files. --dry-run Show changes without committing them. --help Show this message and exit. ``` @@ -310,6 +319,8 @@ Usage: digiarch edit action ignore [OPTIONS] ROOT ID... REASON The --reason option may be omitted when using a template other than "text". + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. For details on the ID arguments, see the edit command. @@ -325,6 +336,7 @@ Options: --template TEMPLATE The template type to use. [required] --reason TEXT The reason why the file is ignored. [required for "text" template] + --lock Lock the edited files. --dry-run Show changes without committing them. --help Show this message and exit. ``` @@ -346,8 +358,12 @@ Usage: digiarch edit action copy [OPTIONS] ROOT ID... PUID If no actions file is give with --actions, the latest version will be downloaded from GitHub. + To lock the file(s) after editing them, use the --lock option. + To see the changes without committing them, use the --dry-run option. + For details on the ID arguments, see the edit command. + Options: --uuid Use UUIDs as identifiers. [default] --puid Use PUIDs as identifiers. @@ -358,6 +374,7 @@ Options: --from-file Interpret IDs as files from which to read the IDs. --actions FILE Path to a YAML file containing file format actions. [env var: DIGIARCH_ACTIONS] + --lock Lock the edited files. --dry-run Show changes without committing them. --help Show this message and exit. ``` @@ -473,15 +490,18 @@ Options: ``` Usage: digiarch search [OPTIONS] ROOT ID... -Search for specific files in the database. + Search for specific files in the database. -Files are displayed in YAML format. + Files are displayed in YAML format. -The ID arguments are interpreted as a list of UUID's by default. This behaviour can be changed with the --puid, ---path, --path-like, --checksum, and --warning options. If the --from-file option is used, each ID argument is -interpreted as the path to a file containing a list of IDs (one per line, empty lines are ignored). + The ID arguments are interpreted as a list of UUID's by default. This + behaviour can be changed with the --puid, --path, --path-like, --checksum, + and --warning options. If the --from-file option is used, each ID argument + is interpreted as the path to a file containing a list of IDs (one per line, + empty lines are ignored). -If there are no ID arguments, then the limit is automatically set to 100 if not set with the --limit option. + If there are no ID arguments, then the limit is automatically set to 100 if + not set with the --limit option. Options: --uuid Use UUIDs as identifiers. [default] From 15e51d06d1971bd696266a3d7c18283af5775331 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:41:23 +0200 Subject: [PATCH 27/29] history:command_history - add limit option --- digiarch/history.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/digiarch/history.py b/digiarch/history.py index d5f3cd7c..03d8a0a0 100644 --- a/digiarch/history.py +++ b/digiarch/history.py @@ -9,6 +9,7 @@ from click import command from click import Context from click import DateTime +from click import IntRange from click import option from click import pass_context @@ -59,6 +60,7 @@ show_default=True, help="Sort by ascending or descending order.", ) +@option("--limit", type=IntRange(1), default=None, help="Limit the number of results.") @pass_context def command_history( ctx: Context, @@ -69,6 +71,7 @@ def command_history( uuid: tuple[str, ...] | None, reason: tuple[str, ...] | None, ascending: bool, + limit: int | None, ): """ View and search events log. @@ -77,7 +80,7 @@ def command_history( If multiple --uuid, --operation, or --reason options are used, the query will match any of them. - If no query option is given, only the first 100 results will be shown. + If no query option is given, then the limit is automatically set to 100 if not set with the --limit option. """ check_database_version(ctx, ctx_params(ctx)["root"], (db_path := root / "_metadata" / "files.db")) @@ -117,12 +120,15 @@ def command_history( ), ) + if not where: + limit = limit or 100 + with FileDB(db_path) as database: for event in database.history.select( where=" and ".join(where) or None, parameters=parameters or None, order_by=[("time", "asc" if ascending else "desc")], - limit=None if where else 100, + limit=limit, ): yaml.dump(event.model_dump(), stdout, yaml.Dumper, sort_keys=False) print() From 594f0d8e12f2c6449bb66dfe465144f58a09116c Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:41:53 +0200 Subject: [PATCH 28/29] readme - update help messages --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 55c4d7dc..24ec644c 100755 --- a/README.md +++ b/README.md @@ -533,7 +533,8 @@ Usage: digiarch history [OPTIONS] ROOT If multiple --uuid, --operation, or --reason options are used, the query will match any of them. - If no query option is given, only the first 100 results will be shown. + If no query option is given, then the limit is automatically set to 100 if + not set with the --limit option. Options: --from [%Y-%m-%d|%Y-%m-%dT%H:%M:%S|%Y-%m-%dT%H:%M:%S.%f] @@ -545,6 +546,7 @@ Options: --reason TEXT Event reason. --ascending / --descending Sort by ascending or descending order. [default: ascending] + --limit INTEGER RANGE Limit the number of results. [x>=1] --help Show this message and exit. ``` From d0e0e592f29fc6f24d562d1c9bb7fcfd541fd7ef Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Mon, 2 Sep 2024 09:44:58 +0200 Subject: [PATCH 29/29] changelog:3.3.0 - update new features --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d791f38..6a2c26b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * Supports sorting by relative path, size, and action (both ascending and descending) * `edit action` commands have a new `--lock` option that locks the files after editing them * The default behaviour is to _not_ lock the files +* `history` command has a new `--limit` option ### Changes @@ -41,7 +42,7 @@ ### Fixes -* Fix `doctor` command `--fix` option not allowing "files" value +* Fix `doctor` command `--fix` option not allowing "files" value ## v3.2.13