Skip to content

Commit

Permalink
Merge pull request #703
Browse files Browse the repository at this point in the history
v3.2.5
  • Loading branch information
MatteoCampinoti94 authored Aug 22, 2024
2 parents b1eaabb + bf162fa commit 7831678
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 58 deletions.
2 changes: 1 addition & 1 deletion digiarch/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.2.4"
__version__ = "3.2.5"
10 changes: 5 additions & 5 deletions digiarch/doctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,16 @@ def sanitize_paths(ctx: Context, database: FileDB, root: Path, dry_run: bool, *l


def deduplicate_extensions(ctx: Context, database: FileDB, root: Path, dry_run: bool, *loggers: Logger | None):
database.create_function("__reverse", 1, lambda s: s[::-1])
for file in database.files.select(
where="instr(reverse(relative_path), '.') != 0"
where="instr(__reverse(relative_path), '.') != 0"
" and relative_path like '%' ||"
" substr(relative_path, length(relative_path) - instr(reverse(relative_path), '.') + 1) ||"
" substr(relative_path, length(relative_path) - instr(reverse(relative_path), '.') + 1)"
" substr(relative_path, length(relative_path) - instr(__reverse(relative_path), '.') + 1) ||"
" substr(relative_path, length(relative_path) - instr(__reverse(relative_path), '.') + 1)"
):
old_suffixes: list[str] = [s.lower() for s in file.suffixes.split(".") if s]
new_suffixes: list[str] = [s.lower() for s in old_suffixes]
# Deduplicate suffixes
new_suffixes = sorted(set(new_suffixes), key=new_suffixes.index)
new_suffixes: list[str] = sorted(set(old_suffixes), key=old_suffixes.index)
# Restore original letter case
new_suffixes = [next(s2 for s2 in old_suffixes if s2.lower() == s) for s in new_suffixes]
old_name: str = file.name
Expand Down
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.poetry]
name = "digiarch"
version = "3.2.4"
version = "3.2.5"
description = "Tools for the Digital Archive Project at Aarhus Stadsarkiv"
authors = ["Aryan Muhammadi Landi <[email protected]>", "Nina Jensen <[email protected]>", "Aarhus Stadsarkiv <[email protected]>"]
authors = ["Aarhus Stadsarkiv <[email protected]>"]
license = "GPL-3.0"
readme = "README.md"
homepage = "https://stadsarkiv.aarhus.dk/"
Expand All @@ -12,10 +12,10 @@ include = ["pyproject.toml"]

[tool.poetry.dependencies]
python = "^3.11"
acacore = {git = "https://github.com/aarhusstadsarkiv/acacore.git", tag = "v3.0.4"}
acacore = {git = "https://github.com/aarhusstadsarkiv/acacore.git", tag = "v3.0.5"}
patool = "^2.4.0"
tnefparse = "^1.4.0"
extract-msg = "^0.48.7"
extract-msg = "^0.49.0"
chardet = "^5.2.0"

[tool.poetry.group.dev.dependencies]
Expand Down
Binary file modified tests/files/_metadata/files.db
Binary file not shown.
143 changes: 102 additions & 41 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pydantic import BaseModel

from digiarch.cli import app
from digiarch.doctor import command_doctor
from digiarch.edit.action import group_action
from digiarch.edit.edit import group_edit
from digiarch.edit.lock import command_lock
Expand Down Expand Up @@ -149,6 +150,107 @@ def test_reidentify(tests_folder: Path, files_folder: Path, files_folder_copy: P
assert "extension mismatch" not in (file_new.warning or [])


# noinspection DuplicatedCode
def test_extract(tests_folder: Path, files_folder: Path, files_folder_copy: Path):
database_path: Path = files_folder / "_metadata" / "files.db"
database_path_copy: Path = files_folder_copy / database_path.relative_to(files_folder)
database_path_copy.parent.mkdir(parents=True, exist_ok=True)
copy(database_path, database_path_copy)

with FileDB(database_path_copy) as database:
files: list[File] = list(database.files.select(where="action = 'extract'"))

app.main(
[
command_extract.name,
str(files_folder_copy),
"--actions",
str(tests_folder / "fileformats.yml"),
"--custom-signatures",
str(tests_folder / "custom_signatures.yml"),
"--siegfried-home",
str(tests_folder),
],
standalone_mode=False,
)

with FileDB(database_path_copy) as database:
for file in files:
file2 = database.files.select(where="uuid = ?", parameters=[str(file.uuid)]).fetchone()
assert file2
if file.relative_path.name.split(".", 1)[0].endswith("-encrypted"):
assert file2.action == "ignore"
assert file2.action_data.ignore.template == "password-protected"
assert file2.processed is True
elif on_success_action := file.action_data.extract.on_success:
assert file2.action == on_success_action
else:
assert file2.action == "ignore"
assert file2.action_data.ignore.template == "extracted-archive"
assert file2.processed is True
assert database.files.select(where="parent = ?", parameters=[str(file.uuid)]).fetchone()


# noinspection DuplicatedCode
def test_doctor_paths(tests_folder: Path, files_folder: Path, files_folder_copy: Path):
database_path: Path = files_folder / "_metadata" / "files.db"
database_path_copy: Path = files_folder_copy / database_path.relative_to(files_folder)
database_path_copy.parent.mkdir(parents=True, exist_ok=True)
copy(database_path, database_path_copy)

with FileDB(database_path_copy) as database:
files: list[File] = list(database.files.select())
for file in files:
file.root = files_folder_copy
file.relative_path = (
file.get_absolute_path()
.rename(file.get_absolute_path().with_name("*" + file.name))
.relative_to(files_folder_copy)
)
database.files.update({"relative_path": file.relative_path}, {"uuid": file.uuid})
database.commit()

app.main([command_doctor.name, str(files_folder_copy), "--fix", "paths"], standalone_mode=False)

with FileDB(database_path_copy) as database:
for file in files:
file2: File | None = database.files.select(where="uuid = ?", parameters=[str(file.uuid)]).fetchone()
assert file2
assert not file.get_absolute_path(files_folder_copy).is_file()
assert file2.get_absolute_path(files_folder_copy).is_file()
assert file2.relative_path == file.relative_path.with_name(file.name.replace("*", "_"))


# noinspection DuplicatedCode
def test_doctor_extensions(tests_folder: Path, files_folder: Path, files_folder_copy: Path):
database_path: Path = files_folder / "_metadata" / "files.db"
database_path_copy: Path = files_folder_copy / database_path.relative_to(files_folder)
database_path_copy.parent.mkdir(parents=True, exist_ok=True)
copy(database_path, database_path_copy)

with FileDB(database_path_copy) as database:
files: list[File] = [f for f in database.files.select() if f.suffix]
for file in files:
file.root = files_folder_copy
file.relative_path = (
file.get_absolute_path()
.rename(file.get_absolute_path().with_name(file.name + file.suffix))
.relative_to(files_folder_copy)
)
database.files.update({"relative_path": file.relative_path}, {"uuid": file.uuid})
database.commit()

app.main([command_doctor.name, str(files_folder_copy), "--fix", "extensions"], standalone_mode=False)

with FileDB(database_path_copy) as database:
for file in files:
file2: File | None = database.files.select(where="uuid = ?", parameters=[str(file.uuid)]).fetchone()
assert file2
assert not file.get_absolute_path(files_folder_copy).is_file()
assert file2.get_absolute_path(files_folder_copy).is_file()
assert file2.relative_path == file.relative_path.with_name(file.name.removesuffix(file.suffix))


# noinspection DuplicatedCode
def test_history(tests_folder: Path, files_folder: Path):
app.main(
Expand Down Expand Up @@ -785,44 +887,3 @@ def test_rollback_extract(tests_folder: Path, files_folder: Path, files_folder_c
file3: File | None = database.files.select(where="uuid = ?", parameters=[str(file.uuid)]).fetchone()
assert not file3
assert not file3.get_absolute_path(files_folder_copy).is_file()


# noinspection DuplicatedCode
def test_extract(tests_folder: Path, files_folder: Path, files_folder_copy: Path):
database_path: Path = files_folder / "_metadata" / "files.db"
database_path_copy: Path = files_folder_copy / database_path.relative_to(files_folder)
database_path_copy.parent.mkdir(parents=True, exist_ok=True)
copy(database_path, database_path_copy)

with FileDB(database_path_copy) as database:
files: list[File] = list(database.files.select(where="action = 'extract'"))

app.main(
[
command_extract.name,
str(files_folder_copy),
"--actions",
str(tests_folder / "fileformats.yml"),
"--custom-signatures",
str(tests_folder / "custom_signatures.yml"),
"--siegfried-home",
str(tests_folder),
],
standalone_mode=False,
)

with FileDB(database_path_copy) as database:
for file in files:
file2 = database.files.select(where="uuid = ?", parameters=[str(file.uuid)]).fetchone()
assert file2
if file.relative_path.name.split(".", 1)[0].endswith("-encrypted"):
assert file2.action == "ignore"
assert file2.action_data.ignore.template == "password-protected"
assert file2.processed is True
elif on_success_action := file.action_data.extract.on_success:
assert file2.action == on_success_action
else:
assert file2.action == "ignore"
assert file2.action_data.ignore.template == "extracted-archive"
assert file2.processed is True
assert database.files.select(where="parent = ?", parameters=[str(file.uuid)]).fetchone()

0 comments on commit 7831678

Please sign in to comment.