Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclude users by Tag name or ID #9

Merged
merged 17 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/omero_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
env:
STAGE: cli
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Checkout omero-test-infra
uses: actions/checkout@master
with:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ repos:
- id: seed-isort-config

- repo: https://github.com/PyCQA/isort
rev: 5.10.1
rev: 5.11.5
hooks:
- id: isort
args: ["--profile", "black"]

- repo: https://github.com/psf/black
rev: 22.1.0
rev: 22.3.0
hooks:
- id: black
args: [--target-version=py36]
Expand Down
16 changes: 16 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ system without running the deletion::

$ omero demo-cleanup --gigabytes 300

To ignore users who's data whose data must not be deleted, you can Tag those users
and specify the `Tag` or parent `Tag Group` by `Name` or `ID`.
This is enabled by default using a Tag named "NO DELETE".
So it is preferable to Tag users on the server with a `Tag` named "NO DELETE" or create
a `Tag Group` named "NO DELETE" containing Tags linked to users.

# Add a Tag to a User via CLI (not possible to see this in the clients)
$ omero obj new ExperimenterAnnotationLink child=TagAnnotation:123 parent=Experimenter:52

# Choose a non-default Tag or Tag Group (by ID or Name) to ignore the tagged users
$ omero demo-cleanup --gigabytes 300 --ignore-tag "Tag Name"

You can also specify individual users by ID or user name, e.g:

--ignore-users 123,user-1,ben,234

To generate the list of users which data must be deleted to free 300GB on the
system and running the deletion (WARNING: data belonging to these users will
be removed permanently)::
Expand Down
25 changes: 22 additions & 3 deletions src/omero_demo_cleanup/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@

from omero.cli import BaseControl, Parser
from omero.gateway import BlitzGateway
from omero_demo_cleanup.library import choose_users, delete_data, resource_usage
from omero_demo_cleanup.library import (
choose_users,
delete_data,
resource_usage,
users_by_id_or_username,
users_by_tag,
)

HELP = """Cleanup disk space on OMERO.server """

Expand Down Expand Up @@ -85,11 +91,20 @@ def _configure(self, parser: Parser) -> None:
help="Perform the data deletion rather than running in dry-run mode."
" Default: false.",
)
parser.add_argument(
"--ignore-tag",
"-t",
default="NO DELETE",
help="Members tagged with this Tag (Name or ID) or child Tags are ignored.",
)
parser.add_argument(
"--ignore-users",
help="Ingore users: Comma-separated IDs and/or user-names.",
)
parser.set_defaults(func=self.cleanup)

@gateway_required
def cleanup(self, args: argparse.Namespace) -> None:

if args.inodes == 0 and args.gigabytes == 0:
self.ctx.die(23, "Please specify how much to delete")

Expand All @@ -111,7 +126,11 @@ def cleanup(self, args: argparse.Namespace) -> None:
)
)

stats = resource_usage(self.gateway, minimum_days=args.days)
ignore = users_by_tag(self.gateway, args.ignore_tag)
ignore.extend(users_by_id_or_username(self.gateway, args.ignore_users))
stats = resource_usage(
self.gateway, minimum_days=args.days, ignore_users=ignore
)
users = choose_users(args.inodes, args.gigabytes * 1000**3, stats)
self.ctx.err(f"Found {len(users)} user(s) for deletion.")
for user in users:
Expand Down
86 changes: 80 additions & 6 deletions src/omero_demo_cleanup/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
LegalGraphTargetsResponse,
)
from omero.gateway import BlitzGateway
from omero.rtypes import rlong
from omero.rtypes import rlong, unwrap
from omero.sys import ParametersI

# If adjusting UserStats, find_worst, choose_users then check with unit tests.
Expand Down Expand Up @@ -181,8 +181,78 @@ def delete_data(conn: BlitzGateway, user_id: int, dry_run: bool = True) -> None:
submit(conn, delete, Delete2Response)


def exp_to_str(exp):
# "user-3" (#6) Charles Darwin
full_name = f"{unwrap(exp.firstName)} {unwrap(exp.lastName)}"
return f'"{exp.omeName.val}" (#{exp.id.val}) {full_name}'


def users_by_id_or_username(conn: BlitzGateway, ignore_users: str) -> List[int]:

if not ignore_users:
return []
exclude = []
users = ignore_users.split(",")
print(f"Ignoring {len(users)} users by ID or Username:")
for user_str in users:
if user_str.isnumeric():
exp = conn.getQueryService().get("Experimenter", int(user_str))
print(" " + exp_to_str(exp))
exclude.append(exp.id.val)
else:
exp = conn.getObject("Experimenter", attributes={"omeName": user_str})
if exp is None:
raise ValueError("Experimenter: %s not found" % user_str)
print(" " + exp_to_str(exp._obj))
exclude.append(exp.id)
return exclude


def users_by_tag(conn: BlitzGateway, tag_name: str) -> List[int]:
# Get users linked to Tag (Name or ID) or linked to child Tags.
if not tag_name or tag_name == "None":
print("No Tag chosen for ingoring users")
return []
exclude = []
if tag_name.isnumeric():
tag = conn.getObject("Annotation", tag_name)
else:
tags = list(
conn.getObjects("TagAnnotation", attributes={"textValue": tag_name})
)
tag = tags[0] if len(tags) > 0 else None
if len(tags) > 1:
ids = [tag.id for tag in tags]
raise ValueError(f"Multiple Tags with name: {tag_name} ({ids})")
if tag is None:
raise ValueError("Tag: %s not found" % tag_name)
# Check if this is a Tag Group
tag_links = list(conn.getAnnotationLinks("Annotation", parent_ids=[tag.id]))

# Handle Tagged Experimenters first...
links = list(conn.getAnnotationLinks("Experimenter", ann_ids=[tag.id]))
exclude.extend([link.parent.id.val for link in links])
# If we have NO child Tags, then always print:
if len(links) > 0 or len(tag_links) == 0:
print(
"Ignoring %s users linked to Tag:%s %s:"
% (len(links), tag.id, tag.textValue)
)

for link in links:
print(" " + exp_to_str(link.parent))

# Then recursively check any child Tags...
if len(tag_links) > 0 or len(links) == 0:
print(f"Tag:{tag.id} {tag.textValue} has {len(tag_links)} child Tags...")
for link in tag_links:
exclude.extend(users_by_tag(conn, str(link.child.id.val)))

return exclude


def find_users(
conn: BlitzGateway, minimum_days: int = 0
conn: BlitzGateway, minimum_days: int = 0, ignore_users: List[int] = []
) -> Tuple[Dict[int, str], Dict[int, int]]:
# Determine which users' data to consider deleting.

Expand All @@ -194,7 +264,8 @@ def find_users(
user_id = result[0].val
user_name = result[1].val
if user_name not in ("PUBLIC", "guest", "root", "monitoring"):
users[user_id] = user_name
if user_id not in ignore_users:
users[user_id] = user_name

for result in conn.getQueryService().projection(
"SELECT DISTINCT owner.id FROM Session WHERE closed IS NULL", None
Expand Down Expand Up @@ -235,12 +306,16 @@ def find_users(
return users, logouts


def resource_usage(conn: BlitzGateway, minimum_days: int = 0) -> List[UserStats]:
def resource_usage(
conn: BlitzGateway, minimum_days: int = 0, ignore_users: List[int] = []
) -> List[UserStats]:
# Note users' resource usage.
# DiskUsage2.targetClasses remains too inefficient so iterate.

user_stats = []
users, logouts = find_users(conn, minimum_days=minimum_days)
users, logouts = find_users(
conn, minimum_days=minimum_days, ignore_users=ignore_users
)
for user_id, user_name in users.items():
print(f'Finding disk usage of "{user_name}" (#{user_id}).')
user = {"Experimenter": [user_id]}
Expand Down Expand Up @@ -286,7 +361,6 @@ def perform_delete(


def main() -> None:

with omero.cli.cli_login() as cli:
conn = omero.gateway.BlitzGateway(client_obj=cli.get_client())
try:
Expand Down