From ec6c1a6192d777a946ee1f322dc3978cb41f5e1e Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Wed, 11 Oct 2023 15:12:56 +0200 Subject: [PATCH] YDA-5345: refactor ARB and add documentation --- docker/images/yoda_irods_icat/Dockerfile | 4 +- docker/images/yoda_irods_icat/rules_uu.cfg | 5 ++ docker/run-cronjob.sh | 4 ++ docs/design/index.md | 1 + .../processes/automatic-resource-balancing.md | 69 +++++++++++++++++++ roles/irods_arb/defaults/main.yml | 3 - roles/irods_arb/meta/main.yml | 1 + roles/irods_arb/tasks/main.yml | 19 ++--- roles/python_irodsclient/defaults/main.yml | 6 ++ roles/python_irodsclient/meta/main.yml | 3 + roles/python_irodsclient/tasks/main.yml | 10 ++- roles/yoda_rulesets/defaults/main.yml | 6 ++ roles/yoda_rulesets/templates/rules_uu.cfg.j2 | 5 ++ 13 files changed, 122 insertions(+), 14 deletions(-) create mode 100644 docs/design/processes/automatic-resource-balancing.md create mode 100644 roles/python_irodsclient/defaults/main.yml diff --git a/docker/images/yoda_irods_icat/Dockerfile b/docker/images/yoda_irods_icat/Dockerfile index a39fa437b..2ded1f8bb 100644 --- a/docker/images/yoda_irods_icat/Dockerfile +++ b/docker/images/yoda_irods_icat/Dockerfile @@ -121,7 +121,9 @@ RUN sudo -u irods python -m pip --no-cache-dir install --user pip==20.2.4 && \ sudo -u irods python -m pip --no-cache-dir install --user setuptools==44.1.1 && \ sudo -u irods python -m pip --no-cache-dir install --user python-irodsclient==1.1.8 && \ sudo -u irods python -m pip --no-cache-dir install --user -r /etc/irods/yoda-ruleset/requirements.txt && \ - sudo -u irods /usr/local/bin/pip3 install --user jsonschema==4.17.3 + sudo -u irods /usr/local/bin/pip3 install --user jsonschema==4.17.3 && \ + sudo -u irods /usr/local/bin/pip3 install --user python-irodsclient==1.1.8 && \ + sudo -u irods /usr/local/bin/pip3 install --user psutil==5.9.5 COPY core.py.template /etc/irods/core.py COPY core.re.template /etc/irods/core.re RUN for script in scheduled-copytovault.sh admin-remove-orphan-vault-if-empty.sh admin-vaultactions.sh \ diff --git a/docker/images/yoda_irods_icat/rules_uu.cfg b/docker/images/yoda_irods_icat/rules_uu.cfg index b6b8242d6..38a638fb8 100644 --- a/docker/images/yoda_irods_icat/rules_uu.cfg +++ b/docker/images/yoda_irods_icat/rules_uu.cfg @@ -75,3 +75,8 @@ sram_rest_api_url = 'https://sram-mock.yoda' sram_api_key = 'PLACEHOLDER' sram_verbose_logging = 'true' sram_tls_verify = 'false' + +arb_enabled = 'false' +arb_exempt_resources = '' +arb_min_gb_free = '0' +arb_min_percent_free = '0' diff --git a/docker/run-cronjob.sh b/docker/run-cronjob.sh index 80ee86e4e..e5bf6ae8e 100755 --- a/docker/run-cronjob.sh +++ b/docker/run-cronjob.sh @@ -57,6 +57,10 @@ case "$1" in docker exec "$EXEC_OPTIONS" public.yoda sudo -iu yodadeployment /var/www/moai/yoda-moai/venv/bin/update_moai --config /var/www/moai/settings.ini yoda_moai ;; + arbupdate) + docker exec "$EXEC_OPTIONS" provider.yoda sudo -iu irods /usr/local/bin/python3 /etc/irods/yoda-ruleset/tools/arb-update-resources.py -v + ;; + *) echo "No cronjob or invalid cronjob provided." ;; diff --git a/docs/design/index.md b/docs/design/index.md index 8bb9898fb..5812ce4d8 100644 --- a/docs/design/index.md +++ b/docs/design/index.md @@ -33,6 +33,7 @@ has_toc: false ## Processes - [Asynchronous and privileged execution](processes/async-system-execution.md) +- [Automatic Resource Balancing](processes/automatic-resource-balancing) - [List of asynchronous processes](processes/asynchronous-processes.md) - [Locking mechanism](processes/locking-mechanism.md) - [Publication process](processes/publication-process.md) diff --git a/docs/design/processes/automatic-resource-balancing.md b/docs/design/processes/automatic-resource-balancing.md new file mode 100644 index 000000000..55a611abd --- /dev/null +++ b/docs/design/processes/automatic-resource-balancing.md @@ -0,0 +1,69 @@ +--- +grand_parent: Software Design +parent: Processes +--- + +# Automatic Resource Balancing + +This page contains information about Automatic Resource Balancing (ARB), the process that Yoda optionally uses to ensure that newly +created data objects are stored on iRODS resources that still have enough space available. + +## Background + +One of the functions of iRODS is to facilitate storage abstraction: data can be stored on different types of back-end storage (e.g. object +storage, NFS shares, etc), but is presented to the user in unified way. iRODS keep track of storage areas, such as local filesystems or +object storage buckets, as resources. Some resources can be scalable (e.g. object storage), whereas other are not scalable in practice +(e.g. local filesystems, depending on the infrastructure). + +## Problem description + +When a non-scalable storage resource does not have enough space left for new data objects, iRODS needs to be configured to not store +any new data on it, in order to prevent failures due to the resource running out of space. On environments with a small number of resources, +manually moving resources out of the iRODS tree is workable, however this becomes cumbersome and prone to errors as the number of resources grows. +Relying on manual administrator action also increases the risk that a resource will fill up outside of office hours, before an administrator +has had time to move it out of the tree. + +Furthermore, moving resources out of the tree causes problems with writes to existing objects. We need to enforce a default resource on uploads of +new files in order to be able to enforce a replica policy (e.g. a data object should have one replica in each of two datacenters). However, when a full +resource is moved out of the tree, the default resource no longer matches with existing data objects on that resource, which results in errors when a user +tries to update existing data objects. In order to resolve this issue, we need to keep full resources in the trees. + +iRODS has a built-in mechanism for enforcing minimum free space on unixfilesystem resources: the +[https://docs.irods.org/4.2.12/plugins/composable_resources/#unixfilesystem](minimum_free_space_for_create_in_bytes setting). However, as of iRODS 4.2.12, +this setting is incompatible with Python-iRODS-client (see e.g. https://github.com/irods/python-irodsclient/issues/462 and https://github.com/irods/irods/issues/7154). +Therefore it's currently not usable in the context of Yoda. + +Considering that we need to determine which resources are full on every data object creation, we need a solution that is optimized for +frequent lookups. + +## Solution + +The current solution for this problem in Yoda is automatic resource balancing (ARB). It consists of the following parts: +- All iRODS systems in the zone (both provider and consumer) have a local cronjob named `arb-update-resources` that periodically retrieves + total and available space for every local unixfilesystem resource. This information is submitted to an update rule in the ruleset. +- The update rule compares available space to a relative trigger value (configuration setting `irods_arb_min_percent_free`) and absolute + trigger value (configuration setting `irods_arb_min_gb_free`), and checks whether the resource has been + manually configured to be exempt from ARB (configuration setting `irods_arb_exempt_resources`). Based on this, resources + are assigned one of the following ARB values: + + * IGNORE: this resource is irrelevant to ARB, because it is neither a unixfilesystem resource nor a passthrough parent resource of a + unixfilesystem resource. + * EXEMPT: this resource has been manually configured to be ignored by ARB. + * FULL: ARB applies to this resource; it has exceeded one of the trigger values + * AVAILABLE: ARB applies to this resource; it has not yet exceeded on of the trigger values. + + These values are stored in both a Redis data structure store and a resource AVU. +- When ARB is enabled, the `pep_resource_resolve_hierarchy_pre` policy retrieves the ARB value of a resource on create actions. It applies a write + vote of `0.0` if the ARB value is `FULL`. If possible, the policy retrieves the ARB value from Redis; there is also a fallback lookup that uses + the resource AVU. + +Together, these components ensure that a resource does not get votes for creating new data objects after it has exceeded one of the trigger values +for minimum amount of free space. + +## Limitations + +- ARB checks available space once a minute by default. The configured trigger values need to take into account that some time can pass between + a resource exceeding its minimum amount of free space, and ARB detecting this event. +- ARB only affects creation of new data objects. Expansion of existing data objects is not considered. This needs to be taken into account + when choosing trigger values. +- ARB can currently only process unixfilesystem resources (and their parent coordinating resources). Other storage resources are ignored. diff --git a/roles/irods_arb/defaults/main.yml b/roles/irods_arb/defaults/main.yml index e2646cf47..8e597c369 100644 --- a/roles/irods_arb/defaults/main.yml +++ b/roles/irods_arb/defaults/main.yml @@ -4,6 +4,3 @@ irods_service_account: irods irods_arb_enabled: false -irods_arb_exempt_resources: "" -irods_arb_min_gb_free: 0 -irods_arb_min_percent_free: 5 diff --git a/roles/irods_arb/meta/main.yml b/roles/irods_arb/meta/main.yml index d5acfebba..308fb2e2a 100644 --- a/roles/irods_arb/meta/main.yml +++ b/roles/irods_arb/meta/main.yml @@ -12,5 +12,6 @@ galaxy_info: dependencies: + - role: python3 - role: python_irodsclient - role: redis diff --git a/roles/irods_arb/tasks/main.yml b/roles/irods_arb/tasks/main.yml index 24c9b22a2..7598753f4 100644 --- a/roles/irods_arb/tasks/main.yml +++ b/roles/irods_arb/tasks/main.yml @@ -4,14 +4,17 @@ become_user: "{{ irods_service_account }}" become: true ansible.builtin.cron: - name: 'update-ufs-resources.py' + name: 'arb-update-resources.py' minute: '*' hour: '*' - job: > - /usr/bin/python /etc/irods/yoda-ruleset/tools/update-ufs-resources.py - -e "{{ irods_arb_exempt_resources }}" - update - --min-percent-free "{{ irods_arb_min_percent_free }}" - --min-gb-free "{{ irods_arb_min_gb_free }}" - >& /dev/null + job: '/usr/local/bin/python3 /etc/irods/yoda-ruleset/tools/arb-update-resources.py > /dev/null' state: '{{ "present" if irods_arb_enabled else "absent" }}' + + +- name: Ensure psutil is installed (Python 3) + become_user: '{{ irods_service_account }}' + become: true + ansible.builtin.pip: + name: "psutil==5.9.5" + executable: /usr/local/bin/pip3 + extra_args: --user diff --git a/roles/python_irodsclient/defaults/main.yml b/roles/python_irodsclient/defaults/main.yml new file mode 100644 index 000000000..90d115d66 --- /dev/null +++ b/roles/python_irodsclient/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# copyright Utrecht University + +irods_service_account: irods + +python_irodsclient_version: 1.1.8 diff --git a/roles/python_irodsclient/meta/main.yml b/roles/python_irodsclient/meta/main.yml index 9e6978707..5be928bd4 100644 --- a/roles/python_irodsclient/meta/main.yml +++ b/roles/python_irodsclient/meta/main.yml @@ -9,3 +9,6 @@ galaxy_info: platforms: - name: EL version: 7 + +dependencies: + - role: python3 diff --git a/roles/python_irodsclient/tasks/main.yml b/roles/python_irodsclient/tasks/main.yml index c4fce3fc8..f3bee0c72 100644 --- a/roles/python_irodsclient/tasks/main.yml +++ b/roles/python_irodsclient/tasks/main.yml @@ -16,8 +16,14 @@ state: present -- name: Ensure python-irodsclient is installed +- name: Ensure python-irodsclient is installed (Python 2) ansible.builtin.pip: name: - - python-irodsclient==1.1.8 + - "python-irodsclient=={{ python_irodsclient_version }}" state: present + + +- name: Ensure python-irodsclient is installed (Python 3) + ansible.builtin.pip: + name: "python_irodsclient=={{ python_irodsclient_version }}" + executable: /usr/local/bin/pip3 diff --git a/roles/yoda_rulesets/defaults/main.yml b/roles/yoda_rulesets/defaults/main.yml index b107a5346..256a28fe3 100644 --- a/roles/yoda_rulesets/defaults/main.yml +++ b/roles/yoda_rulesets/defaults/main.yml @@ -213,3 +213,9 @@ yoda_davrods_anonymous_port: 443 # External Users configuration external_users: true external_users_domain_filter: uu.nl | acc.uu.nl # Domains that should use OIDC (list, wildcard character *) + +# Automatic resource balancing configuration +irods_arb_enabled: false +irods_arb_exempt_resources: "" +irods_arb_min_gb_free: 0 +irods_arb_min_percent_free: 5 diff --git a/roles/yoda_rulesets/templates/rules_uu.cfg.j2 b/roles/yoda_rulesets/templates/rules_uu.cfg.j2 index 1c8359dac..d5def54eb 100644 --- a/roles/yoda_rulesets/templates/rules_uu.cfg.j2 +++ b/roles/yoda_rulesets/templates/rules_uu.cfg.j2 @@ -97,3 +97,8 @@ sram_flow = '{{ sram_flow }}' sram_verbose_logging = '{{ ["false", "true"][sram_verbose_logging|int] }}' sram_tls_verify = '{{ ["false", "true"][sram_tls_verify|int] }}' {% endif %} + +arb_enabled = '{{ ["false", "true"][irods_arb_enabled|int] }}' +arb_exempt_resources = '{{ irods_arb_exempt_resources }}' +arb_min_gb_free = '{{ irods_arb_min_gb_free }}' +arb_min_percent_free = '{{ irods_arb_min_percent_free }}'