Skip to content

Commit

Permalink
Add check for free space before downloading a backup (#663)
Browse files Browse the repository at this point in the history
* Add check for free space before downloading a backup

* Exapand the low disk space comment with more info

* In ITs, get newer java-driver to fix its SSL bug

* Make ITs wait for schema agreement after tabls are created
  • Loading branch information
rzvoncek authored Oct 11, 2023
1 parent 49d1cb9 commit 98f6d63
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 12 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,19 @@ jobs:
ccm start -v
ccm showlastlog|tail -100
ccm stop
# if we are dealing with C* 4.0+, we need to fix the java-driver dependency.
# it has a bug that breaks sstableloader tests with client encryption enabled
# if cassandra version starts with 4.0 or 4.1
if [[ "${{ matrix.cassandra-version }}" =~ ^4\.[01]\. ]];
then
ccm create driver-fix-cluster -v ${{ matrix.cassandra-version }} -n 1
# enclosed in () so we return to current pwd once we are done fixing the driver
(
cd ~/.ccm/repository/${{ matrix.cassandra-version }}/lib
rm cassandra-driver-core-3.11.0-shaded.jar
wget https://repo1.maven.org/maven2/com/datastax/cassandra/cassandra-driver-core/3.11.5/cassandra-driver-core-3.11.5-shaded.jar
)
fi
if [ "${{ matrix.it-backend }}" == "s3" ]
then
# AWS S3 Storage tests
Expand Down
34 changes: 33 additions & 1 deletion medusa/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,21 @@
import logging
import json
import pathlib
import shutil
import sys

from medusa.storage import Storage
from medusa.storage.abstract_storage import AbstractStorage
from medusa.filtering import filter_fqtns


def download_data(storageconfig, backup, fqtns_to_restore, destination):

manifest = json.loads(backup.manifest)

_check_available_space(manifest, destination)

with Storage(config=storageconfig) as storage:
manifest = json.loads(backup.manifest)

for section in manifest:

Expand Down Expand Up @@ -79,3 +85,29 @@ def download_cmd(config, backup_name, download_destination, keyspaces, tables, i

fqtns_to_download, _ = filter_fqtns(keyspaces, tables, node_backup.manifest, ignore_system_keyspaces)
download_data(config.storage, node_backup, fqtns_to_download, download_destination)


def _check_available_space(manifest, destination):
download_size = _get_download_size(manifest)
available_space = _get_available_size(destination)
logging.debug(f'Download size: {download_size}, available space: {available_space}')
if download_size > available_space:
missing = int(download_size) - int(available_space)
logging.error(
f'Directory {destination} does not have enough space to download backup of size {download_size}'
f'(Missing roughly {AbstractStorage.human_readable_size(missing)})'
)
logging.error(
f'Please add --temp-dir pointing to a directory with enough space to your restore command '
f'(or change where --download-destination of your download command points to).'
)
raise RuntimeError('Not enough space available')


def _get_download_size(manifest):
return sum([int(obj['size']) for section in manifest for obj in section['objects']])


def _get_available_size(destination_dir):
pathlib.Path(destination_dir).mkdir(parents=True, exist_ok=True)
return shutil.disk_usage(destination_dir).free
2 changes: 1 addition & 1 deletion medusa/storage/abstract_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def additional_upload_headers(self):
return {}

@staticmethod
def _human_readable_size(size, decimal_places=3):
def human_readable_size(size, decimal_places=3):
for unit in ["B", "KiB", "MiB", "GiB", "TiB"]:
if size < 1024.0:
break
Expand Down
2 changes: 1 addition & 1 deletion medusa/storage/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
file_size = os.stat(src).st_size
logging.debug(
'[Azure Storage] Uploading {} ({}) -> azure://{}/{}'.format(
src, self._human_readable_size(file_size), self.config.bucket_name, object_key
src, self.human_readable_size(file_size), self.config.bucket_name, object_key
)
)

Expand Down
2 changes: 1 addition & 1 deletion medusa/storage/google_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
file_size = os.stat(src).st_size
logging.debug(
'[GCS Storage] Uploading {} ({}) -> gs://{}/{}'.format(
src, self._human_readable_size(file_size), self.config.bucket_name, object_key
src, self.human_readable_size(file_size), self.config.bucket_name, object_key
)
)
with open(src, 'rb') as src_file:
Expand Down
2 changes: 1 addition & 1 deletion medusa/storage/local_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
file_size = os.stat(src_file).st_size
logging.debug(
'[Local Storage] Uploading {} ({}) -> {}'.format(
src_file, self._human_readable_size(file_size), dest_file
src_file, self.human_readable_size(file_size), dest_file
)
)
# remove root_dir from dest_file name
Expand Down
2 changes: 1 addition & 1 deletion medusa/storage/s3_base_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ async def _upload_blob(self, src: str, dest: str) -> ManifestObject:
file_size = os.stat(src).st_size
logging.debug(
'[S3 Storage] Uploading {} ({}) -> {}'.format(
src, self._human_readable_size(file_size), object_key
src, self.human_readable_size(file_size), object_key
)
)

Expand Down
73 changes: 73 additions & 0 deletions tests/download_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
# Copyright 2021 DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import unittest
import uuid

from unittest.mock import patch

from medusa.download import _get_download_size, _check_available_space


class DownloadTest(unittest.TestCase):

def test_get_download_size(self):
manifest = [
{
'keyspace': 'k1',
'columnfamily': 't1-bigBadCfId',
'objects': [
{'path': '', 'size': 100, 'MD5': ''},
{'path': '', 'size': 101, 'MD5': ''},
{'path': '', 'size': 100, 'MD5': ''}
]
},
{
'keyspace': 'k2',
'columnfamily': 't2-81ffe430e50c11e99f91a15641db358f',
'objects': [
{'path': '', 'size': '100', 'MD5': ''},
{'path': '', 'size': '123000', 'MD5': ''}
]
},
]
self.assertEqual(123401, _get_download_size(manifest))

def test_check_available_space(self):
destination = 'whatever'
manifest = [
{
'keyspace': 'k1',
'columnfamily': 't1-bigBadCfId',
'objects': [
{'path': '', 'size': 100, 'MD5': ''}
]
}
]
# we fake the return value of _get_available_size to 100
with patch('medusa.download._get_available_size', return_value=50):
self.assertRaises(RuntimeError, _check_available_space, manifest, destination)
# now we provide more space and the exception does not happen
with patch('medusa.download._get_available_size', return_value=200):
_check_available_space(manifest, destination)

# just /tmp should always exist and there ought to be 100 bytes free
_check_available_space(manifest, destination='/tmp')
# we also check that a previously non-existing directory is present already at the time of the space check
random_destination = f'/tmp/medusa-restore-{str(uuid.uuid4())}'
try:
_check_available_space(manifest, random_destination)
finally:
shutil.rmtree(random_destination)
11 changes: 5 additions & 6 deletions tests/integration/features/steps/integration_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,14 @@ def _i_create_the_whatever_table(context, table_name, keyspace_name):
table = "CREATE TABLE IF NOT EXISTS {}.{} (id timeuuid PRIMARY KEY, value text);"
context.session.execute(table.format(keyspace_name, table_name))

# wait for the table to be created on both nodes
# normally a driver would do this, but for some reason it isn't.
time.sleep(1)


@when('I create the "{table_name}" table with secondary index in keyspace "{keyspace_name}"')
def _i_create_the_table_with_si(context, table_name, keyspace_name):
keyspace = """CREATE KEYSPACE IF NOT EXISTS {} WITH replication = {{'class':'SimpleStrategy',
'replication_factor':1}}"""
context.session.execute(keyspace.format(keyspace_name))

table = "CREATE TABLE IF NOT EXISTS {}.{} (id timeuuid PRIMARY KEY, value text);"
context.session.execute(table.format(keyspace_name, table_name))
_i_create_the_whatever_table(context, table_name, keyspace_name)

si = "CREATE INDEX IF NOT EXISTS {}_idx ON {}.{} (value);"
context.session.execute(si.format(table_name, keyspace_name, table_name))
Expand Down

0 comments on commit 98f6d63

Please sign in to comment.