Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configuring bucket-name from args for RO commands #589 #590

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
14 changes: 7 additions & 7 deletions medusa/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import json
import logging
import pathlib
import sys

from medusa.filtering import filter_fqtns
from medusa.storage import Storage, divide_chunks
from medusa.storage.google_storage import GSUTIL_MAX_FILES_PER_CHUNK
from medusa.filtering import filter_fqtns


def download_data(storageconfig, backup, fqtns_to_restore, destination):
storage = Storage(config=storageconfig)
def download_data(storage, backup, fqtns_to_restore, destination):
manifest = json.loads(backup.manifest)

for section in manifest:
Expand Down Expand Up @@ -65,8 +64,9 @@ def download_data(storageconfig, backup, fqtns_to_restore, destination):
)


def download_cmd(config, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces):
storage = Storage(config=config.storage)
def download_cmd(config, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces,
bucket_name=None, prefix=None):
storage = Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix)

if not download_destination.is_dir():
logging.error('{} is not a directory'.format(download_destination))
Expand All @@ -78,4 +78,4 @@ def download_cmd(config, backup_name, download_destination, keyspaces, tables, i
sys.exit(1)

fqtns_to_download, _ = filter_fqtns(keyspaces, tables, node_backup.manifest, ignore_system_keyspaces)
download_data(config.storage, node_backup, fqtns_to_download, download_destination)
download_data(storage, node_backup, fqtns_to_download, download_destination)
4 changes: 2 additions & 2 deletions medusa/fetch_tokenmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from medusa.storage import Storage


def main(config, backup_name):
storage = Storage(config=config.storage)
def main(config, backup_name, bucket_name, prefix):
storage = Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix)
backup = storage.get_cluster_backup(backup_name)
if not backup:
logging.error('No such backup')
Expand Down
9 changes: 4 additions & 5 deletions medusa/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@
from datetime import datetime
from medusa.storage import Storage


TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'


def get_backups(config, show_all):
storage = Storage(config=config.storage)
def get_backups(config, show_all, bucket_name=None, prefix=None):
storage = Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix)

cluster_backups = sorted(
storage.list_cluster_backups(),
Expand All @@ -37,8 +36,8 @@ def get_backups(config, show_all):
return cluster_backups


def list_backups(config, show_all):
cluster_backups = get_backups(config, show_all)
def list_backups(config, show_all, bucket_name, prefix):
cluster_backups = get_backups(config, show_all, bucket_name, prefix)
seen_incomplete_backup = False
for cluster_backup in cluster_backups:
finished = cluster_backup.finished
Expand Down
35 changes: 25 additions & 10 deletions medusa/medusacli.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,22 +167,26 @@ def backup_cluster(medusaconfig, backup_name, seed_target, stagger, enable_md5_c

@cli.command(name='fetch-tokenmap')
@click.option('--backup-name', help='backup name', required=True)
@click.option('--bucket-name', help='Bucket with backup', required=False, default=None)
@click.option('--prefix', help='Backup prefix', required=False, default=None)
@pass_MedusaConfig
def fetch_tokenmap(medusaconfig, backup_name):
def fetch_tokenmap(medusaconfig, backup_name, bucket_name, prefix):
"""
Get the token/node mapping for a specific backup
"""
medusa.fetch_tokenmap.main(medusaconfig, backup_name)
medusa.fetch_tokenmap.main(medusaconfig, backup_name, bucket_name, prefix)


@cli.command(name='list-backups')
@click.option('--show-all/--no-show-all', default=False, help="List all backups in the bucket")
@click.option('--bucket-name', help='Bucket with backup', required=False, default=None)
@click.option('--prefix', help='Backup prefix', required=False, default=None)
Comment on lines +182 to +183
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: these two new arguments are creating confusion with the ones defined in the global settings (the ones that will be placed before the command name in the command line).

We'd rather need to remove these and expose both the bucket_name and the prefix as is in the storage object.
Then you can use them in your code through the storage object instead of having to pass it around the methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly your suggestion, I should move bucket_name and prefix into the storage config.
Here:
StorageConfig = collections.namedtuple( 'StorageConfig', ['bucket_name', 'key_file', 'prefix', 'fqdn', 'host_file_separator', 'storage_provider', 'base_path', 'max_backup_age', 'max_backup_count', 'api_profile', 'transfer_max_bandwidth', 'concurrent_transfers', 'multi_part_upload_threshold', 'host', 'region', 'port', 'secure', 'aws_cli_path', 'kms_id', 'backup_grace_period_in_days', 'use_sudo_for_restore', 'k8s_mode'] )
If yes, I had another idea in this PR. Changing config everywhere for a single restore is overkill, in the config, we have a configuration for long-term usages(like a regular backup), and changing the config for restore is a potential problem for the backup procedure(you should return config setting back, you should don't have conflicts between operations, etc).

@pass_MedusaConfig
def list_backups(medusaconfig, show_all):
def list_backups(medusaconfig, show_all, bucket_name, prefix):
"""
List backups
"""
medusa.listing.list_backups(medusaconfig, show_all)
medusa.listing.list_backups(medusaconfig, show_all, bucket_name, prefix)


@cli.command(name='download')
Expand All @@ -194,13 +198,16 @@ def list_backups(medusaconfig, show_all):
multiple=True, default={})
@click.option('--ignore-system-keyspaces', help='Do not download cassandra system keyspaces', required=True,
is_flag=True, default=False)
@click.option('--bucket-name', help='Bucket with backup', required=False, default=None)
@click.option('--prefix', help='Backup prefix', required=False, default=None)
@pass_MedusaConfig
def download(medusaconfig, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces):
def download(medusaconfig, backup_name, download_destination, keyspaces, tables, ignore_system_keyspaces, bucket_name,
prefix):
"""
Download backup
"""
medusa.download.download_cmd(medusaconfig, backup_name, Path(download_destination), keyspaces, tables,
ignore_system_keyspaces)
ignore_system_keyspaces, bucket_name, prefix)


@cli.command(name='restore-cluster')
Expand All @@ -224,9 +231,12 @@ def download(medusaconfig, backup_name, download_destination, keyspaces, tables,
@click.option('--version-target', help='Target Cassandra version', required=False, default="3.11.9")
@click.option('--ignore-racks', help='Disable matching nodes based on rack topology', required=False, default=False,
is_flag=True)
@click.option('--bucket-name', help='Bucket with backup', required=False, default=None)
@click.option('--prefix', help='Backup prefix', required=False, default=None)
@pass_MedusaConfig
def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list, keep_auth, bypass_checks,
verify, keyspaces, tables, parallel_restores, use_sstableloader, version_target, ignore_racks):
verify, keyspaces, tables, parallel_restores, use_sstableloader, version_target, ignore_racks,
bucket_name, prefix):
"""
Restore Cassandra cluster
"""
Expand All @@ -243,7 +253,9 @@ def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list,
int(parallel_restores),
use_sstableloader,
version_target,
ignore_racks)
ignore_racks,
bucket_name,
prefix)


@cli.command(name='restore-node')
Expand All @@ -264,14 +276,17 @@ def restore_cluster(medusaconfig, backup_name, seed_target, temp_dir, host_list,
@click.option('--use-sstableloader', help='Use the sstableloader to load the backup into the cluster',
default=False, is_flag=True)
@click.option('--version-target', help='Target Cassandra version', required=False, default="3.11.9")
@click.option('--bucket-name', help='Bucket with backup', required=False, default=None)
@click.option('--prefix', help='Backup prefix', required=False, default=None)
@pass_MedusaConfig
def restore_node(medusaconfig, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables,
use_sstableloader, version_target):
use_sstableloader, version_target, bucket_name, prefix):
"""
Restore single Cassandra node
"""
medusa.restore_node.restore_node(medusaconfig, Path(temp_dir), backup_name, in_place, keep_auth, seeds,
verify, set(keyspaces), set(tables), use_sstableloader, version_target)
verify, set(keyspaces), set(tables), use_sstableloader, version_target,
bucket_name, prefix)


@cli.command(name='status')
Expand Down
17 changes: 11 additions & 6 deletions medusa/restore_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@


def orchestrate(config, backup_name, seed_target, temp_dir, host_list, keep_auth, bypass_checks, verify, keyspaces,
tables, parallel_restores, use_sstableloader=False, version_target=None, ignore_racks=False):
tables, parallel_restores, use_sstableloader=False, version_target=None, ignore_racks=False,
bucket_name=None, prefix=None):
monitoring = Monitoring(config=config.monitoring)
try:
restore_start_time = datetime.datetime.now()
Expand All @@ -57,7 +58,7 @@ def orchestrate(config, backup_name, seed_target, temp_dir, host_list, keep_auth
logging.error(err_msg)
raise RuntimeError(err_msg)

storage = Storage(config=config.storage)
storage = Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix)

try:
cluster_backup = storage.get_cluster_backup(backup_name)
Expand All @@ -68,7 +69,7 @@ def orchestrate(config, backup_name, seed_target, temp_dir, host_list, keep_auth

restore = RestoreJob(cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify,
parallel_restores, keyspaces, tables, bypass_checks, use_sstableloader, version_target,
ignore_racks)
ignore_racks, bucket_name, prefix)
restore.execute()

restore_end_time = datetime.datetime.now()
Expand Down Expand Up @@ -103,7 +104,7 @@ class RestoreJob(object):

def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, keep_auth, verify,
parallel_restores, keyspaces=None, tables=None, bypass_checks=False, use_sstableloader=False,
version_target=None, ignore_racks=False):
version_target=None, ignore_racks=False, bucket_name=None, prefix=None):
self.id = uuid.uuid4()
self.ringmap = None
self.cluster_backup = cluster_backup
Expand All @@ -129,6 +130,8 @@ def __init__(self, cluster_backup, config, temp_dir, host_list, seed_target, kee
self.fqdn_resolver = HostnameResolver(fqdn_resolver, k8s_mode)
self._version_target = version_target
self.ignore_racks = ignore_racks
self.bucket_name = bucket_name
self.prefix = prefix

def prepare_restore(self):
logging.info('Ensuring the backup is found and is complete')
Expand Down Expand Up @@ -427,7 +430,7 @@ def _build_restore_cmd(self):
command = 'mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} ' \
'--fqdn=%s -vvv restore-node ' \
'{in_place} {keep_auth} %s {verify} --backup-name {backup} --temp-dir {temp_dir} ' \
'{use_sstableloader} {keyspaces} {tables}' \
'{use_sstableloader} {keyspaces} {tables} {bucket_name} {prefix}' \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue: following up on my other comment, the --bucket-name and --prefix args need to be placed before the command name (here restore-node).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} ' \ '--fqdn=%s -vvv {bucket_name} {prefix} restore-node ' \ '{in_place} {keep_auth} %s {verify} --backup-name {backup} --temp-dir {temp_dir} ' \ '{use_sstableloader} {keyspaces} {tables}'

Like this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, like this:

mkdir -p {work}; cd {work} && medusa-wrapper {sudo} medusa {config} ' \ '--fqdn=%s -vvv  --bucket-name={bucket_name} --prefix={prefix} restore-node ' \ '{in_place} {keep_auth} %s {verify} --backup-name {backup} --temp-dir {temp_dir} ' \ '{use_sstableloader} {keyspaces} {tables}

.format(work=self.work_dir,
sudo='sudo' if medusa.utils.evaluate_boolean(self.config.cassandra.use_sudo) else '',
config=f'--config-file {self.config.file_path}' if self.config.file_path else '',
Expand All @@ -438,7 +441,9 @@ def _build_restore_cmd(self):
temp_dir=self.temp_dir,
use_sstableloader='--use-sstableloader' if self.use_sstableloader else '',
keyspaces=keyspace_options,
tables=table_options)
tables=table_options,
bucket_name=f'--bucket-name {self.bucket_name}' if self.bucket_name is not None else '',
prefix=f'--prefix {self.prefix}' if self.prefix is not None else '')

logging.debug('Preparing to restore on all nodes with the following command: {}'.format(command))

Expand Down
8 changes: 4 additions & 4 deletions medusa/restore_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@


def restore_node(config, temp_dir, backup_name, in_place, keep_auth, seeds, verify, keyspaces, tables,
use_sstableloader=False, version_target=None):
use_sstableloader=False, version_target=None, bucket_name=None, prefix=None):
if in_place and keep_auth:
logging.error('Cannot keep system_auth when restoring in-place. It would be overwritten')
sys.exit(1)

storage = Storage(config=config.storage)
storage = Storage(config=config.storage, bucket_name=bucket_name, prefix=prefix)
capture_release_version(storage, version_target)

if not use_sstableloader:
Expand Down Expand Up @@ -88,7 +88,7 @@ def restore_node_locally(config, temp_dir, backup_name, in_place, keep_auth, see
# Download the backup
download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4())
logging.info('Downloading data from backup to {}'.format(download_dir))
download_data(config.storage, node_backup, fqtns_to_restore, destination=download_dir)
download_data(storage, node_backup, fqtns_to_restore, destination=download_dir)

if not medusa.utils.evaluate_boolean(config.kubernetes.enabled if config.kubernetes else False):
logging.info('Stopping Cassandra')
Expand Down Expand Up @@ -176,7 +176,7 @@ def restore_node_sstableloader(config, temp_dir, backup_name, in_place, keep_aut
# Download the backup
download_dir = temp_dir / 'medusa-restore-{}'.format(uuid.uuid4())
logging.info('Downloading data from backup to {}'.format(download_dir))
download_data(config.storage, node_backup, fqtns_to_restore, destination=download_dir)
download_data(storage, node_backup, fqtns_to_restore, destination=download_dir)
invoke_sstableloader(config, download_dir, keep_auth, fqtns_to_restore, cassandra.storage_port,
cassandra.native_port)
logging.info('Finished loading backup from {}'.format(fqdn))
Expand Down
4 changes: 2 additions & 2 deletions medusa/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S'


def status(config, backup_name):
storage = Storage(config=config.storage)
def status(config, backup_name, bucket_name=None):
storage = Storage(config=config.storage, bucket_name=bucket_name)

try:
cluster_backup = storage.get_cluster_backup(backup_name)
Expand Down
33 changes: 16 additions & 17 deletions medusa/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,20 @@
import pathlib
import re

from libcloud.storage.providers import Provider
from libcloud.common.types import InvalidCredsError
from libcloud.storage.providers import Provider
from retrying import retry

import medusa.index

from medusa.utils import evaluate_boolean
from medusa.storage.azure_storage import AzureStorage
from medusa.storage.cluster_backup import ClusterBackup
from medusa.storage.node_backup import NodeBackup
from medusa.storage.google_storage import GoogleStorage
from medusa.storage.local_storage import LocalStorage
from medusa.storage.s3_storage import S3Storage
from medusa.storage.s3_rgw import S3RGWStorage
from medusa.storage.azure_storage import AzureStorage
from medusa.storage.node_backup import NodeBackup
from medusa.storage.s3_base_storage import S3BaseStorage

from medusa.storage.s3_rgw import S3RGWStorage
from medusa.storage.s3_storage import S3Storage
from medusa.utils import evaluate_boolean

ManifestObject = collections.namedtuple('ManifestObject', ['path', 'size', 'MD5'])

Expand Down Expand Up @@ -66,43 +64,44 @@ def format_bytes_str(value):


class Storage(object):
def __init__(self, *, config):
def __init__(self, *, config, bucket_name=None, prefix=None):
self._config = config
# Used to bypass dependency checks when running in Kubernetes
self._k8s_mode = evaluate_boolean(config.k8s_mode) if config.k8s_mode else False
self._prefix = pathlib.Path(config.prefix or '.')
self._prefix = pathlib.Path(prefix or config.prefix or '.')
self.prefix_path = str(self._prefix) + '/' if len(str(self._prefix)) > 1 else ''
self._bucket_name = bucket_name
self.storage_driver = self._connect_storage()
self.storage_provider = self._config.storage_provider

def _connect_storage(self):
logging.debug('Loading storage_provider: {}'.format(self._config.storage_provider))
if self._config.storage_provider == Provider.GOOGLE_STORAGE:
google_storage = GoogleStorage(self._config)
google_storage = GoogleStorage(self._config, bucket_name=self._bucket_name)
if not self._k8s_mode:
google_storage.check_dependencies()
return google_storage
elif self._config.storage_provider == Provider.AZURE_BLOBS:
azure_storage = AzureStorage(self._config)
azure_storage = AzureStorage(self._config, bucket_name=self._bucket_name)
if not self._k8s_mode:
azure_storage.check_dependencies()
return azure_storage
elif self._config.storage_provider == Provider.S3_RGW:
return S3RGWStorage(self._config)
return S3RGWStorage(self._config, bucket_name=self._bucket_name)
elif self._config.storage_provider.lower() == "s3_compatible":
s3_storage = S3BaseStorage(self._config)
s3_storage = S3BaseStorage(self._config, bucket_name=self._bucket_name)
if not self._k8s_mode:
s3_storage.check_dependencies()
return s3_storage
elif self._config.storage_provider.startswith(Provider.S3):
s3_storage = S3Storage(self._config)
s3_storage = S3Storage(self._config, bucket_name=self._bucket_name)
if not self._k8s_mode:
s3_storage.check_dependencies()
return s3_storage
elif self._config.storage_provider == Provider.LOCAL:
return LocalStorage(self._config)
return LocalStorage(self._config, bucket_name=self._bucket_name)
elif self._config.storage_provider.lower() == "ibm_storage":
s3_storage = S3BaseStorage(self._config)
s3_storage = S3BaseStorage(self._config, bucket_name=self._bucket_name)
if not self._k8s_mode:
s3_storage.check_dependencies()
return s3_storage
Expand Down
6 changes: 3 additions & 3 deletions medusa/storage/abstract_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import medusa.storage
import medusa.storage.concurrent


BLOCK_SIZE_BYTES = 65536
MULTIPART_PART_SIZE_IN_MB = 8
MULTIPART_BLOCK_SIZE_BYTES = 65536
Expand All @@ -34,10 +33,11 @@

class AbstractStorage(abc.ABC):

def __init__(self, config):
def __init__(self, config, bucket_name=None):
self.config = config
self.bucket_name = bucket_name if bucket_name is not None else config.bucket_name
self.driver = self.connect_storage()
self.bucket = self.driver.get_container(container_name=config.bucket_name)
self.bucket = self.driver.get_container(container_name=self.bucket_name)

@abc.abstractmethod
def connect_storage(self):
Expand Down
2 changes: 1 addition & 1 deletion medusa/storage/azure_blobs_storage/azcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, storage):

@property
def bucket_name(self):
return self._config.bucket_name
return self.storage.bucket_name

def __enter__(self):
with io.open(os.path.expanduser(self._config.key_file), 'r', encoding='utf-8') as json_fi:
Expand Down
Loading