Skip to content

Commit

Permalink
Merge pull request #46 from elixir-cloud-aai/regexp-improve
Browse files Browse the repository at this point in the history
Improve S3 URL support and others
  • Loading branch information
trispera authored Dec 11, 2023
2 parents 1a7b810 + 986dc29 commit fec170a
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 185 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/tox.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Python package

on:
- push
- pull_request

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox tox-gh-actions
- name: Test with tox
run: tox
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ language: python
dist: focal
cache: pip
python:
- '3.7'
- '3.8'
- '3.9'
- '3.10'
- '3.11'
- '3.12'
install:
- sudo apt update
- sudo apt upgrade
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
INSTALL_DEPS = ['kubernetes==9.0.0',
'requests>=2.20.0',
'urllib3==1.26.5',
'boto3==1.16.18',
'boto3==1.33.9',
]
TEST_DEPS = [ 'pytest',
'pyfakefs',
Expand Down
20 changes: 0 additions & 20 deletions src/tesk_core/extract_endpoint.py

This file was deleted.

2 changes: 0 additions & 2 deletions src/tesk_core/filer.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,6 @@ def fileTransputIfEnabled():
elif scheme == 'file':
return fileTransputIfEnabled()
elif scheme in ['http', 'https']:
if 's3' in netloc:
return S3Transput
return HTTPTransput
elif scheme == 's3':
return S3Transput
Expand Down
55 changes: 14 additions & 41 deletions src/tesk_core/filer_s3.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
import re
import botocore
import boto3
import sys
import os
import logging
import re
import botocore
import boto3
from tesk_core.transput import Transput, Type
from tesk_core.extract_endpoint import extract_endpoint

class S3Transput(Transput):
def __init__(self, path, url, ftype):
Transput.__init__(self, path, url, ftype)
self.bucket, self.file_path = self.get_bucket_name_and_file_path()
self.bucket_obj = None

def __enter__(self):
client = boto3.resource('s3', endpoint_url=extract_endpoint())
client = boto3.resource('s3', endpoint_url=self.extract_endpoint())
if self.check_if_bucket_exists(client):
sys.exit(1)
self.bucket_obj = client.Bucket(self.bucket)
return self

def extract_endpoint(self):
return boto3.client('s3').meta.endpoint_url

def check_if_bucket_exists(self, client):
try:
client.meta.client.head_bucket(Bucket=self.bucket)
Expand All @@ -33,40 +36,12 @@ def check_if_bucket_exists(self, client):

def get_bucket_name_and_file_path(self):
"""
if the S3 url is similar to s3://idr-bucket-1/README.txt format
If the S3 url is similar to s3://idr-bucket-1/README.txt format
"""
if self.url.startswith("s3"):
self.url_path = re.sub(r's3:\/', "", self.url)

"""
If the s3 url are of following formats
1. File type = FILE
* http://mybucket.s3.amazonaws.com/file.txt
* http://mybucket.s3-aws-region.amazonaws.com/file.txt
* http://s3.amazonaws.com/mybucket/file.txt
* http://s3-aws-region.amazonaws.com/mybucket/file.txt
* s3://mybucket/file.txt
return values will be
bucket name = mybucket , file path = file.txt
2. File type = DIRECTORY
* http://mybucket.s3.amazonaws.com/dir1/dir2/
* http://mybucket.s3-aws-region.amazonaws.com/dir1/dir2/
* http://s3.amazonaws.com/mybucket/dir1/dir2/
* http://s3-aws-region.amazonaws.com/mybucket/dir1/dir2/
* s3://mybucket/dir1/dir2/
bucket = self.netloc
file_path = self.url_path[1:]

return values will be
bucket name = mybucket , file path = dir1/dir2/
"""

match = re.search('^([^.]+).s3', self.netloc)
if match:
bucket = match.group(1)
else:
bucket = self.url_path.split("/")[1]
file_path = re.sub(r'^\/' + bucket + '\/', "", self.url_path).lstrip("/")
return bucket, file_path

def download_file(self):
Expand Down Expand Up @@ -95,9 +70,7 @@ def upload_dir(self):
elif os.path.isfile(path):
file_type = Type.File
else:
"""
An exception is raised, if the object type is neither file or directory
"""
# An exception is raised, if the object type is neither file or directory
logging.error("Object is neither file or directory : '%s' ",path)
raise IOError
file_path = os.path.join(self.url, item)
Expand All @@ -112,7 +85,7 @@ def upload_dir(self):

def download_dir(self):
logging.debug('Downloading s3 object: "%s" Target: %s', self.bucket + "/" + self.file_path, self.path)
client = boto3.client('s3', endpoint_url=extract_endpoint())
client = boto3.client('s3', endpoint_url=self.extract_endpoint())
if not self.file_path.endswith('/'):
self.file_path += '/'
objects = client.list_objects_v2(Bucket=self.bucket, Prefix=self.file_path)
Expand Down Expand Up @@ -141,4 +114,4 @@ def get_s3_file(self, file_name, key):
logging.error('Got status code: %s', err.response['Error']['Code'])
logging.error(err.response['Error']['Message'])
return 1
return 0
return 0
2 changes: 1 addition & 1 deletion src/tesk_core/taskmaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def init_pvc(data, filer):
return pvc


def run_task(data, filer_name, filer_version, have_json_pvc):
def run_task(data, filer_name, filer_version, have_json_pvc=False):
task_name = data['executors'][0]['metadata']['labels']['taskmaster-name']
pvc = None

Expand Down
35 changes: 0 additions & 35 deletions tests/test_extract_endpoint.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/test_filer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def test_newTransput(self):
self.assertEqual(newTransput('https', 'test.com'), HTTPTransput)
self.assertEqual(newTransput('file', '/home/tfga/workspace/'), FileTransput)
self.assertEqual(newTransput('s3', '/home/tfga/workspace/'), S3Transput)
self.assertEqual(newTransput('http', 's3.aws.com'), S3Transput)
self.assertEqual(newTransput('http', 's3.aws.com'), HTTPTransput)

self.assertThrows(lambda: newTransput('svn', 'example.com')
, UnknownProtocol
Expand Down Expand Up @@ -248,4 +248,4 @@ def test_newTransput_file_disabled(self):

if __name__ == "__main__":
# import sys;sys.argv = ['', 'Test.testName']
unittest.main()
unittest.main()
Loading

0 comments on commit fec170a

Please sign in to comment.