Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull request update/240701 #325

Merged
merged 6 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions bulldozer/bulldozer_worker/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ class GoalReached(DestroyConditionException):
pass


class TaskInErrorState(Exception):
pass


class TaskState:

STARTING_PREPARING = 1
Expand Down Expand Up @@ -270,6 +274,16 @@ def _exec(self):

class SetFinished(Base):

def _handle_error(self, err):
if self.body["try"] < MAX_RETRIES:
self.body["try"] += 1
elif self.body["state"] == TaskState.ERROR:
raise TaskInErrorState('Task is already in ERROR state')
else:
self.body["state"] = TaskState.ERROR
self.body["reason"] = f"{str(err)}" or None
self.on_continue(self.body, self.delayed)

def update_reason(self):

reason = self.body.get("reason")
Expand Down
24 changes: 24 additions & 0 deletions ngui/ui/public/docs/datasets.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
### Summary

Use the Datasets Page to keep track of Your machine learning datasets.
Add your datasets, ensuring you include relevant metadata such as the dataset name, description, source path, labels, and validity period. To log a specific dataset for our training code, include the Dataset ID in your code like this: `arcee.dataset("ID")`. Any datasets you register within your training code will automatically appear on this page.

### View

- Details: Find the path to source (ID) of the dataset and implement into Your code.

### Actions

- Update the Content: Click the Refresh button to view the latest information.

- Add a New Dataset: Easily create a new dataset by clicking the green "Add" button. Specify the ID (path to source) and other properties.

- Manage Datasets: Use the "Actions" column icons to edit/delete the dataset.

### Tips

- Data Cleaning: Regularly clean and preprocess your data to remove noise, handle missing values, and normalize data formats. Consistent preprocessing ensures that models are trained on high-quality data, leading to better performance and reliability.

- Centralized Data Storage: Cloud storage allows easy access and sharing of datasets across teams. This promotes collaboration and ensures that everyone is working with the most up-to-date data.

- Access Controls: Implement robust access controls and permissions to ensure that only authorized users can access sensitive data. This helps in maintaining data security and compliance with privacy regulations.
2 changes: 0 additions & 2 deletions rest_api/rest_api_server/controllers/offer_breakdown.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import logging
import json
from clickhouse_driver import Client as ClickHouseClient
from collections import defaultdict
from datetime import datetime, timedelta
from rest_api.rest_api_server.controllers.ri_breakdown import (
RiBreakdownController)
from rest_api.rest_api_server.controllers.base_async import (
Expand Down
43 changes: 15 additions & 28 deletions rest_api/rest_api_server/controllers/organization_gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@
from typing import List

from clickhouse_driver import Client as ClickHouseClient
from sqlalchemy.sql import and_, exists

from tools.optscale_exceptions.common_exc import WrongArgumentsException

from rest_api.rest_api_server.controllers.base import (
BaseController, ClickHouseMixin)
from rest_api.rest_api_server.controllers.base_async import BaseAsyncControllerWrapper
from rest_api.rest_api_server.exceptions import Err
from rest_api.rest_api_server.models.models import (
OrganizationGemini, CloudAccount)
from rest_api.rest_api_server.controllers.base_async import (
BaseAsyncControllerWrapper)
from rest_api.rest_api_server.models.models import OrganizationGemini
from rest_api.rest_api_server.utils import (
check_string_attribute, check_int_attribute, check_dict_attribute)
check_int_attribute, check_dict_attribute, check_list_attribute,
check_float_attribute
)


LOG = logging.getLogger(__name__)
Expand All @@ -33,11 +31,15 @@ def _validate_stats(**kwargs):
stats = kwargs.get('stats')
check_dict_attribute('stats', stats, allow_empty=True)
if stats:
for param in ['total_objects', 'considered_objects', 'total_size',
'might_deleted']:
for param in ['total_objects', 'filtered_objects',
'duplicated_objects']:
value = stats.get(param)
if value is not None:
check_int_attribute(param, value)
for param in ['total_size', 'duplicates_size', 'monthly_savings']:
value = stats.get(param)
if value is not None:
check_float_attribute(param, value)

def edit(self, item_id, **kwargs):
self._validate_stats(**kwargs)
Expand Down Expand Up @@ -135,31 +137,16 @@ def list(self, organization_id: str = None, **kwargs) -> List[OrganizationGemini
else:
return super().list()

def _validate_filters(self, organization_id, filters):
def _validate_filters(self, filters):
check_dict_attribute('filters', filters, allow_empty=True)
if filters:
if 'buckets' in filters:
check_string_attribute('buckets', filters['buckets'])
check_list_attribute('buckets', filters['buckets'])
if 'min_size' in filters:
check_int_attribute('min_size', filters['min_size'])
if 'cloud_account_id' in filters:
cloud_account_id = filters['cloud_account_id']
check_string_attribute('cloud_account_id', cloud_account_id)
cloud_account_exist = self.session.query(
exists().where(
and_(
CloudAccount.deleted.is_(False),
CloudAccount.organization_id == organization_id,
CloudAccount.id == cloud_account_id
)
)
).scalar()
if not cloud_account_exist:
raise WrongArgumentsException(
Err.OE0217, ['cloud_account_id'])

def create(self, organization_id: str, filters: dict) -> OrganizationGemini:
self._validate_filters(organization_id, filters)
self._validate_filters(filters)
return super().create(organization_id=organization_id,
filters=json.dumps(filters))

Expand Down
27 changes: 17 additions & 10 deletions rest_api/rest_api_server/controllers/ri_breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,16 +114,23 @@ def get_total_stats(self, cloud_account_ids):

def get_flavors(self, cloud_account_ids):
flavor_factor_map = defaultdict(float)
expenses = self.raw_expenses_collection.distinct(
'product/instanceType', {
'cloud_account_id': {'$in': cloud_account_ids},
'start_date': {
'$gte': datetime.fromtimestamp(self.start_date),
'$lte': datetime.fromtimestamp(self.end_date)},
'lineItem/LineItemType': 'DiscountedUsage'
}
)
for flavor_name in expenses:
flavors = self.execute_clickhouse(
"""SELECT DISTINCT instance_type
FROM ri_sp_usage
WHERE cloud_account_id IN cloud_account_ids AND
date >= %(start_date)s AND date <= %(end_date)s AND
offer_type='ri'
""",
params={
'start_date': datetime.fromtimestamp(self.start_date),
'end_date': datetime.fromtimestamp(self.end_date)
},
external_tables=[{'name': 'cloud_account_ids',
'structure': [('id', 'String')],
'data': [{'id': r_id} for r_id in
cloud_account_ids]}])
flavors = [x[0] for x in flavors]
for flavor_name in flavors:
if 'db.' in flavor_name:
# RDS instances don't have normalization factor
# use 1 as default
Expand Down
39 changes: 17 additions & 22 deletions rest_api/rest_api_server/controllers/sp_breakdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,24 @@ def get_usage_breakdown(self, start_date, end_date, cloud_account_ids):

def get_flavors(self, cloud_account_ids):
flavor_rate_map = defaultdict(float)
expenses = self.raw_expenses_collection.aggregate([
{'$match': {
'cloud_account_id': {'$in': cloud_account_ids},
'start_date': {
'$gte': datetime.fromtimestamp(self.start_date),
'$lt': datetime.fromtimestamp(self.end_date)
},
'lineItem/LineItemType': 'SavingsPlanCoveredUsage'
}},
{'$group': {
'_id': {
'instance_type': '$product/instanceType',
'description': '$lineItem/LineItemDescription'
},
'rate': {'$last': '$savingsPlan/SavingsPlanRate'}
}}
])
flavors = self.execute_clickhouse(
"""SELECT DISTINCT instance_type, sp_rate
FROM ri_sp_usage
WHERE cloud_account_id IN cloud_account_ids AND
date >= %(start_date)s AND date <= %(end_date)s AND
offer_type='sp' and sp_rate!=0
""",
params={
'start_date': datetime.fromtimestamp(self.start_date),
'end_date': datetime.fromtimestamp(self.end_date)
},
external_tables=[{'name': 'cloud_account_ids',
'structure': [('id', 'String')],
'data': [{'id': r_id} for r_id in
cloud_account_ids]}])
# todo: it could be two different rates for flavor
for expense in expenses:
_id = expense['_id']
flavor_name = _id.get('instance_type') or _id.get('description')
sp_rate = float(expense.get('rate', 0))
for flavor in flavors:
flavor_name, sp_rate = flavor
if sp_rate:
flavor_rate_map[flavor_name] = sp_rate
return flavor_rate_map
Expand Down Expand Up @@ -123,7 +119,6 @@ def fill_overprovisioning(self, flavor_rate_map, cloud_account_usage,
sp_acc_date_exp[cloud_account_id][date] += overprov_exp
for cloud_acc_id, date_exp in cloud_account_usage.items():
for date, data in date_exp.items():
date_ts = int(date.timestamp())
sp_overprov_exp = sp_acc_date_exp[cloud_acc_id].get(date, 0)
data['overprovision'] = sp_overprov_exp
if 'overprovision_hrs' not in data:
Expand Down
2 changes: 2 additions & 0 deletions rest_api/rest_api_server/handlers/v2/organization_geminis.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ async def patch(self, gemini_id, **kwargs):
description: |
Wrong arguments:
- OE0223: Argument should be integer
- OE0466: Argument should be float
401:
description: |
Unauthorized:
Expand Down Expand Up @@ -536,6 +537,7 @@ async def post(self, organization_id, **kwargs):
- OE0214: Argument should be a string
- OE0217: Invalid query parameter
- OE0223: min_size should be integer
- OE0385: buckets should be a list
401:
description: |
Unauthorized
Expand Down
2 changes: 2 additions & 0 deletions rest_api/rest_api_server/tests/unittests/test_api_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,12 +609,14 @@ def get_csv(path):
('cloud_account_id', 'String', 'default'),
('resource_id', 'String', 'default'),
('date', 'DateTime', datetime.utcnow()),
('instance_type', 'String', ''),
('offer_id', 'String', 'default'),
('offer_type', "Enum8('ri' = 1, 'sp' = 2)", 1),
('offer_cost', 'Float64', 0),
('on_demand_cost', 'Float64', 0),
('usage', 'Float64', 0),
('ri_norm_factor', 'Float32', 0),
('sp_rate', 'Float32', 0),
('expected_cost', 'Float64', 0),
('sign', 'Int8', 1)
], self.ri_sp_usage
Expand Down
Loading
Loading