Skip to content

Commit

Permalink
fix(cli): correct set arguments for openllm import and `openllm bui…
Browse files Browse the repository at this point in the history
…ld` (#775)

* fix(cli): correct set arguments for `openllm import` and `openllm build`

Signed-off-by: Aaron <[email protected]>

* chore: update changelog

Signed-off-by: Aaron <[email protected]>

---------

Signed-off-by: Aaron <[email protected]>
  • Loading branch information
aarnphm authored Dec 13, 2023
1 parent 10f508d commit 2dbcfa8
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 48 deletions.
5 changes: 5 additions & 0 deletions changelog.d/774.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Mixtral is now fully supported on BentoCloud.

```bash
openllm start mistralai/Mixtral-8x7B-Instruct-v0.1
```
1 change: 1 addition & 0 deletions changelog.d/775.change.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update correct arguments for both `openllm import` and `openllm build` to be synonymous with `openllm start`
52 changes: 24 additions & 28 deletions openllm-python/src/openllm_cli/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
LiteralBackend,
LiteralSerialisation,
ParamSpec,
AnyCallable,
get_literal_args,
)
from openllm_core.utils import DEBUG, compose, dantic, resolve_user_filepath
Expand All @@ -25,7 +26,7 @@ class _OpenLLM_GenericInternalConfig(LLMConfig):

class GenerationConfig:
top_k: int = 15
top_p: float = 0.9
top_p: float = 0.78
temperature: float = 0.75
max_new_tokens: int = 128

Expand Down Expand Up @@ -118,32 +119,35 @@ def _id_callback(ctx: click.Context, _: click.Parameter, value: t.Tuple[str, ...
ctx.params[_adapter_mapping_key][adapter_id] = name
return None

def optimization_decorator(fn: FC, *, factory=click, _eager=True) -> FC | list[AnyCallable]:
shared = [
dtype_option(factory=factory), model_version_option(factory=factory), #
backend_option(factory=factory), quantize_option(factory=factory), #
serialisation_option(factory=factory),
]
if not _eager: return shared
return compose(*shared)(fn)

def start_decorator(fn: FC) -> FC:
composed = compose(
_OpenLLM_GenericInternalConfig.parse,
_http_server_args,
cog.optgroup.group('General LLM Options', help='The following options are related to running LLM Server.'),
dtype_option(factory=cog.optgroup),
model_version_option(factory=cog.optgroup),
cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
workers_per_resource_option(factory=cog.optgroup),
cors_option(factory=cog.optgroup),
backend_option(factory=cog.optgroup),
parse_serve_args(),
cog.optgroup.group(
'LLM Optimization Options',
help='''Optimization related options.
'LLM Options',
help='''The following options are related to running LLM Server as well as optimization options.
OpenLLM supports running model k-bit quantization (8-bit, 4-bit), GPTQ quantization, PagedAttention via vLLM.
The following are either in our roadmap or currently being worked on:
- DeepSpeed Inference: [link](https://www.deepspeed.ai/inference/)
- GGML: Fast inference on [bare metal](https://github.com/ggerganov/ggml)
''',
''',
),
quantize_option(factory=cog.optgroup),
serialisation_option(factory=cog.optgroup),
cog.optgroup.option('--server-timeout', type=int, default=None, help='Server timeout in seconds'),
workers_per_resource_option(factory=cog.optgroup),
cors_option(factory=cog.optgroup),
*optimization_decorator(fn, factory=cog.optgroup, _eager=False),
cog.optgroup.option(
'--device',
type=dantic.CUDA,
Expand Down Expand Up @@ -200,8 +204,6 @@ def decorator(f: t.Callable[Concatenate[int, t.Optional[str], P], LLMConfig]) ->
return group(f)
return decorator

_http_server_args = parse_serve_args()

def _click_factory_type(*param_decls: t.Any, **attrs: t.Any) -> t.Callable[[FC | None], FC]:
'''General ``@click`` decorator with some sauce.
Expand Down Expand Up @@ -234,7 +236,8 @@ def adapter_id_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callab
multiple=True,
callback=_id_callback,
metavar='[PATH | [remote/][adapter_name:]adapter_id][, ...]',
)
**attrs,
)(f)


def cors_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[[FC], FC]:
Expand Down Expand Up @@ -291,8 +294,7 @@ def backend_option(f: _AnyCallable | None = None, **attrs: t.Any) -> t.Callable[
envvar='OPENLLM_BACKEND',
show_envvar=True,
help='Runtime to use for both serialisation/inference engine.',
**attrs,
)(f)
**attrs)(f)

def model_name_argument(f: _AnyCallable | None = None, required: bool = True, **attrs: t.Any) -> t.Callable[[FC], FC]:
return cli_argument(
Expand Down Expand Up @@ -329,15 +331,9 @@ def quantize_option(f: _AnyCallable | None = None, *, build: bool = False, **att
'''
+ (
'''
> [!NOTE] that this will set the mode for serving within deployment.'''
if build
else ''
)
+ '''
> [!NOTE] that quantization are currently only available in *PyTorch* models.''',
**attrs,
)(f)

> [!NOTE] that this will set the mode for serving within deployment.''' if build else ''
),
**attrs)(f)

def workers_per_resource_option(
f: _AnyCallable | None = None, *, build: bool = False, **attrs: t.Any
Expand Down
28 changes: 9 additions & 19 deletions openllm-python/src/openllm_cli/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,11 @@
from ._factory import (
FC,
_AnyCallable,
backend_option,
dtype_option,
machine_option,
model_name_argument,
model_version_option,
parse_config_options,
quantize_option,
serialisation_option,
start_decorator,
optimization_decorator,
)

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -590,13 +586,11 @@ class ImportModelOutput(t.TypedDict):
metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]',
help='Deprecated. Use positional argument instead.',
)
@model_version_option
@backend_option
@quantize_option
@serialisation_option
@optimization_decorator
def import_command(
model_id: str,
deprecated_model_id: str | None,
dtype: LiteralDtype,
model_version: str | None,
backend: LiteralBackend | None,
quantize: LiteralQuantise | None,
Expand Down Expand Up @@ -649,6 +643,7 @@ def import_command(
model_version=model_version,
quantize=quantize,
backend=backend,
dtype=dtype,
serialisation=t.cast(
LiteralSerialisation,
first_not_none(
Expand Down Expand Up @@ -712,17 +707,13 @@ class BuildBentoOutput(t.TypedDict):
metavar='[REMOTE_REPO/MODEL_ID | /path/to/local/model]',
help='Deprecated. Use positional argument instead.',
)
@dtype_option
@backend_option
@click.option(
'--bento-version',
type=str,
default=None,
help='Optional bento version for this BentoLLM. Default is the the model revision.',
)
@click.option('--overwrite', is_flag=True, help='Overwrite existing Bento for given LLM if it already exists.')
@cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name='Optimisation options') # type: ignore[misc]
@quantize_option(factory=cog.optgroup, build=True)
@click.option(
'--enable-features',
multiple=True,
Expand All @@ -732,6 +723,7 @@ class BuildBentoOutput(t.TypedDict):
', '.join(OPTIONAL_DEPENDENCIES)
),
)
@optimization_decorator
@click.option(
'--adapter-id',
default=None,
Expand All @@ -740,14 +732,12 @@ class BuildBentoOutput(t.TypedDict):
help="Optional adapters id to be included within the Bento. Note that if you are using relative path, '--build-ctx' must be passed.",
)
@click.option('--build-ctx', help='Build context. This is required if --adapter-id uses relative path', default=None)
@model_version_option
@click.option(
'--dockerfile-template',
default=None,
type=click.File(),
help='Optional custom dockerfile template to be used with this BentoLLM.',
)
@serialisation_option
@cog.optgroup.group(cls=cog.MutuallyExclusiveOptionGroup, name='Utilities options') # type: ignore[misc]
@cog.optgroup.option(
'--containerize',
Expand Down Expand Up @@ -787,20 +777,20 @@ def build_command(
deprecated_model_id: str | None,
bento_version: str | None,
overwrite: bool,
dtype: LiteralDtype,
model_version: str | None,
backend: LiteralBackend | None,
quantize: LiteralQuantise | None,
serialisation: LiteralSerialisation | None,
machine: bool,
dtype: LiteralDtype,
enable_features: tuple[str, ...] | None,
adapter_id: tuple[str, ...],
build_ctx: str | None,
backend: LiteralBackend | None,
model_version: str | None,
dockerfile_template: t.TextIO | None,
max_model_len: int | None,
gpu_memory_utilization:float,
containerize: bool,
push: bool,
serialisation: LiteralSerialisation | None,
force_push: bool,
**_: t.Any,
) -> BuildBentoOutput:
Expand Down
2 changes: 1 addition & 1 deletion openllm-python/src/openllm_cli/extension/list_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
@click.command('list_models', context_settings=termui.CONTEXT_SETTINGS)
@model_name_argument(required=False, shell_complete=model_complete_envvar)
def cli(model_name: str | None) -> DictStrAny:
'''This is equivalent to openllm models --show-available less the nice table.'''
'''List available models in lcoal store to be used wit OpenLLM.'''
models = tuple(inflection.dasherize(key) for key in openllm.CONFIG_MAPPING.keys())
ids_in_local_store = {
k: [
Expand Down

0 comments on commit 2dbcfa8

Please sign in to comment.