diff --git a/requirements.txt b/requirements.txt
index faef06cf8..a16a15778 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,8 @@ multiprocess
packaging
x2paddle
rarfile
-onnx >= 1.6.0
\ No newline at end of file
+gradio
+tritonclient[all]
+attrdict
+psutil
+onnx >= 1.6.0
diff --git a/visualdl/component/inference/fastdeploy_client/__init__.py b/visualdl/component/inference/fastdeploy_client/__init__.py
new file mode 100644
index 000000000..9c19f7b87
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
diff --git a/visualdl/component/inference/fastdeploy_client/client_app.py b/visualdl/component/inference/fastdeploy_client/client_app.py
new file mode 100644
index 000000000..397b8255a
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/client_app.py
@@ -0,0 +1,409 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import gradio as gr
+import numpy as np
+
+from .http_client_manager import get_metric_data
+from .http_client_manager import HttpClientManager
+from .http_client_manager import metrics_table_head
+from .visualizer import visualize_detection
+from .visualizer import visualize_face_alignment
+from .visualizer import visualize_face_detection
+from .visualizer import visualize_headpose
+from .visualizer import visualize_keypoint_detection
+from .visualizer import visualize_matting
+from .visualizer import visualize_ocr
+from .visualizer import visualize_segmentation
+
+_http_manager = HttpClientManager()
+
+supported_tasks = {
+ 'detection': visualize_detection,
+ 'facedet': visualize_face_detection,
+ 'keypointdetection': visualize_keypoint_detection,
+ 'segmentation': visualize_segmentation,
+ 'matting': visualize_matting,
+ 'ocr': visualize_ocr,
+ 'facealignment': visualize_face_alignment,
+ 'headpose': visualize_headpose,
+ 'unspecified': lambda x: str(x)
+}
+
+
+def create_gradio_client_app(): # noqa:C901
+ css = """
+ .gradio-container {
+ font-family: 'IBM Plex Sans', sans-serif;
+ }
+ .gr-button {
+ color: white;
+ border-color: black;
+ background: black;
+ }
+ input[type='range'] {
+ accent-color: black;
+ }
+ .dark input[type='range'] {
+ accent-color: #dfdfdf;
+ }
+ #gallery {
+ min-height: 22rem;
+ margin-bottom: 15px;
+ margin-left: auto;
+ margin-right: auto;
+ border-bottom-right-radius: .5rem !important;
+ border-bottom-left-radius: .5rem !important;
+ }
+ #gallery>div>.h-full {
+ min-height: 20rem;
+ }
+ .details:hover {
+ text-decoration: underline;
+ }
+ .gr-button {
+ white-space: nowrap;
+ }
+ .gr-button:focus {
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
+ outline: none;
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
+ --tw-border-opacity: 1;
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) \
+ var(--tw-ring-offset-color);
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
+ --tw-ring-opacity: .5;
+ }
+ .footer {
+ margin-bottom: 45px;
+ margin-top: 35px;
+ text-align: center;
+ border-bottom: 1px solid #e5e5e5;
+ }
+ .footer>p {
+ font-size: .8rem;
+ display: inline-block;
+ padding: 0 10px;
+ transform: translateY(10px);
+ background: white;
+ }
+ .dark .footer {
+ border-color: #303030;
+ }
+ .dark .footer>p {
+ background: #0b0f19;
+ }
+ .prompt h4{
+ margin: 1.25em 0 .25em 0;
+ font-weight: bold;
+ font-size: 115%;
+ }
+ """
+
+ block = gr.Blocks(css=css)
+
+ with block:
+ gr.HTML("""
+
+
+
+ FastDeploy Client
+
+
+
+ The client is used for creating requests to fastdeploy server.
+
+
+ """)
+ with gr.Group():
+ with gr.Box():
+ with gr.Column():
+ with gr.Row():
+ server_addr_text = gr.Textbox(
+ label="服务ip",
+ show_label=True,
+ max_lines=1,
+ placeholder="localhost",
+ )
+
+ server_http_port_text = gr.Textbox(
+ label="推理服务端口",
+ show_label=True,
+ max_lines=1,
+ placeholder="8000",
+ )
+
+ server_metric_port_text = gr.Textbox(
+ label="性能服务端口",
+ show_label=True,
+ max_lines=1,
+ placeholder="8002",
+ )
+ with gr.Row():
+ model_name_text = gr.Textbox(
+ label="模型名称",
+ show_label=True,
+ max_lines=1,
+ placeholder="yolov5",
+ )
+ model_version_text = gr.Textbox(
+ label="模型版本",
+ show_label=True,
+ max_lines=1,
+ placeholder="1",
+ )
+
+ with gr.Box():
+ with gr.Tab("组件形式"):
+ check_button = gr.Button("获取模型输入输出")
+ component_format_column = gr.Column(visible=False)
+ with component_format_column:
+ task_radio = gr.Radio(
+ choices=list(supported_tasks.keys()),
+ value='unspecified',
+ label='任务类型',
+ visible=True)
+ gr.Markdown("根据模型需要,挑选文本框或者图像框进行输入")
+ with gr.Row():
+ with gr.Column():
+ gr.Markdown("模型输入")
+ input_accordions = []
+ input_name_texts = []
+ input_images = []
+ input_texts = []
+ for i in range(6):
+ accordion = gr.Accordion(
+ "输入变量 {}".format(i),
+ open=True,
+ visible=False)
+ with accordion:
+ input_name_text = gr.Textbox(
+ label="变量名", interactive=False)
+ input_image = gr.Image(type='numpy')
+ input_text = gr.Textbox(
+ label="文本框", max_lines=1000)
+ input_accordions.append(accordion)
+ input_name_texts.append(input_name_text)
+ input_images.append(input_image)
+ input_texts.append(input_text)
+
+ with gr.Column():
+ gr.Markdown("模型输出")
+ output_accordions = []
+ output_name_texts = []
+ output_images = []
+ output_texts = []
+ for i in range(6):
+ accordion = gr.Accordion(
+ "输出变量 {}".format(i),
+ open=True,
+ visible=False)
+ with accordion:
+ output_name_text = gr.Textbox(
+ label="变量名", interactive=False)
+ output_text = gr.Textbox(
+ label="服务返回的原数据",
+ interactive=False,
+ show_label=True)
+ output_image = gr.Image(
+ interactive=False)
+ output_accordions.append(accordion)
+ output_name_texts.append(output_name_text)
+ output_images.append(output_image)
+ output_texts.append(output_text)
+ component_submit_button = gr.Button("提交请求")
+ with gr.Tab("原始形式"):
+ gr.Markdown("模型输入")
+ raw_payload_text = gr.Textbox(
+ label="负载数据", max_lines=10000)
+ with gr.Column():
+ gr.Markdown("输出")
+ output_raw_text = gr.Textbox(
+ label="服务返回的原始数据", interactive=False)
+ raw_submit_button = gr.Button("提交请求")
+
+ with gr.Box():
+ with gr.Column():
+ gr.Markdown("服务性能统计(每次提交请求会自动更新数据,您也可以手动点击更新)")
+ output_html_table = gr.HTML(
+ label="metrics",
+ interactive=False,
+ show_label=False,
+ value=metrics_table_head.format('', ''))
+ update_metric_button = gr.Button("更新统计数据")
+
+ status_text = gr.Textbox(
+ label="status",
+ show_label=True,
+ max_lines=1,
+ interactive=False)
+
+ all_input_output_components = input_accordions + input_name_texts + input_images + \
+ input_texts + output_accordions + output_name_texts + output_images + output_texts
+
+ def get_input_output_name(server_ip, server_port, model_name,
+ model_version):
+ try:
+ server_addr = server_ip + ':' + server_port
+ input_metas, output_metas = _http_manager.get_model_meta(
+ server_addr, model_name, model_version)
+ except Exception as e:
+ return {status_text: str(e)}
+ results = {
+ component: None
+ for component in all_input_output_components
+ }
+ results[component_format_column] = gr.update(visible=True)
+ # results[check_button] = gr.update(visible=False)
+ for input_accordio in input_accordions:
+ results[input_accordio] = gr.update(visible=False)
+ for output_accordio in output_accordions:
+ results[output_accordio] = gr.update(visible=False)
+ results[status_text] = 'GetInputOutputName Successful'
+ for i, input_meta in enumerate(input_metas):
+ results[input_accordions[i]] = gr.update(visible=True)
+ results[input_name_texts[i]] = input_meta['name']
+ for i, output_meta in enumerate(output_metas):
+ results[output_accordions[i]] = gr.update(visible=True)
+ results[output_name_texts[i]] = output_meta['name']
+ return results
+
+ def component_inference(*args):
+ server_ip = args[0]
+ http_port = args[1]
+ metric_port = args[2]
+ model_name = args[3]
+ model_version = args[4]
+ names = args[5:5 + len(input_name_texts)]
+ images = args[5 + len(input_name_texts):5 + len(input_name_texts) +
+ len(input_images)]
+ texts = args[5 + len(input_name_texts) + len(input_images):5 +
+ len(input_name_texts) + len(input_images) +
+ len(input_texts)]
+ task_type = args[-1]
+ server_addr = server_ip + ':' + http_port
+ if server_ip and http_port and model_name and model_version:
+ inputs = {}
+ for i, input_name in enumerate(names):
+ if input_name:
+ if images[i] is not None:
+ inputs[input_name] = np.array([images[i]])
+ if texts[i]:
+ inputs[input_name] = np.array(
+ [[texts[i].encode('utf-8')]], dtype=np.object_)
+ try:
+ infer_results = _http_manager.infer(
+ server_addr, model_name, model_version, inputs)
+ results = {status_text: 'Inference Successful'}
+ for i, (output_name,
+ data) in enumerate(infer_results.items()):
+ results[output_name_texts[i]] = output_name
+ results[output_texts[i]] = str(data)
+ if task_type != 'unspecified':
+ try:
+ results[output_images[i]] = supported_tasks[
+ task_type](images[0], data)
+ except Exception:
+ results[output_images[i]] = None
+ if metric_port:
+ html_table = get_metric_data(server_ip, metric_port)
+ results[output_html_table] = html_table
+ return results
+ except Exception as e:
+ return {status_text: 'Error: {}'.format(e)}
+ else:
+ return {
+ status_text:
+ 'Please input server addr, model name and model version.'
+ }
+
+ def raw_inference(*args):
+ server_ip = args[0]
+ http_port = args[1]
+ metric_port = args[2]
+ model_name = args[3]
+ model_version = args[4]
+ payload_text = args[5]
+ server_addr = server_ip + ':' + http_port
+ try:
+ result = _http_manager.raw_infer(server_addr, model_name,
+ model_version, payload_text)
+ results = {
+ status_text: 'Get response from server',
+ output_raw_text: result
+ }
+ if server_ip and metric_port:
+ html_table = get_metric_data(server_ip, metric_port)
+ results[output_html_table] = html_table
+ return results
+ except Exception as e:
+ return {status_text: 'Error: {}'.format(e)}
+
+ def update_metric(server_ip, metrics_port):
+ if server_ip and metrics_port:
+ try:
+ html_table = get_metric_data(server_ip, metrics_port)
+ return {
+ output_html_table: html_table,
+ status_text: "Successfully update metrics."
+ }
+ except Exception as e:
+ return {status_text: 'Error: {}'.format(e)}
+ else:
+ return {
+ status_text: 'Please input server ip and metrics_port.'
+ }
+
+ check_button.click(
+ fn=get_input_output_name,
+ inputs=[
+ server_addr_text, server_http_port_text, model_name_text,
+ model_version_text
+ ],
+ outputs=[
+ *all_input_output_components, check_button,
+ component_format_column, status_text
+ ])
+ component_submit_button.click(
+ fn=component_inference,
+ inputs=[
+ server_addr_text, server_http_port_text,
+ server_metric_port_text, model_name_text, model_version_text,
+ *input_name_texts, *input_images, *input_texts, task_radio
+ ],
+ outputs=[
+ *output_name_texts, *output_images, *output_texts, status_text,
+ output_html_table
+ ])
+ raw_submit_button.click(
+ fn=raw_inference,
+ inputs=[
+ server_addr_text, server_http_port_text,
+ server_metric_port_text, model_name_text, model_version_text,
+ raw_payload_text
+ ],
+ outputs=[output_raw_text, status_text, output_html_table])
+ update_metric_button.click(
+ fn=update_metric,
+ inputs=[server_addr_text, server_metric_port_text],
+ outputs=[output_html_table, status_text])
+ return block
diff --git a/visualdl/component/inference/fastdeploy_client/http_client_manager.py b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
new file mode 100644
index 000000000..691594152
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/http_client_manager.py
@@ -0,0 +1,304 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import json
+import re
+
+import numpy as np
+import requests
+import tritonclient.http as httpclient
+from attrdict import AttrDict
+from tritonclient.utils import InferenceServerException
+
+
+def convert_http_metadata_config(metadata):
+ metadata = AttrDict(metadata)
+
+ return metadata
+
+
+def prepare_request(inputs_meta, inputs_data, outputs_meta):
+ '''
+ inputs_meta: inputs meta information from model. name: info
+ inputs_data: users input data. name: data
+ '''
+ # Set the input data
+ inputs = []
+ for input_dict in inputs_meta:
+ input_name = input_dict['name']
+ if input_name not in inputs_data:
+ raise RuntimeError(
+ 'Error: input name {} required for model not existed.'.format(
+ input_name))
+ if input_dict['datatype'] == 'FP32':
+ inputs_data[input_name] = inputs_data[input_name].astype(
+ np.float32
+ ) / 255 # image data returned by gradio is uint8, convert to fp32
+ if len(input_dict['shape']
+ ) == 3 and input_dict['shape'][0] == 3: # NCHW
+ inputs_data[input_name] = inputs_data[input_name][0].transpose(
+ 2, 0, 1)
+ elif len(input_dict['shape']
+ ) == 4 and input_dict['shape'][1] == 3: # NCHW
+ inputs_data[input_name] = inputs_data[input_name].transpose(
+ 0, 3, 1, 2)
+ infer_input = httpclient.InferInput(
+ input_name, inputs_data[input_name].shape, input_dict['datatype'])
+ infer_input.set_data_from_numpy(inputs_data[input_name])
+ inputs.append(infer_input)
+ outputs = []
+ for output_dict in outputs_meta:
+ infer_output = httpclient.InferRequestedOutput(output_dict.name)
+ outputs.append(infer_output)
+ return inputs, outputs
+
+
+metrics_table_head = """
+
+
+
+
+
+ 模型名称 |
+ 执行统计 |
+ 延迟统计 |
+
+
+
+ 请求处理成功数 |
+ 请求处理失败数 |
+ 推理batch数 |
+ 推理样本数 |
+ 请求处理时间(ms) |
+ 任务队列等待时间(ms) |
+ 输入处理时间(ms) |
+ 模型推理时间(ms) |
+ 输出处理时间(ms) |
+
+ {}
+
+
+
+
+
+
+
+
+
+
+ GPU |
+ 性能指标 |
+ 显存 |
+
+
+ 利用率(%) |
+ 功率(W) |
+ 功率限制(W) |
+ 耗电量(W) |
+ 总量(GB) |
+ 已使用(GB) |
+
+ {}
+
+
+"""
+
+
+def get_metric_data(server_addr, metric_port): # noqa:C901
+ '''
+ Get metrics data from fastdeploy server, and transform it into html table.
+ Args:
+ server_addr(str): fastdeployserver ip address
+ metric_port(int): fastdeployserver metrics port
+ Returns:
+ htmltable(str): html table to show metrics data
+ '''
+ model_table = {}
+ gpu_table = {}
+ metric_column_name = {
+ "Model": {
+ "nv_inference_request_success", "nv_inference_request_failure",
+ "nv_inference_count", "nv_inference_exec_count",
+ "nv_inference_request_duration_us",
+ "nv_inference_queue_duration_us",
+ "nv_inference_compute_input_duration_us",
+ "nv_inference_compute_infer_duration_us",
+ "nv_inference_compute_output_duration_us"
+ },
+ "GPU": {
+ "nv_gpu_power_usage", "nv_gpu_power_limit",
+ "nv_energy_consumption", "nv_gpu_utilization",
+ "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+ },
+ "CPU": {
+ "nv_cpu_utilization", "nv_cpu_memory_total_bytes",
+ "nv_cpu_memory_used_bytes"
+ }
+ }
+ try:
+ res = requests.get("http://{}:{}/metrics".format(
+ server_addr, metric_port))
+ except Exception:
+ return metrics_table_head.format('', '')
+ metric_content = res.text
+ for content in metric_content.split('\n'):
+ if content.startswith('#'):
+ continue
+ else:
+ res = re.match(r'(\w+){(.*)} (\w+)',
+ content) # match output by server metrics interface
+ if not res:
+ continue
+ metric_name = res.group(1)
+ model = res.group(2)
+ value = res.group(3)
+ infos = {}
+ for info in model.split(','):
+ k, v = info.split('=')
+ v = v.strip('"')
+ infos[k] = v
+ if metric_name in [
+ "nv_inference_request_duration_us",
+ "nv_inference_queue_duration_us",
+ "nv_inference_compute_input_duration_us",
+ "nv_inference_compute_infer_duration_us",
+ "nv_inference_compute_output_duration_us"
+ ]:
+ value = str(float(value) / 1000)
+ elif metric_name in [
+ "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+ ]:
+ value = str(float(value) / 1024 / 1024 / 1024)
+ for key, metric_names in metric_column_name.items():
+ if metric_name in metric_names:
+ if key == 'Model':
+ model_name = infos['model']
+ if model_name not in model_table:
+ model_table[model_name] = {}
+ model_table[model_name][metric_name] = value
+ elif key == 'GPU':
+ gpu_name = infos['gpu_uuid']
+ if gpu_name not in gpu_table:
+ gpu_table[gpu_name] = {}
+ gpu_table[gpu_name][metric_name] = value
+ elif key == 'CPU':
+ pass
+ model_data_list = []
+ gpu_data_list = []
+ model_data_metric_names = [
+ "nv_inference_request_success", "nv_inference_request_failure",
+ "nv_inference_exec_count", "nv_inference_count",
+ "nv_inference_request_duration_us", "nv_inference_queue_duration_us",
+ "nv_inference_compute_input_duration_us",
+ "nv_inference_compute_infer_duration_us",
+ "nv_inference_compute_output_duration_us"
+ ]
+ gpu_data_metric_names = [
+ "nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
+ "nv_energy_consumption", "nv_gpu_memory_total_bytes",
+ "nv_gpu_memory_used_bytes"
+ ]
+ for k, v in model_table.items():
+ data = []
+ data.append(k)
+ for data_metric in model_data_metric_names:
+ data.append(v[data_metric])
+ model_data_list.append(data)
+ for k, v in gpu_table.items():
+ data = []
+ data.append(k)
+ for data_metric in gpu_data_metric_names:
+ data.append(v[data_metric])
+ gpu_data_list.append(data)
+ model_data = '\n'.join([
+ "" + '\n'.join(["" + item + " | "
+ for item in data]) + "
"
+ for data in model_data_list
+ ])
+ gpu_data = '\n'.join([
+ "" + '\n'.join(["" + item + " | "
+ for item in data]) + "
"
+ for data in gpu_data_list
+ ])
+ return metrics_table_head.format(model_data, gpu_data)
+
+
+class HttpClientManager:
+ def __init__(self):
+ self.clients = {} # server url: httpclient
+
+ def _create_client(self, server_url):
+ if server_url in self.clients:
+ return self.clients[server_url]
+ try:
+ fastdeploy_client = httpclient.InferenceServerClient(server_url)
+ self.clients[server_url] = fastdeploy_client
+ return fastdeploy_client
+ except Exception:
+ raise RuntimeError(
+ 'Can not connect to server {}, please check your \
+ server address'.format(server_url))
+
+ def infer(self, server_url, model_name, model_version, inputs):
+ fastdeploy_client = self._create_client(server_url)
+ input_metadata, output_metadata = self.get_model_meta(
+ server_url, model_name, model_version)
+ inputs, outputs = prepare_request(input_metadata, inputs,
+ output_metadata)
+ response = fastdeploy_client.infer(
+ model_name, inputs, model_version=model_version, outputs=outputs)
+
+ results = {}
+ for output in output_metadata:
+ result = response.as_numpy(output.name) # datatype: numpy
+ if output.datatype == 'BYTES': # datatype: bytes
+ try:
+ value = result
+ if len(result.shape) == 1:
+ value = result[0]
+ elif len(result.shape) == 2:
+ value = result[0][0]
+ elif len(result.shape) == 3:
+ value = result[0][0][0]
+ result = json.loads(value) # datatype: json
+ except Exception:
+ pass
+ else:
+ result = result[0]
+ results[output.name] = result
+ return results
+
+ def raw_infer(self, server_url, model_name, model_version, raw_input):
+ url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
+ server_url, model_name, model_version)
+ res = requests.post(url, data=json.dumps(json.loads(raw_input)))
+ return json.dumps(res.json())
+
+ def get_model_meta(self, server_url, model_name, model_version):
+ fastdeploy_client = self._create_client(server_url)
+ try:
+ model_metadata = fastdeploy_client.get_model_metadata(
+ model_name=model_name, model_version=model_version)
+ except InferenceServerException as e:
+ raise RuntimeError("Failed to retrieve the metadata: " + str(e))
+
+ model_metadata = convert_http_metadata_config(model_metadata)
+
+ input_metadata = model_metadata.inputs
+ output_metadata = model_metadata.outputs
+ return input_metadata, output_metadata
diff --git a/visualdl/component/inference/fastdeploy_client/visualizer.py b/visualdl/component/inference/fastdeploy_client/visualizer.py
new file mode 100644
index 000000000..2c6abe0b4
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_client/visualizer.py
@@ -0,0 +1,184 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import numpy as np
+
+__all__ = [
+ 'visualize_detection', 'visualize_keypoint_detection',
+ 'visualize_face_detection', 'visualize_face_alignment',
+ 'visualize_segmentation', 'visualize_matting', 'visualize_ocr',
+ 'visualize_headpose'
+]
+
+
+def visualize_detection(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ boxes = np.array(data['boxes'])
+ scores = np.array(data['scores'])
+ label_ids = np.array(data['label_ids'])
+ masks = np.array(data['masks'])
+ contain_masks = data['contain_masks']
+ detection_result = fd.C.vision.DetectionResult()
+ detection_result.boxes = boxes
+ detection_result.scores = scores
+ detection_result.label_ids = label_ids
+ detection_result.masks = masks
+ detection_result.contain_masks = contain_masks
+ result = fd.vision.vis_detection(image, detection_result)
+ return result
+
+
+def visualize_keypoint_detection(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ keypoints = np.array(data['keypoints'])
+ scores = np.array(data['scores'])
+ num_joints = np.array(data['num_joints'])
+
+ detection_result = fd.C.vision.KeyPointDetectionResult()
+ detection_result.keypoints = keypoints
+ detection_result.scores = scores
+ detection_result.num_joints = num_joints
+
+ result = fd.vision.vis_keypoint_detection(image, detection_result)
+ return result
+
+
+def visualize_face_detection(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ data = np.array(data['data'])
+ scores = np.array(data['scores'])
+ landmarks = np.array(data['landmarks'])
+ landmarks_per_face = data['landmarks_per_face']
+
+ detection_result = fd.C.vision.FaceDetectionResult()
+ detection_result.data = data
+ detection_result.scores = scores
+ detection_result.landmarks = landmarks
+ detection_result.landmarks_per_face = landmarks_per_face
+
+ result = fd.vision.vis_face_detection(image, detection_result)
+ return result
+
+
+def visualize_face_alignment(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ landmarks = np.array(data['landmarks'])
+
+ facealignment_result = fd.C.vision.FaceAlignmentResult()
+ facealignment_result.landmarks = landmarks
+
+ result = fd.vision.vis_face_alignment(image, facealignment_result)
+ return result
+
+
+def visualize_segmentation(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ label_ids = np.array(data['label_ids'])
+ score_map = np.array(data['score_map'])
+ shape = np.array(data['shape'])
+
+ segmentation_result = fd.C.vision.SegmentationResult()
+ segmentation_result.shape = shape
+ segmentation_result.score_map = score_map
+ segmentation_result.label_ids = label_ids
+
+ result = fd.vision.vis_segmentation(image, segmentation_result)
+ return result
+
+
+def visualize_matting(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ alpha = np.array(data['alpha'])
+ foreground = np.array(data['foreground'])
+ contain_foreground = data['contain_foreground']
+ shape = np.array(data['shape'])
+
+ matting_result = fd.C.vision.MattingResult()
+ matting_result.alpha = alpha
+ matting_result.foreground = foreground
+ matting_result.contain_foreground = contain_foreground
+ matting_result.shape = shape
+
+ result = fd.vision.vis_matting(image, matting_result)
+ return result
+
+
+def visualize_ocr(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ boxes = np.array(data['boxes'])
+ text = np.array(data['text'])
+ rec_scores = np.array(data['rec_scores'])
+ cls_scores = np.array(data['cls_scores'])
+ cls_labels = data['cls_labels']
+
+ ocr_result = fd.C.vision.OCRResult()
+ ocr_result.boxes = boxes
+ ocr_result.text = text
+ ocr_result.rec_scores = rec_scores
+ ocr_result.cls_scores = cls_scores
+ ocr_result.cls_labels = cls_labels
+
+ result = fd.vision.vis_ppocr(image, ocr_result)
+ return result
+
+
+def visualize_headpose(image, data):
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ euler_angles = np.array(data['euler_angles'])
+
+ headpose_result = fd.C.vision.HeadPoseResult()
+ headpose_result.euler_angles = euler_angles
+
+ result = fd.vision.vis_headpose(image, headpose_result)
+ return result
diff --git a/visualdl/component/inference/fastdeploy_lib.py b/visualdl/component/inference/fastdeploy_lib.py
new file mode 100644
index 000000000..5264c6e77
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_lib.py
@@ -0,0 +1,790 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import copy
+import json
+import os
+import random
+import re
+import signal
+import string
+from collections import defaultdict
+from subprocess import Popen
+from subprocess import STDOUT
+
+import google.protobuf.json_format as json_format
+import google.protobuf.text_format as text_format
+import psutil
+import requests
+
+from .proto.model_config_pb2 import ModelConfig
+from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
+
+
+def pbtxt2json(content: str):
+ '''
+ Convert protocol messages in text format to json format string.
+ '''
+ message = text_format.Parse(content, ModelConfig())
+ json_string = json_format.MessageToJson(message)
+ return json_string
+
+
+def json2pbtxt(content: str):
+ '''
+ Convert json format string to protocol messages in text format.
+ '''
+ message = json_format.Parse(content, ModelConfig())
+ text_proto = text_format.MessageToString(message)
+ return text_proto
+
+
+def validate_data(model_config):
+ '''
+ Validate data in model config, we should check empty value recieved from front end.
+ The easiest way to handle it is to drop empty value.
+ Args:
+ model_config: model config to be saved in config file
+ Return:
+ model config after filtering.
+ '''
+ model_config_filtered = {}
+ for key, value in model_config.items():
+ if value:
+ model_config_filtered[key] = value
+ return model_config_filtered
+
+
+def analyse_config(cur_dir: str):
+ '''
+ Analyse the model config in specified directory.
+ Return a json object to describe configuration.
+ '''
+ all_model_configs = {}
+ all_model_versions = {}
+ parent_dir, sub_dirs, filenames = os.walk(cur_dir).send(
+ None) # models can only put directory in model repository,
+ # so we should only search depth 1 directories.
+ for model_dir_name in sub_dirs:
+ model_dir, model_sub_dirs, filenames = os.walk(
+ os.path.join(parent_dir, model_dir_name)).send(None)
+ model_name = os.path.basename(model_dir)
+ config_filenames = []
+ for filename in filenames:
+ if '.pbtxt' in filename:
+ config_filenames.append(
+ filename
+ ) # filenames with extension .pbtxt are all config files
+ if config_filenames:
+ default_config_filename = config_filenames[0]
+ if 'config.pbtxt' in config_filenames:
+ default_config_filename = 'config.pbtxt'
+ config_filenames.remove(default_config_filename)
+ config_filenames.insert(0, default_config_filename)
+ else:
+ # if no config.pbtxt, we choose the first file in config_filenames list to create config.pbtxt
+ copy_config_file_to_default_config(model_dir,
+ default_config_filename)
+ default_config_filename = 'config.pbtxt'
+ config_filenames.insert(0, default_config_filename)
+ json_config = json.loads(
+ pbtxt2json(
+ open(os.path.join(model_dir,
+ default_config_filename)).read()))
+ json_config["config_filenames"] = config_filenames[
+ 0] # add config_filenames to config data (frontend developer said he only wanted one filename,
+ # and to request config_filenames by get_config_filenames_for_one_model later)
+ all_model_configs[
+ model_name] = json_config # store original config file content in json format
+ json_config[
+ 'name'] = model_name # because name in config data may be different from model_name,
+ # model_name is model directory name actually, we should conform name with model_name.
+ else:
+ continue
+ for model_sub_dir in model_sub_dirs:
+ if re.match(
+ r'\d+',
+ model_sub_dir): # version directory consists of numbers
+ if model_name not in all_model_versions:
+ all_model_versions[model_name] = {}
+ if model_sub_dir not in all_model_versions[model_name]:
+ all_model_versions[model_name][model_sub_dir] = []
+ for version_resource_file in os.listdir(
+ os.path.join(model_dir, model_sub_dir)):
+ all_model_versions[model_name][model_sub_dir].append(
+ version_resource_file)
+ if model_name not in all_model_versions: # if a model has config but no version directory,
+ # to convenient users, we create one
+ all_model_versions[model_name] = {}
+ os.mkdir(os.path.join(model_dir, '1'))
+ all_model_versions[model_name]['1'] = []
+
+ if not all_model_configs:
+ raise Exception(
+ 'The path you choose is not a valid model repository, please choose a valid path.'
+ )
+ return all_model_configs, all_model_versions
+
+
+def exchange_format_to_original_format(exchange_format):
+ '''
+ Change config exchange format to original format.
+ '''
+ ensembles = []
+ models = []
+ all_models = {}
+ if 'ensembles' in exchange_format:
+ ensembles = exchange_format['ensembles']
+ if 'models' in exchange_format:
+ models = exchange_format['models']
+ alls = ensembles + models
+ for model_config in alls:
+ # 1. add 'executionAccelerators' keyword
+ if 'optimization' in model_config:
+ optimization_config = model_config['optimization']
+ del model_config['optimization']
+ model_config['optimization'] = {}
+ model_config['optimization'][
+ 'executionAccelerators'] = optimization_config
+ # 2. delete versions information
+ if 'versions' in model_config:
+ del model_config['versions']
+ if 'config_filenames' in model_config:
+ del model_config['config_filenames']
+ if 'platform' in model_config and model_config[
+ 'platform'] == 'ensemble': # emsemble model
+ # 3. add 'ensembleScheduling' keyword
+ if 'step' in model_config:
+ step_configs = model_config['step']
+ if 'ensembleScheduling' not in model_config:
+ model_config['ensembleScheduling'] = {}
+ model_config['ensembleScheduling']['step'] = step_configs
+ del model_config['step']
+ # 4. remove two virtual models(feed, fetch), and
+ # "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+ remove_list = []
+ for model_config_in_step in step_configs:
+ if model_config_in_step[
+ 'modelName'] == 'feed' or model_config_in_step[
+ 'modelName'] == 'fetch':
+ remove_list.append(model_config_in_step)
+ continue
+ del model_config_in_step['modelType']
+ del model_config_in_step['inputModels']
+ del model_config_in_step['outputModels']
+ del model_config_in_step['inputVars']
+ del model_config_in_step['outputVars']
+ for remove_item in remove_list:
+ step_configs.remove(remove_item)
+ all_models[model_config['name']] = model_config
+ return all_models
+
+
+def copy_config_file_to_default_config(model_dir, config_name):
+ json_config = json.loads(
+ pbtxt2json(open(os.path.join(model_dir, config_name)).read()))
+ model_name = os.path.basename(model_dir)
+ json_config['name'] = model_name
+ text_proto = json2pbtxt(json.dumps(json_config))
+ with open(os.path.join(model_dir, 'config.pbtxt'), 'w') as f:
+ f.write(text_proto)
+
+
+def original_format_to_exchange_format(original_format, version_info):
+ '''
+ Change config original format to exchange format.
+ '''
+ exchange_format = {}
+ exchange_format['ensembles'] = []
+ exchange_format['models'] = []
+ # 0. transform version info into component format in frontend
+ for model_name, version_filenames_dict in version_info.items():
+ version_info_for_frontend = []
+ for version_name, filenames in version_filenames_dict.items():
+ version_filenames_dict_for_frontend = {}
+ version_filenames_dict_for_frontend['title'] = version_name
+ version_filenames_dict_for_frontend['key'] = version_name
+ version_filenames_dict_for_frontend['children'] = []
+ for filename in filenames:
+ version_filenames_dict_for_frontend['children'].append({
+ 'title':
+ filename,
+ 'key':
+ filename
+ })
+ version_info_for_frontend.append(
+ version_filenames_dict_for_frontend)
+ version_info[model_name] = version_info_for_frontend
+
+ for model_name, model_config in original_format.items():
+ # 1. remove 'executionAccelerators' keyword
+ transformed_config = copy.deepcopy(model_config)
+ if 'optimization' in model_config:
+ if 'executionAccelerators' in model_config['optimization']:
+ transformed_optimization_config = model_config['optimization'][
+ 'executionAccelerators']
+ del transformed_config['optimization']
+ transformed_config[
+ 'optimization'] = transformed_optimization_config
+ # 2. add versions information
+ if model_name in version_info:
+ transformed_config['versions'] = version_info[model_name]
+ if 'platform' in model_config and model_config[
+ 'platform'] == 'ensemble': # emsemble model
+ # 3. remove ensembleScheduling
+ if 'ensembleScheduling' in model_config:
+ if 'step' in model_config['ensembleScheduling']:
+ del transformed_config['ensembleScheduling']
+ transformed_config['step'] = model_config[
+ 'ensembleScheduling']['step']
+ # 4. add two virtual models(feed, fetch), and
+ # "modelType", "inputModels", "outputModels", "inputVars", "outputVars"
+ for model_config_in_step in transformed_config['step']:
+ model_config_in_step['modelType'] = 'normal'
+ model_config_in_step['inputModels'] = []
+ model_config_in_step['outputModels'] = []
+ model_config_in_step['inputVars'] = []
+ model_config_in_step['outputVars'] = []
+
+ transformed_config['step'].append({
+ "modelName": "feed",
+ "modelType": "virtual",
+ "inputModels": [],
+ "outputModels": [],
+ "inputVars": [],
+ "outputVars": []
+ })
+ transformed_config['step'].append({
+ "modelName": "fetch",
+ "modelType": "virtual",
+ "inputModels": [],
+ "outputModels": [],
+ "inputVars": [],
+ "outputVars": []
+ })
+ analyse_step_relationships(transformed_config['step'],
+ transformed_config['input'],
+ transformed_config['output'])
+ exchange_format['ensembles'].append(transformed_config)
+ elif 'backend' in model_config: # single model
+ exchange_format['models'].append(transformed_config)
+ return exchange_format
+
+
+def analyse_step_relationships(step_config, inputs, outputs): # noqa: C901
+ '''
+ Analyse model relationships in ensemble step. And fill \
+ "inputModels", "outputModels", "inputVars", "outputVars" in step_config.
+ step_config: step data in ensemble model config.
+ inputs: inputs in ensemble model config.
+ outputs: outputs in ensemble model config.
+ '''
+ models_dict = {}
+ vars_dict = {}
+ for model_config_in_step in step_config:
+ models_dict[model_config_in_step['modelName']] = model_config_in_step
+ if model_config_in_step['modelType'] == 'virtual':
+ for var in inputs:
+ if var['name'] not in vars_dict:
+ vars_dict[var['name']] = {}
+ vars_dict[var['name']]['from_models'] = set()
+ vars_dict[var['name']]['to_models'] = set()
+ vars_dict[var['name']]['from_models'].add('feed')
+ for var in outputs:
+ if var['name'] not in vars_dict:
+ vars_dict[var['name']] = {}
+ vars_dict[var['name']]['from_models'] = set()
+ vars_dict[var['name']]['to_models'] = set()
+ vars_dict[var['name']]['to_models'].add('fetch')
+ else:
+ for var_placehold_name, var_name in model_config_in_step[
+ 'inputMap'].items():
+ if var_name not in vars_dict:
+ vars_dict[var_name] = {}
+ vars_dict[var_name]['from_models'] = set()
+ vars_dict[var_name]['to_models'] = set()
+ vars_dict[var_name]['to_models'].add(
+ model_config_in_step['modelName'])
+
+ for var_placehold_name, var_name in model_config_in_step[
+ 'outputMap'].items():
+ if var_name not in vars_dict:
+ vars_dict[var_name] = {}
+ vars_dict[var_name]['from_models'] = set()
+ vars_dict[var_name]['to_models'] = set()
+ vars_dict[var_name]['from_models'].add(
+ model_config_in_step['modelName'])
+ for var_name, relationships in vars_dict.items():
+ for from_model in relationships['from_models']:
+ models_dict[from_model]['outputVars'].append(var_name)
+ for var_to_model in relationships['to_models']:
+ if var_to_model not in models_dict[from_model]['outputModels']:
+ models_dict[from_model]['outputModels'].append(
+ var_to_model)
+ for to_model in relationships['to_models']:
+ models_dict[to_model]['inputVars'].append(var_name)
+ for var_from_model in relationships['from_models']:
+ if var_from_model not in models_dict[to_model]['inputModels']:
+ models_dict[to_model]['inputModels'].append(var_from_model)
+ calculate_layout_for_frontend(models_dict)
+
+
+def get_config_filenames_for_one_model(cur_dir, name):
+ _, _, filenames = os.walk(os.path.join(cur_dir, name)).send(None)
+ config_filenames = []
+ backup_config_filenames = []
+ for filename in filenames:
+ if '.pbtxt' in filename and 'vdlbackup' not in filename:
+ config_filenames.append(
+ filename
+ ) # filenames with extension .pbtxt and not contain 'vdlbackup' are normal config files
+ elif '.pbtxt' in filename and 'vdlbackup' in filename:
+ backup_config_filenames.append(
+ filename
+ ) # filenames with extension .pbtxt and contain 'vdlbackup' are backup config files
+ config_filenames = sorted(config_filenames) + sorted(
+ backup_config_filenames)
+ return config_filenames
+
+
+def get_config_for_one_model(cur_dir, name, config_filename):
+ all_model_configs = {}
+ all_model_versions = {}
+ filename = os.path.join(cur_dir, name, config_filename)
+ json_config = json.loads(pbtxt2json(open(filename).read()))
+ json_config[
+ 'name'] = name # because name in config data may be different from model_name,
+ # model_name is model directory name actually, we should conform name with model_name.
+ json_config["config_filenames"] = config_filename
+ all_model_configs[
+ name] = json_config # store original config file content in json format
+ all_model_versions[name] = {}
+ for model_sub_dir in os.listdir(os.path.join(cur_dir, name)):
+ if re.match(r'\d+',
+ model_sub_dir): # version directory consists of numbers
+ if model_sub_dir not in all_model_versions[name]:
+ all_model_versions[name][model_sub_dir] = []
+ for version_resource_file in os.listdir(
+ os.path.join(cur_dir, name, model_sub_dir)):
+ all_model_versions[name][model_sub_dir].append(
+ version_resource_file)
+ model_config = original_format_to_exchange_format(all_model_configs,
+ all_model_versions)
+ if model_config['ensembles']:
+ return model_config['ensembles'][0]
+ elif model_config['models']:
+ return model_config['models'][0]
+
+
+def calculate_layout_for_frontend(model_config_in_step):
+ '''
+ Analyse model topology connections and prepare the positions for each model in layout.
+ Dynamic program algorithm:
+ depth(cur_node) = max([depth(prev_node) for prev_node in cur_node['inputModels']])
+ Args:
+ model_config_in_step(dict): model config in ensemble models' step, indexed by model name.
+ Returns:
+ None. Results calculated will be saved in place.
+ '''
+ path_depth = defaultdict(int)
+
+ def depth_recursive(model):
+ if model['modelName'] == 'feed':
+ path_depth[model['modelName']] = 0
+ return 0
+ if path_depth[model['modelName']] != 0:
+ return path_depth[model['modelName']]
+ path_depth[model['modelName']] = max([
+ depth_recursive(model_config_in_step[model_name]) for model_name in
+ model_config_in_step[model['modelName']]['inputModels']
+ ]) + 1
+ return path_depth[model['modelName']]
+
+ depth_recursive(model_config_in_step['fetch'])
+ path_depth_tuple = [
+ (k, v)
+ for k, v in sorted(path_depth.items(), key=lambda item: item[1])
+ ]
+ cur_x = 0
+ last_depth = -1
+ for model_name, depth in path_depth_tuple:
+ if depth == last_depth:
+ model_config_in_step[model_name]['pos_y'] = depth
+ model_config_in_step[model_name]['pos_x'] = cur_x
+ cur_x += 1
+ else:
+ cur_x = 0
+ model_config_in_step[model_name]['pos_y'] = depth
+ model_config_in_step[model_name]['pos_x'] = cur_x
+ cur_x += 1
+ last_depth = depth
+ return
+
+
+def launch_process(kwargs: dict):
+ '''
+ Launch a fastdeploy server according to specified arguments.
+ '''
+ cmd = ['fastdeployserver']
+ launch_env = os.environ.copy()
+ start_args = {}
+ for key, value in kwargs.items():
+ if key == 'default_model_name': # Used to fill client model_name automatically
+ start_args[key] = value
+ continue
+ if key == 'server-name' or key == 'ensemble-img': # extra information
+ start_args[key] = value
+ continue
+ if key == 'gpus':
+ if value:
+ launch_env['CUDA_VISIBLE_DEVICES'] = value
+ start_args[key] = value
+ continue
+ cmd.append('--{}'.format(key))
+ cmd.append('{}'.format(value))
+ start_args[key] = value
+ if start_args['server-name'] and start_args['server-name'] in os.listdir(
+ FASTDEPLOYSERVER_PATH):
+ raise RuntimeError(
+ "Failed to launch server,server name {} has been used,please write a different server name."
+ .format(start_args['server-name']))
+ all_model_configs, all_model_versions = analyse_config(
+ start_args['model-repository'])
+ model_repo_config = original_format_to_exchange_format(
+ all_model_configs, all_model_versions)
+ model_repo_config['ensemble-img'] = start_args['ensemble-img']
+ logfilename = 'logfile-{}'.format(get_random_string(8))
+ while os.path.exists(os.path.join(FASTDEPLOYSERVER_PATH, logfilename)):
+ logfilename = 'logfile-{}'.format(get_random_string(8))
+ p = Popen(
+ cmd,
+ stdout=open(
+ os.path.join(FASTDEPLOYSERVER_PATH, logfilename), 'w',
+ buffering=1),
+ stderr=STDOUT,
+ universal_newlines=True,
+ env=launch_env)
+ server_name = start_args['server-name'] if start_args[
+ 'server-name'] else p.pid
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_name)),
+ 'w') as f:
+ # filename ${server_name} contain 4 lines:
+ # line1 : the real log filename ${logfilename}
+ # line2 : pid
+ # line3 : launch arguments
+ # line4 : model-repository configuration
+ f.write(logfilename + '\n' + str(p.pid) + '\n' +
+ json.dumps(start_args) + '\n' + json.dumps(model_repo_config))
+ return p
+
+
+def get_random_string(length):
+ # choose from all lowercase letter
+ letters = string.ascii_lowercase
+ result_str = ''.join([random.choice(letters) for i in range(length)])
+ return result_str
+
+
+def get_start_arguments(server_id):
+ '''
+ Get the start arguments for fastdeployserver process.
+ Args:
+ server_id(str): fastdeployserver process name
+ Returns:
+ args(dict): launch arguments when start fastdeployserver process.
+ '''
+ args = {}
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'r') as f:
+ arguments_json = f.read().split('\n')[2]
+ args = json.loads(arguments_json)
+ return args
+
+
+def get_process_pid(server_id):
+ '''
+ Get the process id for fastdeployserver process.
+ Args:
+ server_id(str): fastdeployserver process name
+ Returns:
+ pid(int): process id.
+ '''
+ pid = None
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'r') as f:
+ pid = int(f.read().split('\n')[1])
+ return pid
+
+
+def get_process_logfile_name(server_id):
+ '''
+ Get the process logfile name for fastdeployserver process.
+ Args:
+ server_id(str): fastdeployserver process name
+ Returns:
+ logfile(str): logfile name.
+ '''
+ filename = None
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'r') as f:
+ filename = f.read().split('\n')[0]
+ return filename
+
+
+def get_process_model_configuration(server_id):
+ '''
+ Get the model repository configuration for fastdeployserver process.
+ Args:
+ server_id(str): fastdeployserver process name
+ Returns:
+ configuration(dict): model repository configuration
+ '''
+ conf = {}
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'r') as f:
+ conf_json = f.read().split('\n')[3]
+ conf = json.loads(conf_json)
+ return conf
+
+
+def get_process_output(server_id, length):
+ '''
+ Get the standard output of a opened subprocess.
+ '''
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ logfilename = get_process_logfile_name(server_id)
+ # delete file ${logfilename} if exists
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH,
+ '{}'.format(logfilename)), 'r') as f:
+ f.seek(length)
+ data = f.read()
+ return data
+
+
+def mark_pid_for_dead_process(server_id):
+ '''
+ Resource files for a dead server only deleted when user closes the server in frontend.
+ When user close the server, pid recorded in logfile will be killed.
+ In case a dead process id is reassigned for a new process, we should mark the pid recorded in logfile as outdated.
+ Here, we choose to replace the pid to -1 in logfile to denote the zombie process \
+ which has been polled and becomes dead.
+ Args:
+ server_id(str): fastdeployserver process name
+ '''
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'r') as f:
+ contents = f.read().split('\n')
+ contents[1] = '-1' # we replace pid to -1
+ with open(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)),
+ 'w') as f:
+ f.write('\n'.join(contents))
+
+
+def delete_files_for_process(server_id):
+ '''
+ Delete logfile for fastdeployserver process.
+ Args:
+ server_id(str): fastdeployserver process name
+ '''
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id))):
+ logfilename = get_process_logfile_name(server_id)
+ # delete file ${logfilename} if exists
+ if os.path.exists(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename))):
+ os.remove(
+ os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(logfilename)))
+ os.remove(os.path.join(FASTDEPLOYSERVER_PATH, '{}'.format(server_id)))
+
+
+def kill_process(process):
+ '''
+ Stop a opened subprocess.
+ '''
+ if type(process) == str: # server_id, use os.kill to terminate
+ pid = get_process_pid(process)
+ if pid == -1: # we use -1 to mark dead process
+ return
+ try:
+ os.kill(pid, signal.SIGKILL)
+ except Exception:
+ pass
+ else:
+ pid = process.pid
+ process.kill()
+ try:
+ process.wait(10)
+ except Exception:
+ pass
+
+
+def get_alive_fastdeploy_servers():
+ '''
+ Search server names in `FASTDEPLOYSERVER_PATH`, if process is dead and log still exists due to \
+ some unexpectable reasons, delete log file.
+ '''
+ server_names = [
+ name for name in os.listdir(FASTDEPLOYSERVER_PATH)
+ if 'logfile' not in name
+ ]
+ should_delete_servers = []
+ for server_name in server_names:
+ if check_process_alive(server_name) is False:
+ delete_files_for_process(server_name)
+ should_delete_servers.append(server_name)
+ for server_name in should_delete_servers:
+ server_names.remove(server_name)
+ return server_names
+
+
+def check_process_zombie(server_id):
+ '''
+ Given a server id, check whether the process became zoombie and mark pid as -1.
+ Args:
+ server_id(str): fastdeployserver process name
+ Return:
+ status(bool): True if process became zoombie.
+ '''
+ pid = get_process_pid(server_id)
+ if pid == -1:
+ return True
+ else:
+ return False
+
+
+def check_process_alive(server_id):
+ '''
+ Given a server id, check whether the process is alive or not.
+ Args:
+ server_id(str): fastdeployserver process name
+ Return:
+ status(bool): True if process is still alive.
+ '''
+ pid = get_process_pid(server_id)
+ if pid is None:
+ return False
+ if pid == -1: # We use -1 to mark zombie process which has been dead process.
+ # Consider user wants to know the reason for dead process due to exception,
+ # we return True to let user in frontend can get the log for dead process.
+ return True
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ return False
+ else:
+ if 'fastdeployserve' not in psutil.Process(pid).name(
+ ): # We should judge the pid is fastdeployserver process, in case pid has been reassigned.
+ # Note: I do not know why psutil.Process(pid).name() is fastdeployserve but not fastdeployserver.
+ return False
+ else:
+ return True
+
+
+_metric_column_name = {
+ "Model": {
+ "nv_inference_request_success", "nv_inference_request_failure",
+ "nv_inference_count", "nv_inference_exec_count",
+ "nv_inference_request_duration_us", "nv_inference_queue_duration_us",
+ "nv_inference_compute_input_duration_us",
+ "nv_inference_compute_infer_duration_us",
+ "nv_inference_compute_output_duration_us"
+ },
+ "GPU": {
+ "nv_gpu_power_usage", "nv_gpu_power_limit", "nv_energy_consumption",
+ "nv_gpu_utilization", "nv_gpu_memory_total_bytes",
+ "nv_gpu_memory_used_bytes"
+ },
+ "CPU": {
+ "nv_cpu_utilization", "nv_cpu_memory_total_bytes",
+ "nv_cpu_memory_used_bytes"
+ }
+}
+
+
+def generate_metric_table(server_addr, server_port): # noqa:C901
+ model_table = {}
+ gpu_table = {}
+ try:
+ res = requests.get("http://{}:{}/metrics".format(
+ server_addr, server_port))
+ except Exception:
+ return None
+ metric_content = res.text
+ for content in metric_content.split('\n'):
+ if content.startswith('#'):
+ continue
+ else:
+ res = re.match(r'(\w+){(.*)} (\w+)',
+ content) # match output by server metrics interface
+ if not res:
+ continue
+ metric_name = res.group(1)
+ model = res.group(2)
+ value = res.group(3)
+ infos = {}
+ for info in model.split(','):
+ k, v = info.split('=')
+ v = v.strip('"')
+ infos[k] = v
+ if metric_name in [
+ "nv_inference_request_duration_us",
+ "nv_inference_queue_duration_us",
+ "nv_inference_compute_input_duration_us",
+ "nv_inference_compute_infer_duration_us",
+ "nv_inference_compute_output_duration_us"
+ ]:
+ value = float(value) / 1000
+ elif metric_name in [
+ "nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
+ ]:
+ value = float(value) / 1024 / 1024 / 1024
+ for key, metric_names in _metric_column_name.items():
+ if metric_name in metric_names:
+ if key == 'Model':
+ model_name = infos['model']
+ if model_name not in model_table:
+ model_table[model_name] = {}
+ model_table[model_name][metric_name] = value
+ elif key == 'GPU':
+ gpu_name = infos['gpu_uuid']
+ if gpu_name not in gpu_table:
+ gpu_table[gpu_name] = {}
+ gpu_table[gpu_name][metric_name] = value
+ elif key == 'CPU':
+ pass
+ results = {}
+ results['Model'] = model_table
+ results['GPU'] = gpu_table
+ return results
diff --git a/visualdl/component/inference/fastdeploy_server.py b/visualdl/component/inference/fastdeploy_server.py
new file mode 100644
index 000000000..89b0b13ff
--- /dev/null
+++ b/visualdl/component/inference/fastdeploy_server.py
@@ -0,0 +1,439 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
+import datetime
+import json
+import os
+import re
+import shutil
+import socket
+import time
+from multiprocessing import Process
+from pathlib import Path
+
+import requests
+
+from .fastdeploy_client.client_app import create_gradio_client_app
+from .fastdeploy_lib import analyse_config
+from .fastdeploy_lib import check_process_zombie
+from .fastdeploy_lib import copy_config_file_to_default_config
+from .fastdeploy_lib import delete_files_for_process
+from .fastdeploy_lib import exchange_format_to_original_format
+from .fastdeploy_lib import generate_metric_table
+from .fastdeploy_lib import get_alive_fastdeploy_servers
+from .fastdeploy_lib import get_config_filenames_for_one_model
+from .fastdeploy_lib import get_config_for_one_model
+from .fastdeploy_lib import get_process_model_configuration
+from .fastdeploy_lib import get_process_output
+from .fastdeploy_lib import get_start_arguments
+from .fastdeploy_lib import json2pbtxt
+from .fastdeploy_lib import kill_process
+from .fastdeploy_lib import launch_process
+from .fastdeploy_lib import mark_pid_for_dead_process
+from .fastdeploy_lib import original_format_to_exchange_format
+from .fastdeploy_lib import validate_data
+from visualdl.server.api import gen_result
+from visualdl.server.api import result
+from visualdl.utils.dir import FASTDEPLOYSERVER_PATH
+
+
+class FastDeployServerApi(object):
+ def __init__(self):
+ self.root_dir = Path(os.getcwd())
+ self.opened_servers = {
+ } # Use to store the opened server process pid and process itself
+ self.client_port = None
+
+ @result()
+ def get_directory(self, cur_dir):
+ if self.root_dir not in Path(os.path.abspath(cur_dir)).parents:
+ cur_dir = '.'
+ cur_dir, sub_dirs, filenames = os.walk(cur_dir).send(None)
+ if Path(self.root_dir) != Path(os.path.abspath(cur_dir)):
+ sub_dirs.append('..')
+ sub_dirs = sorted(sub_dirs)
+ directorys = {
+ 'parent_dir':
+ os.path.relpath(Path(os.path.abspath(cur_dir)), self.root_dir),
+ 'sub_dir':
+ sub_dirs
+ }
+ return directorys
+
+ @result()
+ def get_config(self, cur_dir):
+ all_model_configs, all_model_versions = analyse_config(cur_dir)
+ return original_format_to_exchange_format(all_model_configs,
+ all_model_versions)
+
+ @result()
+ def config_update(self, cur_dir, model_name, config, config_filename):
+ config = json.loads(config)
+ all_models = exchange_format_to_original_format(config)
+ model_dir = os.path.join(os.path.abspath(cur_dir), model_name)
+ filtered_config = validate_data(all_models[model_name])
+ text_proto = json2pbtxt(json.dumps(filtered_config))
+ # backup user's config data first, when data corrupted by front-end, we still can recovery data
+ # backup config filename: {original_name}_vdlbackup_{datetime}.pbtxt
+ # backup config can only used to restore config.pbtxt
+ if 'vdlbackup' in config_filename:
+ raise RuntimeError(
+ "Backup config file is not permitted to update.")
+ basename = os.path.splitext(config_filename)[0]
+ shutil.copy(
+ os.path.join(model_dir, config_filename),
+ os.path.join(
+ model_dir, '{}_vdlbackup_{}.pbtxt'.format(
+ basename,
+ datetime.datetime.now().isoformat())))
+ with open(os.path.join(model_dir, config_filename), 'w') as f:
+ f.write(text_proto)
+ return
+
+ @result()
+ def start_server(self, configs):
+ configs = json.loads(configs)
+ process = launch_process(configs)
+ if process.poll() is not None:
+ raise RuntimeError(
+ "Failed to launch fastdeployserver,please check fastdeployserver is installed in environment."
+ )
+ server_name = configs['server-name'] if configs[
+ 'server-name'] else str(process.pid)
+ self.opened_servers[server_name] = process
+ return server_name
+
+ @result()
+ def stop_server(self, server_id):
+ if server_id in self.opened_servers: # check if server_id in self.opened_servers
+ kill_process(self.opened_servers[server_id])
+ del self.opened_servers[server_id]
+ elif server_id in set(
+ os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
+ # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
+ kill_process(server_id)
+ delete_files_for_process(server_id)
+ self._poll_zombie_process()
+
+ @result('text/plain')
+ def get_server_output(self, server_id, length):
+ length = int(length)
+ if server_id in self.opened_servers: # check if server_id in self.opened_servers
+ return get_process_output(server_id, length)
+ elif str(server_id) in set(
+ os.listdir(FASTDEPLOYSERVER_PATH)): # check if server_id in
+ # FASTDEPLOYSERVER_PATH(may be launched by other vdl app instance by gunicorn)
+ return get_process_output(server_id, length)
+ else:
+ return
+
+ @result()
+ def get_server_metric(self, server_id):
+ args = get_start_arguments(server_id)
+ host = 'localhost'
+ port = args.get('metrics-port', 8002)
+ return generate_metric_table(host, port)
+
+ @result()
+ def get_server_list(self):
+ return get_alive_fastdeploy_servers()
+
+ @result()
+ def check_server_alive(self, server_id):
+ self._poll_zombie_process()
+ if check_process_zombie(server_id) is True:
+ raise RuntimeError(
+ "Server {} is down due to exception or killed,please check the reason according to the log, \
+ then close this server.".format(server_id))
+ return
+
+ @result()
+ def get_server_config(self, server_id):
+ return get_process_model_configuration(server_id)
+
+ @result()
+ def get_pretrain_model_list(self):
+ '''
+ Get all available fastdeploy models from hub server.
+ '''
+ res = requests.get(
+ 'http://paddlepaddle.org.cn/paddlehub/fastdeploy_listmodels')
+ result = res.json()
+ if result['status'] != 0:
+ raise RuntimeError(
+ "Failed to get pre-trained model list from hub server.")
+ else:
+ data = result['data']
+ model_list = {}
+ for category, models in data.items():
+ if category not in model_list:
+ model_list[category] = set()
+ for model in models:
+ model_list[category].add(model['name'])
+ # adapt data format for frontend
+ models_info = []
+ for category, model_names in model_list.items():
+ models_info.append({
+ "value": category,
+ "label": category,
+ "children": []
+ })
+ for model_name in sorted(model_names):
+ models_info[-1]["children"].append({
+ "value": model_name,
+ "label": model_name
+ })
+ return models_info
+
+ @result()
+ def download_pretrain_model(self, cur_dir, model_name, version,
+ pretrain_model_name):
+ version_resource_dir = os.path.join(
+ os.path.abspath(cur_dir), model_name, version)
+ try:
+ import fastdeploy as fd
+ except Exception:
+ raise RuntimeError(
+ "fastdeploy is required for visualizing results,please refer to \
+ https://github.com/PaddlePaddle/FastDeploy to install fastdeploy")
+ model_path = fd.download_model(
+ name=pretrain_model_name, path=version_resource_dir)
+ if model_path:
+ if '.onnx' in model_path:
+ shutil.move(
+ model_path,
+ os.path.join(os.path.dirname(model_path), 'model.onnx'))
+ else:
+ for filename in os.listdir(model_path):
+ if '.pdmodel' in filename or '.pdiparams' in filename:
+ shutil.move(
+ os.path.join(model_path, filename),
+ os.path.join(
+ os.path.dirname(model_path), 'model{}'.format(
+ os.path.splitext(filename)[1])))
+ else:
+ shutil.move(
+ os.path.join(model_path, filename),
+ os.path.join(
+ os.path.dirname(model_path), filename))
+ shutil.rmtree(model_path)
+ version_info_for_frontend = []
+ for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+ if re.match(
+ r'\d+',
+ version_name): # version directory consists of numbers
+ version_filenames_dict_for_frontend = {}
+ version_filenames_dict_for_frontend['title'] = version_name
+ version_filenames_dict_for_frontend['key'] = version_name
+ version_filenames_dict_for_frontend['children'] = []
+ for filename in os.listdir(
+ os.path.join(cur_dir, model_name, version_name)):
+ version_filenames_dict_for_frontend['children'].append(
+ {
+ 'title': filename,
+ 'key': filename
+ })
+ version_info_for_frontend.append(
+ version_filenames_dict_for_frontend)
+ return version_info_for_frontend
+ else:
+ raise RuntimeError(
+ "Failed to download pre-trained model {}.".format(
+ pretrain_model_name))
+
+ @result()
+ def get_config_for_model(self, cur_dir, name, config_filename):
+ return get_config_for_one_model(cur_dir, name, config_filename)
+
+ @result()
+ def get_config_filenames_for_model(self, cur_dir, name):
+ return get_config_filenames_for_one_model(cur_dir, name)
+
+ @result()
+ def delete_config_for_model(self, cur_dir, name, config_filename):
+ if self.root_dir not in Path(
+ os.path.abspath(cur_dir)
+ ).parents: # should prevent user remove files outside model-repository
+ raise RuntimeError(
+ 'Failed to delete config file, please check filepath.')
+ if os.path.exists(os.path.join(cur_dir, name, config_filename)):
+ os.remove(os.path.join(cur_dir, name, config_filename))
+ return get_config_filenames_for_one_model(cur_dir, name)
+
+ @result()
+ def set_default_config_for_model(self, cur_dir, name, config_filename):
+ model_dir = os.path.join(os.path.abspath(cur_dir), name)
+ # backup config.pbtxt to config_vdlbackup_{datetime}.pbtxt
+ if os.path.exists(os.path.join(model_dir, 'config.pbtxt')):
+ shutil.copy(
+ os.path.join(model_dir, 'config.pbtxt'),
+ os.path.join(
+ model_dir, 'config_vdlbackup_{}.pbtxt'.format(
+ datetime.datetime.now().isoformat())))
+ if config_filename != 'config.pbtxt':
+ copy_config_file_to_default_config(model_dir, config_filename)
+ return
+
+ @result()
+ def delete_resource_for_model(self, cur_dir, model_name, version,
+ resource_filename):
+ if self.root_dir not in Path(
+ os.path.abspath(cur_dir)
+ ).parents: # should prevent user remove files outside model-repository
+ raise RuntimeError(
+ 'Failed to delete resource file, please check filepath.')
+ resource_path = os.path.join(
+ os.path.abspath(cur_dir), model_name, version, resource_filename)
+ if os.path.exists(resource_path):
+ os.remove(resource_path)
+ version_info_for_frontend = []
+ for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+ if re.match(r'\d+',
+ version_name): # version directory consists of numbers
+ version_filenames_dict_for_frontend = {}
+ version_filenames_dict_for_frontend['title'] = version_name
+ version_filenames_dict_for_frontend['key'] = version_name
+ version_filenames_dict_for_frontend['children'] = []
+ for filename in os.listdir(
+ os.path.join(cur_dir, model_name, version_name)):
+ version_filenames_dict_for_frontend['children'].append({
+ 'title':
+ filename,
+ 'key':
+ filename
+ })
+ version_info_for_frontend.append(
+ version_filenames_dict_for_frontend)
+ return version_info_for_frontend
+
+ @result()
+ def rename_resource_for_model(self, cur_dir, model_name, version,
+ resource_filename, new_filename):
+ if self.root_dir not in Path(
+ os.path.abspath(cur_dir)
+ ).parents: # should prevent user remove files outside model-repository
+ raise RuntimeError(
+ 'Failed to rename resource file, please check filepath.')
+ resource_path = os.path.join(
+ os.path.abspath(cur_dir), model_name, version, resource_filename)
+ new_file_path = os.path.join(
+ os.path.abspath(cur_dir), model_name, version, new_filename)
+ if os.path.exists(resource_path):
+ shutil.move(resource_path, new_file_path)
+ version_info_for_frontend = []
+ for version_name in os.listdir(os.path.join(cur_dir, model_name)):
+ if re.match(r'\d+',
+ version_name): # version directory consists of numbers
+ version_filenames_dict_for_frontend = {}
+ version_filenames_dict_for_frontend['title'] = version_name
+ version_filenames_dict_for_frontend['key'] = version_name
+ version_filenames_dict_for_frontend['children'] = []
+ for filename in os.listdir(
+ os.path.join(cur_dir, model_name, version_name)):
+ version_filenames_dict_for_frontend['children'].append({
+ 'title':
+ filename,
+ 'key':
+ filename
+ })
+ version_info_for_frontend.append(
+ version_filenames_dict_for_frontend)
+ return version_info_for_frontend
+
+ def create_fastdeploy_client(self):
+ if self.client_port is None:
+
+ def get_free_tcp_port():
+ tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ # tcp.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
+ tcp.bind(('localhost', 0))
+ addr, port = tcp.getsockname()
+ tcp.close()
+ return port
+
+ self.client_port = get_free_tcp_port()
+ app = create_gradio_client_app()
+ thread = Process(
+ target=app.launch, kwargs={'server_port': self.client_port})
+ thread.start()
+
+ def check_alive():
+ while True:
+ try:
+ requests.get('http://localhost:{}/'.format(
+ self.client_port))
+ break
+ except Exception:
+ time.sleep(1)
+
+ check_alive()
+ return self.client_port
+
+ def _poll_zombie_process(self):
+ # check if there are servers killed by other vdl app instance and become zoombie
+ should_delete = []
+ for server_id, process in self.opened_servers.items():
+ if process.poll() is not None:
+ mark_pid_for_dead_process(server_id)
+ should_delete.append(server_id)
+
+ for server_id in should_delete:
+ del self.opened_servers[server_id]
+
+
+def create_fastdeploy_api_call():
+ api = FastDeployServerApi()
+ routes = {
+ 'get_directory': (api.get_directory, ['dir']),
+ 'config_update': (api.config_update,
+ ['dir', 'name', 'config', 'config_filename']),
+ 'get_config': (api.get_config, ['dir']),
+ 'get_config_filenames_for_model': (api.get_config_filenames_for_model,
+ ['dir', 'name']),
+ 'get_config_for_model': (api.get_config_for_model,
+ ['dir', 'name', 'config_filename']),
+ 'set_default_config_for_model': (api.set_default_config_for_model,
+ ['dir', 'name', 'config_filename']),
+ 'delete_config_for_model': (api.delete_config_for_model,
+ ['dir', 'name', 'config_filename']),
+ 'start_server': (api.start_server, ['config']),
+ 'stop_server': (api.stop_server, ['server_id']),
+ 'get_server_output': (api.get_server_output, ['server_id', 'length']),
+ 'create_fastdeploy_client': (api.create_fastdeploy_client, []),
+ 'get_server_list': (api.get_server_list, []),
+ 'get_server_metric': (api.get_server_metric, ['server_id']),
+ 'get_server_config': (api.get_server_config, ['server_id']),
+ 'get_pretrain_model_list': (api.get_pretrain_model_list, []),
+ 'check_server_alive': (api.check_server_alive, ['server_id']),
+ 'download_pretrain_model':
+ (api.download_pretrain_model,
+ ['dir', 'name', 'version', 'pretrain_model_name']),
+ 'delete_resource_for_model':
+ (api.delete_resource_for_model,
+ ['dir', 'name', 'version', 'resource_filename']),
+ 'rename_resource_for_model': (api.rename_resource_for_model, [
+ 'dir', 'name', 'version', 'resource_filename', 'new_filename'
+ ])
+ }
+
+ def call(path: str, args):
+ route = routes.get(path)
+ if not route:
+ return json.dumps(gen_result(
+ status=1, msg='api not found')), 'application/json', None
+ method, call_arg_names = route
+ call_args = [args.get(name) for name in call_arg_names]
+ return method(*call_args)
+
+ return call
diff --git a/visualdl/component/inference/proto/__init__.py b/visualdl/component/inference/proto/__init__.py
new file mode 100644
index 000000000..9c19f7b87
--- /dev/null
+++ b/visualdl/component/inference/proto/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =======================================================================
diff --git a/visualdl/component/inference/proto/model_config.protxt b/visualdl/component/inference/proto/model_config.protxt
new file mode 100644
index 000000000..1751f02f7
--- /dev/null
+++ b/visualdl/component/inference/proto/model_config.protxt
@@ -0,0 +1,1981 @@
+// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of NVIDIA CORPORATION nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
+
+syntax = "proto3";
+
+package inference;
+
+//@@.. cpp:namespace:: inference
+
+//@@
+//@@.. cpp:enum:: DataType
+//@@
+//@@ Data types supported for input and output tensors.
+//@@
+enum DataType {
+ //@@ .. cpp:enumerator:: DataType::INVALID = 0
+ TYPE_INVALID = 0;
+
+ //@@ .. cpp:enumerator:: DataType::BOOL = 1
+ TYPE_BOOL = 1;
+
+ //@@ .. cpp:enumerator:: DataType::UINT8 = 2
+ TYPE_UINT8 = 2;
+ //@@ .. cpp:enumerator:: DataType::UINT16 = 3
+ TYPE_UINT16 = 3;
+ //@@ .. cpp:enumerator:: DataType::UINT32 = 4
+ TYPE_UINT32 = 4;
+ //@@ .. cpp:enumerator:: DataType::UINT64 = 5
+ TYPE_UINT64 = 5;
+
+ //@@ .. cpp:enumerator:: DataType::INT8 = 6
+ TYPE_INT8 = 6;
+ //@@ .. cpp:enumerator:: DataType::INT16 = 7
+ TYPE_INT16 = 7;
+ //@@ .. cpp:enumerator:: DataType::INT32 = 8
+ TYPE_INT32 = 8;
+ //@@ .. cpp:enumerator:: DataType::INT64 = 9
+ TYPE_INT64 = 9;
+
+ //@@ .. cpp:enumerator:: DataType::FP16 = 10
+ TYPE_FP16 = 10;
+ //@@ .. cpp:enumerator:: DataType::FP32 = 11
+ TYPE_FP32 = 11;
+ //@@ .. cpp:enumerator:: DataType::FP64 = 12
+ TYPE_FP64 = 12;
+
+ //@@ .. cpp:enumerator:: DataType::STRING = 13
+ TYPE_STRING = 13;
+
+ //@@ .. cpp:enumerator:: DataType::BF16 = 14
+ TYPE_BF16 = 14;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelRateLimiter
+//@@
+//@@ The specifications required by the rate limiter to properly
+//@@ schedule the inference requests across the different models
+//@@ and their instances.
+//@@
+message ModelRateLimiter
+{
+ //@@ .. cpp:var:: message Resource
+ //@@
+ //@@ The resource property.
+ //@@
+ message Resource
+ {
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name associated with the resource.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: bool global
+ //@@
+ //@@ Whether or not the resource is global. If true then the resource
+ //@@ is assumed to be shared among the devices otherwise specified
+ //@@ count of the resource is assumed for each device associated
+ //@@ with the instance.
+ //@@
+ bool global = 2;
+
+ //@@ .. cpp:var:: uint32 count
+ //@@
+ //@@ The number of resources required for the execution of the model
+ //@@ instance.
+ //@@
+ uint32 count = 3;
+ }
+
+ //@@ .. cpp:var:: Resource resources (repeated)
+ //@@
+ //@@ The resources required to execute the request on a model instance.
+ //@@ Resources are just names with a corresponding count. The execution
+ //@@ of the instance will be blocked until the specificied resources are
+ //@@ available. By default an instance uses no rate-limiter resources.
+ //@@
+ repeated Resource resources = 1;
+
+ //@@ .. cpp:var:: uint32 priority
+ //@@
+ //@@ The optional weighting value to be used for prioritizing across
+ //@@ instances. An instance with priority 2 will be given 1/2 the
+ //@@ number of scheduling chances as an instance_group with priority
+ //@@ 1. The default priority is 1. The priority of value 0 will be
+ //@@ treated as priority 1.
+ //@@
+ uint32 priority = 2;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInstanceGroup
+//@@
+//@@ A group of one or more instances of a model and resources made
+//@@ available for those instances.
+//@@
+message ModelInstanceGroup
+{
+ //@@
+ //@@ .. cpp:enum:: Kind
+ //@@
+ //@@ Kind of this instance group.
+ //@@
+ enum Kind {
+ //@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
+ //@@
+ //@@ This instance group represents instances that can run on either
+ //@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
+ //@@ instances will be created on GPU(s), otherwise instances will
+ //@@ be created on CPU.
+ //@@
+ KIND_AUTO = 0;
+
+ //@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
+ //@@
+ //@@ This instance group represents instances that must run on the
+ //@@ GPU.
+ //@@
+ KIND_GPU = 1;
+
+ //@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
+ //@@
+ //@@ This instance group represents instances that must run on the
+ //@@ CPU.
+ //@@
+ KIND_CPU = 2;
+
+ //@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
+ //@@
+ //@@ This instance group represents instances that should run on the
+ //@@ CPU and/or GPU(s) as specified by the model or backend itself.
+ //@@ The inference server will not override the model/backend
+ //@@ settings.
+ //@@
+ KIND_MODEL = 3;
+ }
+
+ //@@
+ //@@ .. cpp:var:: message SecondaryDevice
+ //@@
+ //@@ A secondary device required for a model instance.
+ //@@
+ message SecondaryDevice
+ {
+ //@@
+ //@@ .. cpp:enum:: SecondaryDeviceKind
+ //@@
+ //@@ The kind of the secondary device.
+ //@@
+ enum SecondaryDeviceKind {
+ //@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
+ //@@
+ //@@ An NVDLA core. http://nvdla.org
+ //@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
+ //@@
+ KIND_NVDLA = 0;
+ }
+
+ //@@ .. cpp:var:: SecondaryDeviceKind kind
+ //@@
+ //@@ The secondary device kind.
+ //@@
+ SecondaryDeviceKind kind = 1;
+
+ //@@ .. cpp:var:: int64 device_id
+ //@@
+ //@@ Identifier for the secondary device.
+ //@@
+ int64 device_id = 2;
+ }
+
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ Optional name of this group of instances. If not specified the
+ //@@ name will be formed as _. The name of
+ //@@ individual instances will be further formed by a unique instance
+ //@@ number and GPU index:
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: Kind kind
+ //@@
+ //@@ The kind of this instance group. Default is KIND_AUTO. If
+ //@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
+ //@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
+ //@@ and 'gpu' cannot be specified.
+ //@@
+ Kind kind = 4;
+
+ //@@ .. cpp:var:: int32 count
+ //@@
+ //@@ For a group assigned to GPU, the number of instances created for
+ //@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
+ //@@ of instances created. Default is 1.
+ int32 count = 2;
+
+ //@@ .. cpp:var:: ModelRateLimiter rate_limiter
+ //@@
+ //@@ The rate limiter specific settings to be associated with this
+ //@@ instance group. Optional, if not specified no rate limiting
+ //@@ will be applied to this instance group.
+ //@@
+ ModelRateLimiter rate_limiter = 6;
+
+ //@@ .. cpp:var:: int32 gpus (repeated)
+ //@@
+ //@@ GPU(s) where instances should be available. For each GPU listed,
+ //@@ 'count' instances of the model will be available. Setting 'gpus'
+ //@@ to empty (or not specifying at all) is eqivalent to listing all
+ //@@ available GPUs.
+ //@@
+ repeated int32 gpus = 3;
+
+ //@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
+ //@@
+ //@@ Secondary devices that are required by instances specified by this
+ //@@ instance group. Optional.
+ //@@
+ repeated SecondaryDevice secondary_devices = 8;
+
+ //@@ .. cpp:var:: string profile (repeated)
+ //@@
+ //@@ For TensorRT models containing multiple optimization profile, this
+ //@@ parameter specifies a set of optimization profiles available to this
+ //@@ instance group. The inference server will choose the optimal profile
+ //@@ based on the shapes of the input tensors. This field should lie
+ //@@ between 0 and - 1
+ //@@ and be specified only for TensorRT backend, otherwise an error will
+ //@@ be generated. If not specified, the server will select the first
+ //@@ optimization profile by default.
+ //@@
+ repeated string profile = 5;
+
+ //@@ .. cpp:var:: bool passive
+ //@@
+ //@@ Whether the instances within this instance group will be accepting
+ //@@ inference requests from the scheduler. If true, the instances will
+ //@@ not be added to the scheduler. Default value is false.
+ //@@
+ bool passive = 7;
+
+ //@@ .. cpp:var:: string host_policy
+ //@@
+ //@@ The host policy name that the instance to be associated with.
+ //@@ The default value is set to reflect the device kind of the instance,
+ //@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
+ //@@ KIND_GPU is "gpu_".
+ //@@
+ string host_policy = 9;
+}
+
+//@@
+//@@.. cpp:var:: message ModelTensorReshape
+//@@
+//@@ Reshape specification for input and output tensors.
+//@@
+message ModelTensorReshape
+{
+ //@@ .. cpp:var:: int64 shape (repeated)
+ //@@
+ //@@ The shape to use for reshaping.
+ //@@
+ repeated int64 shape = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelInput
+//@@
+//@@ An input required by the model.
+//@@
+message ModelInput
+{
+ //@@
+ //@@ .. cpp:enum:: Format
+ //@@
+ //@@ The format for the input.
+ //@@
+ enum Format {
+ //@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
+ //@@
+ //@@ The input has no specific format. This is the default.
+ //@@
+ FORMAT_NONE = 0;
+
+ //@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
+ //@@
+ //@@ HWC image format. Tensors with this format require 3 dimensions
+ //@@ if the model does not support batching (max_batch_size = 0) or 4
+ //@@ dimensions if the model does support batching (max_batch_size
+ //@@ >= 1). In either case the 'dims' below should only specify the
+ //@@ 3 non-batch dimensions (i.e. HWC or CHW).
+ //@@
+ FORMAT_NHWC = 1;
+
+ //@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
+ //@@
+ //@@ CHW image format. Tensors with this format require 3 dimensions
+ //@@ if the model does not support batching (max_batch_size = 0) or 4
+ //@@ dimensions if the model does support batching (max_batch_size
+ //@@ >= 1). In either case the 'dims' below should only specify the
+ //@@ 3 non-batch dimensions (i.e. HWC or CHW).
+ //@@
+ FORMAT_NCHW = 2;
+ }
+
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the input.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The data-type of the input.
+ //@@
+ DataType data_type = 2;
+
+ //@@ .. cpp:var:: Format format
+ //@@
+ //@@ The format of the input. Optional.
+ //@@
+ Format format = 3;
+
+ //@@ .. cpp:var:: int64 dims (repeated)
+ //@@
+ //@@ The dimensions/shape of the input tensor that must be provided
+ //@@ when invoking the inference API for this model.
+ //@@
+ repeated int64 dims = 4;
+
+ //@@ .. cpp:var:: ModelTensorReshape reshape
+ //@@
+ //@@ The shape expected for this input by the backend. The input will
+ //@@ be reshaped to this before being presented to the backend. The
+ //@@ reshape must have the same number of elements as the input shape
+ //@@ specified by 'dims'. Optional.
+ //@@
+ ModelTensorReshape reshape = 5;
+
+ //@@ .. cpp:var:: bool is_shape_tensor
+ //@@
+ //@@ Whether or not the input is a shape tensor to the model. This field
+ //@@ is currently supported only for the TensorRT model. An error will be
+ //@@ generated if this specification does not comply with underlying
+ //@@ model.
+ //@@
+ bool is_shape_tensor = 6;
+
+ //@@ .. cpp:var:: bool allow_ragged_batch
+ //@@
+ //@@ Whether or not the input is allowed to be "ragged" in a dynamically
+ //@@ created batch. Default is false indicating that two requests will
+ //@@ only be batched if this tensor has the same shape in both requests.
+ //@@ True indicates that two requests can be batched even if this tensor
+ //@@ has a different shape in each request.
+ //@@
+ bool allow_ragged_batch = 7;
+
+ //@@ .. cpp:var:: bool optional
+ //@@
+ //@@ Whether or not the input is optional for the model execution.
+ //@@ If true, the input is not required in the inference request.
+ //@@ Default value is false.
+ //@@
+ bool optional = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelOutput
+//@@
+//@@ An output produced by the model.
+//@@
+message ModelOutput
+{
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the output.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The data-type of the output.
+ //@@
+ DataType data_type = 2;
+
+ //@@ .. cpp:var:: int64 dims (repeated)
+ //@@
+ //@@ The dimensions/shape of the output tensor.
+ //@@
+ repeated int64 dims = 3;
+
+ //@@ .. cpp:var:: ModelTensorReshape reshape
+ //@@
+ //@@ The shape produced for this output by the backend. The output will
+ //@@ be reshaped from this to the shape specifed in 'dims' before being
+ //@@ returned in the inference response. The reshape must have the same
+ //@@ number of elements as the output shape specified by 'dims'. Optional.
+ //@@
+ ModelTensorReshape reshape = 5;
+
+ //@@ .. cpp:var:: string label_filename
+ //@@
+ //@@ The label file associated with this output. Should be specified only
+ //@@ for outputs that represent classifications. Optional.
+ //@@
+ string label_filename = 4;
+
+
+ //@@ .. cpp:var:: bool is_shape_tensor
+ //@@
+ //@@ Whether or not the output is a shape tensor to the model. This field
+ //@@ is currently supported only for the TensorRT model. An error will be
+ //@@ generated if this specification does not comply with underlying
+ //@@ model.
+ //@@
+ bool is_shape_tensor = 6;
+}
+
+//@@ .. cpp:var:: message BatchInput
+//@@
+//@@ A batch input is an additional input that must be added by
+//@@ the backend based on all the requests in a batch.
+//@@
+message BatchInput
+{
+ //@@
+ //@@ .. cpp:enum:: Kind
+ //@@
+ //@@ The kind of the batch input.
+ //@@
+ enum Kind {
+ //@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
+ //@@
+ //@@ The element count of the 'source_input' will be added as
+ //@@ input with shape [1].
+ //@@
+ BATCH_ELEMENT_COUNT = 0;
+
+ //@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
+ //@@
+ //@@ The accumulated element count of the 'source_input' will be
+ //@@ added as input with shape [1]. For example, if there is a
+ //@@ batch of two request, each with 2 elements, an input of value
+ //@@ 2 will be added to the first request, and an input of value
+ //@@ 4 will be added to the second request.
+ //@@
+ BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
+
+ //@@ .. cpp:enumerator::
+ //@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
+ //@@
+ //@@ The accumulated element count of the 'source_input' will be
+ //@@ added as input with shape [1], except for the first request
+ //@@ in the batch. For the first request in the batch, the input
+ //@@ will have shape [2] where the first element is value 0.
+ //@@
+ BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
+
+ //@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
+ //@@
+ //@@ Among the requests in the batch, the max element count of the
+ //@@ 'source_input' will be added as input with shape
+ //@@ [max_element_count] for the first request in the batch.
+ //@@ For other requests, such input will be with shape [0].
+ //@@ The data of the tensor will be uninitialized.
+ //@@
+ BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
+
+ //@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
+ //@@
+ //@@ Among the requests in the batch, the shape of the
+ //@@ 'source_input' will be added as input with shape
+ //@@ [batch_size, len(input_dim)]. For example, if one
+ //@@ batch-2 input with shape [3, 1] and batch-1 input
+ //@@ with shape [2, 2] are batched, the batch input will
+ //@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
+ //@@
+ BATCH_ITEM_SHAPE = 4;
+
+ //@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
+ //@@
+ //@@ Among the requests in the batch, the shape of the
+ //@@ 'source_input' will be added as input with single dimensional
+ //@@ shape [batch_size * len(input_dim)]. For example, if one
+ //@@ batch-2 input with shape [3, 1] and batch-1 input
+ //@@ with shape [2, 2] are batched, the batch input will
+ //@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
+ //@@
+ BATCH_ITEM_SHAPE_FLATTEN = 5;
+ }
+
+ //@@ .. cpp:var:: Kind kind
+ //@@
+ //@@ The kind of this batch input.
+ //@@
+ Kind kind = 1;
+
+ //@@ .. cpp:var:: string target_name (repeated)
+ //@@
+ //@@ The name of the model inputs that the backend will create
+ //@@ for this batch input.
+ //@@
+ repeated string target_name = 2;
+
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The input's datatype. The data type can be TYPE_INT32 or
+ //@@ TYPE_FP32.
+ //@@
+ DataType data_type = 3;
+
+ //@@ .. cpp:var:: string source_input (repeated)
+ //@@
+ //@@ The backend derives the value for each batch input from one or
+ //@@ more other inputs. 'source_input' gives the names of those
+ //@@ inputs.
+ //@@
+ repeated string source_input = 4;
+}
+
+//@@.. cpp:var:: message BatchOutput
+//@@
+//@@ A batch output is an output produced by the model that must be handled
+//@@ differently by the backend based on all the requests in a batch.
+//@@
+message BatchOutput
+{
+ //@@
+ //@@ .. cpp:enum:: Kind
+ //@@
+ //@@ The kind of the batch output.
+ //@@
+ enum Kind {
+ //@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
+ //@@
+ //@@ The output should be scattered according to the shape of
+ //@@ 'source_input'. The dynamic dimension of the output will
+ //@@ be set to the value of the same dimension in the input.
+ //@@
+ BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
+ }
+
+ //@@ .. cpp:var:: string target_name (repeated)
+ //@@
+ //@@ The name of the outputs to be produced by this batch output
+ //@@ specification.
+ //@@
+ repeated string target_name = 1;
+
+ //@@ .. cpp:var:: Kind kind
+ //@@
+ //@@ The kind of this batch output.
+ //@@
+ Kind kind = 2;
+
+ //@@ .. cpp:var:: string source_input (repeated)
+ //@@
+ //@@ The backend derives each batch output from one or more inputs.
+ //@@ 'source_input' gives the names of those inputs.
+ //@@
+ repeated string source_input = 3;
+}
+
+//@@
+//@@.. cpp:var:: message ModelVersionPolicy
+//@@
+//@@ Policy indicating which versions of a model should be made
+//@@ available by the inference server.
+//@@
+message ModelVersionPolicy
+{
+ //@@ .. cpp:var:: message Latest
+ //@@
+ //@@ Serve only the latest version(s) of a model. This is
+ //@@ the default policy.
+ //@@
+ message Latest
+ {
+ //@@ .. cpp:var:: uint32 num_versions
+ //@@
+ //@@ Serve only the 'num_versions' highest-numbered versions. T
+ //@@ The default value of 'num_versions' is 1, indicating that by
+ //@@ default only the single highest-number version of a
+ //@@ model will be served.
+ //@@
+ uint32 num_versions = 1;
+ }
+
+ //@@ .. cpp:var:: message All
+ //@@
+ //@@ Serve all versions of the model.
+ //@@
+ message All {}
+
+ //@@ .. cpp:var:: message Specific
+ //@@
+ //@@ Serve only specific versions of the model.
+ //@@
+ message Specific
+ {
+ //@@ .. cpp:var:: int64 versions (repeated)
+ //@@
+ //@@ The specific versions of the model that will be served.
+ //@@
+ repeated int64 versions = 1;
+ }
+
+ //@@ .. cpp:var:: oneof policy_choice
+ //@@
+ //@@ Each model must implement only a single version policy. The
+ //@@ default policy is 'Latest'.
+ //@@
+ oneof policy_choice
+ {
+ //@@ .. cpp:var:: Latest latest
+ //@@
+ //@@ Serve only latest version(s) of the model.
+ //@@
+ Latest latest = 1;
+
+ //@@ .. cpp:var:: All all
+ //@@
+ //@@ Serve all versions of the model.
+ //@@
+ All all = 2;
+
+ //@@ .. cpp:var:: Specific specific
+ //@@
+ //@@ Serve only specific version(s) of the model.
+ //@@
+ Specific specific = 3;
+ }
+}
+
+//@@
+//@@.. cpp:var:: message ModelOptimizationPolicy
+//@@
+//@@ Optimization settings for a model. These settings control if/how a
+//@@ model is optimized and prioritized by the backend framework when
+//@@ it is loaded.
+//@@
+message ModelOptimizationPolicy
+{
+ //@@
+ //@@ .. cpp:var:: message Graph
+ //@@
+ //@@ Enable generic graph optimization of the model. If not specified
+ //@@ the framework's default level of optimization is used. Supports
+ //@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
+ //@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
+ //@@ to enabling all optimizations, -1 enables only basic optimizations,
+ //@@ +1 enables only basic and extended optimizations.
+ //@@
+ message Graph
+ {
+ //@@ .. cpp:var:: int32 level
+ //@@
+ //@@ The optimization level. Defaults to 0 (zero) if not specified.
+ //@@
+ //@@ - -1: Disabled
+ //@@ - 0: Framework default
+ //@@ - 1+: Enable optimization level (greater values indicate
+ //@@ higher optimization levels)
+ //@@
+ int32 level = 1;
+ }
+
+ //@@
+ //@@ .. cpp:enum:: ModelPriority
+ //@@
+ //@@ Model priorities. A model will be given scheduling and execution
+ //@@ preference over models at lower priorities. Current model
+ //@@ priorities only work for TensorRT models.
+ //@@
+ enum ModelPriority {
+ //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
+ //@@
+ //@@ The default model priority.
+ //@@
+ PRIORITY_DEFAULT = 0;
+
+ //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
+ //@@
+ //@@ The maximum model priority.
+ //@@
+ PRIORITY_MAX = 1;
+
+ //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
+ //@@
+ //@@ The minimum model priority.
+ //@@
+ PRIORITY_MIN = 2;
+ }
+
+ //@@
+ //@@ .. cpp:var:: message Cuda
+ //@@
+ //@@ CUDA-specific optimization settings.
+ //@@
+ message Cuda
+ {
+ //@@ .. cpp:var:: message GraphSpec
+ //@@
+ //@@ Specification of the CUDA graph to be captured.
+ //@@
+ message GraphSpec
+ {
+ //@@ .. cpp:var:: message Dims
+ //@@
+ //@@ Specification of tensor dimension.
+ //@@
+ message Shape
+ {
+ //@@ .. cpp:var:: int64 dim (repeated)
+ //@@
+ //@@ The dimension.
+ //@@
+ repeated int64 dim = 1;
+ }
+
+ message LowerBound
+ {
+ //@@ .. cpp:var:: int32 batch_size
+ //@@
+ //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
+ //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
+ //@@ be set to value between 1 and 'max_batch_size'.
+ //@@
+ int32 batch_size = 1;
+
+ //@@ .. cpp:var:: map input
+ //@@
+ //@@ The specification of the inputs. 'Shape' is the shape of
+ //@@ the input without batching dimension.
+ //@@
+ map input = 2;
+ }
+
+ //@@ .. cpp:var:: int32 batch_size
+ //@@
+ //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
+ //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
+ //@@ be set to value between 1 and 'max_batch_size'.
+ //@@
+ int32 batch_size = 1;
+
+ //@@ .. cpp:var:: map input
+ //@@
+ //@@ The specification of the inputs. 'Shape' is the shape of the
+ //@@ input without batching dimension.
+ //@@
+ map input = 2;
+
+ //@@ .. cpp:var:: LowerBound graph_lower_bound
+ //@@
+ //@@ Specify the lower bound of the CUDA graph. Optional.
+ //@@ If specified, the graph can be used for input shapes and
+ //@@ batch sizes that are in closed interval between the lower
+ //@@ bound specification and graph specification. For dynamic
+ //@@ shape model, this allows CUDA graphs to be launched
+ //@@ frequently without capturing all possible shape combinations.
+ //@@ However, using graph for shape combinations different from
+ //@@ the one used for capturing introduces uninitialized data for
+ //@@ execution and it may distort the inference result if
+ //@@ the model is sensitive to uninitialized data.
+ //@@
+ LowerBound graph_lower_bound = 3;
+ }
+
+ //@@ .. cpp:var:: bool graphs
+ //@@
+ //@@ Use CUDA graphs API to capture model operations and execute
+ //@@ them more efficiently. Default value is false.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ bool graphs = 1;
+
+ //@@ .. cpp:var:: bool busy_wait_events
+ //@@
+ //@@ Use busy-waiting to synchronize CUDA events to achieve minimum
+ //@@ latency from event complete to host thread to be notified, with
+ //@@ the cost of high CPU load. Default value is false.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ bool busy_wait_events = 2;
+
+ //@@ .. cpp:var:: GraphSpec graph_spec (repeated)
+ //@@
+ //@@ Specification of the CUDA graph to be captured. If not specified
+ //@@ and 'graphs' is true, the default CUDA graphs will be captured
+ //@@ based on model settings.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ repeated GraphSpec graph_spec = 3;
+
+ //@@ .. cpp:var:: bool output_copy_stream
+ //@@
+ //@@ Uses a CUDA stream separate from the inference stream to copy the
+ //@@ output to host. However, be aware that setting this option to
+ //@@ true will lead to an increase in the memory consumption of the
+ //@@ model as Triton will allocate twice as much GPU memory for its
+ //@@ I/O tensor buffers. Default value is false.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ bool output_copy_stream = 4;
+ }
+
+ //@@
+ //@@ .. cpp:var:: message ExecutionAccelerators
+ //@@
+ //@@ Specify the preferred execution accelerators to be used to execute
+ //@@ the model. Currently only recognized by ONNX Runtime backend and
+ //@@ TensorFlow backend.
+ //@@
+ //@@ For ONNX Runtime backend, it will deploy the model with the execution
+ //@@ accelerators by priority, the priority is determined based on the
+ //@@ order that they are set, i.e. the provider at the front has highest
+ //@@ priority. Overall, the priority will be in the following order:
+ //@@ (if instance is on GPU)
+ //@@ CUDA Execution Provider (if instance is on GPU)
+ //@@
+ //@@ Default CPU Execution Provider
+ //@@
+ message ExecutionAccelerators
+ {
+ //@@
+ //@@ .. cpp:var:: message Accelerator
+ //@@
+ //@@ Specify the accelerator to be used to execute the model.
+ //@@ Accelerator with the same name may accept different parameters
+ //@@ depending on the backends.
+ //@@
+ message Accelerator
+ {
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the execution accelerator.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: map parameters
+ //@@
+ //@@ Additional paremeters used to configure the accelerator.
+ //@@
+ map parameters = 2;
+ }
+
+ //@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
+ //@@
+ //@@ The preferred execution provider to be used if the model instance
+ //@@ is deployed on GPU.
+ //@@
+ //@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
+ //@@ and no parameters are required.
+ //@@
+ //@@ For TensorFlow backend, possible values are "tensorrt",
+ //@@ "auto_mixed_precision", "gpu_io".
+ //@@
+ //@@ For "tensorrt", the following parameters can be specified:
+ //@@ "precision_mode": The precision used for optimization.
+ //@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
+ //@@
+ //@@ "max_cached_engines": The maximum number of cached TensorRT
+ //@@ engines in dynamic TensorRT ops. Default value is 100.
+ //@@
+ //@@ "minimum_segment_size": The smallest model subgraph that will
+ //@@ be considered for optimization by TensorRT. Default value is 3.
+ //@@
+ //@@ "max_workspace_size_bytes": The maximum GPU memory the model
+ //@@ can use temporarily during execution. Default value is 1GB.
+ //@@
+ //@@ For "auto_mixed_precision", no parameters are required. If set,
+ //@@ the model will try to use FP16 for better performance.
+ //@@ This optimization can not be set with "tensorrt".
+ //@@
+ //@@ For "gpu_io", no parameters are required. If set, the model will
+ //@@ be executed using TensorFlow Callable API to set input and output
+ //@@ tensors in GPU memory if possible, which can reduce data transfer
+ //@@ overhead if the model is used in ensemble. However, the Callable
+ //@@ object will be created on model creation and it will request all
+ //@@ outputs for every model execution, which may impact the
+ //@@ performance if a request does not require all outputs. This
+ //@@ optimization will only take affect if the model instance is
+ //@@ created with KIND_GPU.
+ //@@
+ repeated Accelerator gpu_execution_accelerator = 1;
+
+ //@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
+ //@@
+ //@@ The preferred execution provider to be used if the model instance
+ //@@ is deployed on CPU.
+ //@@
+ //@@ For ONNX Runtime backend, possible value is "openvino" as name,
+ //@@ and no parameters are required.
+ //@@
+ repeated Accelerator cpu_execution_accelerator = 2;
+ }
+
+ //@@
+ //@@ .. cpp:var:: message PinnedMemoryBuffer
+ //@@
+ //@@ Specify whether to use a pinned memory buffer when transferring data
+ //@@ between non-pinned system memory and GPU memory. Using a pinned
+ //@@ memory buffer for system from/to GPU transfers will typically provide
+ //@@ increased performance. For example, in the common use case where the
+ //@@ request provides inputs and delivers outputs via non-pinned system
+ //@@ memory, if the model instance accepts GPU IOs, the inputs will be
+ //@@ processed by two copies: from non-pinned system memory to pinned
+ //@@ memory, and from pinned memory to GPU memory. Similarly, pinned
+ //@@ memory will be used for delivering the outputs.
+ //@@
+ message PinnedMemoryBuffer
+ {
+ //@@ .. cpp:var:: bool enable
+ //@@
+ //@@ Use pinned memory buffer. Default is true.
+ //@@
+ bool enable = 1;
+ }
+
+ //@@ .. cpp:var:: Graph graph
+ //@@
+ //@@ The graph optimization setting for the model. Optional.
+ //@@
+ Graph graph = 1;
+
+ //@@ .. cpp:var:: ModelPriority priority
+ //@@
+ //@@ The priority setting for the model. Optional.
+ //@@
+ ModelPriority priority = 2;
+
+ //@@ .. cpp:var:: Cuda cuda
+ //@@
+ //@@ CUDA-specific optimization settings. Optional.
+ //@@
+ Cuda cuda = 3;
+
+ //@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
+ //@@
+ //@@ The accelerators used for the model. Optional.
+ //@@
+ ExecutionAccelerators execution_accelerators = 4;
+
+ //@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
+ //@@
+ //@@ Use pinned memory buffer when the data transfer for inputs
+ //@@ is between GPU memory and non-pinned system memory.
+ //@@ Default is true.
+ //@@
+ PinnedMemoryBuffer input_pinned_memory = 5;
+
+ //@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
+ //@@
+ //@@ Use pinned memory buffer when the data transfer for outputs
+ //@@ is between GPU memory and non-pinned system memory.
+ //@@ Default is true.
+ //@@
+ PinnedMemoryBuffer output_pinned_memory = 6;
+
+ //@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
+ //@@
+ //@@ The backend may use a gather kernel to gather input data if the
+ //@@ device has direct access to the source buffer and the destination
+ //@@ buffer. In such case, the gather kernel will be used only if the
+ //@@ number of buffers to be gathered is greater or equal to
+ //@@ the specifed value. If 0, the gather kernel will be disabled.
+ //@@ Default value is 0.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ uint32 gather_kernel_buffer_threshold = 7;
+
+ //@@ .. cpp:var:: bool eager_batching
+ //@@
+ //@@ Start preparing the next batch before the model instance is ready
+ //@@ for the next inference. This option can be used to overlap the
+ //@@ batch preparation with model execution, with the trade-off that
+ //@@ the next batch might be smaller than what it could have been.
+ //@@ Default value is false.
+ //@@ Currently only recognized by TensorRT backend.
+ //@@
+ bool eager_batching = 8;
+}
+
+//@@
+//@@.. cpp:var:: message ModelQueuePolicy
+//@@
+//@@ Queue policy for inference requests.
+//@@
+message ModelQueuePolicy
+{
+ //@@
+ //@@ .. cpp:enum:: TimeoutAction
+ //@@
+ //@@ The action applied to timed-out requests.
+ //@@
+ enum TimeoutAction {
+ //@@ .. cpp:enumerator:: Action::REJECT = 0
+ //@@
+ //@@ Reject the request and return error message accordingly.
+ //@@
+ REJECT = 0;
+
+ //@@ .. cpp:enumerator:: Action::DELAY = 1
+ //@@
+ //@@ Delay the request until all other requests at the same
+ //@@ (or higher) priority levels that have not reached their timeouts
+ //@@ are processed. A delayed request will eventually be processed,
+ //@@ but may be delayed indefinitely due to newly arriving requests.
+ //@@
+ DELAY = 1;
+ }
+
+ //@@
+ //@@ .. cpp:var:: TimeoutAction timeout_action
+ //@@
+ //@@ The action applied to timed-out request.
+ //@@ The default action is REJECT.
+ //@@
+ TimeoutAction timeout_action = 1;
+
+ //@@
+ //@@ .. cpp:var:: uint64 default_timeout_microseconds
+ //@@
+ //@@ The default timeout for every request, in microseconds.
+ //@@ The default value is 0 which indicates that no timeout is set.
+ //@@
+ uint64 default_timeout_microseconds = 2;
+
+ //@@
+ //@@ .. cpp:var:: bool allow_timeout_override
+ //@@
+ //@@ Whether individual request can override the default timeout value.
+ //@@ When true, individual requests can set a timeout that is less than
+ //@@ the default timeout value but may not increase the timeout.
+ //@@ The default value is false.
+ //@@
+ bool allow_timeout_override = 3;
+
+ //@@
+ //@@ .. cpp:var:: uint32 max_queue_size
+ //@@
+ //@@ The maximum queue size for holding requests. A request will be
+ //@@ rejected immediately if it can't be enqueued because the queue is
+ //@@ full. The default value is 0 which indicates that no maximum
+ //@@ queue size is enforced.
+ //@@
+ uint32 max_queue_size = 4;
+}
+
+//@@
+//@@.. cpp:var:: message ModelDynamicBatching
+//@@
+//@@ Dynamic batching configuration. These settings control how dynamic
+//@@ batching operates for the model.
+//@@
+message ModelDynamicBatching
+{
+ //@@ .. cpp:var:: int32 preferred_batch_size (repeated)
+ //@@
+ //@@ Preferred batch sizes for dynamic batching. If a batch of one of
+ //@@ these sizes can be formed it will be executed immediately. If
+ //@@ not specified a preferred batch size will be chosen automatically
+ //@@ based on model and GPU characteristics.
+ //@@
+ repeated int32 preferred_batch_size = 1;
+
+ //@@ .. cpp:var:: uint64 max_queue_delay_microseconds
+ //@@
+ //@@ The maximum time, in microseconds, a request will be delayed in
+ //@@ the scheduling queue to wait for additional requests for
+ //@@ batching. Default is 0.
+ //@@
+ uint64 max_queue_delay_microseconds = 2;
+
+ //@@ .. cpp:var:: bool preserve_ordering
+ //@@
+ //@@ Should the dynamic batcher preserve the ordering of responses to
+ //@@ match the order of requests received by the scheduler. Default is
+ //@@ false. If true, the responses will be returned in the same order as
+ //@@ the order of requests sent to the scheduler. If false, the responses
+ //@@ may be returned in arbitrary order. This option is specifically
+ //@@ needed when a sequence of related inference requests (i.e. inference
+ //@@ requests with the same correlation ID) are sent to the dynamic
+ //@@ batcher to ensure that the sequence responses are in the correct
+ //@@ order.
+ //@@
+ bool preserve_ordering = 3;
+
+ //@@ .. cpp:var:: uint32 priority_levels
+ //@@
+ //@@ The number of priority levels to be enabled for the model,
+ //@@ the priority level starts from 1 and 1 is the highest priority.
+ //@@ Requests are handled in priority order with all priority 1 requests
+ //@@ processed before priority 2, all priority 2 requests processed before
+ //@@ priority 3, etc. Requests with the same priority level will be
+ //@@ handled in the order that they are received.
+ //@@
+ uint32 priority_levels = 4;
+
+ //@@ .. cpp:var:: uint32 default_priority_level
+ //@@
+ //@@ The priority level used for requests that don't specify their
+ //@@ priority. The value must be in the range [ 1, 'priority_levels' ].
+ //@@
+ uint32 default_priority_level = 5;
+
+ //@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
+ //@@
+ //@@ The default queue policy used for requests that don't require
+ //@@ priority handling and requests that specify priority levels where
+ //@@ there is no specific policy given. If not specified, a policy with
+ //@@ default field values will be used.
+ //@@
+ ModelQueuePolicy default_queue_policy = 6;
+
+ //@@ .. cpp:var:: map priority_queue_policy
+ //@@
+ //@@ Specify the queue policy for the priority level. The default queue
+ //@@ policy will be used if a priority level doesn't specify a queue
+ //@@ policy.
+ //@@
+ map priority_queue_policy = 7;
+}
+
+//@@
+//@@.. cpp:var:: message ModelSequenceBatching
+//@@
+//@@ Sequence batching configuration. These settings control how sequence
+//@@ batching operates for the model.
+//@@
+message ModelSequenceBatching
+{
+ //@@ .. cpp:var:: message Control
+ //@@
+ //@@ A control is a signal that the sequence batcher uses to
+ //@@ communicate with a backend.
+ //@@
+ message Control
+ {
+ //@@
+ //@@ .. cpp:enum:: Kind
+ //@@
+ //@@ The kind of the control.
+ //@@
+ enum Kind {
+ //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
+ //@@
+ //@@ A new sequence is/is-not starting. If true a sequence is
+ //@@ starting, if false a sequence is continuing. Must
+ //@@ specify either int32_false_true, fp32_false_true or
+ //@@ bool_false_true for this control. This control is optional.
+ //@@
+ CONTROL_SEQUENCE_START = 0;
+
+ //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
+ //@@
+ //@@ A sequence is/is-not ready for inference. If true the
+ //@@ input tensor data is valid and should be used. If false
+ //@@ the input tensor data is invalid and inferencing should
+ //@@ be "skipped". Must specify either int32_false_true,
+ //@@ fp32_false_true or bool_false_true for this control. This
+ //@@ control is optional.
+ //@@
+ CONTROL_SEQUENCE_READY = 1;
+
+ //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
+ //@@
+ //@@ A sequence is/is-not ending. If true a sequence is
+ //@@ ending, if false a sequence is continuing. Must specify
+ //@@ either int32_false_true, fp32_false_true or bool_false_true
+ //@@ for this control. This control is optional.
+ //@@
+ CONTROL_SEQUENCE_END = 2;
+
+ //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
+ //@@
+ //@@ The correlation ID of the sequence. The correlation ID
+ //@@ is an uint64_t value that is communicated in whole or
+ //@@ in part by the tensor. The tensor's datatype must be
+ //@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
+ //@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
+ //@@ the correlation ID will be truncated to the low-order 32
+ //@@ bits. This control is optional.
+ //@@
+ CONTROL_SEQUENCE_CORRID = 3;
+ }
+
+ //@@ .. cpp:var:: Kind kind
+ //@@
+ //@@ The kind of this control.
+ //@@
+ Kind kind = 1;
+
+ //@@ .. cpp:var:: int32 int32_false_true (repeated)
+ //@@
+ //@@ The control's true and false setting is indicated by setting
+ //@@ a value in an int32 tensor. The tensor must be a
+ //@@ 1-dimensional tensor with size equal to the batch size of
+ //@@ the request. 'int32_false_true' must have two entries: the
+ //@@ first the false value and the second the true value.
+ //@@
+ repeated int32 int32_false_true = 2;
+
+ //@@ .. cpp:var:: float fp32_false_true (repeated)
+ //@@
+ //@@ The control's true and false setting is indicated by setting
+ //@@ a value in a fp32 tensor. The tensor must be a
+ //@@ 1-dimensional tensor with size equal to the batch size of
+ //@@ the request. 'fp32_false_true' must have two entries: the
+ //@@ first the false value and the second the true value.
+ //@@
+ repeated float fp32_false_true = 3;
+
+ //@@ .. cpp:var:: bool bool_false_true (repeated)
+ //@@
+ //@@ The control's true and false setting is indicated by setting
+ //@@ a value in a bool tensor. The tensor must be a
+ //@@ 1-dimensional tensor with size equal to the batch size of
+ //@@ the request. 'bool_false_true' must have two entries: the
+ //@@ first the false value and the second the true value.
+ //@@
+ repeated bool bool_false_true = 5;
+
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The control's datatype.
+ //@@
+ DataType data_type = 4;
+ }
+
+ //@@ .. cpp:var:: message ControlInput
+ //@@
+ //@@ The sequence control values to communicate by a model input.
+ //@@
+ message ControlInput
+ {
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the model input.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: Control control (repeated)
+ //@@
+ //@@ The control value(s) that should be communicated to the
+ //@@ model using this model input.
+ //@@
+ repeated Control control = 2;
+ }
+
+ //@@
+ //@@ .. cpp:var:: message InitialState
+ //@@
+ //@@ Settings used to initialize data for implicit state.
+ //@@
+ message InitialState
+ {
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The data-type of the state.
+ //@@
+ DataType data_type = 1;
+
+ //@@ .. cpp:var:: int64 dims (repeated)
+ //@@
+ //@@ The shape of the state tensor, not including the batch dimension.
+ //@@
+ repeated int64 dims = 2;
+
+ //@@ .. cpp:var:: oneof state_data
+ //@@
+ //@@ Specify how the initial state data is generated.
+ //@@
+ oneof state_data
+ {
+ //@@
+ //@@ .. cpp:var:: bool zero_data
+ //@@
+ //@@ The identifier for using zeros as initial state data.
+ //@@ Note that the value of 'zero_data' will not be checked,
+ //@@ instead, zero data will be used as long as the field is set.
+ //@@
+ bool zero_data = 3;
+
+ //@@ .. cpp:var:: string data_file
+ //@@
+ //@@ The file whose content will be used as the initial data for
+ //@@ the state in row-major order. The file must be provided in
+ //@@ sub-directory 'initial_state' under the model directory.
+ //@@
+ string data_file = 4;
+ }
+
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the state initialization.
+ //@@
+ string name = 5;
+ }
+
+ //@@ .. cpp:var:: message State
+ //@@
+ //@@ An input / output pair of tensors that carry state for the sequence.
+ //@@
+ message State
+ {
+ //@@ .. cpp:var:: string input_name
+ //@@
+ //@@ The name of the model state input.
+ //@@
+ string input_name = 1;
+
+ //@@ .. cpp:var:: string output_name
+ //@@
+ //@@ The name of the model state output.
+ //@@
+ string output_name = 2;
+
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The data-type of the state.
+ //@@
+ DataType data_type = 3;
+
+ //@@ .. cpp:var:: int64 dim (repeated)
+ //@@
+ //@@ The dimension.
+ //@@
+ repeated int64 dims = 4;
+
+ //@@ .. cpp:var:: InitialState initial_state (repeated)
+ //@@
+ //@@ The optional field to specify the initial state for the model.
+ //@@
+ repeated InitialState initial_state = 5;
+ }
+
+ //@@ .. cpp:var:: message StrategyDirect
+ //@@
+ //@@ The sequence batcher uses a specific, unique batch
+ //@@ slot for each sequence. All inference requests in a
+ //@@ sequence are directed to the same batch slot in the same
+ //@@ model instance over the lifetime of the sequence. This
+ //@@ is the default strategy.
+ //@@
+ message StrategyDirect
+ {
+ //@@ .. cpp:var:: uint64 max_queue_delay_microseconds
+ //@@
+ //@@ The maximum time, in microseconds, a candidate request
+ //@@ will be delayed in the sequence batch scheduling queue to
+ //@@ wait for additional requests for batching. Default is 0.
+ //@@
+ uint64 max_queue_delay_microseconds = 1;
+
+ //@@ .. cpp:var:: float minimum_slot_utilization
+ //@@
+ //@@ The minimum slot utilization that must be satisfied to
+ //@@ execute the batch before 'max_queue_delay_microseconds' expires.
+ //@@ For example, a value of 0.5 indicates that the batch should be
+ //@@ executed as soon as 50% or more of the slots are ready even if
+ //@@ the 'max_queue_delay_microseconds' timeout has not expired.
+ //@@ The default is 0.0, indicating that a batch will be executed
+ //@@ before 'max_queue_delay_microseconds' timeout expires if at least
+ //@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
+ //@@ ignored unless minimum_slot_utilization is set to a non-zero
+ //@@ value.
+ //@@
+ float minimum_slot_utilization = 2;
+ }
+
+ //@@ .. cpp:var:: message StrategyOldest
+ //@@
+ //@@ The sequence batcher maintains up to 'max_candidate_sequences'
+ //@@ candidate sequences. 'max_candidate_sequences' can be greater
+ //@@ than the model's 'max_batch_size'. For inferencing the batcher
+ //@@ chooses from the candidate sequences up to 'max_batch_size'
+ //@@ inference requests. Requests are chosen in an oldest-first
+ //@@ manner across all candidate sequences. A given sequence is
+ //@@ not guaranteed to be assigned to the same batch slot for
+ //@@ all inference requests of that sequence.
+ //@@
+ message StrategyOldest
+ {
+ //@@ .. cpp:var:: int32 max_candidate_sequences
+ //@@
+ //@@ Maximum number of candidate sequences that the batcher
+ //@@ maintains. Excess seqences are kept in an ordered backlog
+ //@@ and become candidates when existing candidate sequences
+ //@@ complete.
+ //@@
+ int32 max_candidate_sequences = 1;
+
+ //@@ .. cpp:var:: int32 preferred_batch_size (repeated)
+ //@@
+ //@@ Preferred batch sizes for dynamic batching of candidate
+ //@@ sequences. If a batch of one of these sizes can be formed
+ //@@ it will be executed immediately. If not specified a
+ //@@ preferred batch size will be chosen automatically
+ //@@ based on model and GPU characteristics.
+ //@@
+ repeated int32 preferred_batch_size = 2;
+
+ //@@ .. cpp:var:: uint64 max_queue_delay_microseconds
+ //@@
+ //@@ The maximum time, in microseconds, a candidate request
+ //@@ will be delayed in the dynamic batch scheduling queue to
+ //@@ wait for additional requests for batching. Default is 0.
+ //@@
+ uint64 max_queue_delay_microseconds = 3;
+ }
+
+ //@@ .. cpp:var:: oneof strategy_choice
+ //@@
+ //@@ The strategy used by the sequence batcher. Default strategy
+ //@@ is 'direct'.
+ //@@
+ oneof strategy_choice
+ {
+ //@@ .. cpp:var:: StrategyDirect direct
+ //@@
+ //@@ StrategyDirect scheduling strategy.
+ //@@
+ StrategyDirect direct = 3;
+
+ //@@ .. cpp:var:: StrategyOldest oldest
+ //@@
+ //@@ StrategyOldest scheduling strategy.
+ //@@
+ StrategyOldest oldest = 4;
+ }
+
+ //@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
+ //@@
+ //@@ The maximum time, in microseconds, that a sequence is allowed to
+ //@@ be idle before it is aborted. The inference server considers a
+ //@@ sequence idle when it does not have any inference request queued
+ //@@ for the sequence. If this limit is exceeded, the inference server
+ //@@ will free the sequence slot allocated by the sequence and make it
+ //@@ available for another sequence. If not specified (or specified as
+ //@@ zero) a default value of 1000000 (1 second) is used.
+ //@@
+ uint64 max_sequence_idle_microseconds = 1;
+
+ //@@ .. cpp:var:: ControlInput control_input (repeated)
+ //@@
+ //@@ The model input(s) that the server should use to communicate
+ //@@ sequence start, stop, ready and similar control values to the
+ //@@ model.
+ //@@
+ repeated ControlInput control_input = 2;
+
+ //@@ .. cpp:var:: State state (repeated)
+ //@@
+ //@@ The optional state that can be stored in Triton for performing
+ //@@ inference requests on a sequence. Each sequence holds an implicit
+ //@@ state local to itself. The output state tensor provided by the
+ //@@ model in 'output_name' field of the current inference request will
+ //@@ be transferred as an input tensor named 'input_name' in the next
+ //@@ request of the same sequence. The input state of the first request
+ //@@ in the sequence contains garbage data.
+ //@@
+ repeated State state = 5;
+}
+
+//@@
+//@@.. cpp:var:: message ModelEnsembling
+//@@
+//@@ Model ensembling configuration. These settings specify the models that
+//@@ compose the ensemble and how data flows between the models.
+//@@
+message ModelEnsembling
+{
+ //@@ .. cpp:var:: message Step
+ //@@
+ //@@ Each step specifies a model included in the ensemble,
+ //@@ maps ensemble tensor names to the model input tensors,
+ //@@ and maps model output tensors to ensemble tensor names
+ //@@
+ message Step
+ {
+ //@@ .. cpp:var:: string model_name
+ //@@
+ //@@ The name of the model to execute for this step of the ensemble.
+ //@@
+ string model_name = 1;
+
+ //@@ .. cpp:var:: int64 model_version
+ //@@
+ //@@ The version of the model to use for inference. If -1
+ //@@ the latest/most-recent version of the model is used.
+ //@@
+ int64 model_version = 2;
+
+ //@@ .. cpp:var:: map input_map
+ //@@
+ //@@ Map from name of an input tensor on this step's model to ensemble
+ //@@ tensor name. The ensemble tensor must have the same data type and
+ //@@ shape as the model input. Each model input must be assigned to
+ //@@ one ensemble tensor, but the same ensemble tensor can be assigned
+ //@@ to multiple model inputs.
+ //@@
+ map input_map = 3;
+
+ //@@ .. cpp:var:: map output_map
+ //@@
+ //@@ Map from name of an output tensor on this step's model to ensemble
+ //@@ tensor name. The data type and shape of the ensemble tensor will
+ //@@ be inferred from the model output. It is optional to assign all
+ //@@ model outputs to ensemble tensors. One ensemble tensor name
+ //@@ can appear in an output map only once.
+ //@@
+ map output_map = 4;
+ }
+
+ //@@ .. cpp:var:: Step step (repeated)
+ //@@
+ //@@ The models and the input / output mappings used within the ensemble.
+ //@@
+ repeated Step step = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelParameter
+//@@
+//@@ A model parameter.
+//@@
+message ModelParameter
+{
+ //@@ .. cpp:var:: string string_value
+ //@@
+ //@@ The string value of the parameter.
+ //@@
+ string string_value = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelWarmup
+//@@
+//@@ Settings used to construct the request sample for model warmup.
+//@@
+message ModelWarmup
+{
+ //@@
+ //@@ .. cpp:var:: message Input
+ //@@
+ //@@ Meta data associated with an input.
+ //@@
+ message Input
+ {
+ //@@ .. cpp:var:: DataType data_type
+ //@@
+ //@@ The data-type of the input.
+ //@@
+ DataType data_type = 1;
+
+ //@@ .. cpp:var:: int64 dims (repeated)
+ //@@
+ //@@ The shape of the input tensor, not including the batch dimension.
+ //@@
+ repeated int64 dims = 2;
+
+ //@@ .. cpp:var:: oneof input_data_type
+ //@@
+ //@@ Specify how the input data is generated. If the input has STRING
+ //@@ data type and 'random_data' is set, the data generation will fall
+ //@@ back to 'zero_data'.
+ //@@
+ oneof input_data_type
+ {
+ //@@
+ //@@ .. cpp:var:: bool zero_data
+ //@@
+ //@@ The identifier for using zeros as input data. Note that the
+ //@@ value of 'zero_data' will not be checked, instead, zero data
+ //@@ will be used as long as the field is set.
+ //@@
+ bool zero_data = 3;
+
+ //@@
+ //@@ .. cpp:var:: bool random_data
+ //@@
+ //@@ The identifier for using random data as input data. Note that
+ //@@ the value of 'random_data' will not be checked, instead,
+ //@@ random data will be used as long as the field is set.
+ //@@
+ bool random_data = 4;
+
+ //@@ .. cpp:var:: string input_data_file
+ //@@
+ //@@ The file whose content will be used as raw input data in
+ //@@ row-major order. The file must be provided in a sub-directory
+ //@@ 'warmup' under the model directory. The file contents should be
+ //@@ in binary format. For TYPE_STRING data-type, an element is
+ //@@ represented by a 4-byte unsigned integer giving the length
+ //@@ followed by the actual bytes.
+ //@@
+ string input_data_file = 5;
+ }
+ }
+
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the request sample.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: uint32 batch_size
+ //@@
+ //@@ The batch size of the inference request. This must be >= 1. For
+ //@@ models that don't support batching, batch_size must be 1. If
+ //@@ batch_size > 1, the 'inputs' specified below will be duplicated to
+ //@@ match the batch size requested.
+ //@@
+ uint32 batch_size = 2;
+
+ //@@ .. cpp:var:: map inputs
+ //@@
+ //@@ The warmup meta data associated with every model input, including
+ //@@ control tensors.
+ //@@
+ map inputs = 3;
+
+ //@@ .. cpp:var:: uint32 count
+ //@@
+ //@@ The number of iterations that this warmup sample will be executed.
+ //@@ For example, if this field is set to 2, 2 model executions using this
+ //@@ sample will be scheduled for warmup. Default value is 0 which
+ //@@ indicates that this sample will be used only once.
+ //@@ Note that for sequence model, 'count' may not work well
+ //@@ because the model often expect a valid sequence of requests which
+ //@@ should be represented by a series of warmup samples. 'count > 1'
+ //@@ essentially "resends" one of the sample, which may invalidate the
+ //@@ sequence and result in unexpected warmup failure.
+ //@@
+ uint32 count = 4;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelOperations
+//@@
+//@@ The metadata of libraries providing custom operations for this model.
+//@@
+message ModelOperations
+{
+ //@@ .. cpp:var:: string op_library_filename (repeated)
+ //@@
+ //@@ Optional paths of the libraries providing custom operations for
+ //@@ this model. Valid only for ONNX models.
+ //@@
+ repeated string op_library_filename = 1;
+}
+
+//@@
+//@@ .. cpp:var:: message ModelTransactionPolicy
+//@@
+//@@ The specification that describes the nature of transactions
+//@@ to be expected from the model.
+//@@
+message ModelTransactionPolicy
+{
+ //@@ .. cpp:var:: bool decoupled
+ //@@
+ //@@ Indicates whether responses generated by the model are decoupled with
+ //@@ the requests issued to it, which means the number of responses
+ //@@ generated by model may differ from number of requests issued, and
+ //@@ that the responses may be out of order relative to the order of
+ //@@ requests. The default is false, which means the model will generate
+ //@@ exactly one response for each request.
+ //@@
+ bool decoupled = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelRepositoryAgents
+//@@
+//@@ The repository agents for the model.
+//@@
+message ModelRepositoryAgents
+{
+ //@@
+ //@@ .. cpp:var:: message Agent
+ //@@
+ //@@ A repository agent that should be invoked for the specified
+ //@@ repository actions for this model.
+ //@@
+ message Agent
+ {
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the agent.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: map parameters
+ //@@
+ //@@ The parameters for the agent.
+ //@@
+ map parameters = 2;
+ }
+
+ //@@
+ //@@ .. cpp:var:: Agent agents (repeated)
+ //@@
+ //@@ The ordered list of agents for the model. These agents will be
+ //@@ invoked in order to respond to repository actions occuring for the
+ //@@ model.
+ //@@
+ repeated Agent agents = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelResponseCache
+//@@
+//@@ The response cache setting for the model.
+//@@
+message ModelResponseCache
+{
+ //@@
+ //@@ .. cpp::var:: bool enable
+ //@@
+ //@@ Whether or not to use response cache for the model. If True, the
+ //@@ responses from the model are cached and when identical request
+ //@@ is encountered, instead of going through the model execution,
+ //@@ the response from the cache is utilized. By default, response
+ //@@ cache is disabled for the models.
+ //@@
+ bool enable = 1;
+}
+
+//@@
+//@@.. cpp:var:: message ModelConfig
+//@@
+//@@ A model configuration.
+//@@
+message ModelConfig
+{
+ //@@ .. cpp:var:: string name
+ //@@
+ //@@ The name of the model.
+ //@@
+ string name = 1;
+
+ //@@ .. cpp:var:: string platform
+ //@@
+ //@@ The framework for the model. Possible values are
+ //@@ "tensorrt_plan", "tensorflow_graphdef",
+ //@@ "tensorflow_savedmodel", "onnxruntime_onnx",
+ //@@ "pytorch_libtorch".
+ //@@
+ string platform = 2;
+
+ //@@ .. cpp:var:: string backend
+ //@@
+ //@@ The backend used by the model.
+ //@@
+ string backend = 17;
+
+ //@@ .. cpp:var:: ModelVersionPolicy version_policy
+ //@@
+ //@@ Policy indicating which version(s) of the model will be served.
+ //@@
+ ModelVersionPolicy version_policy = 3;
+
+ //@@ .. cpp:var:: int32 max_batch_size
+ //@@
+ //@@ Maximum batch size allowed for inference. This can only decrease
+ //@@ what is allowed by the model itself. A max_batch_size value of 0
+ //@@ indicates that batching is not allowed for the model and the
+ //@@ dimension/shape of the input and output tensors must exactly
+ //@@ match what is specified in the input and output configuration. A
+ //@@ max_batch_size value > 0 indicates that batching is allowed and
+ //@@ so the model expects the input tensors to have an additional
+ //@@ initial dimension for the batching that is not specified in the
+ //@@ input (for example, if the model supports batched inputs of
+ //@@ 2-dimensional tensors then the model configuration will specify
+ //@@ the input shape as [ X, Y ] but the model will expect the actual
+ //@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
+ //@@ returned outputs will also have an additional initial dimension
+ //@@ for the batch.
+ //@@
+ int32 max_batch_size = 4;
+
+ //@@ .. cpp:var:: ModelInput input (repeated)
+ //@@
+ //@@ The inputs request by the model.
+ //@@
+ repeated ModelInput input = 5;
+
+ //@@ .. cpp:var:: ModelOutput output (repeated)
+ //@@
+ //@@ The outputs produced by the model.
+ //@@
+ repeated ModelOutput output = 6;
+
+ //@@ .. cpp:var:: BatchInput batch_input (repeated)
+ //@@
+ //@@ The model input(s) that the server should use to communicate
+ //@@ batch related values to the model.
+ //@@
+ repeated BatchInput batch_input = 20;
+
+ //@@ .. cpp:var:: BatchOutput batch_output (repeated)
+ //@@
+ //@@ The outputs produced by the model that requires special handling
+ //@@ by the model backend.
+ //@@
+ repeated BatchOutput batch_output = 21;
+
+ //@@ .. cpp:var:: ModelOptimizationPolicy optimization
+ //@@
+ //@@ Optimization configuration for the model. If not specified
+ //@@ then default optimization policy is used.
+ //@@
+ ModelOptimizationPolicy optimization = 12;
+
+ //@@ .. cpp:var:: oneof scheduling_choice
+ //@@
+ //@@ The scheduling policy for the model. If not specified the
+ //@@ default scheduling policy is used for the model. The default
+ //@@ policy is to execute each inference request independently.
+ //@@
+ oneof scheduling_choice
+ {
+ //@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
+ //@@
+ //@@ If specified, enables the dynamic-batching scheduling
+ //@@ policy. With dynamic-batching the scheduler may group
+ //@@ together independent requests into a single batch to
+ //@@ improve inference throughput.
+ //@@
+ ModelDynamicBatching dynamic_batching = 11;
+
+ //@@ .. cpp:var:: ModelSequenceBatching sequence_batching
+ //@@
+ //@@ If specified, enables the sequence-batching scheduling
+ //@@ policy. With sequence-batching, inference requests
+ //@@ with the same correlation ID are routed to the same
+ //@@ model instance. Multiple sequences of inference requests
+ //@@ may be batched together into a single batch to
+ //@@ improve inference throughput.
+ //@@
+ ModelSequenceBatching sequence_batching = 13;
+
+ //@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
+ //@@
+ //@@ If specified, enables the model-ensembling scheduling
+ //@@ policy. With model-ensembling, inference requests
+ //@@ will be processed according to the specification, such as an
+ //@@ execution sequence of models. The input specified in this model
+ //@@ config will be the input for the ensemble, and the output
+ //@@ specified will be the output of the ensemble.
+ //@@
+ ModelEnsembling ensemble_scheduling = 15;
+ }
+
+ //@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
+ //@@
+ //@@ Instances of this model. If not specified, one instance
+ //@@ of the model will be instantiated on each available GPU.
+ //@@
+ repeated ModelInstanceGroup instance_group = 7;
+
+ //@@ .. cpp:var:: string default_model_filename
+ //@@
+ //@@ Optional filename of the model file to use if a
+ //@@ compute-capability specific model is not specified in
+ //@@ :cpp:var:`cc_model_filenames`. If not specified the default name
+ //@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
+ //@@ 'model.pt' depending on the model type.
+ //@@
+ string default_model_filename = 8;
+
+ //@@ .. cpp:var:: map cc_model_filenames
+ //@@
+ //@@ Optional map from CUDA compute capability to the filename of
+ //@@ the model that supports that compute capability. The filename
+ //@@ refers to a file within the model version directory.
+ //@@
+ map cc_model_filenames = 9;
+
+ //@@ .. cpp:var:: map metric_tags
+ //@@
+ //@@ Optional metric tags. User-specific key-value pairs for metrics
+ //@@ reported for this model. These tags are applied to the metrics
+ //@@ reported on the HTTP metrics port.
+ //@@
+ map metric_tags = 10;
+
+ //@@ .. cpp:var:: map parameters
+ //@@
+ //@@ Optional model parameters. User-specified parameter values.
+ //@@
+ map parameters = 14;
+
+ //@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
+ //@@
+ //@@ Warmup setting of this model. If specified, all instances
+ //@@ will be run with the request samples in sequence before
+ //@@ serving the model.
+ //@@ This field can only be specified if the model is not an ensemble
+ //@@ model.
+ //@@
+ repeated ModelWarmup model_warmup = 16;
+
+ //@@ .. cpp:var:: ModelOperations model_operations
+ //@@
+ //@@ Optional metadata of the libraries providing custom operations for
+ //@@ this model.
+ //@@
+ ModelOperations model_operations = 18;
+
+ //@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
+ //@@
+ //@@ Optional specification that describes the nature of transactions
+ //@@ to be expected from the model.
+ //@@
+ ModelTransactionPolicy model_transaction_policy = 19;
+
+ //@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
+ //@@
+ //@@ Optional specification of the agent(s) that should be invoked
+ //@@ with repository actions are performed for this model.
+ //@@
+ ModelRepositoryAgents model_repository_agents = 23;
+
+ //@@ .. cpp:var:: ModelResponseCache response_cache
+ //@@
+ //@@ Optional setting for utilizing the response cache for this
+ //@@ model.
+ //@@
+ ModelResponseCache response_cache = 24;
+}
\ No newline at end of file
diff --git a/visualdl/component/inference/proto/model_config_pb2.py b/visualdl/component/inference/proto/model_config_pb2.py
new file mode 100644
index 000000000..70bf7b906
--- /dev/null
+++ b/visualdl/component/inference/proto/model_config_pb2.py
@@ -0,0 +1,856 @@
+# flake8: noqa
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler. DO NOT EDIT!
+# source: model_config.protxt
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import enum_type_wrapper
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+ b'\n\x13model_config.protxt\x12\tinference\"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r\"\x87\x04\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32\".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12H\n\x11secondary_devices\x18\x08 \x03(\x0b\x32-.inference.ModelInstanceGroup.SecondaryDevice\x12\x0f\n\x07profile\x18\x05 \x03(\t\x12\x0f\n\x07passive\x18\x07 \x01(\x08\x12\x13\n\x0bhost_policy\x18\t \x01(\t\x1a\x9c\x01\n\x0fSecondaryDevice\x12O\n\x04kind\x18\x01 \x01(\x0e\x32\x41.inference.ModelInstanceGroup.SecondaryDevice.SecondaryDeviceKind\x12\x11\n\tdevice_id\x18\x02 \x01(\x03\"%\n\x13SecondaryDeviceKind\x12\x0e\n\nKIND_NVDLA\x10\x00\"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03\"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03\"\xb2\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08\x12\x10\n\x08optional\x18\x08 \x01(\x08\";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02\"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\"\xd9\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t\"\xcd\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03\x12\x14\n\x10\x42\x41TCH_ITEM_SHAPE\x10\x04\x12\x1c\n\x18\x42\x41TCH_ITEM_SHAPE_FLATTEN\x10\x05\"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t\"*\n\x04Kind\x12\"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00\"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice\"\xfd\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12&\n\x1egather_kernel_buffer_threshold\x18\x07 \x01(\r\x12\x16\n\x0e\x65\x61ger_batching\x18\x08 \x01(\x08\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\xba\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x12\x1a\n\x12output_copy_stream\x18\x04 \x01(\x08\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02\"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r\"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01\"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01\"\xef\t\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x12\x35\n\x05state\x18\x05 \x03(\x0b\x32&.inference.ModelSequenceBatching.State\x1a\xb1\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12\x17\n\x0f\x62ool_false_true\x18\x05 \x03(\x08\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType\"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1a\x8a\x01\n\x0cInitialState\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x13\n\tdata_file\x18\x04 \x01(\tH\x00\x12\x0c\n\x04name\x18\x05 \x01(\tB\x0c\n\nstate_data\x1a\xac\x01\n\x05State\x12\x12\n\ninput_name\x18\x01 \x01(\t\x12\x13\n\x0boutput_name\x18\x02 \x01(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12\x44\n\rinitial_state\x18\x05 \x03(\x0b\x32-.inference.ModelSequenceBatching.InitialState\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice\"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t\"\xd9\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32\".inference.ModelWarmup.InputsEntry\x12\r\n\x05\x63ount\x18\x04 \x01(\r\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01\".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t\"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08\"\xe6\x01\n\x15ModelRepositoryAgents\x12\x36\n\x06\x61gents\x18\x01 \x03(\x0b\x32&.inference.ModelRepositoryAgents.Agent\x1a\x94\x01\n\x05\x41gent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12J\n\nparameters\x18\x02 \x03(\x0b\x32\x36.inference.ModelRepositoryAgents.Agent.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"$\n\x12ModelResponseCache\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08\"\xb2\n\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32\".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x12\x41\n\x17model_repository_agents\x18\x17 \x01(\x0b\x32 .inference.ModelRepositoryAgents\x12\x35\n\x0eresponse_cache\x18\x18 \x01(\x0b\x32\x1d.inference.ModelResponseCache\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xfa\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\r\x12\r\n\tTYPE_BF16\x10\x0e\x62\x06proto3'
+)
+
+_DATATYPE = DESCRIPTOR.enum_types_by_name['DataType']
+DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
+TYPE_INVALID = 0
+TYPE_BOOL = 1
+TYPE_UINT8 = 2
+TYPE_UINT16 = 3
+TYPE_UINT32 = 4
+TYPE_UINT64 = 5
+TYPE_INT8 = 6
+TYPE_INT16 = 7
+TYPE_INT32 = 8
+TYPE_INT64 = 9
+TYPE_FP16 = 10
+TYPE_FP32 = 11
+TYPE_FP64 = 12
+TYPE_STRING = 13
+TYPE_BF16 = 14
+
+_MODELRATELIMITER = DESCRIPTOR.message_types_by_name['ModelRateLimiter']
+_MODELRATELIMITER_RESOURCE = _MODELRATELIMITER.nested_types_by_name['Resource']
+_MODELINSTANCEGROUP = DESCRIPTOR.message_types_by_name['ModelInstanceGroup']
+_MODELINSTANCEGROUP_SECONDARYDEVICE = _MODELINSTANCEGROUP.nested_types_by_name[
+ 'SecondaryDevice']
+_MODELTENSORRESHAPE = DESCRIPTOR.message_types_by_name['ModelTensorReshape']
+_MODELINPUT = DESCRIPTOR.message_types_by_name['ModelInput']
+_MODELOUTPUT = DESCRIPTOR.message_types_by_name['ModelOutput']
+_BATCHINPUT = DESCRIPTOR.message_types_by_name['BatchInput']
+_BATCHOUTPUT = DESCRIPTOR.message_types_by_name['BatchOutput']
+_MODELVERSIONPOLICY = DESCRIPTOR.message_types_by_name['ModelVersionPolicy']
+_MODELVERSIONPOLICY_LATEST = _MODELVERSIONPOLICY.nested_types_by_name['Latest']
+_MODELVERSIONPOLICY_ALL = _MODELVERSIONPOLICY.nested_types_by_name['All']
+_MODELVERSIONPOLICY_SPECIFIC = _MODELVERSIONPOLICY.nested_types_by_name[
+ 'Specific']
+_MODELOPTIMIZATIONPOLICY = DESCRIPTOR.message_types_by_name[
+ 'ModelOptimizationPolicy']
+_MODELOPTIMIZATIONPOLICY_GRAPH = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+ 'Graph']
+_MODELOPTIMIZATIONPOLICY_CUDA = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+ 'Cuda']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _MODELOPTIMIZATIONPOLICY_CUDA.nested_types_by_name[
+ 'GraphSpec']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+ 'Shape']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+ 'LowerBound']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.nested_types_by_name[
+ 'InputEntry']
+_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.nested_types_by_name[
+ 'InputEntry']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+ 'ExecutionAccelerators']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.nested_types_by_name[
+ 'Accelerator']
+_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.nested_types_by_name[
+ 'ParametersEntry']
+_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _MODELOPTIMIZATIONPOLICY.nested_types_by_name[
+ 'PinnedMemoryBuffer']
+_MODELQUEUEPOLICY = DESCRIPTOR.message_types_by_name['ModelQueuePolicy']
+_MODELDYNAMICBATCHING = DESCRIPTOR.message_types_by_name[
+ 'ModelDynamicBatching']
+_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _MODELDYNAMICBATCHING.nested_types_by_name[
+ 'PriorityQueuePolicyEntry']
+_MODELSEQUENCEBATCHING = DESCRIPTOR.message_types_by_name[
+ 'ModelSequenceBatching']
+_MODELSEQUENCEBATCHING_CONTROL = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'Control']
+_MODELSEQUENCEBATCHING_CONTROLINPUT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'ControlInput']
+_MODELSEQUENCEBATCHING_INITIALSTATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'InitialState']
+_MODELSEQUENCEBATCHING_STATE = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'State']
+_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'StrategyDirect']
+_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _MODELSEQUENCEBATCHING.nested_types_by_name[
+ 'StrategyOldest']
+_MODELENSEMBLING = DESCRIPTOR.message_types_by_name['ModelEnsembling']
+_MODELENSEMBLING_STEP = _MODELENSEMBLING.nested_types_by_name['Step']
+_MODELENSEMBLING_STEP_INPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+ 'InputMapEntry']
+_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _MODELENSEMBLING_STEP.nested_types_by_name[
+ 'OutputMapEntry']
+_MODELPARAMETER = DESCRIPTOR.message_types_by_name['ModelParameter']
+_MODELWARMUP = DESCRIPTOR.message_types_by_name['ModelWarmup']
+_MODELWARMUP_INPUT = _MODELWARMUP.nested_types_by_name['Input']
+_MODELWARMUP_INPUTSENTRY = _MODELWARMUP.nested_types_by_name['InputsEntry']
+_MODELOPERATIONS = DESCRIPTOR.message_types_by_name['ModelOperations']
+_MODELTRANSACTIONPOLICY = DESCRIPTOR.message_types_by_name[
+ 'ModelTransactionPolicy']
+_MODELREPOSITORYAGENTS = DESCRIPTOR.message_types_by_name[
+ 'ModelRepositoryAgents']
+_MODELREPOSITORYAGENTS_AGENT = _MODELREPOSITORYAGENTS.nested_types_by_name[
+ 'Agent']
+_MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY = _MODELREPOSITORYAGENTS_AGENT.nested_types_by_name[
+ 'ParametersEntry']
+_MODELRESPONSECACHE = DESCRIPTOR.message_types_by_name['ModelResponseCache']
+_MODELCONFIG = DESCRIPTOR.message_types_by_name['ModelConfig']
+_MODELCONFIG_CCMODELFILENAMESENTRY = _MODELCONFIG.nested_types_by_name[
+ 'CcModelFilenamesEntry']
+_MODELCONFIG_METRICTAGSENTRY = _MODELCONFIG.nested_types_by_name[
+ 'MetricTagsEntry']
+_MODELCONFIG_PARAMETERSENTRY = _MODELCONFIG.nested_types_by_name[
+ 'ParametersEntry']
+_MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND = _MODELINSTANCEGROUP_SECONDARYDEVICE.enum_types_by_name[
+ 'SecondaryDeviceKind']
+_MODELINSTANCEGROUP_KIND = _MODELINSTANCEGROUP.enum_types_by_name['Kind']
+_MODELINPUT_FORMAT = _MODELINPUT.enum_types_by_name['Format']
+_BATCHINPUT_KIND = _BATCHINPUT.enum_types_by_name['Kind']
+_BATCHOUTPUT_KIND = _BATCHOUTPUT.enum_types_by_name['Kind']
+_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _MODELOPTIMIZATIONPOLICY.enum_types_by_name[
+ 'ModelPriority']
+_MODELQUEUEPOLICY_TIMEOUTACTION = _MODELQUEUEPOLICY.enum_types_by_name[
+ 'TimeoutAction']
+_MODELSEQUENCEBATCHING_CONTROL_KIND = _MODELSEQUENCEBATCHING_CONTROL.enum_types_by_name[
+ 'Kind']
+ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
+ 'ModelRateLimiter',
+ (_message.Message, ),
+ {
+ 'Resource':
+ _reflection.GeneratedProtocolMessageType(
+ 'Resource',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELRATELIMITER_RESOURCE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
+ }),
+ 'DESCRIPTOR':
+ _MODELRATELIMITER,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
+ })
+_sym_db.RegisterMessage(ModelRateLimiter)
+_sym_db.RegisterMessage(ModelRateLimiter.Resource)
+
+ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
+ 'ModelInstanceGroup',
+ (_message.Message, ),
+ {
+ 'SecondaryDevice':
+ _reflection.GeneratedProtocolMessageType(
+ 'SecondaryDevice',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELINSTANCEGROUP_SECONDARYDEVICE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup.SecondaryDevice)
+ }),
+ 'DESCRIPTOR':
+ _MODELINSTANCEGROUP,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
+ })
+_sym_db.RegisterMessage(ModelInstanceGroup)
+_sym_db.RegisterMessage(ModelInstanceGroup.SecondaryDevice)
+
+ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
+ 'ModelTensorReshape',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELTENSORRESHAPE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
+ })
+_sym_db.RegisterMessage(ModelTensorReshape)
+
+ModelInput = _reflection.GeneratedProtocolMessageType(
+ 'ModelInput',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELINPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelInput)
+ })
+_sym_db.RegisterMessage(ModelInput)
+
+ModelOutput = _reflection.GeneratedProtocolMessageType(
+ 'ModelOutput',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELOUTPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOutput)
+ })
+_sym_db.RegisterMessage(ModelOutput)
+
+BatchInput = _reflection.GeneratedProtocolMessageType(
+ 'BatchInput',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _BATCHINPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.BatchInput)
+ })
+_sym_db.RegisterMessage(BatchInput)
+
+BatchOutput = _reflection.GeneratedProtocolMessageType(
+ 'BatchOutput',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _BATCHOUTPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.BatchOutput)
+ })
+_sym_db.RegisterMessage(BatchOutput)
+
+ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
+ 'ModelVersionPolicy',
+ (_message.Message, ),
+ {
+ 'Latest':
+ _reflection.GeneratedProtocolMessageType(
+ 'Latest',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELVERSIONPOLICY_LATEST,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
+ }),
+ 'All':
+ _reflection.GeneratedProtocolMessageType(
+ 'All',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELVERSIONPOLICY_ALL,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
+ }),
+ 'Specific':
+ _reflection.GeneratedProtocolMessageType(
+ 'Specific',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELVERSIONPOLICY_SPECIFIC,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
+ }),
+ 'DESCRIPTOR':
+ _MODELVERSIONPOLICY,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
+ })
+_sym_db.RegisterMessage(ModelVersionPolicy)
+_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
+_sym_db.RegisterMessage(ModelVersionPolicy.All)
+_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
+
+ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
+ 'ModelOptimizationPolicy',
+ (_message.Message, ),
+ {
+ 'Graph':
+ _reflection.GeneratedProtocolMessageType(
+ 'Graph',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_GRAPH,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
+ }),
+ 'Cuda':
+ _reflection.GeneratedProtocolMessageType(
+ 'Cuda',
+ (_message.Message, ),
+ {
+ 'GraphSpec':
+ _reflection.GeneratedProtocolMessageType(
+ 'GraphSpec',
+ (_message.Message, ),
+ {
+ 'Shape':
+ _reflection.GeneratedProtocolMessageType(
+ 'Shape',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+ }),
+ 'LowerBound':
+ _reflection.GeneratedProtocolMessageType(
+ 'LowerBound',
+ (_message.Message, ),
+ {
+ 'InputEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'InputEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+ }),
+ 'InputEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'InputEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_CUDA,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
+ }),
+ 'ExecutionAccelerators':
+ _reflection.GeneratedProtocolMessageType(
+ 'ExecutionAccelerators',
+ (_message.Message, ),
+ {
+ 'Accelerator':
+ _reflection.GeneratedProtocolMessageType(
+ 'Accelerator',
+ (_message.Message, ),
+ {
+ 'ParametersEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'ParametersEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
+ }),
+ 'PinnedMemoryBuffer':
+ _reflection.GeneratedProtocolMessageType(
+ 'PinnedMemoryBuffer',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
+ }),
+ 'DESCRIPTOR':
+ _MODELOPTIMIZATIONPOLICY,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
+ })
+_sym_db.RegisterMessage(ModelOptimizationPolicy)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
+_sym_db.RegisterMessage(
+ ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
+_sym_db.RegisterMessage(
+ ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
+_sym_db.RegisterMessage(
+ ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
+_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
+
+ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
+ 'ModelQueuePolicy',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELQUEUEPOLICY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
+ })
+_sym_db.RegisterMessage(ModelQueuePolicy)
+
+ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
+ 'ModelDynamicBatching',
+ (_message.Message, ),
+ {
+ 'PriorityQueuePolicyEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'PriorityQueuePolicyEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELDYNAMICBATCHING,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
+ })
+_sym_db.RegisterMessage(ModelDynamicBatching)
+_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
+
+ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
+ 'ModelSequenceBatching',
+ (_message.Message, ),
+ {
+ 'Control':
+ _reflection.GeneratedProtocolMessageType(
+ 'Control',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROL,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
+ }),
+ 'ControlInput':
+ _reflection.GeneratedProtocolMessageType(
+ 'ControlInput',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_CONTROLINPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
+ }),
+ 'InitialState':
+ _reflection.GeneratedProtocolMessageType(
+ 'InitialState',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_INITIALSTATE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.InitialState)
+ }),
+ 'State':
+ _reflection.GeneratedProtocolMessageType(
+ 'State',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STATE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.State)
+ }),
+ 'StrategyDirect':
+ _reflection.GeneratedProtocolMessageType(
+ 'StrategyDirect',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
+ }),
+ 'StrategyOldest':
+ _reflection.GeneratedProtocolMessageType(
+ 'StrategyOldest',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
+ }),
+ 'DESCRIPTOR':
+ _MODELSEQUENCEBATCHING,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
+ })
+_sym_db.RegisterMessage(ModelSequenceBatching)
+_sym_db.RegisterMessage(ModelSequenceBatching.Control)
+_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
+_sym_db.RegisterMessage(ModelSequenceBatching.InitialState)
+_sym_db.RegisterMessage(ModelSequenceBatching.State)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
+_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
+
+ModelEnsembling = _reflection.GeneratedProtocolMessageType(
+ 'ModelEnsembling',
+ (_message.Message, ),
+ {
+ 'Step':
+ _reflection.GeneratedProtocolMessageType(
+ 'Step',
+ (_message.Message, ),
+ {
+ 'InputMapEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'InputMapEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELENSEMBLING_STEP_INPUTMAPENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
+ }),
+ 'OutputMapEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'OutputMapEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELENSEMBLING_STEP,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
+ }),
+ 'DESCRIPTOR':
+ _MODELENSEMBLING,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
+ })
+_sym_db.RegisterMessage(ModelEnsembling)
+_sym_db.RegisterMessage(ModelEnsembling.Step)
+_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
+_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
+
+ModelParameter = _reflection.GeneratedProtocolMessageType(
+ 'ModelParameter',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELPARAMETER,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelParameter)
+ })
+_sym_db.RegisterMessage(ModelParameter)
+
+ModelWarmup = _reflection.GeneratedProtocolMessageType(
+ 'ModelWarmup',
+ (_message.Message, ),
+ {
+ 'Input':
+ _reflection.GeneratedProtocolMessageType(
+ 'Input',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELWARMUP_INPUT,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
+ }),
+ 'InputsEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'InputsEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELWARMUP_INPUTSENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELWARMUP,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelWarmup)
+ })
+_sym_db.RegisterMessage(ModelWarmup)
+_sym_db.RegisterMessage(ModelWarmup.Input)
+_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
+
+ModelOperations = _reflection.GeneratedProtocolMessageType(
+ 'ModelOperations',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELOPERATIONS,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelOperations)
+ })
+_sym_db.RegisterMessage(ModelOperations)
+
+ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
+ 'ModelTransactionPolicy',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELTRANSACTIONPOLICY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
+ })
+_sym_db.RegisterMessage(ModelTransactionPolicy)
+
+ModelRepositoryAgents = _reflection.GeneratedProtocolMessageType(
+ 'ModelRepositoryAgents',
+ (_message.Message, ),
+ {
+ 'Agent':
+ _reflection.GeneratedProtocolMessageType(
+ 'Agent',
+ (_message.Message, ),
+ {
+ 'ParametersEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'ParametersEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR':
+ _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent.ParametersEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELREPOSITORYAGENTS_AGENT,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents.Agent)
+ }),
+ 'DESCRIPTOR':
+ _MODELREPOSITORYAGENTS,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelRepositoryAgents)
+ })
+_sym_db.RegisterMessage(ModelRepositoryAgents)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent)
+_sym_db.RegisterMessage(ModelRepositoryAgents.Agent.ParametersEntry)
+
+ModelResponseCache = _reflection.GeneratedProtocolMessageType(
+ 'ModelResponseCache',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELRESPONSECACHE,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelResponseCache)
+ })
+_sym_db.RegisterMessage(ModelResponseCache)
+
+ModelConfig = _reflection.GeneratedProtocolMessageType(
+ 'ModelConfig',
+ (_message.Message, ),
+ {
+ 'CcModelFilenamesEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'CcModelFilenamesEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELCONFIG_CCMODELFILENAMESENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
+ }),
+ 'MetricTagsEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'MetricTagsEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELCONFIG_METRICTAGSENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
+ }),
+ 'ParametersEntry':
+ _reflection.GeneratedProtocolMessageType(
+ 'ParametersEntry',
+ (_message.Message, ),
+ {
+ 'DESCRIPTOR': _MODELCONFIG_PARAMETERSENTRY,
+ '__module__': 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
+ }),
+ 'DESCRIPTOR':
+ _MODELCONFIG,
+ '__module__':
+ 'model_config.protxt_pb2'
+ # @@protoc_insertion_point(class_scope:inference.ModelConfig)
+ })
+_sym_db.RegisterMessage(ModelConfig)
+_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
+_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
+_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+ DESCRIPTOR._options = None
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_options = b'8\001'
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_options = b'8\001'
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_options = b'8\001'
+ _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
+ _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_options = b'8\001'
+ _MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
+ _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_options = b'8\001'
+ _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
+ _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_options = b'8\001'
+ _MODELWARMUP_INPUTSENTRY._options = None
+ _MODELWARMUP_INPUTSENTRY._serialized_options = b'8\001'
+ _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._options = None
+ _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_options = b'8\001'
+ _MODELCONFIG_CCMODELFILENAMESENTRY._options = None
+ _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_options = b'8\001'
+ _MODELCONFIG_METRICTAGSENTRY._options = None
+ _MODELCONFIG_METRICTAGSENTRY._serialized_options = b'8\001'
+ _MODELCONFIG_PARAMETERSENTRY._options = None
+ _MODELCONFIG_PARAMETERSENTRY._serialized_options = b'8\001'
+ _DATATYPE._serialized_start = 8137
+ _DATATYPE._serialized_end = 8387
+ _MODELRATELIMITER._serialized_start = 35
+ _MODELRATELIMITER._serialized_end = 185
+ _MODELRATELIMITER_RESOURCE._serialized_start = 130
+ _MODELRATELIMITER_RESOURCE._serialized_end = 185
+ _MODELINSTANCEGROUP._serialized_start = 188
+ _MODELINSTANCEGROUP._serialized_end = 707
+ _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_start = 484
+ _MODELINSTANCEGROUP_SECONDARYDEVICE._serialized_end = 640
+ _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_start = 603
+ _MODELINSTANCEGROUP_SECONDARYDEVICE_SECONDARYDEVICEKIND._serialized_end = 640
+ _MODELINSTANCEGROUP_KIND._serialized_start = 642
+ _MODELINSTANCEGROUP_KIND._serialized_end = 707
+ _MODELTENSORRESHAPE._serialized_start = 709
+ _MODELTENSORRESHAPE._serialized_end = 744
+ _MODELINPUT._serialized_start = 747
+ _MODELINPUT._serialized_end = 1053
+ _MODELINPUT_FORMAT._serialized_start = 994
+ _MODELINPUT_FORMAT._serialized_end = 1053
+ _MODELOUTPUT._serialized_start = 1056
+ _MODELOUTPUT._serialized_end = 1234
+ _BATCHINPUT._serialized_start = 1237
+ _BATCHINPUT._serialized_end = 1582
+ _BATCHINPUT_KIND._serialized_start = 1377
+ _BATCHINPUT_KIND._serialized_end = 1582
+ _BATCHOUTPUT._serialized_start = 1585
+ _BATCHOUTPUT._serialized_end = 1728
+ _BATCHOUTPUT_KIND._serialized_start = 1686
+ _BATCHOUTPUT_KIND._serialized_end = 1728
+ _MODELVERSIONPOLICY._serialized_start = 1731
+ _MODELVERSIONPOLICY._serialized_end = 2003
+ _MODELVERSIONPOLICY_LATEST._serialized_start = 1919
+ _MODELVERSIONPOLICY_LATEST._serialized_end = 1949
+ _MODELVERSIONPOLICY_ALL._serialized_start = 1951
+ _MODELVERSIONPOLICY_ALL._serialized_end = 1956
+ _MODELVERSIONPOLICY_SPECIFIC._serialized_start = 1958
+ _MODELVERSIONPOLICY_SPECIFIC._serialized_end = 1986
+ _MODELOPTIMIZATIONPOLICY._serialized_start = 2006
+ _MODELOPTIMIZATIONPOLICY._serialized_end = 3795
+ _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_start = 2536
+ _MODELOPTIMIZATIONPOLICY_GRAPH._serialized_end = 2558
+ _MODELOPTIMIZATIONPOLICY_CUDA._serialized_start = 2561
+ _MODELOPTIMIZATIONPOLICY_CUDA._serialized_end = 3259
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_start = 2711
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC._serialized_end = 3259
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_start = 2910
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE._serialized_end = 2930
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_start = 2933
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND._serialized_end = 3156
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_start = 3055
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._serialized_end = 3156
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_start = 3055
+ _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._serialized_end = 3156
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_start = 3262
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS._serialized_end = 3682
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_start = 3498
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR._serialized_end = 3682
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_start = 3633
+ _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._serialized_end = 3682
+ _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_start = 3684
+ _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER._serialized_end = 3720
+ _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_start = 3722
+ _MODELOPTIMIZATIONPOLICY_MODELPRIORITY._serialized_end = 3795
+ _MODELQUEUEPOLICY._serialized_start = 3798
+ _MODELQUEUEPOLICY._serialized_end = 4017
+ _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_start = 3979
+ _MODELQUEUEPOLICY_TIMEOUTACTION._serialized_end = 4017
+ _MODELDYNAMICBATCHING._serialized_start = 4020
+ _MODELDYNAMICBATCHING._serialized_end = 4431
+ _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_start = 4344
+ _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._serialized_end = 4431
+ _MODELSEQUENCEBATCHING._serialized_start = 4434
+ _MODELSEQUENCEBATCHING._serialized_end = 5697
+ _MODELSEQUENCEBATCHING_CONTROL._serialized_start = 4759
+ _MODELSEQUENCEBATCHING_CONTROL._serialized_end = 5064
+ _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_start = 4947
+ _MODELSEQUENCEBATCHING_CONTROL_KIND._serialized_end = 5064
+ _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_start = 5066
+ _MODELSEQUENCEBATCHING_CONTROLINPUT._serialized_end = 5153
+ _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_start = 5156
+ _MODELSEQUENCEBATCHING_INITIALSTATE._serialized_end = 5294
+ _MODELSEQUENCEBATCHING_STATE._serialized_start = 5297
+ _MODELSEQUENCEBATCHING_STATE._serialized_end = 5469
+ _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_start = 5471
+ _MODELSEQUENCEBATCHING_STRATEGYDIRECT._serialized_end = 5559
+ _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_start = 5561
+ _MODELSEQUENCEBATCHING_STRATEGYOLDEST._serialized_end = 5678
+ _MODELENSEMBLING._serialized_start = 5700
+ _MODELENSEMBLING._serialized_end = 6049
+ _MODELENSEMBLING_STEP._serialized_start = 5767
+ _MODELENSEMBLING_STEP._serialized_end = 6049
+ _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_start = 5952
+ _MODELENSEMBLING_STEP_INPUTMAPENTRY._serialized_end = 5999
+ _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_start = 6001
+ _MODELENSEMBLING_STEP_OUTPUTMAPENTRY._serialized_end = 6049
+ _MODELPARAMETER._serialized_start = 6051
+ _MODELPARAMETER._serialized_end = 6089
+ _MODELWARMUP._serialized_start = 6092
+ _MODELWARMUP._serialized_end = 6437
+ _MODELWARMUP_INPUT._serialized_start = 6209
+ _MODELWARMUP_INPUT._serialized_end = 6360
+ _MODELWARMUP_INPUTSENTRY._serialized_start = 6362
+ _MODELWARMUP_INPUTSENTRY._serialized_end = 6437
+ _MODELOPERATIONS._serialized_start = 6439
+ _MODELOPERATIONS._serialized_end = 6485
+ _MODELTRANSACTIONPOLICY._serialized_start = 6487
+ _MODELTRANSACTIONPOLICY._serialized_end = 6530
+ _MODELREPOSITORYAGENTS._serialized_start = 6533
+ _MODELREPOSITORYAGENTS._serialized_end = 6763
+ _MODELREPOSITORYAGENTS_AGENT._serialized_start = 6615
+ _MODELREPOSITORYAGENTS_AGENT._serialized_end = 6763
+ _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_start = 3633
+ _MODELREPOSITORYAGENTS_AGENT_PARAMETERSENTRY._serialized_end = 3682
+ _MODELRESPONSECACHE._serialized_start = 6765
+ _MODELRESPONSECACHE._serialized_end = 6801
+ _MODELCONFIG._serialized_start = 6804
+ _MODELCONFIG._serialized_end = 8134
+ _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_start = 7929
+ _MODELCONFIG_CCMODELFILENAMESENTRY._serialized_end = 7984
+ _MODELCONFIG_METRICTAGSENTRY._serialized_start = 7986
+ _MODELCONFIG_METRICTAGSENTRY._serialized_end = 8035
+ _MODELCONFIG_PARAMETERSENTRY._serialized_start = 8037
+ _MODELCONFIG_PARAMETERSENTRY._serialized_end = 8113
+# @@protoc_insertion_point(module_scope)
diff --git a/visualdl/server/api.py b/visualdl/server/api.py
index 502bf48f0..0ef7b6dc1 100644
--- a/visualdl/server/api.py
+++ b/visualdl/server/api.py
@@ -417,7 +417,10 @@ def get_component_tabs(*apis, vdl_args, request_args):
all_tabs.update(api('component_tabs', request_args))
all_tabs.add('static_graph')
else:
- return ['static_graph', 'x2paddle', 'fastdeploy_server']
+ return [
+ 'static_graph', 'x2paddle', 'fastdeploy_server',
+ 'fastdeploy_client'
+ ]
return list(all_tabs)
diff --git a/visualdl/server/app.py b/visualdl/server/app.py
index 5f9454fa9..e451c4e21 100644
--- a/visualdl/server/app.py
+++ b/visualdl/server/app.py
@@ -13,12 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =======================================================================
+import json
import multiprocessing
import os
import re
import sys
import threading
import time
+import urllib
import webbrowser
import requests
@@ -32,6 +34,8 @@
import visualdl.server
from visualdl import __version__
+from visualdl.component.inference.fastdeploy_lib import get_start_arguments
+from visualdl.component.inference.fastdeploy_server import create_fastdeploy_api_call
from visualdl.component.inference.model_convert_server import create_model_convert_api_call
from visualdl.component.profiler.profiler_server import create_profiler_api_call
from visualdl.server.api import create_api_call
@@ -71,6 +75,7 @@ def create_app(args): # noqa: C901
api_call = create_api_call(args.logdir, args.model, args.cache_timeout)
profiler_api_call = create_profiler_api_call(args.logdir)
inference_api_call = create_model_convert_api_call()
+ fastdeploy_api_call = create_fastdeploy_api_call()
if args.telemetry:
update_util.PbUpdater(args.product).start()
@@ -153,6 +158,141 @@ def serve_inference_api(method):
return make_response(
Response(data, mimetype=mimetype, headers=headers))
+ @app.route(api_path + '/fastdeploy/', methods=["GET", "POST"])
+ def serve_fastdeploy_api(method):
+ if request.method == 'POST':
+ data, mimetype, headers = fastdeploy_api_call(method, request.form)
+ else:
+ data, mimetype, headers = fastdeploy_api_call(method, request.args)
+ return make_response(
+ Response(data, mimetype=mimetype, headers=headers))
+
+ @app.route(
+ api_path + '/fastdeploy/fastdeploy_client', methods=["GET", "POST"])
+ def serve_fastdeploy_create_fastdeploy_client():
+ try:
+ if request.method == 'POST':
+ fastdeploy_api_call('create_fastdeploy_client', request.form)
+ request_args = request.form
+ else:
+ fastdeploy_api_call('create_fastdeploy_client', request.args)
+ request_args = request.args
+ except Exception as e:
+ error_msg = '{}'.format(e)
+ return make_response(error_msg)
+ args = urllib.parse.urlencode(request_args)
+ if args:
+ return redirect(
+ api_path + "/fastdeploy/fastdeploy_client/app?{}".format(args),
+ code=302)
+ return redirect(
+ api_path + "/fastdeploy/fastdeploy_client/app", code=302)
+
+ @app.route(
+ api_path + "/fastdeploy/fastdeploy_client/",
+ methods=["GET", "POST"])
+ def request_fastdeploy_create_fastdeploy_client_app(path: str):
+ '''
+ Gradio app server url interface. We route urls for gradio app to gradio server.
+
+ Args:
+ path(str): All resource path from gradio server.
+
+ Returns:
+ Any thing from gradio server.
+ '''
+ if request.method == 'POST':
+ port = fastdeploy_api_call('create_fastdeploy_client',
+ request.form)
+ request_args = request.form
+ else:
+ port = fastdeploy_api_call('create_fastdeploy_client',
+ request.args)
+ request_args = request.args
+ if path == 'app':
+ proxy_url = request.url.replace(
+ request.host_url.rstrip('/') + api_path +
+ '/fastdeploy/fastdeploy_client/app',
+ 'http://localhost:{}/'.format(port))
+ else:
+ proxy_url = request.url.replace(
+ request.host_url.rstrip('/') + api_path +
+ '/fastdeploy/fastdeploy_client/',
+ 'http://localhost:{}/'.format(port))
+ resp = requests.request(
+ method=request.method,
+ url=proxy_url,
+ headers={
+ key: value
+ for (key, value) in request.headers if key != 'Host'
+ },
+ data=request.get_data(),
+ cookies=request.cookies,
+ allow_redirects=False)
+ if path == 'app':
+ content = resp.content
+ if request_args and 'server_id' in request_args:
+ server_id = request_args.get('server_id')
+ start_args = get_start_arguments(server_id)
+ http_port = start_args.get('http-port', '')
+ metrics_port = start_args.get('metrics-port', '')
+ model_name = start_args.get('default_model_name', '')
+ content = content.decode()
+ try:
+ default_server_addr = re.search(
+ '"label": {}.*?"value": "".*?}}'.format(
+ json.dumps("服务ip", ensure_ascii=True).replace(
+ '\\', '\\\\')), content).group(0)
+ cur_server_addr = default_server_addr.replace(
+ '"value": ""', '"value": "localhost"')
+ default_http_port = re.search(
+ '"label": {}.*?"value": "".*?}}'.format(
+ json.dumps("推理服务端口", ensure_ascii=True).replace(
+ '\\', '\\\\')), content).group(0)
+ cur_http_port = default_http_port.replace(
+ '"value": ""', '"value": "{}"'.format(http_port))
+ default_metrics_port = re.search(
+ '"label": {}.*?"value": "".*?}}'.format(
+ json.dumps("性能服务端口", ensure_ascii=True).replace(
+ '\\', '\\\\')), content).group(0)
+ cur_metrics_port = default_metrics_port.replace(
+ '"value": ""', '"value": "{}"'.format(metrics_port))
+ default_model_name = re.search(
+ '"label": {}.*?"value": "".*?}}'.format(
+ json.dumps("模型名称", ensure_ascii=True).replace(
+ '\\', '\\\\')), content).group(0)
+ cur_model_name = default_model_name.replace(
+ '"value": ""', '"value": "{}"'.format(model_name))
+ default_model_version = re.search(
+ '"label": {}.*?"value": "".*?}}'.format(
+ json.dumps("模型版本", ensure_ascii=True).replace(
+ '\\', '\\\\')), content).group(0)
+ cur_model_version = default_model_version.replace(
+ '"value": ""', '"value": "{}"'.format('1'))
+ content = content.replace(default_server_addr,
+ cur_server_addr)
+ if http_port:
+ content = content.replace(default_http_port,
+ cur_http_port)
+ if metrics_port:
+ content = content.replace(default_metrics_port,
+ cur_metrics_port)
+ if model_name:
+ content = content.replace(default_model_name,
+ cur_model_name)
+
+ content = content.replace(default_model_version,
+ cur_model_version)
+ except Exception:
+ pass
+ finally:
+ content = content.encode()
+ else:
+ content = resp.content
+ headers = [(name, value) for (name, value) in resp.raw.headers.items()]
+ response = Response(content, resp.status_code, headers)
+ return response
+
@app.route(api_path + '/component_tabs')
def component_tabs():
data, mimetype, headers = get_component_tabs(
diff --git a/visualdl/server/args.py b/visualdl/server/args.py
index cb42422c7..71f97afb1 100644
--- a/visualdl/server/args.py
+++ b/visualdl/server/args.py
@@ -78,7 +78,8 @@ def validate_args(args):
supported_tabs = [
'scalar', 'image', 'text', 'embeddings', 'audio', 'histogram',
'hyper_parameters', 'static_graph', 'dynamic_graph', 'pr_curve',
- 'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server'
+ 'roc_curve', 'profiler', 'x2paddle', 'fastdeploy_server',
+ 'fastdeploy_client'
]
if args.component_tabs is not None:
for component_tab in args.component_tabs:
diff --git a/visualdl/utils/dir.py b/visualdl/utils/dir.py
index 64199f4cd..b22ed4246 100644
--- a/visualdl/utils/dir.py
+++ b/visualdl/utils/dir.py
@@ -23,6 +23,7 @@
VDL_HOME = os.path.join(USER_HOME, '.visualdl')
CONF_HOME = os.path.join(VDL_HOME, 'conf')
CONFIG_PATH = os.path.join(CONF_HOME, 'config.json')
+FASTDEPLOYSERVER_PATH = os.path.join(VDL_HOME, 'fastdeployserver')
X2PADDLE_CACHE_PATH = os.path.join(VDL_HOME, 'x2paddle')
@@ -32,5 +33,7 @@ def init_vdl_config():
if not os.path.exists(CONFIG_PATH) or 0 == os.path.getsize(CONFIG_PATH):
with open(CONFIG_PATH, 'w') as fp:
fp.write(json.dumps(default_vdl_config))
+ if not os.path.exists(FASTDEPLOYSERVER_PATH):
+ os.makedirs(FASTDEPLOYSERVER_PATH, exist_ok=True)
if not os.path.exists(X2PADDLE_CACHE_PATH):
os.makedirs(X2PADDLE_CACHE_PATH, exist_ok=True)