From d0f68c0cea4bd2118b15335be7c67d5d232443f5 Mon Sep 17 00:00:00 2001 From: Dipanshu Gupta Date: Fri, 1 Mar 2024 14:55:13 +0530 Subject: [PATCH 1/2] Enable GRPC support for model serving routes --- .../mockServingRuntimeK8sResource.ts | 4 + .../mockServingRuntimeTemplateK8sResource.ts | 5 +- .../CustomServingRuntimes.cy.ts | 40 ++++++- .../e2e/modelServing/ServingRuntimeList.cy.ts | 3 + .../cypress/cypress/pages/modelServing.ts | 6 + .../cypress/cypress/pages/servingRuntimes.ts | 48 +++++++- .../src/api/k8s/__tests__/templates.spec.ts | 19 ++- frontend/src/api/k8s/templates.ts | 4 +- frontend/src/k8sTypes.ts | 2 + .../CustomServingRuntimeAPIProtocolLabel.tsx | 34 ++++++ ...ustomServingRuntimeAPIProtocolSelector.tsx | 59 ++++++++++ .../CustomServingRuntimeAddTemplate.tsx | 33 +++++- ...ustomServingRuntimePlatformsLabelGroup.tsx | 4 +- .../CustomServingRuntimePlatformsSelector.tsx | 2 +- .../CustomServingRuntimeTableRow.tsx | 4 + .../customServingRuntimes/templatedData.tsx | 5 + .../customServingRuntimes/utils.ts | 36 +++++- .../global/InferenceServiceAPIProtocol.tsx | 27 +++++ .../global/InferenceServiceTableRow.tsx | 7 ++ .../pages/modelServing/screens/global/data.ts | 10 ++ frontend/src/services/templateService.ts | 21 +++- frontend/src/types.ts | 5 + manifests/modelserving/caikit-ootb.yaml | 110 +++++++++--------- manifests/modelserving/ovms-kserve-ootd.yaml | 93 +++++++-------- manifests/modelserving/ovms-ootb.yaml | 5 +- manifests/modelserving/tgis-ootb.yaml | 85 +++++++------- 26 files changed, 504 insertions(+), 167 deletions(-) create mode 100644 frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolLabel.tsx create mode 100644 frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolSelector.tsx create mode 100644 frontend/src/pages/modelServing/screens/global/InferenceServiceAPIProtocol.tsx diff --git a/frontend/src/__mocks__/mockServingRuntimeK8sResource.ts b/frontend/src/__mocks__/mockServingRuntimeK8sResource.ts index d1b1e4172e..bbf3061cf4 100644 --- a/frontend/src/__mocks__/mockServingRuntimeK8sResource.ts +++ b/frontend/src/__mocks__/mockServingRuntimeK8sResource.ts @@ -1,4 +1,5 @@ import { KnownLabels, ServingRuntimeKind } from '~/k8sTypes'; +import { ServingRuntimeAPIProtocol } from '~/types'; type MockResourceConfigType = { name?: string; @@ -8,6 +9,7 @@ type MockResourceConfigType = { auth?: boolean; route?: boolean; acceleratorName?: string; + apiProtocol?: ServingRuntimeAPIProtocol; }; export const mockServingRuntimeK8sResourceLegacy = ({ @@ -92,6 +94,7 @@ export const mockServingRuntimeK8sResource = ({ route = false, displayName = 'OVMS Model Serving', acceleratorName = '', + apiProtocol = ServingRuntimeAPIProtocol.REST, }: MockResourceConfigType): ServingRuntimeKind => ({ apiVersion: 'serving.kserve.io/v1alpha1', kind: 'ServingRuntime', @@ -108,6 +111,7 @@ export const mockServingRuntimeK8sResource = ({ 'enable-auth': auth ? 'true' : 'false', 'enable-route': route ? 'true' : 'false', 'openshift.io/display-name': displayName, + 'opendatahub.io/apiProtocol': apiProtocol, }, name, namespace, diff --git a/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts b/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts index 4a59e8a430..5c6d188f1b 100644 --- a/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts +++ b/frontend/src/__mocks__/mockServingRuntimeTemplateK8sResource.ts @@ -1,5 +1,5 @@ import { TemplateKind } from '~/k8sTypes'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; type MockResourceConfigType = { name?: string; @@ -7,6 +7,7 @@ type MockResourceConfigType = { displayName?: string; replicas?: number; platforms?: ServingRuntimePlatform[]; + apiProtocol?: ServingRuntimeAPIProtocol; isModelmesh?: boolean; }; @@ -16,6 +17,7 @@ export const mockServingRuntimeTemplateK8sResource = ({ displayName = 'New OVMS Server', replicas = 1, isModelmesh = false, + apiProtocol = ServingRuntimeAPIProtocol.REST, platforms, }: MockResourceConfigType): TemplateKind => ({ apiVersion: 'template.openshift.io/v1', @@ -28,6 +30,7 @@ export const mockServingRuntimeTemplateK8sResource = ({ }, annotations: { 'opendatahub.io/modelServingSupport': JSON.stringify(platforms), + 'opendatahub.io/apiProtocol': apiProtocol, }, }, objects: [ diff --git a/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts b/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts index 42c6bb43a5..2951533ab3 100644 --- a/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts @@ -4,7 +4,7 @@ import { mockProjectK8sResource } from '~/__mocks__/mockProjectK8sResource'; import { mockServingRuntimeTemplateK8sResource } from '~/__mocks__/mockServingRuntimeTemplateK8sResource'; import { mockStatus } from '~/__mocks__/mockStatus'; import { servingRuntimes } from '~/__tests__/cypress/cypress/pages/servingRuntimes'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; describe('Custom serving runtimes', () => { beforeEach(() => { @@ -23,6 +23,7 @@ describe('Custom serving runtimes', () => { name: 'template-2', displayName: 'Caikit', platforms: [ServingRuntimePlatform.SINGLE], + apiProtocol: ServingRuntimeAPIProtocol.GRPC, }), mockServingRuntimeTemplateK8sResource({ name: 'template-3', @@ -54,14 +55,47 @@ describe('Custom serving runtimes', () => { servingRuntimes.getRowById('template-4').shouldBeSingleModel(false).shouldBeMultiModel(true); }); + it('should display api protocol in table row', () => { + servingRuntimes.getRowById('template-1').shouldHaveAPIProtocol(ServingRuntimeAPIProtocol.REST); + servingRuntimes.getRowById('template-2').shouldHaveAPIProtocol(ServingRuntimeAPIProtocol.GRPC); + servingRuntimes.getRowById('template-3').shouldHaveAPIProtocol(ServingRuntimeAPIProtocol.REST); + servingRuntimes.getRowById('template-4').shouldHaveAPIProtocol(ServingRuntimeAPIProtocol.REST); + }); + it('should add a new serving runtime', () => { servingRuntimes.findAddButton().click(); cy.get('h1').should('contain', 'Add serving runtime'); - servingRuntimes.shouldDisplayValues([ + + // Check serving runtime dropdown list + servingRuntimes.shouldDisplayServingRuntimeValues([ 'Single-model serving platform', 'Multi-model serving platform', - 'Single-model and multi-model serving platforms', ]); + servingRuntimes.findSelectServingPlatformButton().click(); + + // Create with single model + servingRuntimes.findCreateButton().should('be.disabled'); + servingRuntimes.shouldSelectPlatform('Single-model serving platform'); + servingRuntimes.shouldDisplayAPIProtocolValues([ + ServingRuntimeAPIProtocol.REST, + ServingRuntimeAPIProtocol.GRPC, + ]); + servingRuntimes.shouldSelectAPIProtocol(ServingRuntimeAPIProtocol.REST); + servingRuntimes.findStartFromScratchButton().click(); + servingRuntimes.shouldEnterData(); + servingRuntimes.findCreateButton().should('be.enabled'); + servingRuntimes.findCancelButton().click(); + + servingRuntimes.findAddButton().click(); + + // Create with multi model + servingRuntimes.findCreateButton().should('be.disabled'); + servingRuntimes.shouldSelectPlatform('Multi-model serving platform'); + servingRuntimes.findSelectAPIProtocolButton().should('not.be.enabled'); + servingRuntimes.findSelectAPIProtocolButton().should('include.text', 'REST'); + servingRuntimes.findStartFromScratchButton().click(); + servingRuntimes.shouldEnterData(); + servingRuntimes.findCreateButton().should('be.enabled'); }); it('should duplicate a serving runtime', () => { diff --git a/frontend/src/__tests__/cypress/cypress/e2e/modelServing/ServingRuntimeList.cy.ts b/frontend/src/__tests__/cypress/cypress/e2e/modelServing/ServingRuntimeList.cy.ts index 8e9e903f2d..e310d6c43b 100644 --- a/frontend/src/__tests__/cypress/cypress/e2e/modelServing/ServingRuntimeList.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/e2e/modelServing/ServingRuntimeList.cy.ts @@ -480,6 +480,9 @@ describe('Serving Runtime List', () => { // Check status of deployed model which loaded successfully after an error modelServingSection.findStatusTooltip('Loaded model').should('be.visible'); modelServingSection.findStatusTooltipValue('Loaded model', 'Loaded'); + + // Check API protocol in row + modelServingSection.findAPIProtocol('Loaded model').should('have.text', 'REST'); }); }); diff --git a/frontend/src/__tests__/cypress/cypress/pages/modelServing.ts b/frontend/src/__tests__/cypress/cypress/pages/modelServing.ts index a97df715cb..e0c2b4920f 100644 --- a/frontend/src/__tests__/cypress/cypress/pages/modelServing.ts +++ b/frontend/src/__tests__/cypress/cypress/pages/modelServing.ts @@ -283,6 +283,12 @@ class ModelServingSection { this.findStatusTooltip(name).trigger('mouseleave'); }); } + + findAPIProtocol(name: string) { + return this.findInferenceServiceTable() + .contains('tr', name) + .find('td[data-label="API protocol"]'); + } } export const modelServingGlobal = new ModelServingGlobal(); diff --git a/frontend/src/__tests__/cypress/cypress/pages/servingRuntimes.ts b/frontend/src/__tests__/cypress/cypress/pages/servingRuntimes.ts index 46d836aba5..8236cb5de1 100644 --- a/frontend/src/__tests__/cypress/cypress/pages/servingRuntimes.ts +++ b/frontend/src/__tests__/cypress/cypress/pages/servingRuntimes.ts @@ -1,4 +1,5 @@ import { appChrome } from '~/__tests__/cypress/cypress/pages/appChrome'; +import { ServingRuntimeAPIProtocol } from '~/types'; class ServingRuntimeRow { constructor(public readonly id: string) {} @@ -22,6 +23,10 @@ class ServingRuntimeRow { .should(enabled ? 'exist' : 'not.exist'); return this; } + + shouldHaveAPIProtocol(apiProtocol: ServingRuntimeAPIProtocol) { + this.find().get('[data-label="API protocol"]').should('include.text', apiProtocol); + } } class ServingRuntimes { @@ -54,16 +59,51 @@ class ServingRuntimes { return cy.findByRole('button', { name: 'Add serving runtime' }); } - findSelectValueButton() { - return cy.findByRole('button', { name: 'Select a value' }); + findStartFromScratchButton() { + return cy.findByRole('button', { name: 'Start from scratch' }); + } + + findCreateButton() { + return cy.findByRole('button', { name: 'Create' }); + } + + findCancelButton() { + return cy.findByRole('button', { name: 'Cancel' }); + } + + findSelectServingPlatformButton() { + return cy.findByTestId('custom-serving-runtime-selection'); + } + + findSelectAPIProtocolButton() { + return cy.findByTestId('custom-serving-api-protocol-selection'); } - shouldDisplayValues(values: string[]) { - this.findSelectValueButton().click(); + shouldDisplayServingRuntimeValues(values: string[]) { + this.findSelectServingPlatformButton().click(); values.forEach((value) => cy.findByRole('menuitem', { name: value }).should('exist')); return this; } + shouldDisplayAPIProtocolValues(values: ServingRuntimeAPIProtocol[]) { + this.findSelectAPIProtocolButton().click(); + values.forEach((value) => cy.findByRole('menuitem', { name: value }).should('exist')); + return this; + } + + shouldSelectPlatform(value: string) { + this.findSelectServingPlatformButton().click(); + cy.findByRole('menuitem', { name: value }).click(); + } + + shouldSelectAPIProtocol(value: string) { + cy.findByRole('menuitem', { name: value }).click(); + } + + shouldEnterData() { + cy.get('.view-lines.monaco-mouse-cursor-text').type('test'); + } + getRowById(id: string) { return new ServingRuntimeRow(id); } diff --git a/frontend/src/api/k8s/__tests__/templates.spec.ts b/frontend/src/api/k8s/__tests__/templates.spec.ts index 23d1b86926..23df1ddc5c 100644 --- a/frontend/src/api/k8s/__tests__/templates.spec.ts +++ b/frontend/src/api/k8s/__tests__/templates.spec.ts @@ -6,7 +6,7 @@ import { mockServingRuntimeTemplateK8sResource } from '~/__mocks__/mockServingRu import { assembleServingRuntimeTemplate, deleteTemplate, listTemplates } from '~/api'; import { TemplateModel } from '~/api/models'; import { K8sDSGResource, TemplateKind } from '~/k8sTypes'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; import { genRandomChars } from '~/utilities/string'; jest.mock('@openshift/dynamic-plugin-sdk-utils', () => ({ @@ -38,6 +38,7 @@ describe('assembleServingRuntimeTemplate', () => { servingRuntimeMock, namespace, [ServingRuntimePlatform.MULTI], + ServingRuntimeAPIProtocol.REST, 'template-1', ); expect(result).toStrictEqual( @@ -47,9 +48,12 @@ describe('assembleServingRuntimeTemplate', () => { it('should assemble serving runtime template without templateName', () => { genRandomCharsMock.mockReturnValue('123'); const servingRuntimeMock = JSON.stringify(createServingRuntime('template-123')); - const result = assembleServingRuntimeTemplate(servingRuntimeMock, namespace, [ - ServingRuntimePlatform.MULTI, - ]); + const result = assembleServingRuntimeTemplate( + servingRuntimeMock, + namespace, + [ServingRuntimePlatform.MULTI], + ServingRuntimeAPIProtocol.REST, + ); expect(result).toStrictEqual( mockServingRuntimeTemplateK8sResource({ name: 'template-123', @@ -61,7 +65,12 @@ describe('assembleServingRuntimeTemplate', () => { it('should throw an error when servingRuntime name doesnt exist', () => { const servingRuntimeMock = JSON.stringify(createServingRuntime('')); expect(() => { - assembleServingRuntimeTemplate(servingRuntimeMock, namespace, [ServingRuntimePlatform.MULTI]); + assembleServingRuntimeTemplate( + servingRuntimeMock, + namespace, + [ServingRuntimePlatform.MULTI], + ServingRuntimeAPIProtocol.REST, + ); }).toThrow('Serving runtime name is required'); }); }); diff --git a/frontend/src/api/k8s/templates.ts b/frontend/src/api/k8s/templates.ts index e3b37490ed..5d90ea50cd 100644 --- a/frontend/src/api/k8s/templates.ts +++ b/frontend/src/api/k8s/templates.ts @@ -3,12 +3,13 @@ import { k8sDeleteResource, k8sListResource } from '@openshift/dynamic-plugin-sd import { ServingRuntimeKind, TemplateKind } from '~/k8sTypes'; import { TemplateModel } from '~/api/models'; import { genRandomChars } from '~/utilities/string'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; export const assembleServingRuntimeTemplate = ( body: string, namespace: string, platforms: ServingRuntimePlatform[], + apiProtocol: ServingRuntimeAPIProtocol | undefined, templateName?: string, ): TemplateKind & { objects: ServingRuntimeKind[] } => { const servingRuntime: ServingRuntimeKind = YAML.parse(body); @@ -30,6 +31,7 @@ export const assembleServingRuntimeTemplate = ( }, annotations: { 'opendatahub.io/modelServingSupport': JSON.stringify(platforms), + ...(apiProtocol && { 'opendatahub.io/apiProtocol': apiProtocol }), }, }, objects: [servingRuntime], diff --git a/frontend/src/k8sTypes.ts b/frontend/src/k8sTypes.ts index 649070b58e..8a0613c5f0 100644 --- a/frontend/src/k8sTypes.ts +++ b/frontend/src/k8sTypes.ts @@ -110,6 +110,7 @@ export type ServingRuntimeAnnotations = Partial<{ 'opendatahub.io/disable-gpu': string; 'opendatahub.io/recommended-accelerators': string; 'opendatahub.io/accelerator-name': string; + 'opendatahub.io/apiProtocol': string; 'enable-route': string; 'enable-auth': string; 'modelmesh-enabled': 'true' | 'false'; @@ -1074,6 +1075,7 @@ export type TemplateKind = K8sResourceCommon & { iconClass?: string; 'opendatahub.io/template-enabled': string; 'opendatahub.io/modelServingSupport': string; + 'opendatahub.io/apiProtocol': string; }>; name: string; namespace: string; diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolLabel.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolLabel.tsx new file mode 100644 index 0000000000..ef9818a9ec --- /dev/null +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolLabel.tsx @@ -0,0 +1,34 @@ +import * as React from 'react'; +import { Label, Text, TextVariants } from '@patternfly/react-core'; +import { TemplateKind } from '~/k8sTypes'; +import { + getAPIProtocolFromTemplate, + getEnabledPlatformsFromTemplate, +} from '~/pages/modelServing/customServingRuntimes/utils'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; + +type CustomServingRuntimeAPIProtocolLabelProps = { + template: TemplateKind; +}; + +const CustomServingRuntimeAPIProtocolLabel: React.FC = ({ + template, +}) => { + const apiProtocol = getAPIProtocolFromTemplate(template); + const isMultiModel = getEnabledPlatformsFromTemplate(template).includes( + ServingRuntimePlatform.MULTI, + ); + + // If it is multi-model, we use REST as default + if (!apiProtocol && isMultiModel) { + return ; + } + + if (!apiProtocol || !Object.values(ServingRuntimeAPIProtocol).includes(apiProtocol)) { + return Not defined; + } + + return ; +}; + +export default CustomServingRuntimeAPIProtocolLabel; diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolSelector.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolSelector.tsx new file mode 100644 index 0000000000..447a192f64 --- /dev/null +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAPIProtocolSelector.tsx @@ -0,0 +1,59 @@ +import * as React from 'react'; +import { FormGroup } from '@patternfly/react-core'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; +import SimpleDropdownSelect from '~/components/SimpleDropdownSelect'; + +type CustomServingRuntimeAPIProtocolSelectorProps = { + selectedAPIProtocol: ServingRuntimeAPIProtocol | undefined; + setSelectedAPIProtocol: (apiProtocol: ServingRuntimeAPIProtocol) => void; + selectedPlatforms: ServingRuntimePlatform[]; +}; + +const CustomServingRuntimeAPIProtocolSelector: React.FC< + CustomServingRuntimeAPIProtocolSelectorProps +> = ({ selectedAPIProtocol, setSelectedAPIProtocol, selectedPlatforms }) => { + const isOnlyModelMesh = + selectedPlatforms.includes(ServingRuntimePlatform.MULTI) && + !selectedPlatforms.includes(ServingRuntimePlatform.SINGLE); + + React.useEffect(() => { + if (isOnlyModelMesh) { + setSelectedAPIProtocol(ServingRuntimeAPIProtocol.REST); + } + }, [isOnlyModelMesh, setSelectedAPIProtocol]); + + const options = [ + { + key: ServingRuntimeAPIProtocol.REST, + label: ServingRuntimeAPIProtocol.REST, + }, + ...(isOnlyModelMesh + ? [] + : [ + { + key: ServingRuntimeAPIProtocol.GRPC, + label: ServingRuntimeAPIProtocol.GRPC, + }, + ]), + ]; + + return ( + + setSelectedAPIProtocol(key as ServingRuntimeAPIProtocol)} + /> + + ); +}; + +export default CustomServingRuntimeAPIProtocolSelector; diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAddTemplate.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAddTemplate.tsx index aad3982952..20239d27da 100644 --- a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAddTemplate.tsx +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimeAddTemplate.tsx @@ -21,15 +21,17 @@ import { createServingRuntimeTemplateBackend, updateServingRuntimeTemplateBackend, } from '~/services/templateService'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; import CustomServingRuntimePlatformsSelector from '~/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector'; import { + getAPIProtocolFromTemplate, getEnabledPlatformsFromTemplate, getServingRuntimeDisplayNameFromTemplate, getServingRuntimeNameFromTemplate, isServingRuntimeKind, } from './utils'; import { CustomServingRuntimeContext } from './CustomServingRuntimeContext'; +import CustomServingRuntimeAPIProtocolSelector from './CustomServingRuntimeAPIProtocolSelector'; type CustomServingRuntimeAddTemplateProps = { existingTemplate?: TemplateKind; @@ -83,11 +85,27 @@ const CustomServingRuntimeAddTemplate: React.FC (state ? getAPIProtocolFromTemplate(state.template) : undefined), + [state], + ); + + const apiProtocol: ServingRuntimeAPIProtocol | undefined = React.useMemo( + () => + existingTemplate + ? getAPIProtocolFromTemplate(existingTemplate) + : copiedServingRuntimeAPIProtocol, + [existingTemplate, copiedServingRuntimeAPIProtocol], + ); + const [code, setCode] = React.useState(stringifiedTemplate); const [selectedPlatforms, setSelectedPlatforms] = React.useState(enabledPlatforms); const isSinglePlatformEnabled = selectedPlatforms.includes(ServingRuntimePlatform.SINGLE); const isMultiPlatformEnabled = selectedPlatforms.includes(ServingRuntimePlatform.MULTI); + const [selectedAPIProtocol, setSelectedAPIProtocol] = React.useState< + ServingRuntimeAPIProtocol | undefined + >(apiProtocol); const [loading, setIsLoading] = React.useState(false); const [error, setError] = React.useState(undefined); const navigate = useNavigate(); @@ -96,9 +114,11 @@ const CustomServingRuntimeAddTemplate: React.FC + + + { diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsLabelGroup.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsLabelGroup.tsx index a8b05e7804..860ffb999a 100644 --- a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsLabelGroup.tsx +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsLabelGroup.tsx @@ -25,7 +25,9 @@ const CustomServingRuntimePlatformsLabelGroup: React.FC< return ( {platforms.map((platform, i) => ( - + ))} ); diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx index d719104539..a6057287d0 100644 --- a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx @@ -48,7 +48,7 @@ const CustomServingRuntimePlatformsSelector: React.FC< isRequired > + + + [] = [ label: 'Serving platforms supported', sortable: false, }, + { + field: 'apiProtocol', + label: 'API protocol', + sortable: false, + }, { field: 'kebab', label: '', diff --git a/frontend/src/pages/modelServing/customServingRuntimes/utils.ts b/frontend/src/pages/modelServing/customServingRuntimes/utils.ts index 7440f4b262..4b879d0bf8 100644 --- a/frontend/src/pages/modelServing/customServingRuntimes/utils.ts +++ b/frontend/src/pages/modelServing/customServingRuntimes/utils.ts @@ -1,7 +1,7 @@ import { K8sResourceCommon } from '@openshift/dynamic-plugin-sdk-utils'; import { ServingRuntimeKind, TemplateKind } from '~/k8sTypes'; import { getDisplayNameFromK8sResource } from '~/pages/projects/utils'; -import { ServingRuntimePlatform } from '~/types'; +import { ServingRuntimeAPIProtocol, ServingRuntimePlatform } from '~/types'; export const getTemplateEnabled = ( template: TemplateKind, @@ -89,7 +89,20 @@ export const getServingRuntimeFromTemplate = ( } catch (e) { return undefined; } - return template.objects[0]; + + // Add apiProtocol annotation if exists in template + const apiProtocolAttribute = 'opendatahub.io/apiProtocol'; + const servingRuntimeObj = { ...template.objects[0] }; + const metadata = { ...template.objects[0].metadata }; + + if (metadata.annotations && template.metadata.annotations?.[apiProtocolAttribute]) { + metadata.annotations[apiProtocolAttribute] = + template.metadata.annotations[apiProtocolAttribute]; + } + + servingRuntimeObj.metadata = metadata; + + return servingRuntimeObj; }; export const getDisplayNameFromServingRuntimeTemplate = (resource: ServingRuntimeKind): string => { @@ -122,3 +135,22 @@ export const getEnabledPlatformsFromTemplate = ( return [ServingRuntimePlatform.MULTI]; } }; + +export const getAPIProtocolFromTemplate = ( + template: TemplateKind, +): ServingRuntimeAPIProtocol | undefined => { + if (!template.metadata.annotations?.['opendatahub.io/apiProtocol']) { + return undefined; + } + + return template.metadata.annotations['opendatahub.io/apiProtocol'] as ServingRuntimeAPIProtocol; +}; + +export const getAPIProtocolFromServingRuntime = ( + resource: ServingRuntimeKind, +): ServingRuntimeAPIProtocol | undefined => { + if (!resource.metadata.annotations?.['opendatahub.io/apiProtocol']) { + return undefined; + } + return resource.metadata.annotations['opendatahub.io/apiProtocol'] as ServingRuntimeAPIProtocol; +}; diff --git a/frontend/src/pages/modelServing/screens/global/InferenceServiceAPIProtocol.tsx b/frontend/src/pages/modelServing/screens/global/InferenceServiceAPIProtocol.tsx new file mode 100644 index 0000000000..155feb0289 --- /dev/null +++ b/frontend/src/pages/modelServing/screens/global/InferenceServiceAPIProtocol.tsx @@ -0,0 +1,27 @@ +import * as React from 'react'; +import { Label, Text, TextVariants } from '@patternfly/react-core'; +import { ServingRuntimeKind } from '~/k8sTypes'; +import { getAPIProtocolFromServingRuntime } from '~/pages/modelServing/customServingRuntimes/utils'; +import { ServingRuntimeAPIProtocol } from '~/types'; + +type Props = { + servingRuntime?: ServingRuntimeKind; + isMultiModel?: boolean; +}; + +const InferenceServiceAPIProtocol: React.FC = ({ servingRuntime, isMultiModel }) => { + const apiProtocol = + (servingRuntime && getAPIProtocolFromServingRuntime(servingRuntime)) ?? undefined; + + // If it is multi-model, we use REST as default + if (!apiProtocol && isMultiModel) { + return ; + } + + if (!apiProtocol || !Object.values(ServingRuntimeAPIProtocol).includes(apiProtocol)) { + return Not defined; + } + + return ; +}; +export default InferenceServiceAPIProtocol; diff --git a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx index cf176d9aba..e04d42e7e9 100644 --- a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx +++ b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx @@ -11,6 +11,7 @@ import InferenceServiceEndpoint from './InferenceServiceEndpoint'; import InferenceServiceProject from './InferenceServiceProject'; import InferenceServiceStatus from './InferenceServiceStatus'; import InferenceServiceServingRuntime from './InferenceServiceServingRuntime'; +import InferenceServiceAPIProtocol from './InferenceServiceAPIProtocol'; type InferenceServiceTableRowProps = { obj: InferenceServiceKind; @@ -72,6 +73,12 @@ const InferenceServiceTableRow: React.FC = ({ isKserve={!isModelMesh(inferenceService)} /> + + + = { sortable: false, }; +const COL_API_PROTOCOL: SortableData = { + field: 'apiProtocol', + label: 'API protocol', + width: 10, + sortable: false, +}; + const COL_STATUS: SortableData = { field: 'status', label: 'Status', @@ -69,12 +76,14 @@ export const getGlobalInferenceServiceColumns = ( buildProjectCol(projects), COL_SERVING_RUNTIME, COL_ENDPOINT, + COL_API_PROTOCOL, COL_STATUS, COL_KEBAB, ]; export const getProjectInferenceServiceColumns = (): SortableData[] => [ COL_NAME, COL_ENDPOINT, + COL_API_PROTOCOL, COL_STATUS, COL_KEBAB, ]; @@ -83,6 +92,7 @@ export const getKServeInferenceServiceColumns = (): SortableData => { try { - const template = assembleServingRuntimeTemplate(body, namespace, platforms); + const template = assembleServingRuntimeTemplate(body, namespace, platforms, apiProtocol); const servingRuntime = template.objects[0]; const servingRuntimeName = servingRuntime.metadata.name; @@ -62,6 +63,7 @@ export const updateServingRuntimeTemplateBackend = ( body: string, namespace: string, platforms: ServingRuntimePlatform[], + apiProtocol: ServingRuntimeAPIProtocol | undefined, ): Promise => { try { const { name } = existingTemplate.metadata; @@ -83,7 +85,7 @@ export const updateServingRuntimeTemplateBackend = ( path: '/objects/0', value: servingRuntime, }, - existingTemplate.metadata.annotations + existingTemplate.metadata.annotations?.['opendatahub.io/modelServingSupport'] ? { op: 'replace', path: '/metadata/annotations/opendatahub.io~1modelServingSupport', @@ -96,6 +98,19 @@ export const updateServingRuntimeTemplateBackend = ( 'opendatahub.io/modelServingSupport': JSON.stringify(platforms), }, }, + existingTemplate.metadata.annotations?.['opendatahub.io/apiProtocol'] + ? { + op: 'replace', + path: '/metadata/annotations/opendatahub.io~1apiProtocol', + value: apiProtocol, + } + : { + op: 'add', + path: '/metadata/annotations', + value: { + ...(apiProtocol && { 'opendatahub.io/apiProtocol': apiProtocol }), + }, + }, ]) .then((response) => response.data), ); diff --git a/frontend/src/types.ts b/frontend/src/types.ts index c20b0ebadb..e761988743 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -710,3 +710,8 @@ export enum ServingRuntimePlatform { SINGLE = 'single', MULTI = 'multi', } + +export enum ServingRuntimeAPIProtocol { + REST = 'REST', + GRPC = 'gRPC', +} diff --git a/manifests/modelserving/caikit-ootb.yaml b/manifests/modelserving/caikit-ootb.yaml index a0c966a52f..b869636e85 100644 --- a/manifests/modelserving/caikit-ootb.yaml +++ b/manifests/modelserving/caikit-ootb.yaml @@ -12,60 +12,62 @@ metadata: template.openshift.io/long-description: This template defines resources needed to deploy caikit-tgis-serving servingruntime with Red Hat Data Science KServe for LLM model template.openshift.io/support-url: https://access.redhat.com opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' name: caikit-tgis-serving-template objects: -- apiVersion: serving.kserve.io/v1alpha1 - kind: ServingRuntime - metadata: - name: caikit-tgis-runtime - annotations: - openshift.io/display-name: Caikit TGIS ServingRuntime for KServe - opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' - labels: - opendatahub.io/dashboard: 'true' - spec: - multiModel: false - supportedModelFormats: - - autoSelect: true - name: caikit - containers: - - name: kserve-container - image: text-generation-inference - command: - - text-generation-launcher - args: - - --model-name=/mnt/models/artifacts/ - env: - - name: TRANSFORMERS_CACHE - value: /tmp/transformers_cache - - name: transformer-container - image: caikit-tgis-serving - env: - - name: RUNTIME_LOCAL_MODELS_DIR - value: /mnt/models - - name: TRANSFORMERS_CACHE - value: /tmp/transformers_cache - - name: RUNTIME_GRPC_ENABLED - value: "false" - - name: RUNTIME_HTTP_ENABLED - value: "true" - - name: RUNTIME_GRPC_SERVER_THREAD_POOL_SIZE - value: "64" - ports: - - containerPort: 8080 - protocol: TCP - readinessProbe: - exec: - command: - - python - - -m - - caikit_health_probe - - readiness - livenessProbe: - exec: - command: - - python - - -m - - caikit_health_probe - - liveness + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: caikit-tgis-runtime + annotations: + openshift.io/display-name: Caikit TGIS ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + + labels: + opendatahub.io/dashboard: 'true' + spec: + multiModel: false + supportedModelFormats: + - autoSelect: true + name: caikit + containers: + - name: kserve-container + image: text-generation-inference + command: + - text-generation-launcher + args: + - --model-name=/mnt/models/artifacts/ + env: + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + - name: transformer-container + image: caikit-tgis-serving + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + - name: RUNTIME_GRPC_ENABLED + value: 'false' + - name: RUNTIME_HTTP_ENABLED + value: 'true' + - name: RUNTIME_GRPC_SERVER_THREAD_POOL_SIZE + value: '64' + ports: + - containerPort: 8080 + protocol: TCP + readinessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - readiness + livenessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - liveness parameters: [] diff --git a/manifests/modelserving/ovms-kserve-ootd.yaml b/manifests/modelserving/ovms-kserve-ootd.yaml index 8adce131ed..9ba4fa5b67 100644 --- a/manifests/modelserving/ovms-kserve-ootd.yaml +++ b/manifests/modelserving/ovms-kserve-ootd.yaml @@ -9,50 +9,51 @@ metadata: tags: 'kserve-ovms,servingruntime' description: 'OpenVino Model Serving Definition' opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' objects: -- apiVersion: serving.kserve.io/v1alpha1 - kind: ServingRuntime - labels: - opendatahub.io/dashboard: 'true' - metadata: - annotations: - openshift.io/display-name: OpenVINO Model Server - opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' - name: kserve-ovms - spec: - multiModel: false - annotations: - prometheus.kserve.io/port: '8888' - prometheus.kserve.io/path: /metrics - supportedModelFormats: - - name: openvino_ir - version: opset11 - autoSelect: true - - name: onnx - version: '1' - - name: tensorflow - version: '1' - autoSelect: true - - name: tensorflow - version: '2' - autoSelect: true - - name: paddle - version: '2' - autoSelect: true - protocolVersions: - - v2 - - grpc-v2 - containers: - - name: kserve-container - image: 'ovms-kserve' - args: - - '--model_name={{.Name}}' - - '--port=8001' - - '--rest_port=8888' - - '--model_path=/mnt/models' - - '--file_system_poll_wait_seconds=0' - - '--grpc_bind_address=127.0.0.1' - - '--rest_bind_address=127.0.0.1' - ports: - - containerPort: 8888 - protocol: TCP \ No newline at end of file + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + labels: + opendatahub.io/dashboard: 'true' + metadata: + annotations: + openshift.io/display-name: OpenVINO Model Server + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + name: kserve-ovms + spec: + multiModel: false + annotations: + prometheus.kserve.io/port: '8888' + prometheus.kserve.io/path: /metrics + supportedModelFormats: + - name: openvino_ir + version: opset11 + autoSelect: true + - name: onnx + version: '1' + - name: tensorflow + version: '1' + autoSelect: true + - name: tensorflow + version: '2' + autoSelect: true + - name: paddle + version: '2' + autoSelect: true + protocolVersions: + - v2 + - grpc-v2 + containers: + - name: kserve-container + image: 'ovms-kserve' + args: + - '--model_name={{.Name}}' + - '--port=8001' + - '--rest_port=8888' + - '--model_path=/mnt/models' + - '--file_system_poll_wait_seconds=0' + - '--grpc_bind_address=127.0.0.1' + - '--rest_bind_address=127.0.0.1' + ports: + - containerPort: 8888 + protocol: TCP diff --git a/manifests/modelserving/ovms-ootb.yaml b/manifests/modelserving/ovms-ootb.yaml index e0eaeb61ed..f6f469d9a1 100644 --- a/manifests/modelserving/ovms-ootb.yaml +++ b/manifests/modelserving/ovms-ootb.yaml @@ -9,6 +9,7 @@ metadata: tags: 'ovms,servingruntime' description: 'OpenVino Model Serving Definition' opendatahub.io/modelServingSupport: '["multi"]' + opendatahub.io/apiProtocol: 'REST' objects: - apiVersion: serving.kserve.io/v1alpha1 kind: ServingRuntime @@ -60,5 +61,5 @@ objects: version: '1' - autoSelect: true name: tensorflow - version: "2" -parameters: [] \ No newline at end of file + version: '2' +parameters: [] diff --git a/manifests/modelserving/tgis-ootb.yaml b/manifests/modelserving/tgis-ootb.yaml index 51977a6a47..f81e136900 100644 --- a/manifests/modelserving/tgis-ootb.yaml +++ b/manifests/modelserving/tgis-ootb.yaml @@ -6,53 +6,54 @@ metadata: opendatahub.io/ootb: 'true' annotations: description: Text Generation Inference Server (TGIS) is a high performance inference engine that deploys and serves Large Language Models. - openshift.io/display-name: TGIS Standalone ServingRuntime for KServe (gRPC) + openshift.io/display-name: TGIS Standalone ServingRuntime for KServe openshift.io/provider-display-name: Red Hat, Inc. tags: rhods,rhoai,kserve,servingruntime template.openshift.io/documentation-url: https://github.com/opendatahub-io/text-generation-inference template.openshift.io/long-description: This template defines resources needed to deploy TGIS standalone servingruntime with KServe in Red Hat OpenShift AI opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'gRPC' name: tgis-grpc-serving-template objects: -- apiVersion: serving.kserve.io/v1alpha1 - kind: ServingRuntime - metadata: - name: tgis-grpc-runtime - annotations: - openshift.io/display-name: TGIS Standalone ServingRuntime for KServe (gRPC) - opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' - labels: - opendatahub.io/dashboard: 'true' - spec: - multiModel: false - supportedModelFormats: - - autoSelect: true - name: pytorch - containers: - - name: kserve-container - image: text-generation-inference - command: ["text-generation-launcher"] - args: - - "--model-name=/mnt/models/" - - "--port=3000" - - "--grpc-port=8033" - env: - - name: TRANSFORMERS_CACHE - value: /tmp/transformers_cache - readinessProbe: - exec: - command: - - curl - - localhost:3000/health - initialDelaySeconds: 5 - livenessProbe: - exec: - command: - - curl - - localhost:3000/health - initialDelaySeconds: 5 - ports: - - containerPort: 8033 - name: h2c - protocol: TCP + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: tgis-grpc-runtime + annotations: + openshift.io/display-name: TGIS Standalone ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + multiModel: false + supportedModelFormats: + - autoSelect: true + name: pytorch + containers: + - name: kserve-container + image: text-generation-inference + command: ['text-generation-launcher'] + args: + - '--model-name=/mnt/models/' + - '--port=3000' + - '--grpc-port=8033' + env: + - name: TRANSFORMERS_CACHE + value: /tmp/transformers_cache + readinessProbe: + exec: + command: + - curl + - localhost:3000/health + initialDelaySeconds: 5 + livenessProbe: + exec: + command: + - curl + - localhost:3000/health + initialDelaySeconds: 5 + ports: + - containerPort: 8033 + name: h2c + protocol: TCP parameters: [] From 22921fd4e1ac2965d34dc44b5ac4e8735457aed3 Mon Sep 17 00:00:00 2001 From: Dipanshu Gupta Date: Fri, 1 Mar 2024 18:04:52 +0530 Subject: [PATCH 2/2] Remove support for creating both Single and multi model custom serving runtime --- .../customServingRuntimes/CustomServingRuntimes.cy.ts | 10 +++++----- .../CustomServingRuntimePlatformsSelector.tsx | 4 ---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts b/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts index 2951533ab3..2c57f3c5ea 100644 --- a/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/e2e/customServingRuntimes/CustomServingRuntimes.cy.ts @@ -17,7 +17,7 @@ describe('Custom serving runtimes', () => { mockServingRuntimeTemplateK8sResource({ name: 'template-1', displayName: 'Multi Platform', - platforms: [ServingRuntimePlatform.SINGLE, ServingRuntimePlatform.MULTI], + platforms: [ServingRuntimePlatform.SINGLE], }), mockServingRuntimeTemplateK8sResource({ name: 'template-2', @@ -49,10 +49,10 @@ describe('Custom serving runtimes', () => { }); it('should display platform labels in table rows', () => { - servingRuntimes.getRowById('template-1').shouldBeSingleModel(true).shouldBeMultiModel(true); - servingRuntimes.getRowById('template-2').shouldBeSingleModel(true).shouldBeMultiModel(false); - servingRuntimes.getRowById('template-3').shouldBeSingleModel(false).shouldBeMultiModel(true); - servingRuntimes.getRowById('template-4').shouldBeSingleModel(false).shouldBeMultiModel(true); + servingRuntimes.getRowById('template-1').shouldBeSingleModel(true); + servingRuntimes.getRowById('template-2').shouldBeSingleModel(true); + servingRuntimes.getRowById('template-3').shouldBeMultiModel(true); + servingRuntimes.getRowById('template-4').shouldBeMultiModel(true); }); it('should display api protocol in table row', () => { diff --git a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx index a6057287d0..330fb86e78 100644 --- a/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx +++ b/frontend/src/pages/modelServing/customServingRuntimes/CustomServingRuntimePlatformsSelector.tsx @@ -27,10 +27,6 @@ const CustomServingRuntimePlatformsSelector: React.FC< key: ServingRuntimePlatform.MULTI, label: RuntimePlatformSelectOptionLabels[ServingRuntimePlatform.MULTI], }, - { - key: 'both', - label: RuntimePlatformSelectOptionLabels.both, - }, ]; const selection =