Source code for mcli.api.inference_deployments.api_ping

""" Ping a InferenceDeployment """
from __future__ import annotations

from typing import Callable, Optional, Union, cast

import requests
import validators
from requests import Response

from mcli.api.exceptions import InferenceServerException
from mcli.api.inference_deployments import get_inference_deployment
from mcli.api.model.inference_deployment import InferenceDeployment

__all__ = ['ping']


[docs]def ping( deployment: Union[InferenceDeployment, str], *, timeout: Optional[float] = 10, ) -> dict: """Pings an inference deployment that has been launched in the MosaicML platform and returns the status of the deployment. The deployment must have a '/ping' endpoint defined. Arguments: deployment: The deployment to check the status of. Can be a InferenceDeployment object, the name of an deployment, or a string which is of the form https://<deployment dns>. timeout: Time, in seconds, in which the call should complete. If the call takes too long, a TimeoutError will be raised. Raises: HTTPError: If pinging the endpoint fails MAPIException: If connecting to MAPI, raised when a MAPI communication error occurs """ validate_url = cast(Callable[[str], bool], validators.url) if isinstance(deployment, str) and not validate_url(deployment): # if a string is passed in that is not a url then lookup the deployment and get the name deployment = get_inference_deployment(deployment) base_url = deployment if isinstance(deployment, InferenceDeployment): base_url = f'https://{deployment.public_dns}' try: resp: Response = requests.get(url=f'{base_url}/ping', timeout=timeout) if resp.ok: return {"status": resp.status_code} else: raise InferenceServerException.from_server_error_response(resp.content.decode().strip(), resp.status_code) except requests.exceptions.ConnectionError as e: raise InferenceServerException.from_requests_error(e) from e