Complete Azure ML Managed Endpoints Tutorial: Production Model Deployment
Azure ML Managed Endpoints provide a fully managed solution for deploying machine learning models at scale. They handle infrastructure, scaling, security, and monitoring automatically.
Why Managed Endpoints?
Key Benefits:- Fully managed: No infrastructure to manage
- Auto-scaling: Scale based on traffic
- Blue-green deployments: Safe rollouts
- Built-in monitoring: Metrics and logging
- Security: Authentication and network isolation
- Online endpoints: Real-time inference
- Batch endpoints: Large-scale batch processing
Prerequisites
pip install azure-ai-ml azure-identity
Azure CLI
az login
az extension add -n ml
Online Endpoints
1. Create Online Endpoint
from azure.ai.ml import MLClient
from azure.ai.ml.entities import ManagedOnlineEndpoint
from azure.identity import DefaultAzureCredential
mlclient = MLClient(
credential=DefaultAzureCredential(),
subscriptionid="your-subscription-id",
resourcegroupname="my-resource-group",
workspacename="my-ml-workspace"
)
Create endpoint
endpoint = ManagedOnlineEndpoint(
name="my-online-endpoint",
description="Online endpoint for real-time inference",
authmode="key", # or "amltoken"
tags={"environment": "production"}
)
mlclient.onlineendpoints.begincreateorupdate(endpoint).result()
print(f"Endpoint created: {endpoint.name}")
2. Create Deployment
from azure.ai.ml.entities import (
ManagedOnlineDeployment,
Model,
Environment,
CodeConfiguration
)
Create deployment
bluedeployment = ManagedOnlineDeployment(
name="blue",
endpointname="my-online-endpoint",
model=Model(path="./model"),
codeconfiguration=CodeConfiguration(
code="./scoring",
scoringscript="score.py"
),
environment=Environment(
condafile="./environment.yml",
image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
),
instancetype="StandardDS3v2",
instancecount=1
)
mlclient.onlinedeployments.begincreateorupdate(bluedeployment).result()
print("Deployment created")
3. Scoring Script
# scoring/score.py
import json
import joblib
import numpy as np
import os
import logging
def init():
"""Initialize model on startup."""
global model
modelpath = os.path.join(os.getenv("AZUREMLMODELDIR"), "model.joblib")
model = joblib.load(modelpath)
logging.info("Model loaded successfully")
def run(rawdata):
"""Run inference on incoming data."""
try:
data = json.loads(rawdata)
features = np.array(data["features"])
# Run prediction
predictions = model.predict(features)
probabilities = model.predictproba(features)
return {
"predictions": predictions.tolist(),
"probabilities": probabilities.tolist()
}
except Exception as e:
logging.error(f"Error: {str(e)}")
return {"error": str(e)}
4. Set Traffic
# Route all traffic to blue deployment
endpoint.traffic = {"blue": 100}
mlclient.onlineendpoints.begincreateorupdate(endpoint).result()
Get endpoint details
endpoint = mlclient.onlineendpoints.get("my-online-endpoint")
print(f"Scoring URI: {endpoint.scoringuri}")
print(f"Traffic: {endpoint.traffic}")
5. Test Endpoint
import json
Test data
testdata = {
"features": [[5.1, 3.5, 1.4, 0.2], [6.2, 3.4, 5.4, 2.3]]
}
Invoke endpoint
response = mlclient.onlineendpoints.invoke(
endpointname="my-online-endpoint",
requestfile=json.dumps(testdata)
)