3. Build the Load Testing Component¶
Now, we are going to create a component to test the performance of your model server.
We are going to use a python performance testing tool called Locust.
import os
import subprocess
from lightning import BuildConfig, LightningWork
class Locust(LightningWork):
def __init__(self, num_users: int = 100):
"""This component checks the performance of a server. The server url is passed to its run method.
Arguments:
num_users: Number of users emulated by Locust
"""
# Note: Using the default port 8089 of Locust.
super().__init__(
port=8089,
parallel=True,
cloud_build_config=BuildConfig(requirements=["locust"]),
)
self.num_users = num_users
def run(self, load_tested_url: str):
# 1: Create the locust command line.
cmd = " ".join(
[
"locust",
"--master-host",
str(self.host),
"--master-port",
str(self.port),
"--host",
str(load_tested_url),
"-u",
str(self.num_users),
]
)
# 2: Create another process with locust
process = subprocess.Popen(cmd, cwd=os.path.dirname(__file__), shell=True)
# 3: Wait for the process to finish. As locust is a server,
# this waits infinitely or if killed.
process.wait()
Finally, once the component is done, we need to create a locustfile.py
file which defines the format of the request to send to your model server.
The endpoint to hit has the following format: /v2/models/{MODEL_NAME}/versions/{VERSION}/infer
.
from locust import FastHttpUser, task
from sklearn import datasets
from sklearn.model_selection import train_test_split
class HelloWorldUser(FastHttpUser):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._prepare_inference_request()
@task
def predict(self):
self.client.post(
"/v2/models/mnist-svm/versions/v0.0.1/infer",
json=self.inference_request,
)
def _prepare_inference_request(self):
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data,
# we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Split data into train and test subsets
_, X_test, _, _ = train_test_split(data, digits.target, test_size=0.5, shuffle=False)
x_0 = X_test[0:1]
self.inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist(),
}
]
}