Initial PoC commit
This commit is contained in:
36
Server/vllm-htr.service
Normal file
36
Server/vllm-htr.service
Normal file
@@ -0,0 +1,36 @@
|
||||
# ===========================================
|
||||
# vLLM HTR Service
|
||||
# Reads configuration from /home/fenix/htr-api/.env
|
||||
# ===========================================
|
||||
# BACKUP COPY - After editing, run:
|
||||
# sudo cp ~/htr-api/vllm-htr.service /etc/systemd/system/
|
||||
# sudo systemctl daemon-reload && sudo systemctl restart vllm-htr
|
||||
# ===========================================
|
||||
|
||||
[Unit]
|
||||
Description=vLLM Server for HTR
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=fenix
|
||||
WorkingDirectory=/llm
|
||||
|
||||
# Load environment variables from .env
|
||||
EnvironmentFile=/home/fenix/htr-api/.env
|
||||
|
||||
# Use environment variables in ExecStart
|
||||
ExecStart=/llm/env/bin/vllm serve ${VLLM_MODEL_PATH} \
|
||||
--host ${VLLM_HOST} \
|
||||
--port ${VLLM_PORT} \
|
||||
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
||||
--gpu-memory-utilization ${VLLM_GPU_UTIL} \
|
||||
--trust-remote-code \
|
||||
--served-model-name ${VLLM_MODEL_NAME}
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
TimeoutStartSec=300
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user