37 lines
973 B
Desktop File
37 lines
973 B
Desktop File
# ===========================================
|
|
# vLLM HTR Service
|
|
# Reads configuration from /home/fenix/htr-api/.env
|
|
# ===========================================
|
|
# BACKUP COPY - After editing, run:
|
|
# sudo cp ~/htr-api/vllm-htr.service /etc/systemd/system/
|
|
# sudo systemctl daemon-reload && sudo systemctl restart vllm-htr
|
|
# ===========================================
|
|
|
|
[Unit]
|
|
Description=vLLM Server for HTR
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=fenix
|
|
WorkingDirectory=/llm
|
|
|
|
# Load environment variables from .env
|
|
EnvironmentFile=/home/fenix/htr-api/.env
|
|
|
|
# Use environment variables in ExecStart
|
|
ExecStart=/llm/env/bin/vllm serve ${VLLM_MODEL_PATH} \
|
|
--host ${VLLM_HOST} \
|
|
--port ${VLLM_PORT} \
|
|
--max-model-len ${VLLM_MAX_MODEL_LEN} \
|
|
--gpu-memory-utilization ${VLLM_GPU_UTIL} \
|
|
--trust-remote-code \
|
|
--served-model-name ${VLLM_MODEL_NAME}
|
|
|
|
Restart=on-failure
|
|
RestartSec=10
|
|
TimeoutStartSec=300
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|