# =========================================== # vLLM HTR Service # Reads configuration from /home/fenix/htr-api/.env # =========================================== # BACKUP COPY - After editing, run: # sudo cp ~/htr-api/vllm-htr.service /etc/systemd/system/ # sudo systemctl daemon-reload && sudo systemctl restart vllm-htr # =========================================== [Unit] Description=vLLM Server for HTR After=network.target [Service] Type=simple User=fenix WorkingDirectory=/llm # Load environment variables from .env EnvironmentFile=/home/fenix/htr-api/.env # Use environment variables in ExecStart ExecStart=/llm/env/bin/vllm serve ${VLLM_MODEL_PATH} \ --host ${VLLM_HOST} \ --port ${VLLM_PORT} \ --max-model-len ${VLLM_MAX_MODEL_LEN} \ --gpu-memory-utilization ${VLLM_GPU_UTIL} \ --trust-remote-code \ --served-model-name ${VLLM_MODEL_NAME} Restart=on-failure RestartSec=10 TimeoutStartSec=300 [Install] WantedBy=multi-user.target