docker run --name bge-reranker-v2-m3 -d --runtime nvidia --gpus all \-v ~/.cache/modelscope:/root/.cache/huggingface \--env "VLLM_USE_MODELSCOPE=True" \-p 8001:8000 \--ipc=host \vllm/vllm-openai:latest \--model BAAI/bge-reranker-v2-m3 \--gpu_memory_utilization 0.9
links
https://www.ppmy.cn/news/1748773.html?action=onClick