Skip to content

Commit

Permalink
Document Sync by Tina
Browse files Browse the repository at this point in the history
  • Loading branch information
Chivier committed Sep 19, 2024
1 parent 89b65c2 commit e90f9a1
Showing 1 changed file with 12 additions and 19 deletions.
31 changes: 12 additions & 19 deletions docs/stable/store/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ class VllmModelDownloader:
from vllm import LLM
from vllm.config import LoadFormat

def _run_writer(input_dir, output_dir):
# set the model storage path
storage_path = os.getenv("STORAGE_PATH", "./models")

def _run_writer(input_dir, model_name):
# load models from the input directory
llm_writer = LLM(
model=input_dir,
Expand All @@ -169,10 +172,11 @@ class VllmModelDownloader:
enforce_eager=True,
max_model_len=1,
)
model_path = os.path.join(storage_path, model_name)
model_executer = llm_writer.llm_engine.model_executor
# save the models in the ServerlessLLM format
model_executer.save_serverless_llm_state(
path=output_dir, pattern=pattern, max_size=max_size
path=model_path, pattern=pattern, max_size=max_size
)
for file in os.listdir(input_dir):
# Copy the metadata files into the output directory
Expand All @@ -182,48 +186,37 @@ class VllmModelDownloader:
".safetensors",
):
src_path = os.path.join(input_dir, file)
dest_path = os.path.join(output_dir, file)
dest_path = os.path.join(model_path, file)
if os.path.isdir(src_path):
shutil.copytree(src_path, dest_path)
else:
shutil.copy(src_path, output_dir)
shutil.copy(src_path, dest_path)
del model_executer
del llm_writer
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()

# set the model storage path
storage_path = os.getenv("STORAGE_PATH", "./models")
model_dir = os.path.join(storage_path, model_name)

# create the output directory
if os.path.exists(model_dir):
print(f"Already exists: {model_dir}")
return
os.makedirs(model_dir, exist_ok=True)

try:
with TemporaryDirectory() as cache_dir:
# download model from huggingface
# download from huggingface
input_dir = snapshot_download(
model_name,
cache_dir=cache_dir,
allow_patterns=["*.safetensors", "*.bin", "*.json", "*.txt"],
)
_run_writer(input_dir, model_dir)
_run_writer(input_dir, model_name)
except Exception as e:
print(f"An error occurred while saving the model: {e}")
# remove the output dir
shutil.rmtree(model_dir)
shutil.rmtree(os.path.join(storage_path, model_name))
raise RuntimeError(
f"Failed to save model {model_name} for vllm backend: {e}"
f"Failed to save {model_name} for vllm backend: {e}"
)

downloader = VllmModelDownloader()
downloader.download_vllm_model("facebook/opt-1.3b", "float16", 1)

```

After downloading the model, you can launch the checkpoint store server and load the model in vLLM through `serverless_llm` load format.
Expand Down

0 comments on commit e90f9a1

Please sign in to comment.