improve docs

lm-sys · Oct 9, 2023 · 0adf31c · 0adf31c
1 parent 85833d4
commit 0adf31c
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 9 deletions.
diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -27,9 +27,6 @@
 )
 
 from fastchat.constants import CPU_ISA
-from fastchat.modules.gptq import GptqConfig, load_gptq_quantized
-from fastchat.modules.awq import AWQConfig, load_awq_quantized
-from fastchat.modules.exllama import ExllamaConfig, load_exllama_model
 from fastchat.conversation import Conversation, get_conv_template
 from fastchat.model.compression import load_compress_model
 from fastchat.model.llama_condense_monkey_patch import replace_llama_with_condense
@@ -40,6 +37,9 @@
 from fastchat.model.monkey_patch_non_inplace import (
     replace_llama_attn_with_non_inplace_operations,
 )
+from fastchat.modules.awq import AWQConfig, load_awq_quantized
+from fastchat.modules.exllama import ExllamaConfig, load_exllama_model
+from fastchat.modules.gptq import GptqConfig, load_gptq_quantized
 from fastchat.utils import get_gpu_memory
 
 # Check an environment variable to check if we should be sharing Peft model

diff --git a/fastchat/model/model_exllama.py b/fastchat/model/model_exllama.py
@@ -1,8 +1,9 @@
-import sys
-import torch
 import gc
+import sys
 from typing import Dict
 
+import torch
+
 
 def generate_stream_exllama(
     model,

diff --git a/fastchat/serve/cli.py b/fastchat/serve/cli.py
@@ -29,9 +29,9 @@
 import torch
 
 from fastchat.model.model_adapter import add_model_args
-from fastchat.modules.gptq import GptqConfig
 from fastchat.modules.awq import AWQConfig
 from fastchat.modules.exllama import ExllamaConfig
+from fastchat.modules.gptq import GptqConfig
 from fastchat.serve.inference import ChatIO, chat_loop
 from fastchat.utils import str_to_torch_dtype
 

diff --git a/fastchat/serve/inference.py b/fastchat/serve/inference.py
@@ -35,8 +35,8 @@
     get_conversation_template,
     get_generate_stream_function,
 )
-from fastchat.modules.gptq import GptqConfig
 from fastchat.modules.awq import AWQConfig
+from fastchat.modules.gptq import GptqConfig
 from fastchat.modules.exllama import ExllamaConfig
 from fastchat.utils import is_partial_stop, is_sentence_complete, get_context_length
 

diff --git a/fastchat/serve/model_worker.py b/fastchat/serve/model_worker.py
@@ -45,15 +45,15 @@
     get_conversation_template,
     get_generate_stream_function,
 )
-from fastchat.modules.gptq import GptqConfig
 from fastchat.modules.awq import AWQConfig
+from fastchat.modules.exllama import ExllamaConfig
+from fastchat.modules.gptq import GptqConfig
 from fastchat.utils import (
     build_logger,
     pretty_print_semaphore,
     get_context_length,
     str_to_torch_dtype,
 )
-from fastchat.modules.exllama import ExllamaConfig
 from fastchat.utils import build_logger, pretty_print_semaphore, get_context_length