diff --git a/flow_judge/models/llamafile.py b/flow_judge/models/llamafile.py index 7005e71..1c44098 100644 --- a/flow_judge/models/llamafile.py +++ b/flow_judge/models/llamafile.py @@ -240,7 +240,7 @@ def start_llamafile_server(self): f"--threads {self.generation_params.get('thread_count', os.cpu_count() or 1)} " \ f"--nobrowser -b {self.generation_params.get('batch_size', 32)} " \ f"--parallel {self.generation_params.get('max_concurrent_requests', 1)} " \ - f"--cont-batching'" + f"--cont-batching" if self.disable_kv_offload: command += " -nkvo" @@ -262,6 +262,8 @@ def start_llamafile_server(self): command += f" --{key} {value}" logging.info(f"Additional server argument added: --{key} {value}") + command += "'" + logging.info(f"Starting llamafile server with command: {command}") def log_output(pipe, log_func):