Skip to content

Commit

Permalink
feat(docker-compose): add Docker Compose file generation
Browse files Browse the repository at this point in the history
Signed-off-by: David Anyatonwu <davidanyatonwu@gmail.com>
  • Loading branch information
onyedikachi-david committed Oct 10, 2024
1 parent 2dbc975 commit 5fbbc5f
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 39 deletions.
96 changes: 70 additions & 26 deletions helpers/devcontainer_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
from schemas import DevContainerModel
from supabase_client import supabase
from models import DevContainer
import yaml


import logging
import tiktoken


def truncate_context(context, max_tokens=120000):
logging.info(f"Starting truncate_context with max_tokens={max_tokens}")
logging.debug(f"Initial context length: {len(context)} characters")
Expand All @@ -36,21 +38,29 @@ def truncate_context(context, max_tokens=120000):
logging.debug(f"Structure end position: {structure_end}")
logging.debug(f"Languages end position: {languages_end}")

important_content = context[:languages_end] + "<<END_SECTION: Repository Languages >>\n\n"
remaining_content = context[languages_end + len("<<END_SECTION: Repository Languages >>\n\n"):]
important_content = (
context[:languages_end] + "<<END_SECTION: Repository Languages >>\n\n"
)
remaining_content = context[
languages_end + len("<<END_SECTION: Repository Languages >>\n\n") :
]

important_tokens = encoding.encode(important_content)
logging.debug(f"Important content token count: {len(important_tokens)}")

if len(important_tokens) > max_tokens:
logging.warning("Important content alone exceeds max_tokens. Truncating important content.")
logging.warning(
"Important content alone exceeds max_tokens. Truncating important content."
)
important_content = encoding.decode(important_tokens[:max_tokens])
return important_content

remaining_tokens = max_tokens - len(important_tokens)
logging.info(f"Tokens available for remaining content: {remaining_tokens}")

truncated_remaining = encoding.decode(encoding.encode(remaining_content)[:remaining_tokens])
truncated_remaining = encoding.decode(
encoding.encode(remaining_content)[:remaining_tokens]
)

final_context = important_content + truncated_remaining
final_tokens = encoding.encode(final_context)
Expand All @@ -60,65 +70,85 @@ def truncate_context(context, max_tokens=120000):

return final_context

def generate_devcontainer_json(instructor_client, repo_url, repo_context, devcontainer_url=None, max_retries=2, regenerate=False):

def generate_devcontainer_json(
instructor_client,
repo_url,
repo_context,
devcontainer_url=None,
max_retries=2,
regenerate=False,
):
existing_devcontainer = None
existing_docker_compose = None
if "<<EXISTING_DEVCONTAINER>>" in repo_context:
logging.info("Existing devcontainer.json found in the repository.")
existing_devcontainer = (
repo_context.split("<<EXISTING_DEVCONTAINER>>")[1]
.split("<<END_EXISTING_DEVCONTAINER>>")[0]
.strip()
)
if not regenerate and devcontainer_url:
logging.info(f"Using existing devcontainer.json from URL: {devcontainer_url}")
return existing_devcontainer, devcontainer_url
if "<<EXISTING_DOCKER_COMPOSE>>" in repo_context:
logging.info("Existing docker-compose.yml found in the repository.")
existing_docker_compose = (
repo_context.split("<<EXISTING_DOCKER_COMPOSE>>")[1]
.split("<<END_EXISTING_DOCKER_COMPOSE>>")[0]
.strip()
)
if not regenerate and devcontainer_url:
logging.info(f"Using existing devcontainer.json from URL: {devcontainer_url}")
return existing_devcontainer, existing_docker_compose, devcontainer_url

logging.info("Generating devcontainer.json...")
logging.info("Generating devcontainer.json and docker-compose.yml...")

# Truncate the context to fit within token limits
truncated_context = truncate_context(repo_context, max_tokens=126000)

template_data = {
"repo_url": repo_url,
"repo_context": truncated_context,
"existing_devcontainer": existing_devcontainer
"existing_devcontainer": existing_devcontainer,
"existing_docker_compose": existing_docker_compose
}

prompt = process_template("prompts/devcontainer.jinja", template_data)
prompt = process_template("prompts/devcontainer_docker_compose.jinja", template_data)

for attempt in range(max_retries + 1):
try:
logging.debug(f"Attempt {attempt + 1} to generate devcontainer.json")
logging.debug(f"Attempt {attempt + 1} to generate devcontainer.json and docker-compose.yml")
response = instructor_client.chat.completions.create(
model=os.getenv("MODEL"),
response_model=DevContainerModel,
messages=[
{"role": "system", "content": "You are a helpful assistant that generates devcontainer.json files."},
{"role": "system", "content": "You are a helpful assistant that generates devcontainer.json and docker-compose.yml files."},
{"role": "user", "content": prompt},
],
)
devcontainer_json = json.dumps(response.dict(exclude_none=True), indent=2)
devcontainer_json = json.dumps(response.dict(exclude={'docker_compose'}, exclude_none=True), indent=2)
docker_compose_yml = yaml.dump(response.docker_compose.dict(exclude_none=True), sort_keys=False) if response.docker_compose else None

if validate_devcontainer_json(devcontainer_json):
logging.info("Successfully generated and validated devcontainer.json")
if existing_devcontainer and not regenerate:
return existing_devcontainer, devcontainer_url
if validate_devcontainer_json(devcontainer_json) and (docker_compose_yml is None or validate_docker_compose_yml(docker_compose_yml)):
logging.info("Successfully generated and validated devcontainer.json and docker-compose.yml")
if existing_devcontainer and existing_docker_compose and not regenerate:
return existing_devcontainer, existing_docker_compose, devcontainer_url
else:
return devcontainer_json, None # Return None as URL for generated content
return devcontainer_json, docker_compose_yml, None
else:
logging.warning(f"Generated JSON failed validation on attempt {attempt + 1}")
logging.warning(f"Generated files failed validation on attempt {attempt + 1}")
if attempt == max_retries:
raise ValueError("Failed to generate valid devcontainer.json after maximum retries")
raise ValueError("Failed to generate valid files after maximum retries")
except Exception as e:
logging.error(f"Error on attempt {attempt + 1}: {str(e)}")
if attempt == max_retries:
raise

raise ValueError("Failed to generate valid devcontainer.json after maximum retries")
raise ValueError("Failed to generate valid files after maximum retries")


def validate_devcontainer_json(devcontainer_json):
logging.info("Validating devcontainer.json...")
schema_path = os.path.join(os.path.dirname(__file__), "..", "schemas", "devContainer.base.schema.json")
schema_path = os.path.join(
os.path.dirname(__file__), "..", "schemas", "devContainer.base.schema.json"
)
with open(schema_path, "r") as schema_file:
schema = json.load(schema_file)
try:
Expand All @@ -130,10 +160,24 @@ def validate_devcontainer_json(devcontainer_json):
logging.error(f"Validation failed: {e}")
return False


def validate_docker_compose_yml(docker_compose_yml):
logging.info("Validating docker-compose.yml...")
try:
yaml.safe_load(docker_compose_yml)
logging.info("Docker Compose YAML validation successful.")
return True
except yaml.YAMLError as e:
logging.error(f"Docker Compose YAML validation failed: {e}")
return False


def save_devcontainer(new_devcontainer):
try:
result = supabase.table("devcontainers").insert(new_devcontainer.dict()).execute()
result = (
supabase.table("devcontainers").insert(new_devcontainer.dict()).execute()
)
return result.data[0] if result.data else None
except Exception as e:
logging.error(f"Error saving devcontainer to Supabase: {str(e)}")
raise
raise
32 changes: 23 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ async def get():

@rt("/generate", methods=["post"])
async def post(repo_url: str, regenerate: bool = False):
logging.info(f"Generating devcontainer.json for: {repo_url}")
logging.info(f"Generating devcontainer.json and docker-compose.yml for: {repo_url}")

# Normalize the repo_url by stripping trailing slashes
repo_url = repo_url.rstrip('/')
Expand All @@ -112,22 +112,23 @@ async def post(repo_url: str, regenerate: bool = False):
exists, existing_record = check_url_exists(repo_url)
logging.info(f"URL check result: exists={exists}, existing_record={existing_record}")

repo_context, existing_devcontainer, devcontainer_url = fetch_repo_context(repo_url)
repo_context, existing_devcontainer, existing_docker_compose, devcontainer_url = fetch_repo_context(repo_url)
logging.info(f"Fetched repo context. Existing devcontainer: {'Yes' if existing_devcontainer else 'No'}")
logging.info(f"Existing docker-compose: {'Yes' if existing_docker_compose else 'No'}")
logging.info(f"Devcontainer URL: {devcontainer_url}")

if exists and not regenerate:
logging.info(f"URL already exists in database. Returning existing devcontainer_json for: {repo_url}")
logging.info(f"URL already exists in database. Returning existing files for: {repo_url}")
devcontainer_json = existing_record['devcontainer_json']
docker_compose_yml = existing_record['docker_compose_yml']
generated = existing_record['generated']
source = "database"
url = existing_record['devcontainer_url']
else:
devcontainer_json, url = generate_devcontainer_json(instructor_client, repo_url, repo_context, devcontainer_url, regenerate=regenerate)
devcontainer_json, docker_compose_yml, url = generate_devcontainer_json(instructor_client, repo_url, repo_context, devcontainer_url, regenerate=regenerate)
generated = True
source = "generated" if url is None else "repository"


if not exists or regenerate:
logging.info("Saving to database...")
try:
Expand All @@ -145,16 +146,16 @@ async def post(repo_url: str, regenerate: bool = False):
new_devcontainer = DevContainer(
url=repo_url,
devcontainer_json=devcontainer_json,
docker_compose_yml=docker_compose_yml,
devcontainer_url=devcontainer_url,
repo_context=repo_context,
tokens=count_tokens(repo_context),
model=os.getenv("MODEL"),
embedding=embedding_json,
generated=generated,
created_at=datetime.utcnow().isoformat() # Ensure this is a string
created_at=datetime.utcnow().isoformat()
)

# Convert the Pydantic model to a dictionary and handle datetime serialization
devcontainer_dict = json.loads(new_devcontainer.json(exclude_unset=True))

result = supabase.table("devcontainers").insert(devcontainer_dict).execute()
Expand All @@ -164,7 +165,7 @@ async def post(repo_url: str, regenerate: bool = False):
raise

return Div(
Article(f"Devcontainer.json {'found in ' + source if source in ['database', 'repository'] else 'generated'}"),
Article(f"Files {'found in ' + source if source in ['database', 'repository'] else 'generated'}"),
Pre(
Code(devcontainer_json, id="devcontainer-code", cls="overflow-auto"),
Div(
Expand All @@ -185,7 +186,20 @@ async def post(repo_url: str, regenerate: bool = False):
cls="button-group"
),
cls="code-container relative"
)
),
Pre(
Code(docker_compose_yml, id="docker-compose-code", cls="overflow-auto"),
Div(
Button(
Img(cls="w-4 h-4", src="assets/icons/copy-icon.svg", alt="Copy"),
cls="icon-button copy-button",
title="Copy to clipboard",
),
Span(cls="action-text", id="action-text"),
cls="button-group"
),
cls="code-container relative"
) if docker_compose_yml else None
)
except Exception as e:
logging.error(f"An error occurred: {str(e)}", exc_info=True)
Expand Down
86 changes: 86 additions & 0 deletions prompts/devcontainer_docker_compose.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
Given the following context from a GitHub repository:

{{ repo_context }}

{% if existing_devcontainer %}
An existing devcontainer.json file was found in the repository:

{{ existing_devcontainer }}

Please use this as a reference and improve upon it, incorporating any new requirements or best practices.
{% endif %}

{% if existing_docker_compose %}
An existing docker-compose.yml file was found in the repository:

{{ existing_docker_compose }}

Please use this as a reference and improve upon it, incorporating any new requirements or best practices.
{% endif %}

Begin by applying Chain of Thought (CoT) reasoning to decompose the context and task into logical, manageable components. Think slowly and pay attention to all important facts in the context such as the ports used by the application and the ports used for testing.

Generate both a devcontainer.json file and a docker-compose.yml file for this project. The files should include appropriate settings for the development environment based on the project's requirements and structure.

For the devcontainer.json:
- The 'features' field is essential and should include a dictionary of features to enable within the container.
- Include comments to explain what each line or block of code does.

For the docker-compose.yml:
- Define the necessary services, volumes, and networks.
- Use appropriate images or build contexts for each service.
- Set up the correct port mappings and environment variables.
- Define dependencies between services if needed.

Here's an example of a devcontainer.json with Docker Compose integration:

```json
{
"name": "Project Dev Container",
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspace",
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {}
},
"forwardPorts": [3000, 5432],
"customizations": {
"vscode": {
"extensions": [
"ms-azuretools.vscode-docker",
"ms-python.python"
]
}
},
"postCreateCommand": "pip install -r requirements.txt"
}
```

And here's an example of a corresponding docker-compose.yml:

```yaml
version: '3.8'
services:
app:
build:
context: .
dockerfile: Dockerfile
volumes:
- ..:/workspace:cached
command: sleep infinity
network_mode: service:db
db:
image: postgres:latest
restart: unless-stopped
volumes:
- postgres-data:/var/lib/postgresql/data
environment:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: myapp

volumes:
postgres-data:
```

Your goal is to deliver the most logical, secure, efficient, and well-documented devcontainer.json and docker-compose.yml files for the given project.
Loading

0 comments on commit 5fbbc5f

Please sign in to comment.