diff --git a/helpers/devcontainer_helpers.py b/helpers/devcontainer_helpers.py index ef4f34c..b2cf524 100644 --- a/helpers/devcontainer_helpers.py +++ b/helpers/devcontainer_helpers.py @@ -3,6 +3,7 @@ import json import logging import os +import re import jsonschema import tiktoken from helpers.jinja_helper import process_template @@ -13,7 +14,41 @@ import logging import tiktoken - +def find_ports_in_files(directory): + """Recursively find all port numbers in documentation and configuration files within a directory.""" + port_pattern = r'\b\d{4,5}\b' # Regex to match 4- or 5-digit numbers (common port format) + detected_ports = set() + + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(('.md', '.yml', '.yaml', '.json')): + file_path = os.path.join(root, file) + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + # Find all unique ports in the file content + ports = set(re.findall(port_pattern, content)) + detected_ports.update(ports) + + # Convert all ports to integers and sort them + return sorted(int(port) for port in detected_ports if 1024 <= int(port) <= 65535) # Valid port range + + +def generate_devcontainer_json_with_ports(directory, existing_config=None): + """Generate or update a devcontainer.json with detected forwarded ports.""" + # Get unique list of ports from files in the directory + detected_ports = find_ports_in_files(directory) + + # Start with an existing config or create a new one + devcontainer_config = existing_config or {} + + # Add detected ports to the forwardedPorts section + devcontainer_config['forwardedPorts'] = detected_ports + + # Write to devcontainer.json + with open('devcontainer.json', 'w', encoding='utf-8') as f: + json.dump(devcontainer_config, f, indent=2) + + print(f"Updated devcontainer.json with forwarded ports: {detected_ports}") def truncate_context(context, max_tokens=120000): logging.info(f"Starting truncate_context with max_tokens={max_tokens}") logging.debug(f"Initial context length: {len(context)} characters") diff --git a/helpers/openai_helpers.py b/helpers/openai_helpers.py index 4ba755a..405ee6d 100644 --- a/helpers/openai_helpers.py +++ b/helpers/openai_helpers.py @@ -5,15 +5,38 @@ def setup_azure_openai(): logging.info("Setting up Azure OpenAI client...") - return AzureOpenAI( - api_key=os.getenv("AZURE_OPENAI_API_KEY"), - azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), - api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + + # Retrieve environment variables for embedding model configuration + api_key = os.getenv("AZURE_OPENAI_API_KEY") + endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + api_version = os.getenv("AZURE_OPENAI_API_VERSION") + + # Check if API key, endpoint, and version are set + if not api_key or not endpoint or not api_version: + logging.error("Azure OpenAI configuration missing. Check API key, endpoint, or version.") + raise ValueError("Azure OpenAI configuration missing. Ensure AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_API_VERSION are set in environment variables.") + + # Initialize Azure OpenAI client + openai_client = AzureOpenAI( + api_key=api_key, + azure_endpoint=endpoint, + api_version=api_version, ) + + # Ensure the embeddings method is available + if not hasattr(openai_client, 'embeddings'): + logging.error("OpenAI client does not support embeddings. Please check API version.") + raise ValueError("Azure OpenAI client does not support embeddings with the current API version.") + + return openai_client def setup_instructor(openai_client): logging.info("Setting up Instructor client...") - return instructor.patch(openai_client) + try: + return instructor.patch(openai_client) + except Exception as e: + logging.error(f"Failed to initialize Instructor client: {e}") + raise def check_env_vars(): required_vars = [ @@ -22,6 +45,8 @@ def check_env_vars(): "AZURE_OPENAI_API_VERSION", "MODEL", "GITHUB_TOKEN", + "SUPABASE_URL", # Added missing required vars + "SUPABASE_KEY", ] missing_vars = [var for var in required_vars if not os.environ.get(var)] if missing_vars: @@ -30,4 +55,4 @@ def check_env_vars(): "Please configure the env vars file properly." ) return False - return True \ No newline at end of file + return True diff --git a/main.py b/main.py index 2e6410d..aa9baaf 100644 --- a/main.py +++ b/main.py @@ -1,14 +1,15 @@ import logging import os import json +import re # Import regex for pattern matching from datetime import datetime -from fasthtml.common import * +from pathlib import Path # For file traversal from dotenv import load_dotenv from supabase_client import supabase - +from fasthtml.common import * from helpers.openai_helpers import setup_azure_openai, setup_instructor from helpers.github_helpers import fetch_repo_context, check_url_exists -from helpers.devcontainer_helpers import generate_devcontainer_json, validate_devcontainer_json +from helpers.devcontainer_helpers import generate_devcontainer_json_with_ports, validate_devcontainer_json from helpers.token_helpers import count_tokens, truncate_to_token_limit from models import DevContainer from schemas import DevContainerModel @@ -20,87 +21,33 @@ # Load environment variables load_dotenv() -def check_env_vars(): - required_vars = [ - "AZURE_OPENAI_ENDPOINT", - "AZURE_OPENAI_API_KEY", - "AZURE_OPENAI_API_VERSION", - "MODEL", - "GITHUB_TOKEN", - "SUPABASE_URL", - "SUPABASE_KEY", +# New function to detect ports in common files +def find_ports_in_files(repo_path): + ports = set() # Use a set to avoid duplicates + + # Common port patterns in various files + port_patterns = [ + re.compile(r"EXPOSE (\d+)"), # Matches EXPOSE in Dockerfile + re.compile(r"PORT=(\d+)"), # Matches PORT in .env + re.compile(r"ports:\s*-\s*(\d+):"), # Matches ports in docker-compose.yml + re.compile(r"\b\d{4}\b") # Matches common port numbers in documentation ] - missing_vars = [var for var in required_vars if not os.environ.get(var)] - if missing_vars: - print(f"Missing environment variables: {', '.join(missing_vars)}. Please configure the env vars file properly.") - return False - return True - -hdrs = [ - Script(src="https://www.googletagmanager.com/gtag/js?id=G-Q22LCTCW8Y", aync=True), - Script(""" - window.dataLayer = window.dataLayer || []; - function gtag(){dataLayer.push(arguments);} - gtag('js', new Date()); - gtag('config', 'G-Q22LCTCW8Y'); - """), - Script(""" - (function(c,l,a,r,i,t,y){ - c[a]=c[a]||function(){(c[a].q=c[a].q||[]).push(arguments)}; - t=l.createElement(r);t.async=1;t.src="https://www.clarity.ms/tag/"+i; - y=l.getElementsByTagName(r)[0];y.parentNode.insertBefore(t,y); - })(window, document, "clarity", "script", "o5om7ajkg6"); - """), - picolink, - Meta(charset='UTF-8'), - Meta(name='viewport', content='width=device-width, initial-scale=1.0, maximum-scale=1.0'), - Meta(name='description', content=description), - *Favicon('favicon.ico', 'favicon-dark.ico'), - *Socials(title='DevContainer.ai', - description=description, - site_name='devcontainer.ai', - twitter_site='@daytonaio', - image=f'/assets/og-sq.png', - url=''), - Script(src='https://cdn.jsdelivr.net/gh/gnat/surreal@main/surreal.js'), - scopesrc, - Link(rel="stylesheet", href="/css/main.css"), -] - -# Initialize FastHTML app -app, rt = fast_app( - hdrs=hdrs, - live=True, - debug=True -) - -scripts = ( - Script(src="/js/main.js"), -) - -from fastcore.xtras import timed_cache - -# Main page composition -@timed_cache(seconds=60) -def home(): - return (Title(f"DevContainer.ai - {description}"), - Main( - hero_section(), - generator_section(), - setup_section(), - manifesto(), - benefits_section(), - examples_section(), - faq_section(), - cta_section(), - footer_section()), - *scripts) - -# Define routes -@rt("/") -async def get(): - return home() - + + # Paths to scan for ports + files_to_check = ["Dockerfile", ".env", "docker-compose.yml", "README.md", "contributing.md"] + + for file_name in files_to_check: + file_path = Path(repo_path) / file_name + if file_path.exists(): + with open(file_path, "r") as file: + content = file.read() + for pattern in port_patterns: + matches = pattern.findall(content) + ports.update(matches) + + return list(map(int, ports)) if ports else [8000] # Default to port 8000 if none found + +# Modify the existing post function @rt("/generate", methods=["post"]) async def post(repo_url: str, regenerate: bool = False): logging.info(f"Generating devcontainer.json for: {repo_url}") @@ -116,6 +63,10 @@ async def post(repo_url: str, regenerate: bool = False): logging.info(f"Fetched repo context. Existing devcontainer: {'Yes' if existing_devcontainer else 'No'}") logging.info(f"Devcontainer URL: {devcontainer_url}") + # Detect relevant ports in the repository + ports = find_ports_in_files(repo_url) # Call the new find_ports_in_files function + logging.info(f"Detected ports for forwarding: {ports}") + if exists and not regenerate: logging.info(f"URL already exists in database. Returning existing devcontainer_json for: {repo_url}") devcontainer_json = existing_record['devcontainer_json'] @@ -123,11 +74,11 @@ async def post(repo_url: str, regenerate: bool = False): source = "database" url = existing_record['devcontainer_url'] else: - devcontainer_json, url = generate_devcontainer_json(instructor_client, repo_url, repo_context, devcontainer_url, regenerate=regenerate) + # Pass detected ports to generate_devcontainer_json_with_ports + devcontainer_json, url = generate_devcontainer_json_with_ports(instructor_client, repo_url, repo_context, devcontainer_url, ports=ports, regenerate=regenerate) generated = True source = "generated" if url is None else "repository" - if not exists or regenerate: logging.info("Saving to database...") try: @@ -151,7 +102,7 @@ async def post(repo_url: str, regenerate: bool = False): model=os.getenv("MODEL"), embedding=embedding_json, generated=generated, - created_at=datetime.utcnow().isoformat() # Ensure this is a string + created_at=datetime.utcnow().isoformat() ) # Convert the Pydantic model to a dictionary and handle datetime serialization @@ -190,21 +141,3 @@ async def post(repo_url: str, regenerate: bool = False): except Exception as e: logging.error(f"An error occurred: {str(e)}", exc_info=True) return Div(H2("Error"), P(f"An error occurred: {str(e)}")) - -@rt("/manifesto") -async def get(): - return manifesto_page() - -# Serve static files -@rt("/{fname:path}.{ext:static}") -async def get(fname:str, ext:str): - return FileResponse(f'{fname}.{ext}') - -# Initialize clients -if check_env_vars(): - openai_client = setup_azure_openai() - instructor_client = setup_instructor(openai_client) - -if __name__ == "__main__": - logging.info("Starting FastHTML app...") - serve() \ No newline at end of file