diff --git a/.github/workflows/publish-image.yml b/.github/workflows/publish-image.yml
index 7ba64fb..72d0f0a 100644
--- a/.github/workflows/publish-image.yml
+++ b/.github/workflows/publish-image.yml
@@ -4,6 +4,10 @@ on:
   push:
     branches:
       - main
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
 
 jobs:
   push_to_registry:
diff --git a/Dockerfile b/Dockerfile
index 93148b9..48c9d84 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ WORKDIR /app
 COPY . /app
 
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir --user Flask Flask_cors PyYAML openai waitress
+    pip install --no-cache-dir --user -r requirements.txt
 
 # Runtime stage
 FROM python:3.9-slim as runtime
diff --git a/README.md b/README.md
index 2b12a70..d4834cd 100644
--- a/README.md
+++ b/README.md
@@ -30,15 +30,40 @@ To run the plugin, enter the following command:
 python main.py
 ```
 
-Once the local server is running:
+Once the local server is running...
 
-1. Navigate to https://chat.openai.com. 
+### Test the ChatGPT Plugin
+
+1. Navigate to https://chat.openai.com.
 2. In the Model drop down, select "Plugins" (note, if you don't see it there, you don't have access yet).
 3. Select "Plugin store"
 4. Select "Develop your own plugin"
 5. Enter in `localhost:5003` since this is the URL the server is running on locally, then select "Find manifest file".
 
-The plugin should now be installed and enabled! You can start with a question like "What is on my todo list" and then try adding something to it as well! 
+The plugin should now be installed and enabled! You can start with a question like "What is on my todo list" and then try adding something to it as well!
+
+### Test the Ask GPT feature from developer.tbd.website
+
+Execute the following curl requests:
+
+```sh
+ASK_QUERY="How to connect to web5 and create a record?"
+ASK_QUERY=$(sed "s/ /%20/g" <<<"$ASK_QUERY") # encodes whitespaces
+curl "http://localhost:5003/ask_chat?query=$ASK_QUERY"
+```
+
+## Running with Docker
+
+1. Install docker on your computer
+2. Set the OPENAI_API_KEY in the `docker-compose.yaml` file
+3. Execute `docker compose up`
+
+## Deployment settings
+
+Environment variables used in the server:
+
+- `WEB5GPT_MONTHLY_USAGE_LIMIT_USD` - sets the monthly usage limit in USD for the `/ask_chat` endpoint. The service actually just set a daily limit by dividing this number by 30. By default this is $500/30 = ~$16.66 per day.
+- `OPENAI_API_KEY` - define the OpenAI API Key
 
 ## Getting help
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 0000000..eb69757
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,12 @@
+version: "3"
+
+services:
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "5003:5003"
+    environment:
+      OPENAI_API_KEY: ${OPENAI_API_KEY}
+      WEB5GPT_MONTHLY_USAGE_LIMIT_USD: 30
diff --git a/main.py b/main.py
index b09f990..964e535 100644
--- a/main.py
+++ b/main.py
@@ -5,8 +5,10 @@
 from flask_cors import CORS
 import yaml
 from openai import OpenAI
+from usage_cost_tracker import UsageCostTracker
 
 client = OpenAI()
+usage_cost_tracker = UsageCostTracker()
 
 app = Flask(__name__)
 CORS(app)  # Enables CORS for all routes
@@ -98,8 +100,10 @@ def openapi_spec():
 
 @app.route("/ask_chat", methods=['GET'])
 def ask_chat_route():
-    query = request.args.get('query')    
-    
+    usage_cost_tracker.check_usage_costs()
+
+    query = request.args.get('query')
+
     messages = [{"role": "system", "content": "you are a helpful assistant to find the best names that match what knowledge is being asked for. Return only a list of matching names as requested."},
                {"role": "user", "content": "I will provide you lists of json objects which map a name of a piece of knowledge to a description. You then take a question from the website developer.tbd.website and return a list of names that best the question, 2 to 3 ideally."},
                {"role": "assistant", "content": "Got it."},
@@ -116,13 +120,12 @@ def ask_chat_route():
 
     response = client.chat.completions.create(model="gpt-4-1106-preview",
     messages=messages)
+    usage_cost_tracker.compute_response_costs(response)
 
-    
-    
     response_message = response.choices[0].message
     csv_list = response_message.content
     print("csv_list", csv_list)
-    
+
 
     csv_list = csv_list.split(',')
 
@@ -141,7 +144,7 @@ def ask_chat_route():
         _, code = content.split('-----', 1)
         knowledge += f"{item}:\n\n{code}\n\n"
 
-    
+
 
     messages = [{"role": "system", "content": "You are a helpful assistant that provides code examples and explanations when context is provided. Please don't invent APIs. Code examples should be surrounded with markdown backticks to make presentation easy."},
             {"role": "user", "content": "Please don't hallucinate responses if you don't know what the API is, stick to the content you know. Also remember code examples should be surrounded with markdown backticks to make presentation easy."},
@@ -152,27 +155,34 @@ def ask_chat_route():
 
 
     def stream():
+        response_tokens = 0
+        
         if knowledge == '':
             yield 'data: Sorry, I don\'t know about that topic. Please try again.\n\n'
             return
         completion = client.chat.completions.create(model="gpt-3.5-turbo-16k",
         messages=messages,
         stream=True)
+        usage_cost_tracker.compute_messages_cost(messages, "gpt-3.5-turbo-16k")
         for line in completion:
             print(line.choices[0])
             chunk = line.choices[0].delta.content
-            if chunk:                    
+            if chunk:
+                response_tokens += usage_cost_tracker.count_tokens(chunk)
+
                 if chunk.endswith("\n"):
-                    yield 'data: %s|CR|\n\n' % chunk.rstrip()                    
+                    yield 'data: %s|CR|\n\n' % chunk.rstrip()
                 else:
-                    yield 'data: %s\n\n' % chunk                    
+                    yield 'data: %s\n\n' % chunk
 
-        
-    return flask.Response(stream(), mimetype='text/event-stream')        
+        # Post process the response to add the cost    
+        usage_cost_tracker.compute_tokens_cost(response_tokens, "gpt-3.5-turbo-16k", is_output=True)
+
+    return flask.Response(stream(), mimetype='text/event-stream')
 
 
 def get_chat_functions():
-    functions = []    
+    functions = []
     for filename in os.listdir('content'):
         if filename.endswith('.txt'):
             topic = filename[:-4]
@@ -182,7 +192,7 @@ def get_chat_functions():
                     "name": f"{topic}",
                     "description": explanation.strip()
                 })
-    
+
     return functions
 
 def main():
diff --git a/requirements.txt b/requirements.txt
index f62051c..b01eada 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ Flask>1.0
 Flask-cors
 PyYAML
 openai
-waitress
\ No newline at end of file
+waitress
+tiktoken
\ No newline at end of file
diff --git a/usage_cost_tracker.py b/usage_cost_tracker.py
new file mode 100644
index 0000000..2b9567f
--- /dev/null
+++ b/usage_cost_tracker.py
@@ -0,0 +1,106 @@
+import datetime
+import tiktoken
+import os
+from openai.types.chat import ChatCompletion
+
+MONTHLY_USAGE_LIMIT_USD = os.getenv("WEB5GPT_MONTHLY_USAGE_LIMIT_USD")
+
+DEFAULT_MONTHLY_USAGE_LIMIT_USD = 500.0
+
+# Pricing rates updated on 2023-12-19
+MODELS_COSTS = {
+    "gpt-3.5-turbo-16k": { "price_per_thousand_tokens": { "input": 0.001, "output": 0.0020 } },
+    "gpt-4-1106-preview": { "price_per_thousand_tokens": { "input": 0.01, "output": 0.03 } },
+}
+
+class UsageCostTracker:
+    def __init__(self):
+        self.current_date = datetime.datetime.now().date()
+        self.monthly_usage_cost = 0.0
+        self.tokenizer = tiktoken.get_encoding("cl100k_base")
+
+        # initialize monthly usage limit in USD
+        if MONTHLY_USAGE_LIMIT_USD:
+            self.total_monthly_limit = float(MONTHLY_USAGE_LIMIT_USD)
+        else:
+            self.total_monthly_limit = DEFAULT_MONTHLY_USAGE_LIMIT_USD
+        print(">>> Total monthly cost limit: $%.2f" % self.total_monthly_limit)
+
+    def check_usage_costs(self):
+        today = datetime.datetime.now().date()
+
+        # Reset the monthly usage cost if it's a new month
+        if self.current_date.month != today.month:
+            print(">>> New month, resetting monthly usage cost")
+            print(">>> Current Monthly usage cost: $%.4f (of $%.2f)" % (self.monthly_usage_cost, self.total_monthly_limit))
+            self.monthly_usage_cost = 0.0
+            self.current_date = today
+
+        # Check if the monthly usage cost has exceeded the limit
+        if self.monthly_usage_cost >= self.total_monthly_limit:
+            raise Exception(f">>> ERROR! Monthly usage cost ${self.monthly_usage_cost:.2f} exceeded limit of ${self.total_monthly_limit:.2f}")
+        
+    def compute_response_costs(self, response: ChatCompletion):
+        if not response.usage or not response.model:
+            print(">>> WARNING! No model/usage in response, impossible to compute costs")
+            return
+        
+        response_model = response.model
+        if response_model not in MODELS_COSTS:
+            print(">>> WARNING! Model not found in MODELS_COSTS, impossible to compute costs")
+            return
+
+        model_costs = MODELS_COSTS[response_model]
+
+        print(">>> Computing costs for Model: %s" % response_model)
+
+        price_per_thousand_tokens = model_costs["price_per_thousand_tokens"]
+        input_cost = calculate_tokens_cost(response.usage.prompt_tokens, price_per_thousand_tokens["input"])
+        output_cost = calculate_tokens_cost(response.usage.completion_tokens, price_per_thousand_tokens["output"])
+        total_cost = input_cost + output_cost
+        print(">>> Total cost: $%.4f" % total_cost)
+
+        self.monthly_usage_cost += total_cost
+        print(">>> Current Monthly usage cost: $%.4f (of $%.2f)" % (self.monthly_usage_cost, self.total_monthly_limit))
+
+
+    def compute_messages_cost(self, messages, model_name):
+        tokens_per_message = 3
+        tokens_per_name = 1
+        num_tokens = 0
+        
+        for message in messages:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                num_tokens += self.count_tokens(value)
+                if key == "name":
+                    num_tokens += tokens_per_name
+        
+        num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
+        
+        self.compute_tokens_cost(num_tokens, model_name)
+
+
+    def compute_tokens_cost(self, tokens, model_name, is_output=False):
+        if model_name not in MODELS_COSTS:
+            print(">>> WARNING! Model not found in MODELS_COSTS, impossible to compute costs")
+            return
+
+        model_costs = MODELS_COSTS[model_name]
+        input_type = "output" if is_output else "input"
+        price_per_thousand_tokens = model_costs["price_per_thousand_tokens"][input_type]
+
+        usage_cost = calculate_tokens_cost(tokens, price_per_thousand_tokens)
+
+        self.monthly_usage_cost += usage_cost
+        print(f">>> Added cost: ${usage_cost:.4f}, New monthly usage: ${self.monthly_usage_cost:.4f} (of ${self.total_monthly_limit:.2f})")
+
+    def count_tokens(self, text):  
+        return len(self.tokenizer.encode(text))
+
+
+
+def calculate_tokens_cost(tokens, price_per_thousand_tokens):
+    cost_per_token = price_per_thousand_tokens / 1000
+    return cost_per_token * tokens
+    
\ No newline at end of file