Add lambda and dynamodb infra for exposed key checker

thinkst · Nov 19, 2024 · 1b03f86 · 1b03f86
1 parent 2763cf4
commit 1b03f86
Show file tree

Hide file tree

Showing 15 changed files with 1,048 additions and 0 deletions.
diff --git a/aws-exposed-key-checker-infra/.gitignore b/aws-exposed-key-checker-infra/.gitignore
@@ -0,0 +1,6 @@
+.terraform
+.tfvars
+*.zip
+*.tfstate
+*.tfstate.backup
+test_ticket_data.json
diff --git a/aws-exposed-key-checker-infra/.terraform.lock.hcl b/aws-exposed-key-checker-infra/.terraform.lock.hcl
diff --git a/aws-exposed-key-checker-infra/lambda_outputs/.gitignore b/aws-exposed-key-checker-infra/lambda_outputs/.gitignore
@@ -0,0 +1,2 @@
+**/*
+!.gitignore
diff --git a/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/database.py b/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/database.py
@@ -0,0 +1,39 @@
+from typing import TYPE_CHECKING
+import boto3
+from botocore.config import Config
+from exposed_key_checker.util import create_support_ticket
+
+if TYPE_CHECKING:
+    from exposed_keys import ExposedKeyData
+
+DB_TABLE_NAME = "ExposedKeyCheckerProcessed"
+BOTO_CONFIG = Config(region_name="us-east-1")
+
+
+class Database:
+    def __init__(self):
+        self._db = boto3.client("dynamodb", config=BOTO_CONFIG)
+
+    def mark_key_as_processed(self, key_data: "ExposedKeyData"):
+        try:
+            self._db.put_item(TableName=DB_TABLE_NAME, Item=key_data.to_db_item())
+        except Exception as e:
+            text = f"The key checker could not save the following item as processed in DynamoDB: {key_data}. The exception was {e}."
+            create_support_ticket(
+                "Exposed AWS Key Checker could not save processed state",
+                text,
+                "exposed-aws-key-checker-save-error",
+            )
+
+    def has_key_been_processed(self, key_data: "ExposedKeyData") -> bool:
+        res = self._db.query(
+            TableName=DB_TABLE_NAME,
+            Select="COUNT",
+            ExpressionAttributeValues={
+                ":v1": {
+                    "S": key_data.iam_user,
+                },
+            },
+            KeyConditionExpression="IamUser = :v1",
+        )
+        return res.get("Count", 0) > 0
diff --git a/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/exposed_keys.py b/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/exposed_keys.py
@@ -0,0 +1,109 @@
+from dataclasses import dataclass
+from datetime import datetime
+import re
+from exposed_key_checker.ticket_manager import TicketData
+
+
+def parse_tickets(
+    tickets: "list[TicketData]",
+) -> "tuple[list[ExposedKeyData], list[int]]":
+    exposed_data: list[ExposedKeyData] = []
+    parse_error_ids: list[int] = []
+    for ticket in tickets:
+        data = ExposedKeyData.from_ticket(ticket)
+        if data is None and not _should_ignore_ticket_parse_failure(ticket):
+            parse_error_ids.append(ticket.id)
+        elif data is not None:
+            exposed_data.append(data)
+
+    return exposed_data, parse_error_ids
+
+
+def _should_ignore_ticket_parse_failure(ticket: TicketData) -> bool:
+    """
+    Ignore ticket parse failures if we don't expect the email to have iam_user / key details
+
+    There are a few different types of emails, like follow up emails, case resolved emails, etc.
+    """
+    ignore_strs = [
+        "correspondence was added to case",
+        "following up",
+        "following-up",
+        "follow up",
+        "follow-up",
+        "previous notice",
+        "we have not heard back from you",
+        "duplicate of case",
+        "case has been resolved",
+    ]
+
+    text = ticket.description.lower()
+    for match_str in ignore_strs:
+        if match_str in text:
+            return True
+    else:
+        return False
+
+
+@dataclass
+class ExposedKeyData:
+    ticket: TicketData
+    iam_user: str
+    access_key: str
+    public_location: str
+    case_no: str
+
+    @property
+    def tokens_server(self) -> str:
+        return self.iam_user.split("@@")[0]
+
+    @property
+    def token(self) -> str:
+        return self.iam_user.split("@@")[1]
+
+    def to_db_item(self) -> dict:
+        return {
+            "IamUser": {"S": self.iam_user.lower()},
+            "AccessKey": {"S": self.access_key.lower()},
+            "PublicLocation": {"S": self.public_location},
+            "ProcessedAt": {"N": f"{datetime.now().timestamp():.0f}"},
+            "ZendeskTicketId": {"N": str(self.ticket.id)},
+            "TicketCreatedAt": {"N": f"{self.ticket.created_dt.timestamp():.0f}"},
+        }
+
+    @classmethod
+    def from_ticket(cls, ticket: TicketData) -> "ExposedKeyData | None":
+        iam_user = ""
+        access_key = ""
+        location = ""
+        case_no = "<unknown>"
+
+        account_details_match = re.search(
+            r"access key * (\w+).*user *([\w-]+\.(?:com|net|org)@@\w+)",
+            ticket.description,
+            re.IGNORECASE,
+        )
+        if account_details_match is None:
+            return None
+
+        access_key, iam_user = account_details_match.groups()
+
+        case_match = re.search(r"case (\d+)", ticket.subject, re.IGNORECASE)
+        if case_match is not None:
+            case_no = case_match.group(1)
+
+        location_match = re.search(
+            r"online at *(http[s]?://[\w%./#-]+) *\. *(?:to)?",
+            ticket.description,
+            re.IGNORECASE,
+        )
+        if location_match is not None:
+            location = location_match.group(1)
+
+        return cls(ticket, iam_user.lower(), access_key.lower(), location, case_no)
+
+    def __str__(self) -> str:
+        return f"<Key with IAM user = {self.iam_user}, key = {self.access_key} and ticket ID = {self.ticket.id}>"
+
+    def __repr__(self) -> str:
+        return self.__str__()
diff --git a/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/lambda_handler.py b/aws-exposed-key-checker-infra/lambda_source/exposed_key_checker/lambda_handler.py
@@ -0,0 +1,123 @@
+import json
+import os
+from datetime import datetime, timedelta
+import requests
+import boto3
+from botocore.config import Config
+
+from exposed_key_checker.database import Database
+from exposed_key_checker.ticket_manager import ZendeskTicketManager
+from exposed_key_checker.util import create_support_ticket
+from exposed_key_checker.exposed_keys import ExposedKeyData, parse_tickets
+
+DB_TABLE_NAME = "ExposedKeyCheckerProcessed"
+MAX_PROCESS_AGE_DAYS = 7
+ZENDESK_EXPOSED_TICKET_TAG = os.environ["ZENDESK_EXPOSED_TICKET_TAG"]
+ZENDESK_AUTH_SECRET_ID = os.environ["ZENDESK_AUTH_SECRET_ID"]
+TOKENS_SERVERS_ALLOW_LIST = [
+    s.strip() for s in os.environ["TOKENS_SERVERS_ALLOW_LIST"].split(",")
+]
+TOKENS_POST_URL_OVERRIDE = os.getenv("TOKENS_POST_URL_OVERRIDE")
+
+BOTO_CONFIG = Config(region_name="us-east-1")
+
+
+def lambda_handler(_event, _context):
+    db = Database()
+
+    try:
+        ticket_manager = ZendeskTicketManager(*get_zendesk_auth())
+        key_data, failed_ids = gather_data(ticket_manager)
+    except Exception as e:
+        text = f"The key checker could not query the Zendesk API for tickets.\nThe exception was {e}."
+        create_support_ticket(
+            "Exposed AWS Key Checker could not query the Zendesk API",
+            text,
+            "exposed-aws-key-checker-zendesk-api-error",
+        )
+        return
+
+    process_data(db, key_data)
+
+    if failed_ids:
+        text = f"The key checker could not parse the following Zendesk ticket IDs: {failed_ids}"
+        create_support_ticket(
+            "Exposed AWS Key Checker could not parse Zendesk tickets",
+            text,
+            "exposed-aws-key-checker-parse-error",
+        )
+
+
+def process_data(db: Database, data: "list[ExposedKeyData]"):
+    unprocessed_items = [d for d in data if not db.has_key_been_processed(d)]
+    processed_count = len(data) - len(unprocessed_items)
+    print(f"Skipping {processed_count} items that were already processed.")
+
+    items_to_process = []
+    for item in unprocessed_items:
+        if item.tokens_server not in TOKENS_SERVERS_ALLOW_LIST:
+            print(
+                f"Ignoring the following item because its server is not in the allow list: {item}"
+            )
+            continue
+
+        items_to_process.append(item)
+
+    print(f"Processing {len(items_to_process)} unprocessed items: {items_to_process}.")
+
+    for item in items_to_process:
+        try:
+            send_to_tokens_server(item)
+        except Exception as e:
+            text = f"The key checker could not post the exposed event to the tokens server for the following item: {item}\nThe exception was: {e}.\n\nThis post will be retried automatically on the next run of the lambda. This only needs to be investigated if the failures continue."
+            create_support_ticket(
+                "Exposed AWS Key Checker could not post to tokens server",
+                text,
+                "exposed-aws-key-checker-post-error",
+            )
+        else:
+            db.mark_key_as_processed(item)
+
+
+def gather_data(
+    ticket_manager: "ZendeskTicketManager",
+) -> "tuple[list[ExposedKeyData], list[int]]":
+    data: list[ExposedKeyData] = []
+    error_ids: list[int] = []
+
+    num_tickets = 0
+    for tickets in ticket_manager.read_all_tickets_in_batches():
+        num_tickets += len(tickets)
+        key_data, eids = parse_tickets(tickets)
+        data.extend(key_data)
+        error_ids.extend(eids)
+
+        age = datetime.now() - key_data[-1].ticket.created_dt
+        if age > timedelta(days=MAX_PROCESS_AGE_DAYS):
+            # Only check the last week's data
+            break
+
+    print(f"Got {len(data)} exposed keys from {num_tickets} tickets.")
+
+    return data, error_ids
+
+
+def send_to_tokens_server(data: "ExposedKeyData"):
+    post_url = TOKENS_POST_URL_OVERRIDE or data.tokens_server
+    print(f"Sending key exposed event to {post_url} for token {data.token}")
+
+    post_data = {
+        "token_exposed": True,
+        "exposed_time": int(data.ticket.created_dt.strftime("%s")),
+        "public_location": data.public_location,
+    }
+
+    res = requests.post(post_url, data=post_data)
+    res.raise_for_status()
+
+
+def get_zendesk_auth():
+    client = boto3.client("secretsmanager", config=BOTO_CONFIG)
+    res = client.get_secret_value(SecretId=ZENDESK_AUTH_SECRET_ID)
+    data = json.loads(res.get("SecretString"))
+    return data["api_token"], data["user"], data["search_endpoint"]