Skip to content

Commit

Permalink
Add lambda and dynamodb infra for exposed key checker
Browse files Browse the repository at this point in the history
  • Loading branch information
gjcthinkst committed Nov 19, 2024
1 parent 2763cf4 commit 1b03f86
Show file tree
Hide file tree
Showing 15 changed files with 1,048 additions and 0 deletions.
6 changes: 6 additions & 0 deletions aws-exposed-key-checker-infra/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.terraform
.tfvars
*.zip
*.tfstate
*.tfstate.backup
test_ticket_data.json
62 changes: 62 additions & 0 deletions aws-exposed-key-checker-infra/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions aws-exposed-key-checker-infra/lambda_outputs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
**/*
!.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import TYPE_CHECKING
import boto3
from botocore.config import Config
from exposed_key_checker.util import create_support_ticket

if TYPE_CHECKING:
from exposed_keys import ExposedKeyData

DB_TABLE_NAME = "ExposedKeyCheckerProcessed"
BOTO_CONFIG = Config(region_name="us-east-1")


class Database:
def __init__(self):
self._db = boto3.client("dynamodb", config=BOTO_CONFIG)

def mark_key_as_processed(self, key_data: "ExposedKeyData"):
try:
self._db.put_item(TableName=DB_TABLE_NAME, Item=key_data.to_db_item())
except Exception as e:
text = f"The key checker could not save the following item as processed in DynamoDB: {key_data}. The exception was {e}."
create_support_ticket(
"Exposed AWS Key Checker could not save processed state",
text,
"exposed-aws-key-checker-save-error",
)

def has_key_been_processed(self, key_data: "ExposedKeyData") -> bool:
res = self._db.query(
TableName=DB_TABLE_NAME,
Select="COUNT",
ExpressionAttributeValues={
":v1": {
"S": key_data.iam_user,
},
},
KeyConditionExpression="IamUser = :v1",
)
return res.get("Count", 0) > 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from dataclasses import dataclass
from datetime import datetime
import re
from exposed_key_checker.ticket_manager import TicketData


def parse_tickets(
tickets: "list[TicketData]",
) -> "tuple[list[ExposedKeyData], list[int]]":
exposed_data: list[ExposedKeyData] = []
parse_error_ids: list[int] = []
for ticket in tickets:
data = ExposedKeyData.from_ticket(ticket)
if data is None and not _should_ignore_ticket_parse_failure(ticket):
parse_error_ids.append(ticket.id)
elif data is not None:
exposed_data.append(data)

return exposed_data, parse_error_ids


def _should_ignore_ticket_parse_failure(ticket: TicketData) -> bool:
"""
Ignore ticket parse failures if we don't expect the email to have iam_user / key details
There are a few different types of emails, like follow up emails, case resolved emails, etc.
"""
ignore_strs = [
"correspondence was added to case",
"following up",
"following-up",
"follow up",
"follow-up",
"previous notice",
"we have not heard back from you",
"duplicate of case",
"case has been resolved",
]

text = ticket.description.lower()
for match_str in ignore_strs:
if match_str in text:
return True
else:
return False


@dataclass
class ExposedKeyData:
ticket: TicketData
iam_user: str
access_key: str
public_location: str
case_no: str

@property
def tokens_server(self) -> str:
return self.iam_user.split("@@")[0]

@property
def token(self) -> str:
return self.iam_user.split("@@")[1]

def to_db_item(self) -> dict:
return {
"IamUser": {"S": self.iam_user.lower()},
"AccessKey": {"S": self.access_key.lower()},
"PublicLocation": {"S": self.public_location},
"ProcessedAt": {"N": f"{datetime.now().timestamp():.0f}"},
"ZendeskTicketId": {"N": str(self.ticket.id)},
"TicketCreatedAt": {"N": f"{self.ticket.created_dt.timestamp():.0f}"},
}

@classmethod
def from_ticket(cls, ticket: TicketData) -> "ExposedKeyData | None":
iam_user = ""
access_key = ""
location = ""
case_no = "<unknown>"

account_details_match = re.search(
r"access key * (\w+).*user *([\w-]+\.(?:com|net|org)@@\w+)",
ticket.description,
re.IGNORECASE,
)
if account_details_match is None:
return None

access_key, iam_user = account_details_match.groups()

case_match = re.search(r"case (\d+)", ticket.subject, re.IGNORECASE)
if case_match is not None:
case_no = case_match.group(1)

location_match = re.search(
r"online at *(http[s]?://[\w%./#-]+) *\. *(?:to)?",
ticket.description,
re.IGNORECASE,
)
if location_match is not None:
location = location_match.group(1)

return cls(ticket, iam_user.lower(), access_key.lower(), location, case_no)

def __str__(self) -> str:
return f"<Key with IAM user = {self.iam_user}, key = {self.access_key} and ticket ID = {self.ticket.id}>"

def __repr__(self) -> str:
return self.__str__()
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import json
import os
from datetime import datetime, timedelta
import requests
import boto3
from botocore.config import Config

from exposed_key_checker.database import Database
from exposed_key_checker.ticket_manager import ZendeskTicketManager
from exposed_key_checker.util import create_support_ticket
from exposed_key_checker.exposed_keys import ExposedKeyData, parse_tickets

DB_TABLE_NAME = "ExposedKeyCheckerProcessed"
MAX_PROCESS_AGE_DAYS = 7
ZENDESK_EXPOSED_TICKET_TAG = os.environ["ZENDESK_EXPOSED_TICKET_TAG"]
ZENDESK_AUTH_SECRET_ID = os.environ["ZENDESK_AUTH_SECRET_ID"]
TOKENS_SERVERS_ALLOW_LIST = [
s.strip() for s in os.environ["TOKENS_SERVERS_ALLOW_LIST"].split(",")
]
TOKENS_POST_URL_OVERRIDE = os.getenv("TOKENS_POST_URL_OVERRIDE")

BOTO_CONFIG = Config(region_name="us-east-1")


def lambda_handler(_event, _context):
db = Database()

try:
ticket_manager = ZendeskTicketManager(*get_zendesk_auth())
key_data, failed_ids = gather_data(ticket_manager)
except Exception as e:
text = f"The key checker could not query the Zendesk API for tickets.\nThe exception was {e}."
create_support_ticket(
"Exposed AWS Key Checker could not query the Zendesk API",
text,
"exposed-aws-key-checker-zendesk-api-error",
)
return

process_data(db, key_data)

if failed_ids:
text = f"The key checker could not parse the following Zendesk ticket IDs: {failed_ids}"
create_support_ticket(
"Exposed AWS Key Checker could not parse Zendesk tickets",
text,
"exposed-aws-key-checker-parse-error",
)


def process_data(db: Database, data: "list[ExposedKeyData]"):
unprocessed_items = [d for d in data if not db.has_key_been_processed(d)]
processed_count = len(data) - len(unprocessed_items)
print(f"Skipping {processed_count} items that were already processed.")

items_to_process = []
for item in unprocessed_items:
if item.tokens_server not in TOKENS_SERVERS_ALLOW_LIST:
print(
f"Ignoring the following item because its server is not in the allow list: {item}"
)
continue

items_to_process.append(item)

print(f"Processing {len(items_to_process)} unprocessed items: {items_to_process}.")

for item in items_to_process:
try:
send_to_tokens_server(item)
except Exception as e:
text = f"The key checker could not post the exposed event to the tokens server for the following item: {item}\nThe exception was: {e}.\n\nThis post will be retried automatically on the next run of the lambda. This only needs to be investigated if the failures continue."
create_support_ticket(
"Exposed AWS Key Checker could not post to tokens server",
text,
"exposed-aws-key-checker-post-error",
)
else:
db.mark_key_as_processed(item)


def gather_data(
ticket_manager: "ZendeskTicketManager",
) -> "tuple[list[ExposedKeyData], list[int]]":
data: list[ExposedKeyData] = []
error_ids: list[int] = []

num_tickets = 0
for tickets in ticket_manager.read_all_tickets_in_batches():
num_tickets += len(tickets)
key_data, eids = parse_tickets(tickets)
data.extend(key_data)
error_ids.extend(eids)

age = datetime.now() - key_data[-1].ticket.created_dt
if age > timedelta(days=MAX_PROCESS_AGE_DAYS):
# Only check the last week's data
break

print(f"Got {len(data)} exposed keys from {num_tickets} tickets.")

return data, error_ids


def send_to_tokens_server(data: "ExposedKeyData"):
post_url = TOKENS_POST_URL_OVERRIDE or data.tokens_server
print(f"Sending key exposed event to {post_url} for token {data.token}")

post_data = {
"token_exposed": True,
"exposed_time": int(data.ticket.created_dt.strftime("%s")),
"public_location": data.public_location,
}

res = requests.post(post_url, data=post_data)
res.raise_for_status()


def get_zendesk_auth():
client = boto3.client("secretsmanager", config=BOTO_CONFIG)
res = client.get_secret_value(SecretId=ZENDESK_AUTH_SECRET_ID)
data = json.loads(res.get("SecretString"))
return data["api_token"], data["user"], data["search_endpoint"]
Loading

0 comments on commit 1b03f86

Please sign in to comment.