Skip to content

Commit

Permalink
Scrub sensitive data from Sentry events
Browse files Browse the repository at this point in the history
Why these changes are being introduced:

Though data scrubbing is configured in Sentry for this project
removing sensitive data before sending provides another layer
of protection from exposing sensitive data in Sentry issues.

How this addresses that need:
* Utilizes Sentry's before_send() method to remove all variables
and scope data from Sentry events before sending

Side effects of this change:
* Sentry events do not have variables from stack trace,
but still contain helpful debug information

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/HRQB-39
  • Loading branch information
ghukill committed Jul 12, 2024
1 parent ed392aa commit 021b6b6
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 65 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ requests-mock = "*"
freezegun = "*"
sqlalchemy-stubs = "*"
boto3 = "*"
pytest-mock = "*"

[requires]
python_version = "3.11"
Expand Down
137 changes: 73 additions & 64 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 31 additions & 1 deletion hrqb/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import copy
import logging
import os
from typing import Any

import sentry_sdk
from sentry_sdk.types import Event


class Config:
Expand Down Expand Up @@ -75,6 +77,34 @@ def configure_sentry() -> str:
env = os.getenv("WORKSPACE")
sentry_dsn = os.getenv("SENTRY_DSN")
if sentry_dsn and sentry_dsn.lower() != "none":
sentry_sdk.init(sentry_dsn, environment=env)
sentry_sdk.init(
sentry_dsn,
environment=env,
before_send=sentry_before_send_callback,
)
return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
return "No Sentry DSN found, exceptions will not be sent to Sentry"


def sentry_before_send_callback(event: Event, _hint: dict) -> Event:
"""Callback for modifying sentry event data before sending.
This function is difficult to mock given how it's registered with sentry_sdk.init(),
where calling another functions for the actual work allows for mocking there.
"""
return _remove_sensitive_scope_variables(event)


def _remove_sensitive_scope_variables(event: Event) -> Event:
"""Removes sensitive data from Sentry event.
copy.deepcopy() is used to allow testing of the original object and the returned
object separately.
"""
new_event = copy.deepcopy(event)
if "exception" in new_event:
for exc_type in new_event["exception"]["values"]:
for stacktrace in exc_type.get("stacktrace", {}).get("frames", []):
for item in ["vars", "pre_context", "post_context"]:
stacktrace.pop(item, None)
return new_event
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,3 +919,11 @@ def task_transform_years_complete(
task = TransformYears(pipeline=all_tasks_pipeline_name)
task.run()
return task


@pytest.fixture
def sensitive_scope_variable():
return {
"note": "I am a dictionary with sensitive information",
"secret": "very-secret-abc123",
}
31 changes: 31 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# ruff: noqa: S105, TRY301, TRY002, BLE001

import json
import logging

import sentry_sdk

from hrqb import config
from hrqb.config import configure_logger, configure_sentry


Expand Down Expand Up @@ -35,3 +41,28 @@ def test_configure_sentry_env_variable_is_dsn(monkeypatch):
monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456")
result = configure_sentry()
assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test"


def test_sentry_scope_variables_removed_from_sent_event(
mocker,
sensitive_scope_variable,
):
not_secret_value = "Everyone can see this exception message."
secret_value = "very-secret-abc123"

spy_scrubber = mocker.spy(config, "_remove_sensitive_scope_variables")
try:
raise Exception(not_secret_value)
except Exception as exc:
sentry_sdk.capture_exception(exc)

original_event = json.dumps(spy_scrubber.call_args)
scrubbed_event = json.dumps(spy_scrubber.spy_return)

# not secret value present before and after scrubbing
assert not_secret_value in original_event
assert not_secret_value in scrubbed_event

# secret value present in original, but absent from scrubbed
assert secret_value in original_event
assert secret_value not in scrubbed_event

0 comments on commit 021b6b6

Please sign in to comment.