Skip to content

Commit

Permalink
INTR-447 - Export search as CSV (#842)
Browse files Browse the repository at this point in the history
Co-authored-by: Cameron Lamb <cameron.lamb@digital.trade.gov.uk>
  • Loading branch information
david-okeke1337 and CamLamb authored Dec 17, 2024
1 parent 9a6bfb9 commit 36488da
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 9 deletions.
18 changes: 18 additions & 0 deletions src/content/management/commands/create_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@
"can_change_home_page_content",
]

SEARCH_EXPORTERS_GROUP_NAME = "Search Exporters"
SEARCH_EXPORTERS_PERMISSIONS = [
"export_search",
]


class Command(BaseCommand):
help = "Create page permissions"
Expand Down Expand Up @@ -245,6 +250,18 @@ def event_permissions(self):
EVENT_EDITORS_PAGE_PERMISSIONS,
)

def search_exporters_permissions(self):
search_exporters_group, _ = Group.objects.get_or_create(
name=SEARCH_EXPORTERS_GROUP_NAME
)

search_exporters_group.permissions.set(
Permission.objects.filter(
codename__in=SEARCH_EXPORTERS_PERMISSIONS,
content_type__app_label="extended_search",
)
)

def handle(self, *args, **options):
news_moderators, _ = Group.objects.get_or_create(
name="News Moderators",
Expand Down Expand Up @@ -354,3 +371,4 @@ def handle(self, *args, **options):

self.home_page_permissions()
self.event_permissions()
self.search_exporters_permissions()
4 changes: 2 additions & 2 deletions src/content/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from extended_search.index import DWIndexedField as IndexedField
from extended_search.index import Indexed, RelatedFields
from peoplefinder.widgets import PersonChooser
from search.utils import split_query
from user.models import User as UserModel


Expand Down Expand Up @@ -527,8 +526,9 @@ class SearchKeywordOrPhrase(models.Model):

class SearchKeywordOrPhraseQuerySet(models.QuerySet):
def filter_by_query(self, query):
query_parts = split_query(query)
from search.utils import split_query

query_parts = split_query(query)
return self.filter(search_keyword_or_phrase__keyword_or_phrase__in=query_parts)


Expand Down
3 changes: 2 additions & 1 deletion src/core/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ class PageProblemFoundForm(forms.Form):
class WagtailUserEditForm(UserEditForm):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
del self.fields["is_superuser"]
if "is_superuser" in self.fields:
del self.fields["is_superuser"]
22 changes: 22 additions & 0 deletions src/extended_search/migrations/0003_alter_setting_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.16 on 2024-11-25 15:24

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("extended_search", "0002_alter_setting_options_alter_setting_key"),
]

operations = [
migrations.AlterModelOptions(
name="setting",
options={
"permissions": (
("view_explore", "View the global search explore page"),
("export_search", "Export the search result as csv"),
)
},
),
]
5 changes: 4 additions & 1 deletion src/extended_search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ class Setting(models.Model):
)

class Meta:
permissions = (("view_explore", "View the global search explore page"),)
permissions = (
("view_explore", "View the global search explore page"),
("export_search", "Export the search result as csv"),
)

def __str__(self):
return self.key
5 changes: 5 additions & 0 deletions src/search/templates/search/partials/search_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ <h1 class="govuk-heading-m">Start typing to search</h1>
{% endif %}
</div>

{% if perms.extended_search.export_search %}
<a class="govuk-link"
href="{% url 'search:export_search' search_category %}?query={{ search_query }}">Download search results</a>
{% endif %}

<script>
function searchFeedbackData(formData) {
const searchData = JSON.parse(formData.get("search_data") || {});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ <h3 class="govuk-heading-s">Featured {{ result_type_display }}</h3>
{% else %}
{% if tab_name != 'all' and not tab_override %}
<script lang="javascript">
url = '{% url 'search:category' 'all' %}?query={{search_query}}'
url = "{% url 'search:category' 'all' %}?query={{search_query}}"
document.onload = window.location = url;
</script>
{% endif %}
Expand Down
3 changes: 2 additions & 1 deletion src/search/urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.urls import path
from django.views.generic import RedirectView

from .views import autocomplete, explore, search
from .views import autocomplete, explore, export_search, search


app_name = "search"
Expand All @@ -22,5 +22,6 @@
path("explore/", explore, name="explore"),
path("autocomplete/", autocomplete, name="autocomplete"),
path("<str:category>/", search, name="category"),
path("<str:category>/export_search/", export_search, name="export_search"),
path("", search, name="home"),
]
123 changes: 122 additions & 1 deletion src/search/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
import re
import unicodedata
from typing import Optional
from typing import TYPE_CHECKING, Optional

from django.conf import settings
from django.db import models
from django.http import HttpRequest
from django.urls import reverse
from wagtail.search.query import Fuzzy, Or, Phrase, PlainText

from content.models import BasePage
from extended_search import settings as search_settings
from extended_search.index import Indexed
from extended_search.query import Nested, OnlyFields
from extended_search.query_builder import CustomQueryBuilder
from news.models import NewsPage
from peoplefinder.models import Person, Team


if TYPE_CHECKING:
from content.models import BasePage
from peoplefinder.models import Person, Team


def sanitize_search_query(query: Optional[str] = None) -> str:
Expand Down Expand Up @@ -236,3 +247,113 @@ def has_only_bad_results(query, category, pinned_results, search_results):
bad_score_threshold = get_bad_score_threshold(query, category)
highest_score = search_results[0]._score
return highest_score <= bad_score_threshold


#
# EXPORT UTILS
#


def get_content_owner(page) -> dict:
page_content_owner = getattr(page, "content_owner", None)
return {
"name": page_content_owner.full_name if page_content_owner else "",
"email": page_content_owner.email if page_content_owner else "",
}


def get_content_author(page) -> dict:
content_author = {
"name": "",
"email": "",
}
perm_sec_as_author = (
page.perm_sec_as_author if hasattr(page, "perm_sec_as_author") else False
)
if perm_sec_as_author:
content_author["name"] = settings.PERM_SEC_NAME
return content_author

if issubclass(page.__class__, NewsPage) and hasattr(page, "get_first_publisher"):
first_publisher = page.get_first_publisher()
content_author["name"] = first_publisher.get_full_name()
content_author["email"] = first_publisher.email
return content_author

latest_revision_user = page.get_latest_revision().user
if latest_revision_user:
content_author["name"] = latest_revision_user.get_full_name()
content_author["email"] = latest_revision_user.email
return content_author


def get_page_export_row(page_result: "BasePage", request: HttpRequest) -> list[str]:
content_owner = get_content_owner(page_result)
content_author = get_content_author(page_result)
return [
page_result.title,
request.build_absolute_uri(page_result.get_url()),
request.build_absolute_uri(
reverse("wagtailadmin_pages:edit", args=[page_result.id])
),
content_owner["name"],
content_owner["email"],
content_author["name"],
content_author["email"],
page_result.first_published_at,
page_result.last_published_at,
type(page_result).__name__,
]


def get_person_export_row(person_result: "Person", request: HttpRequest) -> list[str]:
return [
person_result.first_name,
person_result.last_name,
person_result.email,
person_result.primary_phone_number,
request.build_absolute_uri(person_result.get_absolute_url()),
{role.job_title: role.team.name for role in person_result.roles.all()},
]


def get_team_export_row(team_result: "Team", request: HttpRequest) -> list[str]:
return [
team_result.name,
request.build_absolute_uri(team_result.get_absolute_url()),
request.build_absolute_uri(reverse("team-edit", args=[team_result.slug])),
]


SEARCH_EXPORT_MAPPINGS: dict[models.Model, dict] = {
BasePage: {
"header": [
"Title",
"URL",
"Edit URL",
"Content Owner Name",
"Content Owner Email",
"Content Author Name",
"Content Author Email",
"First Published",
"Last Updated",
"Page Type",
],
"item_to_row_function": get_page_export_row,
},
Person: {
"header": [
"First Name",
"Last Name",
"Email",
"Phone",
"Profile URL",
"Roles {'Job Title': 'Team Name'}",
],
"item_to_row_function": get_person_export_row,
},
Team: {
"header": ["Title", "URL", "Edit URL"],
"item_to_row_function": get_team_export_row,
},
}
53 changes: 51 additions & 2 deletions src/search/views.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import logging

import sentry_sdk
Expand All @@ -14,7 +15,6 @@
from extended_search.settings import settings_singleton
from peoplefinder.models import Person, Team
from search.templatetags import search as search_template_tag
from search.utils import get_query_info_for_model


logger = logging.getLogger(__name__)
Expand All @@ -24,6 +24,10 @@ def can_view_explore():
return user_passes_test(lambda u: u.has_perm("extended_search.view_explore"))


def can_export_search():
return user_passes_test(lambda u: u.has_perm("extended_search.export_search"))


@require_http_methods(["GET"])
def autocomplete(request: HttpRequest) -> HttpResponse:
_category = "autocomplete"
Expand Down Expand Up @@ -58,7 +62,7 @@ def autocomplete(request: HttpRequest) -> HttpResponse:


@require_http_methods(["GET"])
def search(request: HttpRequest, category: str = None) -> HttpResponse:
def search(request: HttpRequest, category: str | None = None) -> HttpResponse:
query = request.GET.get("query", "")
page = request.GET.get("page", "1")
tab_override = request.GET.get("tab_override", False)
Expand Down Expand Up @@ -97,6 +101,8 @@ def explore(request: HttpRequest) -> HttpResponse:
"""
Administrative view for exploring search options, boosts, etc
"""
from search.utils import get_query_info_for_model

if request.method == "POST":
if not request.user.has_perm("extended_search.change_setting"):
messages.error(request, "You are not authorised to edit settings")
Expand Down Expand Up @@ -132,3 +138,46 @@ def explore(request: HttpRequest) -> HttpResponse:
}

return TemplateResponse(request, "search/explore.html", context=context)


@can_export_search()
def export_search(request: HttpRequest, category: str) -> HttpResponse:
"""
Administrative view for exporting search results as csv
"""
from search.utils import SEARCH_EXPORT_MAPPINGS

query = request.GET.get("query", "")
if category == "all":
search_vector = search_template_tag.SEARCH_VECTORS["all_pages"](request)
else:
search_vector = search_template_tag.SEARCH_VECTORS[category](request)

search_results = search_vector.search(query)
search_model = search_vector.model

export_mapping = None
for k, v in SEARCH_EXPORT_MAPPINGS.items():
if issubclass(search_model, k):
export_mapping = v
break

if not export_mapping:
raise TypeError(
f"'{search_model}' is not a model that is configured for export"
)

filename = f"search_export_{category}.csv"
response = HttpResponse(
content_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)

writer = csv.writer(response)
writer.writerow(export_mapping["header"])

for result in search_results:
row = export_mapping["item_to_row_function"](result, request)
writer.writerow(row)

return response

0 comments on commit 36488da

Please sign in to comment.