Skip to content

Commit

Permalink
fix: Handle missing date fields in XML submissions (#5744)
Browse files Browse the repository at this point in the history
* refactor: Eliminate _construct_creation_date helper

* fix: Use xml2rfc method for filling in missing date fields

* fix: Set options.date for xml2rfc writers

* test: Test handling of missing date element/fields
  • Loading branch information
jennifer-richards authored Jun 2, 2023
1 parent a3452d0 commit d33a6f3
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 48 deletions.
20 changes: 19 additions & 1 deletion ietf/submit/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3347,7 +3347,8 @@ def test_process_submission_xml(self):
"test_submission.xml",
title="Correct Draft Title",
)
xml_path.write_text(xml.read())
xml_contents = xml.read()
xml_path.write_text(xml_contents)
output = process_submission_xml("draft-somebody-test", "00")
self.assertEqual(output["filename"], "draft-somebody-test")
self.assertEqual(output["rev"], "00")
Expand All @@ -3362,6 +3363,23 @@ def test_process_submission_xml(self):
self.assertIsNone(output["formal_languages"])
self.assertEqual(output["xml_version"], "3")

# Should behave on missing or partial <date> elements
xml_path.write_text(re.sub(r"<date.+>", "", xml_contents)) # strip <date...> entirely
output = process_submission_xml("draft-somebody-test", "00")
self.assertEqual(output["document_date"], None)

xml_path.write_text(re.sub(r"<date year=.+ month", "<date month", xml_contents)) # remove year
output = process_submission_xml("draft-somebody-test", "00")
self.assertEqual(output["document_date"], date_today())

xml_path.write_text(re.sub(r"(<date.+) month=.+day=(.+>)", r"\1 day=\2", xml_contents)) # remove month
output = process_submission_xml("draft-somebody-test", "00")
self.assertEqual(output["document_date"], date_today())

xml_path.write_text(re.sub(r"<date(.+) day=.+>", r"<date\1>", xml_contents)) # remove day
output = process_submission_xml("draft-somebody-test", "00")
self.assertEqual(output["document_date"], date_today())

# name mismatch
xml, _ = submission_file(
"draft-somebody-wrong-name-00", # name that appears in the file
Expand Down
4 changes: 4 additions & 0 deletions ietf/submit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,8 +941,10 @@ def render_missing_formats(submission):
xmltree.tree = v2v3.convert2to3()

# --- Prep the xml ---
today = date_today()
prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET])
prep.options.accept_prepped = True
prep.options.date = today
xmltree.tree = prep.prep()
if xmltree.tree == None:
raise SubmissionError(f'Error from xml2rfc (prep): {prep.errors}')
Expand All @@ -952,6 +954,7 @@ def render_missing_formats(submission):
if not txt_path.exists():
writer = xml2rfc.TextWriter(xmltree, quiet=True)
writer.options.accept_prepped = True
writer.options.date = today
writer.write(txt_path)
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
Expand All @@ -966,6 +969,7 @@ def render_missing_formats(submission):
# --- Convert to html ---
html_path = staging_path(submission.name, submission.rev, '.html')
writer = xml2rfc.HtmlWriter(xmltree, quiet=True)
writer.options.date = today
writer.write(str(html_path))
log.log(
'In %s: xml2rfc %s generated %s from %s (version %s)' % (
Expand Down
62 changes: 21 additions & 41 deletions ietf/utils/draft.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,46 +190,6 @@ def get_title(self):
def get_wordcount(self):
raise NotImplementedError

@staticmethod
def _construct_creation_date(year, month, day=None):
"""Construct a date for the document
Roughly follows RFC 7991 section 2.17, but only allows missing day and
assumes the 15th if day is not specified month/year are not current.
year: integer or string with 4-digit year
month: integer or string with numeric or English month. Some abbreviations recognized.
day: integer or string with numeric day of month. Optional.
Raises ValueError if there is a problem interpreting the data
"""
year = int(year)
day = int(day)
if isinstance(month, str):
month = month.lower()
if month in month_names:
month = month_names.index(month) + 1
elif month in month_names_abbrev3:
month = month_names_abbrev3.index(month) + 1
elif month in month_names_abbrev4:
month = month_names_abbrev4.index(month) + 1
elif month.isdigit() and int(month) in range(1, 13):
month = int(month)
else:
raise ValueError("Unrecognized month")
today = date_today()
if not day:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month == today.month and year == today.year:
day = today.day
else:
day = 15
return datetime.date(year, month, day)


# ----------------------------------------------------------------------

class PlaintextDraft(Draft):
Expand Down Expand Up @@ -500,7 +460,27 @@ def get_creation_date(self):
day = int( md.get( 'day', 0 ) )
year = int( md['year'] )
try:
self._creation_date = self._construct_creation_date(year, mon, day)
if mon in month_names:
month = month_names.index( mon ) + 1
elif mon in month_names_abbrev3:
month = month_names_abbrev3.index( mon ) + 1
elif mon in month_names_abbrev4:
month = month_names_abbrev4.index( mon ) + 1
elif mon.isdigit() and int(mon) in range(1,13):
month = int(mon)
else:
continue
today = date_today()
if day==0:
# if the date was given with only month and year, use
# today's date if month and year is today's month and
# year, otherwise pick the middle of the month.
# Don't use today's day for month and year in the past
if month==today.month and year==today.year:
day = today.day
else:
day = 15
self._creation_date = datetime.date(year, month, day)
return self._creation_date
except ValueError:
# mon abbreviation not in _MONTH_NAMES
Expand Down
22 changes: 16 additions & 6 deletions ietf/utils/xmldraft.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
# Copyright The IETF Trust 2022, All Rights Reserved
# -*- coding: utf-8 -*-
import datetime
import io
import re
import xml2rfc

import debug # pyflakes: ignore

from contextlib import ExitStack
from xml2rfc.util.date import augment_date, extract_date
from ietf.utils.timezone import date_today

from .draft import Draft

Expand Down Expand Up @@ -136,12 +139,19 @@ def get_title(self):
def get_creation_date(self):
date_elt = self.xmlroot.find("front/date")
if date_elt is not None:
try:
year = date_elt.get("year")
month = date_elt.get("month")
return self._construct_creation_date(year, month, date_elt.get("day", None))
except ValueError:
pass
# ths mimics handling of date elements in the xml2rfc text/html writers
today = date_today()
year, month, day = extract_date(date_elt, today)
year, month, day = augment_date(year, month, day, today)
if day is None:
# Must choose a day for a datetime.date. Per RFC 7991 sect 2.17, we use
# today's date if it is consistent with the rest of the date. Otherwise,
# arbitrariy (and consistent with the text parser) assume the 15th.
if year == today.year and month == today.month:
day = today.day
else:
day = 15
return datetime.date(year, month, day)
return None

# todo fix the implementation of XMLDraft.get_abstract()
Expand Down

0 comments on commit d33a6f3

Please sign in to comment.