From a8b09d8fd6582a0cbe2c1a449aaf48cf977b13d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Mon, 30 Sep 2024 12:17:57 +1300 Subject: [PATCH] Add word-count exercise --- config.json | 8 + .../practice/word-count/.docs/instructions.md | 47 +++++ .../practice/word-count/.docs/introduction.md | 8 + .../practice/word-count/.meta/Example.roc | 39 +++++ .../practice/word-count/.meta/config.json | 18 ++ .../practice/word-count/.meta/template.j2 | 18 ++ .../practice/word-count/.meta/tests.toml | 57 ++++++ exercises/practice/word-count/WordCount.roc | 5 + .../practice/word-count/word-count-test.roc | 164 ++++++++++++++++++ 9 files changed, 364 insertions(+) create mode 100644 exercises/practice/word-count/.docs/instructions.md create mode 100644 exercises/practice/word-count/.docs/introduction.md create mode 100644 exercises/practice/word-count/.meta/Example.roc create mode 100644 exercises/practice/word-count/.meta/config.json create mode 100644 exercises/practice/word-count/.meta/template.j2 create mode 100644 exercises/practice/word-count/.meta/tests.toml create mode 100644 exercises/practice/word-count/WordCount.roc create mode 100644 exercises/practice/word-count/word-count-test.roc diff --git a/config.json b/config.json index cecb887..0e7cb53 100644 --- a/config.json +++ b/config.json @@ -423,6 +423,14 @@ "prerequisites": [], "difficulty": 3 }, + { + "slug": "word-count", + "name": "Word Count", + "uuid": "225716ce-363d-4ece-bfe2-b25e2cb25d52", + "practices": [], + "prerequisites": [], + "difficulty": 3 + }, { "slug": "yacht", "name": "Yacht", diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md new file mode 100644 index 0000000..064393c --- /dev/null +++ b/exercises/practice/word-count/.docs/instructions.md @@ -0,0 +1,47 @@ +# Instructions + +Your task is to count how many times each word occurs in a subtitle of a drama. + +The subtitles from these dramas use only ASCII characters. + +The characters often speak in casual English, using contractions like _they're_ or _it's_. +Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word. + +Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " "). +The only punctuation that does not separate words is the apostrophe in contractions. + +Numbers are considered words. +If the subtitles say _It costs 100 dollars._ then _100_ will be its own word. + +Words are case insensitive. +For example, the word _you_ occurs three times in the following sentence: + +> You come back, you hear me? DO YOU HEAR ME? + +The ordering of the word counts in the results doesn't matter. + +Here's an example that incorporates several of the elements discussed above: + +- simple words +- contractions +- numbers +- case insensitive words +- punctuation (including apostrophes) to separate words +- different forms of whitespace to separate words + +`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` + +The mapping for this subtitle would be: + +```text +123: 1 +agent: 1 +cried: 1 +fled: 1 +i: 1 +password: 2 +so: 1 +special: 1 +that's: 1 +the: 2 +``` diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md new file mode 100644 index 0000000..1654508 --- /dev/null +++ b/exercises/practice/word-count/.docs/introduction.md @@ -0,0 +1,8 @@ +# Introduction + +You teach English as a foreign language to high school students. + +You've decided to base your entire curriculum on TV shows. +You need to analyze which words are used, and how often they're repeated. + +This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes. diff --git a/exercises/practice/word-count/.meta/Example.roc b/exercises/practice/word-count/.meta/Example.roc new file mode 100644 index 0000000..25da047 --- /dev/null +++ b/exercises/practice/word-count/.meta/Example.roc @@ -0,0 +1,39 @@ +module [countWords] + +countWords : Str -> Dict Str U64 +countWords = \sentence -> + sentence + |> Str.toUtf8 + |> List.append ' ' # to ensure the last word is added + |> List.walk { words: [], word: [], contractionStarted: Bool.false } \state, char -> + { words, word, contractionStarted } = state + when char is + c if c >= 'A' && c <= 'Z' -> + { words, word: word |> List.append (c - 'A' + 'a'), contractionStarted: Bool.false } + + c if c >= 'a' && c <= 'z' || c >= '0' && c <= '9' -> + { words, word: word |> List.append c, contractionStarted: Bool.false } + + c -> + if List.isEmpty word then + state + else if c != '\'' || contractionStarted then + if contractionStarted then + { words: words |> List.append (word |> List.dropLast 1), word: [], contractionStarted: Bool.false } + else + { words: words |> List.append word, word: [], contractionStarted: Bool.false } + else + + { words, word: word |> List.append c, contractionStarted: Bool.true } + |> .words + |> List.dropIf List.isEmpty + |> List.walk (Dict.empty {}) \result, chars -> + word = + when chars |> Str.fromUtf8 is + Ok parsedWord -> parsedWord + Err (BadUtf8 _ _) -> crash "Unreachable: we only use ASCII characters" + result + |> Dict.update word \maybeCount -> + when maybeCount is + Ok count -> Ok (count + 1) + Err Missing -> Ok 1 diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json new file mode 100644 index 0000000..1031709 --- /dev/null +++ b/exercises/practice/word-count/.meta/config.json @@ -0,0 +1,18 @@ +{ + "authors": [ + "ageron" + ], + "files": { + "solution": [ + "WordCount.roc" + ], + "test": [ + "word-count-test.roc" + ], + "example": [ + ".meta/Example.roc" + ] + }, + "blurb": "Given a phrase, count the occurrences of each word in that phrase.", + "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour." +} diff --git a/exercises/practice/word-count/.meta/template.j2 b/exercises/practice/word-count/.meta/template.j2 new file mode 100644 index 0000000..8902bfa --- /dev/null +++ b/exercises/practice/word-count/.meta/template.j2 @@ -0,0 +1,18 @@ +{%- import "generator_macros.j2" as macros with context -%} +{{ macros.canonical_ref() }} +{{ macros.header() }} + +import {{ exercise | to_pascal }} exposing [{{ cases[0]["property"] | to_camel }}] + +{% for case in cases -%} +# {{ case["description"] }} +expect + result = {{ case["property"] | to_camel }} {{ case["input"]["sentence"] | to_roc }} + expected = Dict.fromList [ + {%- for word, count in case["expected"].items() %} + ({{ word | to_roc }}, {{ count }}), + {%- endfor %} + ] + result == expected + +{% endfor %} diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml new file mode 100644 index 0000000..1be425b --- /dev/null +++ b/exercises/practice/word-count/.meta/tests.toml @@ -0,0 +1,57 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[61559d5f-2cad-48fb-af53-d3973a9ee9ef] +description = "count one word" + +[5abd53a3-1aed-43a4-a15a-29f88c09cbbd] +description = "count one of each word" + +[2a3091e5-952e-4099-9fac-8f85d9655c0e] +description = "multiple occurrences of a word" + +[e81877ae-d4da-4af4-931c-d923cd621ca6] +description = "handles cramped lists" + +[7349f682-9707-47c0-a9af-be56e1e7ff30] +description = "handles expanded lists" + +[a514a0f2-8589-4279-8892-887f76a14c82] +description = "ignore punctuation" + +[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e] +description = "include numbers" + +[dac6bc6a-21ae-4954-945d-d7f716392dbf] +description = "normalize case" + +[4185a902-bdb0-4074-864c-f416e42a0f19] +description = "with apostrophes" +include = false + +[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3] +description = "with apostrophes" +reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19" + +[be72af2b-8afe-4337-b151-b297202e4a7b] +description = "with quotations" + +[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6] +description = "substrings from the beginning" + +[c5f4ef26-f3f7-4725-b314-855c04fb4c13] +description = "multiple spaces not detected as a word" + +[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360] +description = "alternating word separators not detected as a word" + +[6d00f1db-901c-4bec-9829-d20eb3044557] +description = "quotation for word with apostrophe" diff --git a/exercises/practice/word-count/WordCount.roc b/exercises/practice/word-count/WordCount.roc new file mode 100644 index 0000000..c6ac117 --- /dev/null +++ b/exercises/practice/word-count/WordCount.roc @@ -0,0 +1,5 @@ +module [countWords] + +countWords : Str -> Dict Str U64 +countWords = \sentence -> + crash "Please implement the 'countWords' function" diff --git a/exercises/practice/word-count/word-count-test.roc b/exercises/practice/word-count/word-count-test.roc new file mode 100644 index 0000000..56cc2ce --- /dev/null +++ b/exercises/practice/word-count/word-count-test.roc @@ -0,0 +1,164 @@ +# These tests are auto-generated with test data from: +# https://github.com/exercism/problem-specifications/tree/main/exercises/word-count/canonical-data.json +# File last updated on 2024-09-29 +app [main] { + pf: platform "https://github.com/roc-lang/basic-cli/releases/download/0.15.0/SlwdbJ-3GR7uBWQo6zlmYWNYOxnvo8r6YABXD-45UOw.tar.br", +} + +main = + Task.ok {} + +import WordCount exposing [countWords] + +# count one word +expect + result = countWords "word" + expected = Dict.fromList [ + ("word", 1), + ] + result == expected + +# count one of each word +expect + result = countWords "one of each" + expected = Dict.fromList [ + ("one", 1), + ("of", 1), + ("each", 1), + ] + result == expected + +# multiple occurrences of a word +expect + result = countWords "one fish two fish red fish blue fish" + expected = Dict.fromList [ + ("one", 1), + ("fish", 4), + ("two", 1), + ("red", 1), + ("blue", 1), + ] + result == expected + +# handles cramped lists +expect + result = countWords "one,two,three" + expected = Dict.fromList [ + ("one", 1), + ("two", 1), + ("three", 1), + ] + result == expected + +# handles expanded lists +expect + result = countWords "one,\ntwo,\nthree" + expected = Dict.fromList [ + ("one", 1), + ("two", 1), + ("three", 1), + ] + result == expected + +# ignore punctuation +expect + result = countWords "car: carpet as java: javascript!!&@$%^&" + expected = Dict.fromList [ + ("car", 1), + ("carpet", 1), + ("as", 1), + ("java", 1), + ("javascript", 1), + ] + result == expected + +# include numbers +expect + result = countWords "testing, 1, 2 testing" + expected = Dict.fromList [ + ("testing", 2), + ("1", 1), + ("2", 1), + ] + result == expected + +# normalize case +expect + result = countWords "go Go GO Stop stop" + expected = Dict.fromList [ + ("go", 3), + ("stop", 2), + ] + result == expected + +# with apostrophes +expect + result = countWords "'First: don't laugh. Then: don't cry. You're getting it.'" + expected = Dict.fromList [ + ("first", 1), + ("don't", 2), + ("laugh", 1), + ("then", 1), + ("cry", 1), + ("you're", 1), + ("getting", 1), + ("it", 1), + ] + result == expected + +# with quotations +expect + result = countWords "Joe can't tell between 'large' and large." + expected = Dict.fromList [ + ("joe", 1), + ("can't", 1), + ("tell", 1), + ("between", 1), + ("large", 2), + ("and", 1), + ] + result == expected + +# substrings from the beginning +expect + result = countWords "Joe can't tell between app, apple and a." + expected = Dict.fromList [ + ("joe", 1), + ("can't", 1), + ("tell", 1), + ("between", 1), + ("app", 1), + ("apple", 1), + ("and", 1), + ("a", 1), + ] + result == expected + +# multiple spaces not detected as a word +expect + result = countWords " multiple whitespaces" + expected = Dict.fromList [ + ("multiple", 1), + ("whitespaces", 1), + ] + result == expected + +# alternating word separators not detected as a word +expect + result = countWords ",\n,one,\n ,two \n 'three'" + expected = Dict.fromList [ + ("one", 1), + ("two", 1), + ("three", 1), + ] + result == expected + +# quotation for word with apostrophe +expect + result = countWords "can, can't, 'can't'" + expected = Dict.fromList [ + ("can", 1), + ("can't", 2), + ] + result == expected +