Add word-count exercise (#127)

exercism · Oct 2, 2024 · d0c1825 · d0c1825
1 parent e6d3bb7
commit d0c1825
Show file tree

Hide file tree

Showing 9 changed files with 364 additions and 0 deletions.
diff --git a/config.json b/config.json
@@ -431,6 +431,14 @@
         "prerequisites": [],
         "difficulty": 3
       },
+      {
+        "slug": "word-count",
+        "name": "Word Count",
+        "uuid": "225716ce-363d-4ece-bfe2-b25e2cb25d52",
+        "practices": [],
+        "prerequisites": [],
+        "difficulty": 3
+      },
       {
         "slug": "yacht",
         "name": "Yacht",

diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md
@@ -0,0 +1,47 @@
+# Instructions
+
+Your task is to count how many times each word occurs in a subtitle of a drama.
+
+The subtitles from these dramas use only ASCII characters.
+
+The characters often speak in casual English, using contractions like _they're_ or _it's_.
+Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word.
+
+Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " ").
+The only punctuation that does not separate words is the apostrophe in contractions.
+
+Numbers are considered words.
+If the subtitles say _It costs 100 dollars._ then _100_ will be its own word.
+
+Words are case insensitive.
+For example, the word _you_ occurs three times in the following sentence:
+
+> You come back, you hear me? DO YOU HEAR ME?
+
+The ordering of the word counts in the results doesn't matter.
+
+Here's an example that incorporates several of the elements discussed above:
+
+- simple words
+- contractions
+- numbers
+- case insensitive words
+- punctuation (including apostrophes) to separate words
+- different forms of whitespace to separate words
+
+`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.`
+
+The mapping for this subtitle would be:
+
+```text
+123: 1
+agent: 1
+cried: 1
+fled: 1
+i: 1
+password: 2
+so: 1
+special: 1
+that's: 1
+the: 2
+```
diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md
@@ -0,0 +1,8 @@
+# Introduction
+
+You teach English as a foreign language to high school students.
+
+You've decided to base your entire curriculum on TV shows.
+You need to analyze which words are used, and how often they're repeated.
+
+This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes.
diff --git a/exercises/practice/word-count/.meta/Example.roc b/exercises/practice/word-count/.meta/Example.roc
@@ -0,0 +1,39 @@
+module [countWords]
+
+countWords : Str -> Dict Str U64
+countWords = \sentence ->
+    sentence
+    |> Str.toUtf8
+    |> List.append ' ' # to ensure the last word is added
+    |> List.walk { words: [], word: [], contractionStarted: Bool.false } \state, char ->
+        { words, word, contractionStarted } = state
+        when char is
+            c if c >= 'A' && c <= 'Z' ->
+                { words, word: word |> List.append (c - 'A' + 'a'), contractionStarted: Bool.false }
+
+            c if c >= 'a' && c <= 'z' || c >= '0' && c <= '9' ->
+                { words, word: word |> List.append c, contractionStarted: Bool.false }
+
+            c ->
+                if List.isEmpty word then
+                    state
+                else if c != '\'' || contractionStarted then
+                    if contractionStarted then
+                        { words: words |> List.append (word |> List.dropLast 1), word: [], contractionStarted: Bool.false }
+                    else
+                        { words: words |> List.append word, word: [], contractionStarted: Bool.false }
+                    else
+
+                { words, word: word |> List.append c, contractionStarted: Bool.true }
+    |> .words
+    |> List.dropIf List.isEmpty
+    |> List.walk (Dict.empty {}) \result, chars ->
+        word =
+            when chars |> Str.fromUtf8 is
+                Ok parsedWord -> parsedWord
+                Err (BadUtf8 _ _) -> crash "Unreachable: we only use ASCII characters"
+        result
+        |> Dict.update word \maybeCount ->
+            when maybeCount is
+                Ok count -> Ok (count + 1)
+                Err Missing -> Ok 1
diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json
@@ -0,0 +1,18 @@
+{
+  "authors": [
+    "ageron"
+  ],
+  "files": {
+    "solution": [
+      "WordCount.roc"
+    ],
+    "test": [
+      "word-count-test.roc"
+    ],
+    "example": [
+      ".meta/Example.roc"
+    ]
+  },
+  "blurb": "Given a phrase, count the occurrences of each word in that phrase.",
+  "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour."
+}
diff --git a/exercises/practice/word-count/.meta/template.j2 b/exercises/practice/word-count/.meta/template.j2
@@ -0,0 +1,18 @@
+{%- import "generator_macros.j2" as macros with context -%}
+{{ macros.canonical_ref() }}
+{{ macros.header() }}
+
+import {{ exercise | to_pascal }} exposing [{{ cases[0]["property"] | to_camel }}]
+
+{% for case in cases -%}
+# {{ case["description"] }}
+expect
+    result = {{ case["property"] | to_camel }} {{ case["input"]["sentence"] | to_roc }}
+    expected = Dict.fromList [
+      {%- for word, count in case["expected"].items() %}
+      ({{ word | to_roc }}, {{ count }}),
+      {%- endfor %}
+    ]
+    result == expected
+
+{% endfor %}
diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml
@@ -0,0 +1,57 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[61559d5f-2cad-48fb-af53-d3973a9ee9ef]
+description = "count one word"
+
+[5abd53a3-1aed-43a4-a15a-29f88c09cbbd]
+description = "count one of each word"
+
+[2a3091e5-952e-4099-9fac-8f85d9655c0e]
+description = "multiple occurrences of a word"
+
+[e81877ae-d4da-4af4-931c-d923cd621ca6]
+description = "handles cramped lists"
+
+[7349f682-9707-47c0-a9af-be56e1e7ff30]
+description = "handles expanded lists"
+
+[a514a0f2-8589-4279-8892-887f76a14c82]
+description = "ignore punctuation"
+
+[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e]
+description = "include numbers"
+
+[dac6bc6a-21ae-4954-945d-d7f716392dbf]
+description = "normalize case"
+
+[4185a902-bdb0-4074-864c-f416e42a0f19]
+description = "with apostrophes"
+include = false
+
+[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3]
+description = "with apostrophes"
+reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19"
+
+[be72af2b-8afe-4337-b151-b297202e4a7b]
+description = "with quotations"
+
+[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6]
+description = "substrings from the beginning"
+
+[c5f4ef26-f3f7-4725-b314-855c04fb4c13]
+description = "multiple spaces not detected as a word"
+
+[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360]
+description = "alternating word separators not detected as a word"
+
+[6d00f1db-901c-4bec-9829-d20eb3044557]
+description = "quotation for word with apostrophe"
diff --git a/exercises/practice/word-count/WordCount.roc b/exercises/practice/word-count/WordCount.roc
@@ -0,0 +1,5 @@
+module [countWords]
+
+countWords : Str -> Dict Str U64
+countWords = \sentence ->
+    crash "Please implement the 'countWords' function"
diff --git a/exercises/practice/word-count/word-count-test.roc b/exercises/practice/word-count/word-count-test.roc
@@ -0,0 +1,164 @@
+# These tests are auto-generated with test data from:
+# https://github.com/exercism/problem-specifications/tree/main/exercises/word-count/canonical-data.json
+# File last updated on 2024-09-29
+app [main] {
+    pf: platform "https://github.com/roc-lang/basic-cli/releases/download/0.15.0/SlwdbJ-3GR7uBWQo6zlmYWNYOxnvo8r6YABXD-45UOw.tar.br",
+}
+
+main =
+    Task.ok {}
+
+import WordCount exposing [countWords]
+
+# count one word
+expect
+    result = countWords "word"
+    expected = Dict.fromList [
+        ("word", 1),
+    ]
+    result == expected
+
+# count one of each word
+expect
+    result = countWords "one of each"
+    expected = Dict.fromList [
+        ("one", 1),
+        ("of", 1),
+        ("each", 1),
+    ]
+    result == expected
+
+# multiple occurrences of a word
+expect
+    result = countWords "one fish two fish red fish blue fish"
+    expected = Dict.fromList [
+        ("one", 1),
+        ("fish", 4),
+        ("two", 1),
+        ("red", 1),
+        ("blue", 1),
+    ]
+    result == expected
+
+# handles cramped lists
+expect
+    result = countWords "one,two,three"
+    expected = Dict.fromList [
+        ("one", 1),
+        ("two", 1),
+        ("three", 1),
+    ]
+    result == expected
+
+# handles expanded lists
+expect
+    result = countWords "one,\ntwo,\nthree"
+    expected = Dict.fromList [
+        ("one", 1),
+        ("two", 1),
+        ("three", 1),
+    ]
+    result == expected
+
+# ignore punctuation
+expect
+    result = countWords "car: carpet as java: javascript!!&@$%^&"
+    expected = Dict.fromList [
+        ("car", 1),
+        ("carpet", 1),
+        ("as", 1),
+        ("java", 1),
+        ("javascript", 1),
+    ]
+    result == expected
+
+# include numbers
+expect
+    result = countWords "testing, 1, 2 testing"
+    expected = Dict.fromList [
+        ("testing", 2),
+        ("1", 1),
+        ("2", 1),
+    ]
+    result == expected
+
+# normalize case
+expect
+    result = countWords "go Go GO Stop stop"
+    expected = Dict.fromList [
+        ("go", 3),
+        ("stop", 2),
+    ]
+    result == expected
+
+# with apostrophes
+expect
+    result = countWords "'First: don't laugh. Then: don't cry. You're getting it.'"
+    expected = Dict.fromList [
+        ("first", 1),
+        ("don't", 2),
+        ("laugh", 1),
+        ("then", 1),
+        ("cry", 1),
+        ("you're", 1),
+        ("getting", 1),
+        ("it", 1),
+    ]
+    result == expected
+
+# with quotations
+expect
+    result = countWords "Joe can't tell between 'large' and large."
+    expected = Dict.fromList [
+        ("joe", 1),
+        ("can't", 1),
+        ("tell", 1),
+        ("between", 1),
+        ("large", 2),
+        ("and", 1),
+    ]
+    result == expected
+
+# substrings from the beginning
+expect
+    result = countWords "Joe can't tell between app, apple and a."
+    expected = Dict.fromList [
+        ("joe", 1),
+        ("can't", 1),
+        ("tell", 1),
+        ("between", 1),
+        ("app", 1),
+        ("apple", 1),
+        ("and", 1),
+        ("a", 1),
+    ]
+    result == expected
+
+# multiple spaces not detected as a word
+expect
+    result = countWords " multiple   whitespaces"
+    expected = Dict.fromList [
+        ("multiple", 1),
+        ("whitespaces", 1),
+    ]
+    result == expected
+
+# alternating word separators not detected as a word
+expect
+    result = countWords ",\n,one,\n ,two \n 'three'"
+    expected = Dict.fromList [
+        ("one", 1),
+        ("two", 1),
+        ("three", 1),
+    ]
+    result == expected
+
+# quotation for word with apostrophe
+expect
+    result = countWords "can, can't, 'can't'"
+    expected = Dict.fromList [
+        ("can", 1),
+        ("can't", 2),
+    ]
+    result == expected
+