Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add run-length-encoding exercise #146

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,14 @@
"prerequisites": [],
"difficulty": 4
},
{
"slug": "run-length-encoding",
"name": "Run-Length Encoding",
"uuid": "d6b8a979-8ce8-4f46-a6cd-35cd44b19701",
"practices": [],
"prerequisites": [],
"difficulty": 4
},
{
"slug": "all-your-base",
"name": "All Your Base",
Expand Down
20 changes: 20 additions & 0 deletions exercises/practice/run-length-encoding/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Instructions

Implement run-length encoding and decoding.

Run-length encoding (RLE) is a simple form of data compression, where runs (consecutive data elements) are replaced by just one data value and count.

For example we can represent the original 53 characters with only 13.

```text
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB" -> "12WB12W3B24WB"
```

RLE allows the original data to be perfectly reconstructed from the compressed data, which makes it a lossless data compression.

```text
"AABCCCDEEEE" -> "2AB3CD4E" -> "AABCCCDEEEE"
```

For simplicity, you can assume that the unencoded string will only contain the letters A through Z (either lower or upper case) and whitespace.
This way data to be encoded will never contain any numbers and numbers inside data to be decoded always represent the count for the following character.
44 changes: 44 additions & 0 deletions exercises/practice/run-length-encoding/.meta/Example.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
module [encode, decode]

encode : Str -> Result Str [BadUtf8 _ _]
encode = \string ->
appendCountAndLetter = \state ->
if state.count == 0 then
[]
else if state.count == 1 then
state.chars |> List.append state.lastChar
else
digits = state.count |> Num.toStr |> Str.toUtf8
state.chars |> List.concat digits |> List.append state.lastChar

string
|> Str.toUtf8
|> List.walk { chars: [], lastChar: 0, count: 0 } \state, char ->
if state.count == 0 then
{ chars: [], lastChar: char, count: 1 }
else if state.lastChar == char then
{ state & count: state.count + 1 }
else
chars = appendCountAndLetter state
{ chars, lastChar: char, count: 1 }
|> \state -> appendCountAndLetter state
ageron marked this conversation as resolved.
Show resolved Hide resolved
|> Str.fromUtf8

decode : Str -> Result Str [BadUtf8 _ _, InvalidNumStr]
decode = \string ->
string
|> Str.toUtf8
|> List.walkTry? { chars: [], digits: [] } \state, char ->
if char >= '0' && char <= '9' then
digits = state.digits |> List.append char
Ok { state & digits }
else if state.digits == [] then
chars = state.chars |> List.append char
Ok { state & chars }
else
countStr = Str.fromUtf8? state.digits
count = Str.toU64? countStr
chars = state.chars |> List.concat (List.repeat char count)
Ok { chars, digits: [] }
|> .chars
|> Str.fromUtf8
19 changes: 19 additions & 0 deletions exercises/practice/run-length-encoding/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"authors": [
"ageron"
],
"files": {
"solution": [
"RunLengthEncoding.roc"
],
"test": [
"run-length-encoding-test.roc"
],
"example": [
".meta/Example.roc"
]
},
"blurb": "Implement run-length encoding and decoding.",
"source": "Wikipedia",
"source_url": "https://en.wikipedia.org/wiki/Run-length_encoding"
}
27 changes: 27 additions & 0 deletions exercises/practice/run-length-encoding/.meta/template.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{%- import "generator_macros.j2" as macros with context -%}
{{ macros.canonical_ref() }}
{{ macros.header() }}

import {{ exercise | to_pascal }} exposing [encode, decode]

{% for supercase in cases %}
##
## {{ supercase["description"] }}
##

{% for case in supercase["cases"] -%}
# {{ case["description"] }}
expect
string = {{ case["input"]["string"] | to_roc }}
{%- if case["property"] == "consistency" %}
result = string |> encode |> Result.try decode
result == Ok string
{%- else %}
result = string |> {{ case["property"] | to_camel }}
expected = {{ case["expected"] | to_roc }}
result == Ok expected
{%- endif %}

{% endfor %}
{% endfor %}

49 changes: 49 additions & 0 deletions exercises/practice/run-length-encoding/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[ad53b61b-6ffc-422f-81a6-61f7df92a231]
description = "run-length encode a string -> empty string"

[52012823-b7e6-4277-893c-5b96d42f82de]
description = "run-length encode a string -> single characters only are encoded without count"

[b7868492-7e3a-415f-8da3-d88f51f80409]
description = "run-length encode a string -> string with no single characters"

[859b822b-6e9f-44d6-9c46-6091ee6ae358]
description = "run-length encode a string -> single characters mixed with repeated characters"

[1b34de62-e152-47be-bc88-469746df63b3]
description = "run-length encode a string -> multiple whitespace mixed in string"

[abf176e2-3fbd-40ad-bb2f-2dd6d4df721a]
description = "run-length encode a string -> lowercase characters"

[7ec5c390-f03c-4acf-ac29-5f65861cdeb5]
description = "run-length decode a string -> empty string"

[ad23f455-1ac2-4b0e-87d0-b85b10696098]
description = "run-length decode a string -> single characters only"

[21e37583-5a20-4a0e-826c-3dee2c375f54]
description = "run-length decode a string -> string with no single characters"

[1389ad09-c3a8-4813-9324-99363fba429c]
description = "run-length decode a string -> single characters with repeated characters"

[3f8e3c51-6aca-4670-b86c-a213bf4706b0]
description = "run-length decode a string -> multiple whitespace mixed in string"

[29f721de-9aad-435f-ba37-7662df4fb551]
description = "run-length decode a string -> lowercase string"

[2a762efd-8695-4e04-b0d6-9736899fbc16]
description = "encode and then decode -> encode followed by decode gives original string"
9 changes: 9 additions & 0 deletions exercises/practice/run-length-encoding/RunLengthEncoding.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module [encode, decode]

encode : Str -> Result Str _
encode = \string ->
crash "Please implement the 'encode' function"

decode : Str -> Result Str _
decode = \string ->
crash "Please implement the 'decode' function"
114 changes: 114 additions & 0 deletions exercises/practice/run-length-encoding/run-length-encoding-test.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# These tests are auto-generated with test data from:
# https://github.com/exercism/problem-specifications/tree/main/exercises/run-length-encoding/canonical-data.json
# File last updated on 2024-10-10
app [main] {
pf: platform "https://github.com/roc-lang/basic-cli/releases/download/0.15.0/SlwdbJ-3GR7uBWQo6zlmYWNYOxnvo8r6YABXD-45UOw.tar.br",
}

main =
Task.ok {}

import RunLengthEncoding exposing [encode, decode]

##
## run-length encode a string
##

# empty string
expect
string = ""
result = string |> encode
expected = ""
result == Ok expected

# single characters only are encoded without count
expect
string = "XYZ"
result = string |> encode
expected = "XYZ"
result == Ok expected

# string with no single characters
expect
string = "AABBBCCCC"
result = string |> encode
expected = "2A3B4C"
result == Ok expected

# single characters mixed with repeated characters
expect
string = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"
result = string |> encode
expected = "12WB12W3B24WB"
result == Ok expected

# multiple whitespace mixed in string
expect
string = " hsqq qww "
result = string |> encode
expected = "2 hs2q q2w2 "
result == Ok expected

# lowercase characters
expect
string = "aabbbcccc"
result = string |> encode
expected = "2a3b4c"
result == Ok expected

##
## run-length decode a string
##

# empty string
expect
string = ""
result = string |> decode
expected = ""
result == Ok expected

# single characters only
expect
string = "XYZ"
result = string |> decode
expected = "XYZ"
result == Ok expected

# string with no single characters
expect
string = "2A3B4C"
result = string |> decode
expected = "AABBBCCCC"
result == Ok expected

# single characters with repeated characters
expect
string = "12WB12W3B24WB"
result = string |> decode
expected = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"
result == Ok expected

# multiple whitespace mixed in string
expect
string = "2 hs2q q2w2 "
result = string |> decode
expected = " hsqq qww "
result == Ok expected

# lowercase string
expect
string = "2a3b4c"
result = string |> decode
expected = "aabbbcccc"
result == Ok expected

##
## encode and then decode
##

# encode followed by decode gives original string
expect
string = "zzz ZZ zZ"
result = string |> encode |> Result.try decode
result == Ok string