From 8edda64673bbe6d9ac643acce0e2fe074f7d664a Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Mon, 14 Dec 2020 20:10:59 -0500 Subject: [PATCH 01/13] Track both the total Bytes and Byte Position of the Stream --- src/Internal/Position.php | 31 +++++++++++++++--- src/StringStream.php | 2 +- tests/Internal/PositionTest.php | 40 +++++++++++++++++++++++- tests/Internal/StringStreamTest.php | 6 ++-- tests/ParseResult/ErrorReportingTest.php | 6 ++-- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/src/Internal/Position.php b/src/Internal/Position.php index 62b6459..99e9123 100644 --- a/src/Internal/Position.php +++ b/src/Internal/Position.php @@ -18,27 +18,33 @@ */ final class Position { + /** @psalm-readonly */ + private int $totalBytes; /** @psalm-readonly */ private string $filename; /** @psalm-readonly */ private int $line; /** @psalm-readonly */ private int $column; + /** @psalm-readonly */ + private int $bytePosition; - function __construct(string $filename, int $line, int $column) + function __construct(int $totalBytes, string $filename, int $line, int $column, int $bytePosition) { + $this->totalBytes = $totalBytes; $this->filename = $filename; $this->line = $line; $this->column = $column; + $this->bytePosition = $bytePosition; } /** * Initial position (line 1, column 1). The optional filename is the source of the input, and is really just a label * to make more useful error messages. */ - public static function initial(string $filename = ""): Position + public static function initial(int $totalBytes = 0, string $filename = ""): Position { - return new Position($filename, 1, 1); + return new Position($totalBytes, $filename, 1, 1, 0); } /** @@ -53,6 +59,7 @@ public function advance(string $parsed): Position { $column = $this->column; $line = $this->line; + $bytePosition = $this->bytePosition; /** @psalm-var string $char */ foreach (mb_str_split($parsed, 1) as $char) { switch ($char) { @@ -67,9 +74,10 @@ public function advance(string $parsed): Position default: $column++; } + $bytePosition += strlen($char); } - return new Position($this->filename, $line, $column); + return new Position($this->totalBytes, $this->filename, $line, $column, $bytePosition); } public function filename(): string @@ -86,4 +94,19 @@ public function column(): int { return $this->column; } + + public function totalBytes(): int + { + return $this->totalBytes; + } + + public function bytePosition(): int + { + return $this->bytePosition; + } + + public function unreadBytes(): int + { + return $this->totalBytes - $this->bytePosition; + } } diff --git a/src/StringStream.php b/src/StringStream.php index a1e3ea9..4fe148d 100644 --- a/src/StringStream.php +++ b/src/StringStream.php @@ -28,7 +28,7 @@ final class StringStream implements Stream public function __construct(string $string, ?Position $position = null) { $this->string = $string; - $this->position = $position ?? Position::initial(); + $this->position = $position ?? Position::initial(strlen($string)); } /** diff --git a/tests/Internal/PositionTest.php b/tests/Internal/PositionTest.php index 6688242..ca8c967 100644 --- a/tests/Internal/PositionTest.php +++ b/tests/Internal/PositionTest.php @@ -20,18 +20,56 @@ final class PositionTest extends TestCase /** @test */ public function update() { - $position = Position::initial(); + $position = Position::initial(8); $this->assertEquals(1, $position->line()); $this->assertEquals(1, $position->column()); + $this->assertEquals(8, $position->totalBytes()); + $this->assertEquals(8, $position->unreadBytes()); + $this->assertEquals(0, $position->bytePosition()); $position = $position->advance("a"); $this->assertEquals(1, $position->line()); $this->assertEquals(2, $position->column()); + $this->assertEquals(7, $position->unreadBytes()); + $this->assertEquals(1, $position->bytePosition()); $position = $position->advance("\n"); $this->assertEquals(2, $position->line()); $this->assertEquals(1, $position->column()); + $this->assertEquals(6, $position->unreadBytes()); + $this->assertEquals(2, $position->bytePosition()); $position = $position->advance("\n\n\nabc"); $this->assertEquals(5, $position->line()); $this->assertEquals(4, $position->column()); + $this->assertEquals(0, $position->unreadBytes()); + $this->assertEquals(8, $position->bytePosition()); + } + + /** + * The german word Über (over) is counted as 5:4 based on strlen:mb_strlen respectively. + * @test + */ + public function multibyte_update() + { + $position = Position::initial(strlen('Über')); + $this->assertEquals(1, $position->line()); + $this->assertEquals(1, $position->column()); + $this->assertEquals(5, $position->totalBytes()); + $this->assertEquals(5, $position->unreadBytes()); + $this->assertEquals(0, $position->bytePosition()); + $position = $position->advance("Ü"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(2, $position->column()); + $this->assertEquals(3, $position->unreadBytes()); + $this->assertEquals(2, $position->bytePosition()); + $position = $position->advance("b"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(3, $position->column()); + $this->assertEquals(2, $position->unreadBytes()); + $this->assertEquals(3, $position->bytePosition()); + $position = $position->advance("er"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(5, $position->column()); + $this->assertEquals(0, $position->unreadBytes()); + $this->assertEquals(5, $position->bytePosition()); } /** @test */ diff --git a/tests/Internal/StringStreamTest.php b/tests/Internal/StringStreamTest.php index cab0ec7..5470154 100644 --- a/tests/Internal/StringStreamTest.php +++ b/tests/Internal/StringStreamTest.php @@ -23,7 +23,7 @@ public function take1() $s = new StringStream("abc"); $t = $s->take1(); $this->assertEquals("a", $t->chunk()); - $expectedPosition = new Position("", 1, 2); + $expectedPosition = new Position(3, "", 1, 2, 1); $expectedStream = new StringStream("bc", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } @@ -34,7 +34,7 @@ public function takeN() $s = new StringStream("abcde"); $t = $s->takeN(3); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position("", 1, 4); + $expectedPosition = new Position(5, "", 1, 4, 3); $expectedStream = new StringStream("de", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } @@ -45,7 +45,7 @@ public function takeWhile() $s = new StringStream("abc\nde"); $t = $s->takeWhile(fn($c) => $c !== "\n"); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position("", 1, 4); + $expectedPosition = new Position(6, "", 1, 4, 3); $expectedStream = new StringStream("\nde", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } diff --git a/tests/ParseResult/ErrorReportingTest.php b/tests/ParseResult/ErrorReportingTest.php index 91cdab5..972a64e 100644 --- a/tests/ParseResult/ErrorReportingTest.php +++ b/tests/ParseResult/ErrorReportingTest.php @@ -49,7 +49,7 @@ public function failing_on_the_first_token() public function failing_with_an_advanced_position() { $parser = char('a'); - $input = new StringStream("bcd", new Position("/path/to/file", 5, 10)); + $input = new StringStream("bcd", new Position(3,"/path/to/file", 5, 10, 0)); $result = $parser->run($input); $expected = <<run($input); $expected = <<sequence(char('a')); - $input = new StringStream("\n\n\nbcd\nxyz", Position::initial("/path/to/file")); + $input = new StringStream("\n\n\nbcd\nxyz", Position::initial(10,"/path/to/file")); $result = $parser->run($input); $expected = << Date: Mon, 14 Dec 2020 20:17:41 -0500 Subject: [PATCH 02/13] Pull back to just the bare minimum --- src/Internal/Position.php | 21 ++++----------------- src/StringStream.php | 2 +- tests/Internal/PositionTest.php | 14 ++------------ tests/Internal/StringStreamTest.php | 6 +++--- tests/ParseResult/ErrorReportingTest.php | 6 +++--- 5 files changed, 13 insertions(+), 36 deletions(-) diff --git a/src/Internal/Position.php b/src/Internal/Position.php index 99e9123..0552848 100644 --- a/src/Internal/Position.php +++ b/src/Internal/Position.php @@ -18,8 +18,6 @@ */ final class Position { - /** @psalm-readonly */ - private int $totalBytes; /** @psalm-readonly */ private string $filename; /** @psalm-readonly */ @@ -29,9 +27,8 @@ final class Position /** @psalm-readonly */ private int $bytePosition; - function __construct(int $totalBytes, string $filename, int $line, int $column, int $bytePosition) + function __construct(string $filename, int $line, int $column, int $bytePosition) { - $this->totalBytes = $totalBytes; $this->filename = $filename; $this->line = $line; $this->column = $column; @@ -42,9 +39,9 @@ function __construct(int $totalBytes, string $filename, int $line, int $column, * Initial position (line 1, column 1). The optional filename is the source of the input, and is really just a label * to make more useful error messages. */ - public static function initial(int $totalBytes = 0, string $filename = ""): Position + public static function initial(string $filename = ""): Position { - return new Position($totalBytes, $filename, 1, 1, 0); + return new Position($filename, 1, 1, 0); } /** @@ -77,7 +74,7 @@ public function advance(string $parsed): Position $bytePosition += strlen($char); } - return new Position($this->totalBytes, $this->filename, $line, $column, $bytePosition); + return new Position($this->filename, $line, $column, $bytePosition); } public function filename(): string @@ -95,18 +92,8 @@ public function column(): int return $this->column; } - public function totalBytes(): int - { - return $this->totalBytes; - } - public function bytePosition(): int { return $this->bytePosition; } - - public function unreadBytes(): int - { - return $this->totalBytes - $this->bytePosition; - } } diff --git a/src/StringStream.php b/src/StringStream.php index 4fe148d..a1e3ea9 100644 --- a/src/StringStream.php +++ b/src/StringStream.php @@ -28,7 +28,7 @@ final class StringStream implements Stream public function __construct(string $string, ?Position $position = null) { $this->string = $string; - $this->position = $position ?? Position::initial(strlen($string)); + $this->position = $position ?? Position::initial(); } /** diff --git a/tests/Internal/PositionTest.php b/tests/Internal/PositionTest.php index ca8c967..6bf0c9f 100644 --- a/tests/Internal/PositionTest.php +++ b/tests/Internal/PositionTest.php @@ -20,26 +20,21 @@ final class PositionTest extends TestCase /** @test */ public function update() { - $position = Position::initial(8); + $position = Position::initial(); $this->assertEquals(1, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(8, $position->totalBytes()); - $this->assertEquals(8, $position->unreadBytes()); $this->assertEquals(0, $position->bytePosition()); $position = $position->advance("a"); $this->assertEquals(1, $position->line()); $this->assertEquals(2, $position->column()); - $this->assertEquals(7, $position->unreadBytes()); $this->assertEquals(1, $position->bytePosition()); $position = $position->advance("\n"); $this->assertEquals(2, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(6, $position->unreadBytes()); $this->assertEquals(2, $position->bytePosition()); $position = $position->advance("\n\n\nabc"); $this->assertEquals(5, $position->line()); $this->assertEquals(4, $position->column()); - $this->assertEquals(0, $position->unreadBytes()); $this->assertEquals(8, $position->bytePosition()); } @@ -49,26 +44,21 @@ public function update() */ public function multibyte_update() { - $position = Position::initial(strlen('Über')); + $position = Position::initial(); $this->assertEquals(1, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(5, $position->totalBytes()); - $this->assertEquals(5, $position->unreadBytes()); $this->assertEquals(0, $position->bytePosition()); $position = $position->advance("Ü"); $this->assertEquals(1, $position->line()); $this->assertEquals(2, $position->column()); - $this->assertEquals(3, $position->unreadBytes()); $this->assertEquals(2, $position->bytePosition()); $position = $position->advance("b"); $this->assertEquals(1, $position->line()); $this->assertEquals(3, $position->column()); - $this->assertEquals(2, $position->unreadBytes()); $this->assertEquals(3, $position->bytePosition()); $position = $position->advance("er"); $this->assertEquals(1, $position->line()); $this->assertEquals(5, $position->column()); - $this->assertEquals(0, $position->unreadBytes()); $this->assertEquals(5, $position->bytePosition()); } diff --git a/tests/Internal/StringStreamTest.php b/tests/Internal/StringStreamTest.php index 5470154..e25183b 100644 --- a/tests/Internal/StringStreamTest.php +++ b/tests/Internal/StringStreamTest.php @@ -23,7 +23,7 @@ public function take1() $s = new StringStream("abc"); $t = $s->take1(); $this->assertEquals("a", $t->chunk()); - $expectedPosition = new Position(3, "", 1, 2, 1); + $expectedPosition = new Position("", 1, 2, 1); $expectedStream = new StringStream("bc", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } @@ -34,7 +34,7 @@ public function takeN() $s = new StringStream("abcde"); $t = $s->takeN(3); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position(5, "", 1, 4, 3); + $expectedPosition = new Position("", 1, 4, 3); $expectedStream = new StringStream("de", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } @@ -45,7 +45,7 @@ public function takeWhile() $s = new StringStream("abc\nde"); $t = $s->takeWhile(fn($c) => $c !== "\n"); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position(6, "", 1, 4, 3); + $expectedPosition = new Position("", 1, 4, 3); $expectedStream = new StringStream("\nde", $expectedPosition); $this->assertEquals($expectedStream, $t->stream()); } diff --git a/tests/ParseResult/ErrorReportingTest.php b/tests/ParseResult/ErrorReportingTest.php index 972a64e..d3dcab2 100644 --- a/tests/ParseResult/ErrorReportingTest.php +++ b/tests/ParseResult/ErrorReportingTest.php @@ -49,7 +49,7 @@ public function failing_on_the_first_token() public function failing_with_an_advanced_position() { $parser = char('a'); - $input = new StringStream("bcd", new Position(3,"/path/to/file", 5, 10, 0)); + $input = new StringStream("bcd", new Position("/path/to/file", 5, 10, 0)); $result = $parser->run($input); $expected = <<run($input); $expected = <<sequence(char('a')); - $input = new StringStream("\n\n\nbcd\nxyz", Position::initial(10,"/path/to/file")); + $input = new StringStream("\n\n\nbcd\nxyz", Position::initial("/path/to/file")); $result = $parser->run($input); $expected = << Date: Tue, 15 Dec 2020 10:57:06 -0500 Subject: [PATCH 03/13] Add initial generic text file stream implementation --- src/TextFileStream.php | 149 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 src/TextFileStream.php diff --git a/src/TextFileStream.php b/src/TextFileStream.php new file mode 100644 index 0000000..bd2a834 --- /dev/null +++ b/src/TextFileStream.php @@ -0,0 +1,149 @@ +filename(), $position); + } + + public function __construct(string $filePath, ?Position $position = null) + { + $this->filePath = $filePath; + $this->fileHandle = fopen($this->filePath, 'rb'); + $this->position = $position ?? Position::initial($this->filePath); + if (!is_null($position)) { + fseek($this->fileHandle, $this->position->bytePosition()); + } + } + + /** + * @throws EndOfStream + */ + private function guardEndOfStream(): void + { + if ($this->isEOF()) { + throw new EndOfStream("End of stream was reached in " . $this->position->pretty()); + } + } + + /** + * @inheritDoc + */ + public function take1(): TakeResult + { + $this->guardEndOfStream(); + + $token = fgetc($this->fileHandle); + $position = $this->position->advance($token); + + return new TakeResult( + $token, + self::createFromPosition($position) + ); + } + + /** + * @inheritDoc + */ + public function takeN(int $n): TakeResult + { + if ($n <= 0) { + return new TakeResult("", $this); + } + + $this->guardEndOfStream(); + + $chunk = fread($this->fileHandle, $n); + $position = $this->position->advance($chunk); + + return new TakeResult( + $chunk, + self::createFromPosition($position) + ); + } + + /** + * @inheritDoc + */ + public function takeWhile(callable $predicate): TakeResult + { + if ($this->isEOF()) { + return new TakeResult("", $this); + } + + /** + * Variable to track if loop breaks due to EOF. + * @var bool $eof + */ + $eof = false; + + $chunk = ""; // Init the result buffer + $nextToken = fgetc($this->fileHandle); + while ($predicate($nextToken)) { + $chunk .= $nextToken; + if (!feof($this->fileHandle)) { + $nextToken = fgetc($this->fileHandle); + } else { + $eof = true; + break; + } + } + // If the loop breaks because EOF then skip this. + if (!$eof) { + // However if the loop breaks because the predicate, then step one byte back. + fseek($this->fileHandle, -1, SEEK_CUR); + } + $position = $this->position->advance($chunk); + + return new TakeResult( + $chunk, + self::createFromPosition($position) + ); + } + + /** + * @inheritDoc + */ + public function __toString(): string + { + if (0 === ($size = filesize($this->filePath))) { + return ""; + } + + $stringData = fread($this->fileHandle, $size); + fseek($this->fileHandle, $this->position->bytePosition()); + return $stringData; + } + + /** + * @inheritDoc + */ + public function isEOF(): bool + { + return feof($this->fileHandle); + } + + /** + * @inheritDoc + */ + public function position(): Position + { + return $this->position; + } + + public function filePath(): string + { + return $this->filePath; + } +} From 840b03b26b8acd183bfe14e1a78a01af21c210c8 Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 15 Dec 2020 10:57:42 -0500 Subject: [PATCH 04/13] Add test stubs for TextFileStream needs.. ...and include editorconfig changes to prevent that affecting tests --- .editorconfig | 3 +++ tests/stubs/abc-return-de.txt | 2 ++ tests/stubs/abc.txt | 1 + tests/stubs/abcde.txt | 1 + 4 files changed, 7 insertions(+) create mode 100644 tests/stubs/abc-return-de.txt create mode 100644 tests/stubs/abc.txt create mode 100644 tests/stubs/abcde.txt diff --git a/.editorconfig b/.editorconfig index b26a9a9..2a4e1bf 100644 --- a/.editorconfig +++ b/.editorconfig @@ -13,3 +13,6 @@ trim_trailing_whitespace = true [*.md] trim_trailing_whitespace = false + +[*.txt] +insert_final_newline = false diff --git a/tests/stubs/abc-return-de.txt b/tests/stubs/abc-return-de.txt new file mode 100644 index 0000000..e1fbf29 --- /dev/null +++ b/tests/stubs/abc-return-de.txt @@ -0,0 +1,2 @@ +abc +de \ No newline at end of file diff --git a/tests/stubs/abc.txt b/tests/stubs/abc.txt new file mode 100644 index 0000000..f2ba8f8 --- /dev/null +++ b/tests/stubs/abc.txt @@ -0,0 +1 @@ +abc \ No newline at end of file diff --git a/tests/stubs/abcde.txt b/tests/stubs/abcde.txt new file mode 100644 index 0000000..6a81654 --- /dev/null +++ b/tests/stubs/abcde.txt @@ -0,0 +1 @@ +abcde \ No newline at end of file From 679d668efcf3052b790006d90c6d605f53315901 Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 15 Dec 2020 10:57:57 -0500 Subject: [PATCH 05/13] Add tests to cover TextFileStream --- tests/Internal/TextFileStreamTest.php | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/Internal/TextFileStreamTest.php diff --git a/tests/Internal/TextFileStreamTest.php b/tests/Internal/TextFileStreamTest.php new file mode 100644 index 0000000..4725d43 --- /dev/null +++ b/tests/Internal/TextFileStreamTest.php @@ -0,0 +1,71 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Tests\Verraes\Parsica\Internal; + +use PHPUnit\Framework\TestCase; +use Verraes\Parsica\Internal\Position; +use Verraes\Parsica\TextFileStream; + +final class TextFileStreamTest extends TestCase +{ + + private string $stubBasePath; + + public function setUp(): void + { + parent::setUp(); // TODO: Change the autogenerated stub + $this->stubBasePath = dirname(__DIR__) . "/stubs/"; + } + + /** @test */ + public function take1() + { + $testStub = $this->stubBasePath . "abc.txt"; + + $s = new TextFileStream($testStub); + $t = $s->take1(); + $this->assertEquals("a", $t->chunk()); + $expectedPosition = new Position($testStub, 1, 2, 1); + $expectedStream = new TextFileStream($testStub, $expectedPosition); + // Because the file socket stream is unique we compare individual aspects... + $this->assertEquals($expectedStream->position(), $t->stream()->position()); + $this->assertEquals($expectedStream->filePath(), $t->stream()->filePath()); + $this->assertEquals((string) $expectedStream, (string) $t->stream()); + } + + /** @test */ + public function takeN() + { + $testStub = $this->stubBasePath . "abcde.txt"; + $s = new TextFileStream($testStub); + $t = $s->takeN(3); + $this->assertEquals("abc", $t->chunk()); + $expectedPosition = new Position($testStub, 1, 4, 3); + $expectedStream = new TextFileStream($testStub, $expectedPosition); + $this->assertEquals($expectedStream->position(), $t->stream()->position()); + $this->assertEquals($expectedStream->filePath(), $t->stream()->filePath()); + $this->assertEquals((string) $expectedStream, (string) $t->stream()); + } + + /** @test */ + public function takeWhile() + { + $testStub = $this->stubBasePath . "abc-return-de.txt"; + $s = new TextFileStream($testStub); + $t = $s->takeWhile(fn($c) => $c !== "\n"); + $this->assertEquals("abc", $t->chunk()); + $expectedPosition = new Position($testStub, 1, 4, 3); + $expectedStream = new TextFileStream($testStub, $expectedPosition); + $this->assertEquals($expectedStream->position(), $t->stream()->position()); + $this->assertEquals($expectedStream->filePath(), $t->stream()->filePath()); + $this->assertEquals((string) $expectedStream, (string) $t->stream()); + } +} From d86e26e2feae5ad994b255849a245debbfc0cae5 Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 15 Dec 2020 14:48:54 -0500 Subject: [PATCH 06/13] Some refactors to keep the filePointer position sane... ...might be worth doing a special class to wrap the immutable resource? --- src/TextFileStream.php | 48 +++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/TextFileStream.php b/src/TextFileStream.php index bd2a834..66c1834 100644 --- a/src/TextFileStream.php +++ b/src/TextFileStream.php @@ -1,11 +1,13 @@ -filePath = $filePath; $this->fileHandle = fopen($this->filePath, 'rb'); $this->position = $position ?? Position::initial($this->filePath); - if (!is_null($position)) { + if (true !== is_null($position)) { fseek($this->fileHandle, $this->position->bytePosition()); } } + public function __destruct() + { + if (is_resource($this->fileHandle)) { + fclose($this->fileHandle); + } + } + /** * @throws EndOfStream */ @@ -37,6 +49,18 @@ private function guardEndOfStream(): void } } + private function safeRead(?int $n = null): string + { + if (is_null($n)) { + $tokenChunk = fgetc($this->fileHandle); + } else { + $tokenChunk = fread($this->fileHandle, $n); + } + rewind($this->fileHandle); + fseek($this->fileHandle, $this->position->bytePosition()); + return !$tokenChunk ? '' : $tokenChunk; + } + /** * @inheritDoc */ @@ -44,7 +68,7 @@ public function take1(): TakeResult { $this->guardEndOfStream(); - $token = fgetc($this->fileHandle); + $token = $this->safeRead(); $position = $this->position->advance($token); return new TakeResult( @@ -64,7 +88,7 @@ public function takeN(int $n): TakeResult $this->guardEndOfStream(); - $chunk = fread($this->fileHandle, $n); + $chunk = $this->safeRead($n); $position = $this->position->advance($chunk); return new TakeResult( @@ -82,12 +106,6 @@ public function takeWhile(callable $predicate): TakeResult return new TakeResult("", $this); } - /** - * Variable to track if loop breaks due to EOF. - * @var bool $eof - */ - $eof = false; - $chunk = ""; // Init the result buffer $nextToken = fgetc($this->fileHandle); while ($predicate($nextToken)) { @@ -95,16 +113,11 @@ public function takeWhile(callable $predicate): TakeResult if (!feof($this->fileHandle)) { $nextToken = fgetc($this->fileHandle); } else { - $eof = true; break; } } - // If the loop breaks because EOF then skip this. - if (!$eof) { - // However if the loop breaks because the predicate, then step one byte back. - fseek($this->fileHandle, -1, SEEK_CUR); - } $position = $this->position->advance($chunk); + $this->safeRead(); return new TakeResult( $chunk, @@ -121,6 +134,7 @@ public function __toString(): string return ""; } + fseek($this->fileHandle, $this->position->bytePosition()); $stringData = fread($this->fileHandle, $size); fseek($this->fileHandle, $this->position->bytePosition()); return $stringData; From 6c578032b0e7cd5cdc5603e3c0b24d9c2cc7eb2b Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 15 Dec 2020 14:49:29 -0500 Subject: [PATCH 07/13] Psalm tweaks to account for file resource stream --- src/Stream.php | 2 +- src/StringStream.php | 2 +- src/TextFileStream.php | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Stream.php b/src/Stream.php index 6765764..c4d2d57 100644 --- a/src/Stream.php +++ b/src/Stream.php @@ -16,7 +16,7 @@ /** * Represents an input stream. This allows us to have different types of input, each with their own optimizations. * - * @psalm-immutable + * @psalm-external-mutation-free */ interface Stream { diff --git a/src/StringStream.php b/src/StringStream.php index a1e3ea9..3fa1d70 100644 --- a/src/StringStream.php +++ b/src/StringStream.php @@ -15,7 +15,7 @@ use Parsica\Parsica\Internal\TakeResult; /** - * @psalm-immutable + * @psalm-external-mutation-free */ final class StringStream implements Stream { diff --git a/src/TextFileStream.php b/src/TextFileStream.php index 66c1834..7543e1b 100644 --- a/src/TextFileStream.php +++ b/src/TextFileStream.php @@ -7,10 +7,17 @@ use Verraes\Parsica\Internal\Position; use Verraes\Parsica\Internal\TakeResult; +/** + * @psalm-external-mutation-free + */ final class TextFileStream implements Stream { private string $filePath; + /** + * @psalm-allow-private-mutation + * @var resource + */ private $fileHandle; private Position $position; From 27d7fdcd15fedc4c3559495daf89fce014398f7e Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 15 Dec 2020 14:50:23 -0500 Subject: [PATCH 08/13] Add a file based example and tweak excel example to be reuseable --- tests/Examples/ExcelClasses.php | 57 ++++++++++++++ tests/Examples/ExcelTest.php | 54 +------------- tests/Examples/ExcelTextFileStreamTest.php | 86 ++++++++++++++++++++++ tests/stubs/example-excel-1.txt | 1 + tests/stubs/example-excel-2.txt | 1 + 5 files changed, 146 insertions(+), 53 deletions(-) create mode 100644 tests/Examples/ExcelClasses.php create mode 100644 tests/Examples/ExcelTextFileStreamTest.php create mode 100644 tests/stubs/example-excel-1.txt create mode 100644 tests/stubs/example-excel-2.txt diff --git a/tests/Examples/ExcelClasses.php b/tests/Examples/ExcelClasses.php new file mode 100644 index 0000000..1a656e8 --- /dev/null +++ b/tests/Examples/ExcelClasses.php @@ -0,0 +1,57 @@ +col = $col; + $this->row = $row; + } +} +class Range +{ + private Cell $from; + private Cell $to; + + function __construct(Cell $from, Cell $to) + { + $this->from = $from; + $this->to = $to; + } +} +class Intersection +{ + private Range $l; + private Range $r; + + function __construct(Range $l, Range $r) + { + $this->l = $l; + $this->r = $r; + } +} +class Sum +{ + private Intersection $intersection; + + function __construct(Intersection $intersection) + { + $this->intersection = $intersection; + } +} +class Ampersand +{ + private Cell $l; + private Cell $r; + + function __construct(Cell $l, Cell $r) + { + $this->l = $l; + $this->r = $r; + } +} diff --git a/tests/Examples/ExcelTest.php b/tests/Examples/ExcelTest.php index 98476b6..269de34 100644 --- a/tests/Examples/ExcelTest.php +++ b/tests/Examples/ExcelTest.php @@ -76,56 +76,4 @@ private function excelParser(): Parser } -class Cell -{ - private $col; - private $row; - - function __construct($col, $row) - { - $this->col = $col; - $this->row = $row; - } -} -class Range -{ - private Cell $from; - private Cell $to; - - function __construct(Cell $from, Cell $to) - { - $this->from = $from; - $this->to = $to; - } -} -class Intersection -{ - private Range $l; - private Range $r; - - function __construct(Range $l, Range $r) - { - $this->l = $l; - $this->r = $r; - } -} -class Sum -{ - private Intersection $intersection; - - function __construct(Intersection $intersection) - { - $this->intersection = $intersection; - } -} -class Ampersand -{ - private Cell $l; - private Cell $r; - - function __construct(Cell $l, Cell $r) - { - $this->l = $l; - $this->r = $r; - } -} +require_once __DIR__ . '/ExcelClasses.php'; diff --git a/tests/Examples/ExcelTextFileStreamTest.php b/tests/Examples/ExcelTextFileStreamTest.php new file mode 100644 index 0000000..c3a94ac --- /dev/null +++ b/tests/Examples/ExcelTextFileStreamTest.php @@ -0,0 +1,86 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Tests\Verraes\Parsica\Examples; + +use PHPUnit\Framework\TestCase; +use Verraes\Parsica\Parser; +use Verraes\Parsica\PHPUnit\ParserAssertions; +use Verraes\Parsica\TextFileStream; +use function Verraes\Parsica\{alphaChar, between, char, collect, digitChar, skipHSpace1, space, string}; + +final class ExcelTextFileStreamTest extends TestCase +{ + use ParserAssertions; + + private string $stubBasePath; + + public function setUp(): void + { + parent::setUp(); // TODO: Change the autogenerated stub + $this->stubBasePath = dirname(__DIR__) . "/stubs/"; + } + + /** @test */ + public function spaceOrOperatorDependingOnContext() + { + // https://twitter.com/Mark_Baker/status/1309919606887374849?s=20 + // and https://twitter.com/Mark_Baker/status/1309960902482026498?s=20 + // `=SUM(B7:D7 C6:C8)` where space is the intersection operator for the + // intersection between the two ranges B7:D7 and C6:C8 (ie. C7), + // and `=A1 & B1` where the space is simply whitespace and should be ignored + + $parser = $this->excelParser(); + + $input = new TextFileStream($this->stubBasePath . 'example-excel-1.txt'); + $expected = new Sum( + new Intersection( + new Range(new Cell("B", "7"), new Cell("D", "7")), + new Range(new Cell("C", "6"), new Cell("C", "8")), + ) + ); + $this->assertParsesStream($input, $parser, $expected); + + $input = new TextFileStream($this->stubBasePath . 'example-excel-2.txt'); + $expected = new Ampersand( + new Cell("A", "1"), + new Cell("B", "1"), + ); + $this->assertParsesStream($input, $parser, $expected); + } + + private function excelParser(): Parser + { + $parens = fn(Parser $p): Parser => between(char('('), char(')'), $p); + $cell = collect(alphaChar(), digitChar()) + ->map(fn($o) => new Cell($o[0], $o[1])); + $range = collect($cell, char(':'), $cell) + ->map(fn($o) => new Range($o[0], $o[2])); + $intersection = collect($range, space(), $range) + ->map(fn($o) => new Intersection($o[0], $o[2])); + $sum = (string('=SUM')->followedBy($parens($intersection))) + ->map(fn($o) => new Sum($o)); + + + // consumes space before and after Parser $p + $token = fn(Parser $p): Parser => between(skipHSpace1(), skipHSpace1(), $p); + $ampersand = char('=')->followedBy(collect( + $cell, + $token(char('&')), + $cell + ))->map(fn($o) => new Ampersand($o[0], $o[2])); + + + return $sum->or($ampersand); + } + +} + +require_once __DIR__ . '/ExcelClasses.php'; diff --git a/tests/stubs/example-excel-1.txt b/tests/stubs/example-excel-1.txt new file mode 100644 index 0000000..9d3cdbc --- /dev/null +++ b/tests/stubs/example-excel-1.txt @@ -0,0 +1 @@ +=SUM(B7:D7 C6:C8) \ No newline at end of file diff --git a/tests/stubs/example-excel-2.txt b/tests/stubs/example-excel-2.txt new file mode 100644 index 0000000..f70f8bf --- /dev/null +++ b/tests/stubs/example-excel-2.txt @@ -0,0 +1 @@ +=A1 & B1 \ No newline at end of file From a83e2be1154cd3be1df9e0228c03c80c2e87c2be Mon Sep 17 00:00:00 2001 From: MallardDuck Date: Tue, 22 Dec 2020 18:10:35 -0500 Subject: [PATCH 09/13] update to use immutable read file package --- composer.json | 3 +- src/TextFileStream.php | 74 ++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/composer.json b/composer.json index 8c1e807..e9b4c7c 100644 --- a/composer.json +++ b/composer.json @@ -25,7 +25,8 @@ "require": { "php": "^7.4 || ^8.0", "ext-mbstring": "*", - "cypresslab/php-curry": "^0.5.0" + "cypresslab/php-curry": "^0.5.0", + "mallardduck/immutable-read-file": "^0.5.2" }, "require-dev": { "ext-json": "*", diff --git a/src/TextFileStream.php b/src/TextFileStream.php index 7543e1b..586161c 100644 --- a/src/TextFileStream.php +++ b/src/TextFileStream.php @@ -3,6 +3,7 @@ namespace Verraes\Parsica; use InvalidArgumentException; +use MallardDuck\ImmutableReadFile\ImmutableFile; use Verraes\Parsica\Internal\EndOfStream; use Verraes\Parsica\Internal\Position; use Verraes\Parsica\Internal\TakeResult; @@ -16,9 +17,8 @@ final class TextFileStream implements Stream private string $filePath; /** * @psalm-allow-private-mutation - * @var resource */ - private $fileHandle; + private ImmutableFile $fileHandle; private Position $position; public static function createFromPosition(Position $position): self @@ -28,22 +28,15 @@ public static function createFromPosition(Position $position): self public function __construct(string $filePath, ?Position $position = null) { + /** + * @psalm-suppress ImpureFunctionCall + */ if (!is_file($filePath)) { throw new InvalidArgumentException("The file path for the text-file is not a valid file."); } $this->filePath = $filePath; - $this->fileHandle = fopen($this->filePath, 'rb'); $this->position = $position ?? Position::initial($this->filePath); - if (true !== is_null($position)) { - fseek($this->fileHandle, $this->position->bytePosition()); - } - } - - public function __destruct() - { - if (is_resource($this->fileHandle)) { - fclose($this->fileHandle); - } + $this->fileHandle = ImmutableFile::fromFilePathWithPosition($this->filePath, $this->position->bytePosition()); } /** @@ -56,18 +49,6 @@ private function guardEndOfStream(): void } } - private function safeRead(?int $n = null): string - { - if (is_null($n)) { - $tokenChunk = fgetc($this->fileHandle); - } else { - $tokenChunk = fread($this->fileHandle, $n); - } - rewind($this->fileHandle); - fseek($this->fileHandle, $this->position->bytePosition()); - return !$tokenChunk ? '' : $tokenChunk; - } - /** * @inheritDoc */ @@ -75,7 +56,10 @@ public function take1(): TakeResult { $this->guardEndOfStream(); - $token = $this->safeRead(); + /** + * @psalm-suppress ImpureMethodCall + */ + $token = $this->fileHandle->fgetc(); $position = $this->position->advance($token); return new TakeResult( @@ -95,7 +79,10 @@ public function takeN(int $n): TakeResult $this->guardEndOfStream(); - $chunk = $this->safeRead($n); + /** + * @psalm-suppress ImpureMethodCall + */ + $chunk = $this->fileHandle->fread($n); $position = $this->position->advance($chunk); return new TakeResult( @@ -113,22 +100,31 @@ public function takeWhile(callable $predicate): TakeResult return new TakeResult("", $this); } + $remaining = $this->fileHandle; + /** + * @psalm-suppress ImpureMethodCall + */ + $nextToken = $this->fileHandle->fgetc(); $chunk = ""; // Init the result buffer - $nextToken = fgetc($this->fileHandle); while ($predicate($nextToken)) { $chunk .= $nextToken; - if (!feof($this->fileHandle)) { - $nextToken = fgetc($this->fileHandle); + /** + * @psalm-suppress ImpureMethodCall + */ + $remaining = $remaining->advanceBytePosition(); + if (!$remaining->feof()) { + /** + * @psalm-suppress ImpureMethodCall + */ + $nextToken = $remaining->fgetc(); } else { break; } } - $position = $this->position->advance($chunk); - $this->safeRead(); return new TakeResult( $chunk, - self::createFromPosition($position) + self::createFromPosition($this->position->advance($chunk)) ); } @@ -137,14 +133,14 @@ public function takeWhile(callable $predicate): TakeResult */ public function __toString(): string { - if (0 === ($size = filesize($this->filePath))) { + /** + * @psalm-suppress ImpureMethodCall + */ + if (0 === $this->fileHandle->getFileSize()) { return ""; } - fseek($this->fileHandle, $this->position->bytePosition()); - $stringData = fread($this->fileHandle, $size); - fseek($this->fileHandle, $this->position->bytePosition()); - return $stringData; + return (string) $this->fileHandle; } /** @@ -152,7 +148,7 @@ public function __toString(): string */ public function isEOF(): bool { - return feof($this->fileHandle); + return $this->fileHandle->feof(); } /** From 535ad36313805ae74bd76900afd5755835aa30e6 Mon Sep 17 00:00:00 2001 From: Dan Pock Date: Mon, 19 Apr 2021 13:14:48 -0400 Subject: [PATCH 10/13] update tests for new namespace --- composer.json | 4 ++-- src/PHPUnit/ParserAssertions.php | 24 ++++++++++++++++++++++ src/TextFileStream.php | 8 ++++---- tests/Examples/ExcelClasses.php | 2 +- tests/Examples/ExcelTextFileStreamTest.php | 10 ++++----- tests/Internal/TextFileStreamTest.php | 6 +++--- 6 files changed, 39 insertions(+), 15 deletions(-) diff --git a/composer.json b/composer.json index e9b4c7c..928386e 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,7 @@ "php": "^7.4 || ^8.0", "ext-mbstring": "*", "cypresslab/php-curry": "^0.5.0", - "mallardduck/immutable-read-file": "^0.5.2" + "mallardduck/immutable-read-file": "^0.5.3" }, "require-dev": { "ext-json": "*", @@ -56,7 +56,7 @@ }, "autoload-dev": { "psr-4": { - "Tests\\Verraes\\Parsica\\": "tests/" + "Tests\\Parsica\\Parsica\\": "tests/" } }, "scripts": { diff --git a/src/PHPUnit/ParserAssertions.php b/src/PHPUnit/ParserAssertions.php index fae0ec8..19e1af4 100644 --- a/src/PHPUnit/ParserAssertions.php +++ b/src/PHPUnit/ParserAssertions.php @@ -12,6 +12,7 @@ use Exception; use Parsica\Parsica\Parser; +use Parsica\Parsica\Stream; use Parsica\Parsica\StringStream; /** @@ -46,6 +47,29 @@ protected function assertParses(string $input, Parser $parser, $expectedOutput, } } + /** + * @psalm-param mixed $expectedOutput + * + * @api + */ + protected function assertParsesStream(Stream $input, Parser $parser, $expectedOutput, string $message = ""): void + { + $actualResult = $parser->run($input); + if ($actualResult->isSuccess()) { + $this->assertStrictlyEquals( + $expectedOutput, + $actualResult->output(), + $message . "\n" . "The parser succeeded but the output doesn't match your expected output." + ); + } else { + $this->fail( + $message . "\n" + ."The parser failed with the following error message:\n" + .$actualResult->errorMessage()."\n" + ); + } + } + /** * Behaves like assertSame for primitives, behaves like assertEquals for objects of the same type, and fails * for everything else. diff --git a/src/TextFileStream.php b/src/TextFileStream.php index 586161c..720e838 100644 --- a/src/TextFileStream.php +++ b/src/TextFileStream.php @@ -1,12 +1,12 @@ Date: Mon, 19 Apr 2021 16:11:26 -0400 Subject: [PATCH 11/13] Add benchmarks for TextFileStream specific things --- benchmarks/JSONBench.php | 10 +++ benchmarks/JSONbench.json | 31 ++++++++ benchmarks/ManyBench.txt | 1 + benchmarks/ManyBenchTextFileStream.php | 97 ++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 benchmarks/JSONbench.json create mode 100644 benchmarks/ManyBench.txt create mode 100644 benchmarks/ManyBenchTextFileStream.php diff --git a/benchmarks/JSONBench.php b/benchmarks/JSONBench.php index 38831a7..1b85444 100644 --- a/benchmarks/JSONBench.php +++ b/benchmarks/JSONBench.php @@ -10,6 +10,7 @@ use Parsica\Parsica\JSON\JSON as ParsicaJSON; use Json as BaseMaxJson; +use Parsica\Parsica\TextFileStream; class JSONBench { @@ -17,6 +18,7 @@ class JSONBench function __construct() { + $this->fileData = new TextFileStream(__DIR__ . '/JSONbench.json'); $this->data = <<tryString($this->data); } + /** + * @Revs(5) + * @Iterations(3) + */ + public function bench_Parsica_TextFileStream_JSON() + { + $result = ParsicaJSON::json()->try($this->fileData); + } /** * @Revs(5) diff --git a/benchmarks/JSONbench.json b/benchmarks/JSONbench.json new file mode 100644 index 0000000..273a725 --- /dev/null +++ b/benchmarks/JSONbench.json @@ -0,0 +1,31 @@ +{ + "name": "mathiasverraes/parsica", + "type": "library", + "alotoftext": [ + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet", + "Lorem Ipsum dolor sit amet" + ], + "alotmoretext": "Fuga iusto dolores ipsam. Qui excepturi veniam iste autem ducimus porro et voluptas. Veniam veniam ducimus cumque facere repudiandae corrupti sint quas. Cupiditate asperiores iure omnis dolores nihil asperiores qui quo. Assumenda quia iure deserunt deserunt. Perspiciatis velit quia et.\n\nExplicabo non dolores aut facere. Perferendis in est voluptate. Et laboriosam et autem voluptatum rem nam et aut. Voluptatem praesentium et earum fugit accusamus tempore consectetur natus. Beatae sunt nisi rerum blanditiis consequatur rerum ut.\n\nIure ipsa sit assumenda. Vitae nisi qui vero. Eveniet cum aliquam molestiae molestias. Nisi aut ea alias quo ea voluptatem. Minus ea mollitia quis.", + "description": "The easiest way to build robust parsers in PHP.", + "keywords": [ + "parser", + "parser-combinator", + "parser combinator", + "parsing" + ] +} diff --git a/benchmarks/ManyBench.txt b/benchmarks/ManyBench.txt new file mode 100644 index 0000000..54cbcee --- /dev/null +++ b/benchmarks/ManyBench.txt @@ -0,0 +1 @@ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \ No newline at end of file diff --git a/benchmarks/ManyBenchTextFileStream.php b/benchmarks/ManyBenchTextFileStream.php new file mode 100644 index 0000000..982c821 --- /dev/null +++ b/benchmarks/ManyBenchTextFileStream.php @@ -0,0 +1,97 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Parsica\Parsica\Parser; +use Parsica\Parsica\TextFileStream; +use function Parsica\Parsica\any; +use function Parsica\Parsica\char; +use function Parsica\Parsica\collect; +use function Parsica\Parsica\many; +use function Parsica\Parsica\map; +use function Parsica\Parsica\pure; +use function Parsica\Parsica\recursive; +use function Parsica\Parsica\satisfy; +use function Parsica\Parsica\takeWhile; + +class ManyBenchTextFileStream +{ + private string $data; + + function __construct() + { + $this->fileData = new TextFileStream(__DIR__ . '/ManyBench.txt'); + + $this->takeWhile = takeWhile(fn(string $c): bool => $c === 'a'); + $this->manySatisfy = many(satisfy(fn(string $c): bool => $c === 'a')); + $this->manyChar = many(char('a')); + $this->oldManySatisfy = static::oldMany(satisfy(fn(string $c): bool => $c === 'a')); + $this->oldManyChar = static::oldMany(char('a')); + } + + /** + * @Revs(10) + * @Iterations(10) + */ + public function bench_takeWhile() + { + $result = $this->takeWhile->try($this->fileData); + } + + /** + * @Revs(10) + * @Iterations(10) + */ + public function bench_manySatisfy() + { + $result = $this->manySatisfy->try($this->fileData); + } + + /** + * @Revs(10) + * @Iterations(10) + */ + public function bench_manyChar() + { + $result = $this->manyChar->try($this->fileData); + } + + /** + * @Revs(10) + * @Iterations(10) + */ + public function bench_oldManySatisfy() + { + $result = $this->oldManySatisfy->try($this->fileData); + } + + /** + * @Revs(10) + * @Iterations(10) + */ + public function bench_oldManyChar() + { + $result = $this->oldManyChar->try($this->fileData); + } + + public static function oldMany(Parser $parser) + { + $rec = recursive(); + $rec->recurse( + any( + map( + collect($parser, $rec), + fn(array $o): array => array_merge([$o[0]], $o[1]) + ), + pure([]), + ) + ); + return $rec; + } +} From e66965d63337092bbb3557033d57fe133e3c8c68 Mon Sep 17 00:00:00 2001 From: Dan Pock Date: Sat, 24 Apr 2021 19:13:37 -0400 Subject: [PATCH 12/13] bump immutable-read-file --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 928386e..1f30a4e 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,7 @@ "php": "^7.4 || ^8.0", "ext-mbstring": "*", "cypresslab/php-curry": "^0.5.0", - "mallardduck/immutable-read-file": "^0.5.3" + "mallardduck/immutable-read-file": "^1.0.0" }, "require-dev": { "ext-json": "*", From ec763a0e04508164c583dfe6d5d13067698ee43b Mon Sep 17 00:00:00 2001 From: Dan Pock Date: Sat, 24 Apr 2021 19:40:52 -0400 Subject: [PATCH 13/13] add a different Position implementation for TextFileStreams --- src/Internal/BasePosition.php | 13 ++++ src/Internal/Position.php | 18 +---- src/Internal/PositionWithBytes.php | 99 ++++++++++++++++++++++++ src/Stream.php | 4 +- src/TextFileStream.php | 12 +-- tests/Internal/PositionTest.php | 8 -- tests/Internal/PositionWithBytesTest.php | 95 +++++++++++++++++++++++ tests/Internal/TextFileStreamTest.php | 8 +- 8 files changed, 223 insertions(+), 34 deletions(-) create mode 100644 src/Internal/BasePosition.php create mode 100644 src/Internal/PositionWithBytes.php create mode 100644 tests/Internal/PositionWithBytesTest.php diff --git a/src/Internal/BasePosition.php b/src/Internal/BasePosition.php new file mode 100644 index 0000000..86f3276 --- /dev/null +++ b/src/Internal/BasePosition.php @@ -0,0 +1,13 @@ +filename = $filename; $this->line = $line; $this->column = $column; - $this->bytePosition = $bytePosition; } /** @@ -41,7 +38,7 @@ function __construct(string $filename, int $line, int $column, int $bytePosition */ public static function initial(string $filename = ""): Position { - return new Position($filename, 1, 1, 0); + return new Position($filename, 1, 1); } /** @@ -56,7 +53,6 @@ public function advance(string $parsed): Position { $column = $this->column; $line = $this->line; - $bytePosition = $this->bytePosition; /** @psalm-var string $char */ foreach (mb_str_split($parsed, 1) as $char) { switch ($char) { @@ -71,10 +67,9 @@ public function advance(string $parsed): Position default: $column++; } - $bytePosition += strlen($char); } - return new Position($this->filename, $line, $column, $bytePosition); + return new Position($this->filename, $line, $column); } public function filename(): string @@ -91,9 +86,4 @@ public function column(): int { return $this->column; } - - public function bytePosition(): int - { - return $this->bytePosition; - } } diff --git a/src/Internal/PositionWithBytes.php b/src/Internal/PositionWithBytes.php new file mode 100644 index 0000000..f268e6d --- /dev/null +++ b/src/Internal/PositionWithBytes.php @@ -0,0 +1,99 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Parsica\Parsica\Internal; + +/** + * File, line, and column position of the parser. + * + * @psalm-immutable + * @psalm-external-mutation-free + */ +final class PositionWithBytes implements BasePosition +{ + /** @psalm-readonly */ + private string $filename; + /** @psalm-readonly */ + private int $line; + /** @psalm-readonly */ + private int $column; + /** @psalm-readonly */ + private int $bytePosition; + + function __construct(string $filename, int $line, int $column, int $bytePosition) + { + $this->filename = $filename; + $this->line = $line; + $this->column = $column; + $this->bytePosition = $bytePosition; + } + + /** + * Initial position (line 1, column 1). The optional filename is the source of the input, and is really just a label + * to make more useful error messages. + */ + public static function initial(string $filename = ""): PositionWithBytes + { + return new PositionWithBytes($filename, 1, 1, 0); + } + + /** + * Pretty print as "filename:line:column" + */ + public function pretty(): string + { + return $this->filename . ":" . $this->line . ":" . $this->column; + } + + public function advance(string $parsed): PositionWithBytes + { + $column = $this->column; + $line = $this->line; + $bytePosition = $this->bytePosition; + /** @psalm-var string $char */ + foreach (mb_str_split($parsed, 1) as $char) { + switch ($char) { + case "\n": + case "\r": + $line++; + $column = 1; + break; + case "\t": + $column = $column + 4 - (($column - 1) % 4); + break; + default: + $column++; + } + $bytePosition += strlen($char); + } + + return new PositionWithBytes($this->filename, $line, $column, $bytePosition); + } + + public function filename(): string + { + return $this->filename; + } + + public function line(): int + { + return $this->line; + } + + public function column(): int + { + return $this->column; + } + + public function bytePosition(): int + { + return $this->bytePosition; + } +} diff --git a/src/Stream.php b/src/Stream.php index c4d2d57..4d4b709 100644 --- a/src/Stream.php +++ b/src/Stream.php @@ -10,7 +10,7 @@ namespace Parsica\Parsica; -use Parsica\Parsica\Internal\Position; +use Parsica\Parsica\Internal\BasePosition; use Parsica\Parsica\Internal\TakeResult; /** @@ -67,5 +67,5 @@ public function isEOF(): bool; * * @internal */ - public function position() : Position; + public function position() : BasePosition; } diff --git a/src/TextFileStream.php b/src/TextFileStream.php index 720e838..ce09dbc 100644 --- a/src/TextFileStream.php +++ b/src/TextFileStream.php @@ -5,7 +5,7 @@ use InvalidArgumentException; use MallardDuck\ImmutableReadFile\ImmutableFile; use Parsica\Parsica\Internal\EndOfStream; -use Parsica\Parsica\Internal\Position; +use Parsica\Parsica\Internal\PositionWithBytes; use Parsica\Parsica\Internal\TakeResult; /** @@ -19,14 +19,14 @@ final class TextFileStream implements Stream * @psalm-allow-private-mutation */ private ImmutableFile $fileHandle; - private Position $position; + private PositionWithBytes $position; - public static function createFromPosition(Position $position): self + public static function createFromPosition(PositionWithBytes $position): self { return new self($position->filename(), $position); } - public function __construct(string $filePath, ?Position $position = null) + public function __construct(string $filePath, ?PositionWithBytes $position = null) { /** * @psalm-suppress ImpureFunctionCall @@ -35,7 +35,7 @@ public function __construct(string $filePath, ?Position $position = null) throw new InvalidArgumentException("The file path for the text-file is not a valid file."); } $this->filePath = $filePath; - $this->position = $position ?? Position::initial($this->filePath); + $this->position = $position ?? PositionWithBytes::initial($this->filePath); $this->fileHandle = ImmutableFile::fromFilePathWithPosition($this->filePath, $this->position->bytePosition()); } @@ -154,7 +154,7 @@ public function isEOF(): bool /** * @inheritDoc */ - public function position(): Position + public function position(): PositionWithBytes { return $this->position; } diff --git a/tests/Internal/PositionTest.php b/tests/Internal/PositionTest.php index 6bf0c9f..50adc32 100644 --- a/tests/Internal/PositionTest.php +++ b/tests/Internal/PositionTest.php @@ -23,19 +23,15 @@ public function update() $position = Position::initial(); $this->assertEquals(1, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(0, $position->bytePosition()); $position = $position->advance("a"); $this->assertEquals(1, $position->line()); $this->assertEquals(2, $position->column()); - $this->assertEquals(1, $position->bytePosition()); $position = $position->advance("\n"); $this->assertEquals(2, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(2, $position->bytePosition()); $position = $position->advance("\n\n\nabc"); $this->assertEquals(5, $position->line()); $this->assertEquals(4, $position->column()); - $this->assertEquals(8, $position->bytePosition()); } /** @@ -47,19 +43,15 @@ public function multibyte_update() $position = Position::initial(); $this->assertEquals(1, $position->line()); $this->assertEquals(1, $position->column()); - $this->assertEquals(0, $position->bytePosition()); $position = $position->advance("Ü"); $this->assertEquals(1, $position->line()); $this->assertEquals(2, $position->column()); - $this->assertEquals(2, $position->bytePosition()); $position = $position->advance("b"); $this->assertEquals(1, $position->line()); $this->assertEquals(3, $position->column()); - $this->assertEquals(3, $position->bytePosition()); $position = $position->advance("er"); $this->assertEquals(1, $position->line()); $this->assertEquals(5, $position->column()); - $this->assertEquals(5, $position->bytePosition()); } /** @test */ diff --git a/tests/Internal/PositionWithBytesTest.php b/tests/Internal/PositionWithBytesTest.php new file mode 100644 index 0000000..fb54519 --- /dev/null +++ b/tests/Internal/PositionWithBytesTest.php @@ -0,0 +1,95 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Tests\Parsica\Parsica\Internal; + +use Parsica\Parsica\TextFileStream; +use PHPUnit\Framework\TestCase; +use Parsica\Parsica\Internal\PositionWithBytes; +use Parsica\Parsica\StringStream; +use function Parsica\Parsica\char; + +final class PositionWithBytesTest extends TestCase +{ + /** @test */ + public function update() + { + $position = PositionWithBytes::initial(); + $this->assertEquals(1, $position->line()); + $this->assertEquals(1, $position->column()); + $this->assertEquals(0, $position->bytePosition()); + $position = $position->advance("a"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(2, $position->column()); + $this->assertEquals(1, $position->bytePosition()); + $position = $position->advance("\n"); + $this->assertEquals(2, $position->line()); + $this->assertEquals(1, $position->column()); + $this->assertEquals(2, $position->bytePosition()); + $position = $position->advance("\n\n\nabc"); + $this->assertEquals(5, $position->line()); + $this->assertEquals(4, $position->column()); + $this->assertEquals(8, $position->bytePosition()); + } + + /** + * The german word Über (over) is counted as 5:4 based on strlen:mb_strlen respectively. + * @test + */ + public function multibyte_update() + { + $position = PositionWithBytes::initial(); + $this->assertEquals(1, $position->line()); + $this->assertEquals(1, $position->column()); + $this->assertEquals(0, $position->bytePosition()); + $position = $position->advance("Ü"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(2, $position->column()); + $this->assertEquals(2, $position->bytePosition()); + $position = $position->advance("b"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(3, $position->column()); + $this->assertEquals(3, $position->bytePosition()); + $position = $position->advance("er"); + $this->assertEquals(1, $position->line()); + $this->assertEquals(5, $position->column()); + $this->assertEquals(5, $position->bytePosition()); + } + + /** @test */ + public function position_in_sequence() + { + $filePath = dirname(__DIR__) . '/stubs/abc.txt'; + $parser = char('a')->followedBy(char('b')); + $input = new TextFileStream($filePath, PositionWithBytes::initial($filePath)); + $result = $parser->run($input); + + $expectedColumn = 3; + $actualColumn = $result->remainder()->position()->column(); + $this->assertEquals($expectedColumn, $actualColumn); + } + + /** @test */ + public function position_with_tabs() + { + $expected = 10; + // All of these move the column position to 10 + $position = PositionWithBytes::initial()->advance("123456789"); + $this->assertEquals($expected, $position->column()); + $position = PositionWithBytes::initial()->advance("\t56789"); + $this->assertEquals($expected, $position->column()); + $position = PositionWithBytes::initial()->advance("\t\t9"); + $this->assertEquals($expected, $position->column()); + $position = PositionWithBytes::initial()->advance("1\t56789"); + $this->assertEquals($expected, $position->column()); + $position = PositionWithBytes::initial()->advance("123\t56789"); + $this->assertEquals($expected, $position->column()); + } +} diff --git a/tests/Internal/TextFileStreamTest.php b/tests/Internal/TextFileStreamTest.php index b4732e7..ead045a 100644 --- a/tests/Internal/TextFileStreamTest.php +++ b/tests/Internal/TextFileStreamTest.php @@ -11,7 +11,7 @@ namespace Tests\Parsica\Parsica\Internal; use PHPUnit\Framework\TestCase; -use Parsica\Parsica\Internal\Position; +use Parsica\Parsica\Internal\PositionWithBytes; use Parsica\Parsica\TextFileStream; final class TextFileStreamTest extends TestCase @@ -33,7 +33,7 @@ public function take1() $s = new TextFileStream($testStub); $t = $s->take1(); $this->assertEquals("a", $t->chunk()); - $expectedPosition = new Position($testStub, 1, 2, 1); + $expectedPosition = new PositionWithBytes($testStub, 1, 2, 1); $expectedStream = new TextFileStream($testStub, $expectedPosition); // Because the file socket stream is unique we compare individual aspects... $this->assertEquals($expectedStream->position(), $t->stream()->position()); @@ -48,7 +48,7 @@ public function takeN() $s = new TextFileStream($testStub); $t = $s->takeN(3); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position($testStub, 1, 4, 3); + $expectedPosition = new PositionWithBytes($testStub, 1, 4, 3); $expectedStream = new TextFileStream($testStub, $expectedPosition); $this->assertEquals($expectedStream->position(), $t->stream()->position()); $this->assertEquals($expectedStream->filePath(), $t->stream()->filePath()); @@ -62,7 +62,7 @@ public function takeWhile() $s = new TextFileStream($testStub); $t = $s->takeWhile(fn($c) => $c !== "\n"); $this->assertEquals("abc", $t->chunk()); - $expectedPosition = new Position($testStub, 1, 4, 3); + $expectedPosition = new PositionWithBytes($testStub, 1, 4, 3); $expectedStream = new TextFileStream($testStub, $expectedPosition); $this->assertEquals($expectedStream->position(), $t->stream()->position()); $this->assertEquals($expectedStream->filePath(), $t->stream()->filePath());