-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from graze/tokeniser-states
Tokeniser states
- Loading branch information
Showing
10 changed files
with
259 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
<?php | ||
|
||
namespace Graze\CsvToken\Tokeniser; | ||
|
||
use RuntimeException; | ||
|
||
class State | ||
{ | ||
const S_ANY = 0; | ||
const S_IN_QUOTE = 1; | ||
const S_IN_ESCAPE = 2; | ||
const S_IN_QUOTE_ESCAPE = 4; | ||
|
||
const S_ANY_TOKENS = Token::T_ANY & ~Token::T_DOUBLE_QUOTE; | ||
const S_IN_QUOTE_TOKENS = Token::T_CONTENT | Token::T_QUOTE | Token::T_DOUBLE_QUOTE | Token::T_ESCAPE; | ||
const S_IN_ESCAPE_TOKENS = Token::T_CONTENT; | ||
const S_IN_QUOTE_ESCAPE_TOKENS = Token::T_CONTENT; | ||
|
||
/** @var array */ | ||
private $types; | ||
/** @var State[] */ | ||
private $states; | ||
|
||
/** | ||
* State constructor. | ||
* | ||
* @param array $types | ||
*/ | ||
public function __construct(array $types) | ||
{ | ||
$this->types = $types; | ||
} | ||
|
||
/** | ||
* @param int $token | ||
* | ||
* @return State|null | ||
*/ | ||
public function getNextState($token) | ||
{ | ||
foreach ($this->states as $mask => $state) { | ||
if ($mask & $token) { | ||
return $state; | ||
} | ||
} | ||
|
||
throw new RuntimeException("The supplied token: {$token} has no target state"); | ||
} | ||
|
||
/** | ||
* @param int $tokenMask | ||
* @param State $target | ||
*/ | ||
public function addStateTarget($tokenMask, State $target) | ||
{ | ||
$this->states[$tokenMask] = $target; | ||
} | ||
|
||
/** | ||
* @param int $position | ||
* @param string $buffer | ||
* | ||
* @return Token | ||
*/ | ||
public function match($position, $buffer) | ||
{ | ||
foreach ($this->types as $search => $tokenType) { | ||
if (substr($buffer, 0, strlen($search)) == $search) { | ||
return new Token($tokenType, $search, $position); | ||
} | ||
} | ||
|
||
return new Token(Token::T_CONTENT, $buffer[0], $position); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
<?php | ||
|
||
namespace Graze\CsvToken\Tokeniser; | ||
|
||
trait StateBuilder | ||
{ | ||
/** | ||
* @param array $types | ||
* | ||
* @return State The default starting state | ||
*/ | ||
public function buildStates(array $types) | ||
{ | ||
$getTypes = function ($tokenMask) use ($types) { | ||
return array_filter($types, function ($type) use ($tokenMask) { | ||
return $type & $tokenMask; | ||
}); | ||
}; | ||
|
||
$any = new State($getTypes(State::S_ANY_TOKENS)); | ||
$inQuote = new State($getTypes(State::S_IN_QUOTE_TOKENS)); | ||
$inEscape = new State($getTypes(State::S_IN_ESCAPE_TOKENS)); | ||
$inQuoteEscape = new State($getTypes(State::S_IN_QUOTE_ESCAPE_TOKENS)); | ||
|
||
// generate state mapping | ||
$any->addStateTarget(Token::T_ANY & ~Token::T_QUOTE & ~Token::T_ESCAPE, $any); | ||
$any->addStateTarget(Token::T_QUOTE, $inQuote); | ||
$any->addStateTarget(Token::T_ESCAPE, $inEscape); | ||
|
||
$inQuote->addStateTarget(Token::T_CONTENT | Token::T_DOUBLE_QUOTE, $inQuote); | ||
$inQuote->addStateTarget(Token::T_QUOTE, $any); | ||
$inQuote->addStateTarget(Token::T_ESCAPE, $inQuoteEscape); | ||
|
||
$inEscape->addStateTarget(Token::T_CONTENT, $any); | ||
|
||
$inQuoteEscape->addStateTarget(Token::T_CONTENT, $inQuote); | ||
|
||
return $any; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
namespace Graze\CsvToken\Test\Unit\Tokeniser; | ||
|
||
use Graze\CsvToken\Test\TestCase; | ||
use Graze\CsvToken\Tokeniser\State; | ||
use Graze\CsvToken\Tokeniser\Token; | ||
use RuntimeException; | ||
|
||
class StateTest extends TestCase | ||
{ | ||
public function testCallGetNextStateWithAnInvalidTokenWillThrowAnException() | ||
{ | ||
$state = new State([]); | ||
$state->addStateTarget(Token::T_CONTENT, $state); | ||
|
||
static::assertSame($state, $state->getNextState(Token::T_CONTENT)); | ||
|
||
static::expectException(RuntimeException::class); | ||
$state->getNextState(Token::T_ESCAPE); | ||
} | ||
} |
Oops, something went wrong.