From c10c3d0f9368085a3c7edafbae459338bcb818af Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Tue, 12 Nov 2024 18:27:43 -0700 Subject: [PATCH 01/14] feat: hash based JSON verification --- circuits.json | 15 + circuits/json/parser/hash_machine.circom | 414 ++++++++++++++++++ circuits/json/parser/hash_parser.circom | 72 +++ .../test/json/parser/hash_machine.test.ts | 52 +++ 4 files changed, 553 insertions(+) create mode 100644 circuits/json/parser/hash_machine.circom create mode 100644 circuits/json/parser/hash_parser.circom create mode 100644 circuits/test/json/parser/hash_machine.test.ts diff --git a/circuits.json b/circuits.json index 97ce86e..f8136f6 100644 --- a/circuits.json +++ b/circuits.json @@ -267,5 +267,20 @@ 1024, 10 ] + }, + "hash_machine": { + "file": "json/parser/hash_machine", + "template": "StateUpdateHasher", + "params": [ + 10 + ] + }, + "hash_parser": { + "file": "json/parser/hash_parser", + "template": "ParserHasher", + "params": [ + 1024, + 10 + ] } } \ No newline at end of file diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom new file mode 100644 index 0000000..5d960d3 --- /dev/null +++ b/circuits/json/parser/hash_machine.circom @@ -0,0 +1,414 @@ +/* +# `machine` +This module consists of the core parsing components for generating proofs of selective disclosure in JSON. + +## Layout +The key ingredients of `parser` are: + - `StateUpdate`: has as input a current state of a stack-machine parser. + Also takes in a `byte` as input which combines with the current state + to produce the `next_*` states. + - `StateToMask`: Reads the current state to decide whether accept instruction tokens + or ignore them for the current task (e.g., ignore `[` if `parsing_string == 1`). + - `GetTopOfStack`: Helper function that yields the topmost allocated stack value + and a pointer (index) to that value. + - `RewriteStack`: Combines all the above data and produces the `next_stack`. + +`parser` brings in many functions from the `utils` module and `language`. +The inclusion of `langauge` allows for this file to (eventually) be generic over +a grammar for different applications (e.g., HTTP, YAML, TOML, etc.). + +## Testing +Tests for this module are located in the files: `circuits/test/parser/*.test.ts +*/ + +pragma circom 2.1.9; + +include "../../utils/array.circom"; +include "../../utils/bytes.circom"; +include "../../utils/operators.circom"; +include "../../utils/hash.circom"; +include "language.circom"; + +/* +This template is for updating the state of the parser from a current state to a next state. + +# Params: + - `MAX_STACK_HEIGHT`: the maximum stack height that can be used before triggering overflow. + +# Inputs: + - `byte` : the byte value of ASCII that was read by the parser. + - `stack[MAX_STACK_HEIGHT][2]`: the stack machine's current stack. + - `parsing_number` : a bool flag that indicates whether the parser is currently parsing a string or not. + - `parsing_number` : a bool flag that indicates whether the parser is currently parsing a number or not. + +# Outputs: + - `next_stack[MAX_STACK_HEIGHT][2]`: the stack machine's stack after reading `byte`. + - `next_parsing_number` : a bool flag that indicates whether the parser is currently parsing a string or not after reading `byte`. + - `next_parsing_number` : a bool flag that indicates whether the parser is currently parsing a number or not after reading `byte`. +*/ +template StateUpdateHasher(MAX_STACK_HEIGHT) { + signal input byte; + + signal input stack[MAX_STACK_HEIGHT][2]; + signal input parsing_string; + signal input parsing_number; + signal input tree_hasher[MAX_STACK_HEIGHT]; + + signal output next_stack[MAX_STACK_HEIGHT][2]; + signal output next_parsing_string; + signal output next_parsing_number; + signal output next_tree_hasher[MAX_STACK_HEIGHT]; + + component Command = Command(); + + //--------------------------------------------------------------------------------------------// + // Break down what was read + // * read in a start brace `{` * + component readStartBrace = IsEqual(); + readStartBrace.in <== [byte, 123]; + // * read in an end brace `}` * + component readEndBrace = IsEqual(); + readEndBrace.in <== [byte, 125]; + // * read in a start bracket `[` * + component readStartBracket = IsEqual(); + readStartBracket.in <== [byte, 91]; + // * read in an end bracket `]` * + component readEndBracket = IsEqual(); + readEndBracket.in <== [byte, 93]; + // * read in a colon `:` * + component readColon = IsEqual(); + readColon.in <== [byte, 58]; + // * read in a comma `,` * + component readComma = IsEqual(); + readComma.in <== [byte, 44]; + // * read in some delimeter * + signal readDelimeter <== readStartBrace.out + readEndBrace.out + readStartBracket.out + readEndBracket.out + + readColon.out + readComma.out; + // * read in some number * + component readNumber = InRange(8); + readNumber.in <== byte; + readNumber.range <== [48, 57]; // This is the range where ASCII digits are + // * read in a quote `"` * + component readQuote = IsEqual(); + readQuote.in <== [byte, 34]; + component readOther = IsZero(); + readOther.in <== readDelimeter + readNumber.out + readQuote.out; + //--------------------------------------------------------------------------------------------// + // Yield instruction based on what byte we read * + component readStartBraceInstruction = ScalarArrayMul(3); + readStartBraceInstruction.scalar <== readStartBrace.out; + readStartBraceInstruction.array <== Command.START_BRACE; + component readEndBraceInstruction = ScalarArrayMul(3); + readEndBraceInstruction.scalar <== readEndBrace.out; + readEndBraceInstruction.array <== Command.END_BRACE; + component readStartBracketInstruction = ScalarArrayMul(3); + readStartBracketInstruction.scalar <== readStartBracket.out; + readStartBracketInstruction.array <== Command.START_BRACKET; + component readEndBracketInstruction = ScalarArrayMul(3); + readEndBracketInstruction.scalar <== readEndBracket.out; + readEndBracketInstruction.array <== Command.END_BRACKET; + component readColonInstruction = ScalarArrayMul(3); + readColonInstruction.scalar <== readColon.out; + readColonInstruction.array <== Command.COLON; + component readCommaInstruction = ScalarArrayMul(3); + readCommaInstruction.scalar <== readComma.out; + readCommaInstruction.array <== Command.COMMA; + component readNumberInstruction = ScalarArrayMul(3); + readNumberInstruction.scalar <== readNumber.out; + readNumberInstruction.array <== Command.NUMBER; + component readQuoteInstruction = ScalarArrayMul(3); + readQuoteInstruction.scalar <== readQuote.out; + readQuoteInstruction.array <== Command.QUOTE; + + component Instruction = GenericArrayAdd(3,8); + Instruction.arrays <== [readStartBraceInstruction.out, readEndBraceInstruction.out, + readStartBracketInstruction.out, readEndBracketInstruction.out, + readColonInstruction.out, readCommaInstruction.out, + readNumberInstruction.out, readQuoteInstruction.out]; + //--------------------------------------------------------------------------------------------// + // Apply state changing data + // * get the instruction mask based on current state * + component mask = StateToMask(MAX_STACK_HEIGHT); + mask.readDelimeter <== readDelimeter; + mask.readNumber <== readNumber.out; + mask.parsing_string <== parsing_string; + mask.parsing_number <== parsing_number; + // * multiply the mask array elementwise with the instruction array * + component mulMaskAndOut = ArrayMul(3); + mulMaskAndOut.lhs <== mask.out; + mulMaskAndOut.rhs <== [Instruction.out[0], Instruction.out[1], Instruction.out[2] - readOther.out]; + // * compute the new stack * + component topOfStack = GetTopOfStack(MAX_STACK_HEIGHT); + topOfStack.stack <== stack; + signal pointer <== topOfStack.pointer; + signal current_value[2] <== topOfStack.value; + component newStack = RewriteStack(MAX_STACK_HEIGHT); + newStack.stack <== stack; + newStack.pointer <== pointer; + newStack.current_value <== current_value; + newStack.read_write_value <== mulMaskAndOut.out[0]; + newStack.readStartBrace <== readStartBrace.out; + newStack.readStartBracket <== readStartBracket.out; + newStack.readEndBrace <== readEndBrace.out; + newStack.readEndBracket <== readEndBracket.out; + newStack.readColon <== readColon.out; + newStack.readComma <== readComma.out; + // * set all the next state of the parser * + next_stack <== newStack.next_stack; + next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; + next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; + signal next_pointer <== newStack.next_pointer; + //--------------------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------------------// + // Get the next tree hasher state + /* + Idea: + We basically want a hasher that only hashes the KVs in a tree structure, so we have it + store a hash array for the KV hash at a given depth. We will have to accumulate bytes + into the hasher state while reading a value, so ultimately we want to check the hash array + pointer changes right after we get a hash match on the key byte sequence. + + To start, let's just get something that hashes into the array like a buffer. + */ + // Get the next state hash + component packedState = GenericBytePackArray(4,1); + packedState.in <== [ [byte], [pointer], [current_value[0]], [current_value[1]] ]; + signal state_hash <== IndexSelector(MAX_STACK_HEIGHT)(tree_hasher, pointer - 1); + signal next_state_hash <== PoseidonChainer()([state_hash, packedState.out[0]]); + + // TODO: can probably output these from rewrite stack + // Now, use this to know how to modify the tree_hasher + signal is_push <== IsZero()(next_pointer - (pointer + 1)); + signal is_pop <== IsZero()(next_pointer - (pointer - 1)); + + + // signal was_write <== parsing_number + parsing_string; // only write to slot if we are parsing a value type + // signal is_next_write <== next_parsing_number + next_parsing_string; // only write to slot if we are parsing a value type + // signal is_write <== was_write * is_next_write; + + signal was_and_is_parsing_string <== parsing_string * next_parsing_string; + signal is_write <== was_and_is_parsing_string + next_parsing_number; + + // signal what_to_write <== is_write * next_state_hash; + // signal where_to_write_at[MAX_STACK_HEIGHT]; + // signal what_to_write_at[MAX_STACK_HEIGHT]; + // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + // what_to_write_at[i] <== what_to_write + // } + + // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + // next_tree_hasher[i] <== tree_hasher[i] * (1 - is_pop) + what_to_write_at[i]; // Rewrite the array, replacing at `i` + // } + + signal stack_hashes[MAX_STACK_HEIGHT]; + for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++){ + stack_hashes[i] <== PoseidonChainer()(next_stack[i]); + } + // signal base_hashes[MAX_STACK_HEIGHT] <== ArrayAdd(MAX_STACK_HEIGHT)(stack_hashes, tree_hasher); + component writeTo = WriteToIndex(MAX_STACK_HEIGHT, 1); + writeTo.array_to_write_to <== stack_hashes; + /* + IDEA: + if push, we write `[state_hash, 0]` at pointer + if pop, we write `[0,0]` at pointer + if neither, we write `[next_state_hash IF is_write ELSE 0, 0 ] + + */ + + signal to_write_if_is_write <== next_state_hash * is_write; + signal to_write_if_is_push <== state_hash * is_push; + writeTo.array_to_write_at_index <== [to_write_if_is_write + to_write_if_is_push]; + writeTo.index <== next_pointer; + next_tree_hasher <== writeTo.out; + log("--------------------------------"); + log("state_hash: ", state_hash); + log("pointer: ", pointer); + log("next_pointer: ", next_pointer); + log("byte: ", byte); + log("--------------------------------"); +} + +/* +This template is for updating the state of the parser from a current state to a next state. + +# Params: + - `n`: tunable parameter for the number of `parsing_states` needed (TODO: could be removed). + +# Inputs: + - `readDelimeter` : a bool flag that indicates whether the byte value read was a delimeter. + - `readNumber` : a bool flag that indicates whether the byte value read was a number. + - `parsing_number`: a bool flag that indicates whether the parser is currently parsing a string or not. + - `parsing_number`: a bool flag that indicates whether the parser is currently parsing a number or not. + +# Outputs: + - `out[3]`: an array of values fed to update the stack and the parsing state flags. + - 0: mask for `read_write_value` + - 1: mask for `parsing_string` + - 2: mask for `parsing_number` +*/ +template StateToMask(n) { + // TODO: Probably need to assert things are bits where necessary. + signal input readDelimeter; + signal input readNumber; + signal input parsing_string; + signal input parsing_number; + signal output out[3]; + + + // `read_write_value`can change: IF NOT `parsing_string` + out[0] <== (1 - parsing_string); + + // `parsing_string` can change: + out[1] <== 1 - 2 * parsing_string; + + + //--------------------------------------------------------------------------------------------// + // `parsing_number` is more complicated to deal with + /* We have the possible relevant states below: + [isParsingString, isParsingNumber, readNumber, readDelimeter]; + 1 2 4 8 + Above is the binary value for each if is individually enabled + This is a total of 2^4 states + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]; + and the above is what we want to set `next_parsing_number` to given those + possible. + Below is an optimized version that could instead be done with a `Switch` + */ + signal parsingNumberReadDelimeter <== parsing_number * (readDelimeter); + signal readNumberNotParsingNumber <== (1 - parsing_number) * readNumber; + signal notParsingStringAndParsingNumberReadDelimeterOrReadNumberNotParsingNumber <== (1 - parsing_string) * (parsingNumberReadDelimeter + readNumberNotParsingNumber); + // 10 above ^^^^^^^^^^^^^^^^^ 4 above ^^^^^^^^^^^^^^^^^^ + signal parsingNumberNotReadNumber <== parsing_number * (1 - readNumber) ; + signal parsingNumberNotReadNumberNotReadDelimeter <== parsingNumberNotReadNumber * (1-readDelimeter); + out[2] <== notParsingStringAndParsingNumberReadDelimeterOrReadNumberNotParsingNumber + parsingNumberNotReadNumberNotReadDelimeter; + // Sorry about the long names, but they hopefully read clearly! +} + +// TODO: Check if underconstrained +/* +This template is for getting the values at the top of the stack as well as the pointer to the top. + +# Params: + - `n`: tunable parameter for the stack height. + +# Inputs: + - `stack[n][2]` : the stack to get the values and pointer of. + +# Outputs: + - `value[2]`: the value at the top of the stack + - `pointer` : the pointer for the top of stack index +*/ +template GetTopOfStack(n) { + signal input stack[n][2]; + signal output value[2]; + signal output pointer; + + component isUnallocated[n]; + component atTop = SwitchArray(n,2); + var selector = 0; + for(var i = 0; i < n; i++) { + isUnallocated[i] = IsEqualArray(2); + isUnallocated[i].in[0] <== [0,0]; + isUnallocated[i].in[1] <== stack[i]; + selector += (1 - isUnallocated[i].out); + atTop.branches[i] <== i + 1; + atTop.vals[i] <== stack[i]; + } + atTop.case <== selector; + _ <== atTop.match; + value <== atTop.out; + pointer <== selector; +} + +// TODO: IMPORTANT NOTE, THE STACK IS CONSTRAINED TO 2**8 so the InRange work (could be changed) +/* +This template is for updating the stack given the current stack and the byte we read in `StateUpdate`. + +# Params: + - `n`: tunable parameter for the number of bits needed to represent the `MAX_STACK_HEIGHT`. + +# Inputs: + - `read_write_value` : what value should be pushed to or popped from the stack. + - `readStartBrace` : a bool flag that indicates whether the byte value read was a start brace `{`. + - `readEndBrace` : a bool flag that indicates whether the byte value read was a end brace `}`. + - `readStartBracket` : a bool flag that indicates whether the byte value read was a start bracket `[`. + - `readEndBracket` : a bool flag that indicates whether the byte value read was a end bracket `]`. + - `readColon` : a bool flag that indicates whether the byte value read was a colon `:`. + - `readComma` : a bool flag that indicates whether the byte value read was a comma `,`. + +# Outputs: + - `next_stack[n][2]`: the next stack of the parser. +*/ +template RewriteStack(n) { + assert(n < 2**8); + signal input stack[n][2]; + signal input pointer; + signal input current_value[2]; + + signal input read_write_value; + signal input readStartBrace; + signal input readStartBracket; + signal input readEndBrace; + signal input readEndBracket; + signal input readColon; + signal input readComma; + + signal output next_stack[n][2]; + + //--------------------------------------------------------------------------------------------// + // * scan value on top of stack * + // component topOfStack = GetTopOfStack(n); + // topOfStack.stack <== stack; + // signal pointer <== topOfStack.pointer; + // signal current_value[2] <== topOfStack.value; + // * check if we are currently in a value of an object * + // * check if value indicates currently in an array * + component inArray = IsEqual(); + inArray.in[0] <== current_value[0]; + inArray.in[1] <== 2; + //--------------------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------------------// + // * composite signals * + signal readCommaInArray <== readComma * inArray.out; + signal readCommaNotInArray <== readComma * (1 - inArray.out); + //--------------------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------------------// + // * determine whether we are pushing or popping from the stack * + component isPush = IsEqual(); + isPush.in <== [readStartBrace + readStartBracket, 1]; + component isPop = IsEqual(); + isPop.in <== [readEndBrace + readEndBracket, 1]; + // * set an indicator array for where we are pushing to or popping from* + component indicator[n]; + for(var i = 0; i < n; i++) { + // Points + indicator[i] = IsZero(); + indicator[i].in <== pointer - isPop.out - readColon - readComma - i; // Note, pointer points to unallocated region! + } + //--------------------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------------------// + // * loop to modify the stack by rebuilding it * + signal stack_change_value[2] <== [(isPush.out + isPop.out) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; + signal second_index_clear[n]; + for(var i = 0; i < n; i++) { + next_stack[i][0] <== stack[i][0] + indicator[i].out * stack_change_value[0]; + second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char + next_stack[i][1] <== stack[i][1] + indicator[i].out * (stack_change_value[1] - second_index_clear[i]); + } + //--------------------------------------------------------------------------------------------// + + //--------------------------------------------------------------------------------------------// + // * check for under or overflow + component isUnderflowOrOverflow = InRange(8); + isUnderflowOrOverflow.in <== pointer - isPop.out + isPush.out; + isUnderflowOrOverflow.range <== [0,n]; + isUnderflowOrOverflow.out === 1; + //--------------------------------------------------------------------------------------------// + + signal output next_pointer <== pointer - isPop.out + isPush.out; +} \ No newline at end of file diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom new file mode 100644 index 0000000..9d6e341 --- /dev/null +++ b/circuits/json/parser/hash_parser.circom @@ -0,0 +1,72 @@ +pragma circom 2.1.9; + +include "../../utils/bytes.circom"; +include "hash_machine.circom"; + +template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { + signal input data[DATA_BYTES]; + + // TODO: Add assertions on the inputs here! + + //--------------------------------------------------------------------------------------------// + //-CONSTRAINTS--------------------------------------------------------------------------------// + //--------------------------------------------------------------------------------------------// + component dataASCII = ASCII(DATA_BYTES); + dataASCII.in <== data; + //--------------------------------------------------------------------------------------------// + // Initialze the parser + component State[DATA_BYTES]; + State[0] = StateUpdateHasher(MAX_STACK_HEIGHT); + State[0].byte <== data[0]; + for(var i = 0; i < MAX_STACK_HEIGHT; i++) { + State[0].stack[i] <== [0,0]; + State[0].tree_hasher[i] <== PoseidonChainer()([0,0]); + } + State[0].parsing_string <== 0; + State[0].parsing_number <== 0; + + // Debugging + for(var i = 0; i { + let circuit: WitnessTester<["data"]>; + + it(`array only input`, async () => { + let filename = "array_only"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); + + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 3], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + + await circuit.expectPass({ + data: input + }); + }); + + console.log("[0,0] hash: ", PoseidonModular([0, 0])); + console.log("[2,0] hash: ", PoseidonModular([2, 0])); + console.log("[2,1] hash: ", PoseidonModular([2, 1])); + console.log("[1,0] hash: ", PoseidonModular([1, 0])); + // [0,0] hash: 14744269619966411208579211824598458697587494354926760081771325075741142829156n + // [2,0] hash: 17525667638260400994329361135304146970274213890416440938331684485841550124768n + // [2,1] hash: 9708419728795563670286566418307042748092204899363634976546883453490873071450n + // [1,0] hash: 18423194802802147121294641945063302532319431080857859605204660473644265519999n + + // TODO: Check that the hash of the packedState.in getting the next_state_hash is correct, the stack hashes are correct. + + // it(`example input`, async () => { + // let filename = "example"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); + + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 7], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); + + // await circuit.expectPass({ + // data: input + // }); + // }); + + + +}) \ No newline at end of file From cb44e39e51d4d72328ebe283c97cb0b9e2b1433b Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Wed, 13 Nov 2024 06:16:39 -0700 Subject: [PATCH 02/14] WIP: save --- circuits/json/parser/hash_machine.circom | 1 + .../test/json/parser/hash_machine.test.ts | 42 ++++++++++++------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 5d960d3..0008299 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -356,6 +356,7 @@ template RewriteStack(n) { signal input readComma; signal output next_stack[n][2]; + signal output next_tree_hasher[n][2] //--------------------------------------------------------------------------------------------// // * scan value on top of stack * diff --git a/circuits/test/json/parser/hash_machine.test.ts b/circuits/test/json/parser/hash_machine.test.ts index 3ffd729..2ebbd79 100644 --- a/circuits/test/json/parser/hash_machine.test.ts +++ b/circuits/test/json/parser/hash_machine.test.ts @@ -4,7 +4,7 @@ import { PoseidonModular } from "../../common/poseidon"; describe("hash_machine", () => { let circuit: WitnessTester<["data"]>; - it(`array only input`, async () => { + it(`array_only_input`, async () => { let filename = "array_only"; let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); @@ -31,22 +31,36 @@ describe("hash_machine", () => { // TODO: Check that the hash of the packedState.in getting the next_state_hash is correct, the stack hashes are correct. - // it(`example input`, async () => { - // let filename = "example"; - // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); + it(`example_input`, async () => { + let filename = "example"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); - // circuit = await circomkit.WitnessTester(`Parser`, { - // file: "json/parser/hash_parser", - // template: "ParserHasher", - // params: [input.length, 7], - // }); - // console.log("#constraints:", await circuit.getConstraintCount()); + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 7], + }); + console.log("#constraints:", await circuit.getConstraintCount()); - // await circuit.expectPass({ - // data: input - // }); - // }); + await circuit.expectPass({ + data: input + }); + }); + it(`spotify_input`, async () => { + let filename = "spotify"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 7], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + + await circuit.expectPass({ + data: input + }); + }); }) \ No newline at end of file From 01691f42ef64ec7a7e506cbfda5cd69c12d0ca57 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Thu, 14 Nov 2024 18:20:54 -0700 Subject: [PATCH 03/14] resetting for clearer approach --- circuits/json/parser/hash_machine.circom | 146 ++++++++++-------- circuits/json/parser/hash_parser.circom | 26 +--- .../test/json/parser/hash_machine.test.ts | 60 +++---- 3 files changed, 117 insertions(+), 115 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 0008299..0542c9f 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -52,12 +52,12 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal input stack[MAX_STACK_HEIGHT][2]; signal input parsing_string; signal input parsing_number; - signal input tree_hasher[MAX_STACK_HEIGHT]; + signal input tree_hasher[MAX_STACK_HEIGHT][2]; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; - signal output next_tree_hasher[MAX_STACK_HEIGHT]; + signal output next_tree_hasher[MAX_STACK_HEIGHT][2]; component Command = Command(); @@ -144,6 +144,8 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal current_value[2] <== topOfStack.value; component newStack = RewriteStack(MAX_STACK_HEIGHT); newStack.stack <== stack; + newStack.tree_hasher <== tree_hasher; + newStack.byte <== byte; newStack.pointer <== pointer; newStack.current_value <== current_value; newStack.read_write_value <== mulMaskAndOut.out[0]; @@ -157,74 +159,74 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { next_stack <== newStack.next_stack; next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; - signal next_pointer <== newStack.next_pointer; + next_tree_hasher <== newStack.next_tree_hasher; //--------------------------------------------------------------------------------------------// - //--------------------------------------------------------------------------------------------// - // Get the next tree hasher state - /* - Idea: - We basically want a hasher that only hashes the KVs in a tree structure, so we have it - store a hash array for the KV hash at a given depth. We will have to accumulate bytes - into the hasher state while reading a value, so ultimately we want to check the hash array - pointer changes right after we get a hash match on the key byte sequence. - - To start, let's just get something that hashes into the array like a buffer. - */ - // Get the next state hash - component packedState = GenericBytePackArray(4,1); - packedState.in <== [ [byte], [pointer], [current_value[0]], [current_value[1]] ]; - signal state_hash <== IndexSelector(MAX_STACK_HEIGHT)(tree_hasher, pointer - 1); - signal next_state_hash <== PoseidonChainer()([state_hash, packedState.out[0]]); - - // TODO: can probably output these from rewrite stack - // Now, use this to know how to modify the tree_hasher - signal is_push <== IsZero()(next_pointer - (pointer + 1)); - signal is_pop <== IsZero()(next_pointer - (pointer - 1)); - - - // signal was_write <== parsing_number + parsing_string; // only write to slot if we are parsing a value type - // signal is_next_write <== next_parsing_number + next_parsing_string; // only write to slot if we are parsing a value type - // signal is_write <== was_write * is_next_write; - - signal was_and_is_parsing_string <== parsing_string * next_parsing_string; - signal is_write <== was_and_is_parsing_string + next_parsing_number; - - // signal what_to_write <== is_write * next_state_hash; - // signal where_to_write_at[MAX_STACK_HEIGHT]; - // signal what_to_write_at[MAX_STACK_HEIGHT]; - // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { - // what_to_write_at[i] <== what_to_write - // } - - // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { - // next_tree_hasher[i] <== tree_hasher[i] * (1 - is_pop) + what_to_write_at[i]; // Rewrite the array, replacing at `i` - // } + // //--------------------------------------------------------------------------------------------// + // // Get the next tree hasher state + // /* + // Idea: + // We basically want a hasher that only hashes the KVs in a tree structure, so we have it + // store a hash array for the KV hash at a given depth. We will have to accumulate bytes + // into the hasher state while reading a value, so ultimately we want to check the hash array + // pointer changes right after we get a hash match on the key byte sequence. + + // To start, let's just get something that hashes into the array like a buffer. + // */ + // // Get the next state hash + // component packedState = GenericBytePackArray(4,1); + // packedState.in <== [ [byte], [pointer], [current_value[0]], [current_value[1]] ]; + // signal state_hash <== IndexSelector(MAX_STACK_HEIGHT)(tree_hasher, pointer - 1); + // signal next_state_hash <== PoseidonChainer()([state_hash, packedState.out[0]]); + + // // TODO: can probably output these from rewrite stack + // // Now, use this to know how to modify the tree_hasher + // signal is_push <== IsZero()(next_pointer - (pointer + 1)); + // signal is_pop <== IsZero()(next_pointer - (pointer - 1)); + + + // // signal was_write <== parsing_number + parsing_string; // only write to slot if we are parsing a value type + // // signal is_next_write <== next_parsing_number + next_parsing_string; // only write to slot if we are parsing a value type + // // signal is_write <== was_write * is_next_write; + + // signal was_and_is_parsing_string <== parsing_string * next_parsing_string; + // signal is_write <== was_and_is_parsing_string + next_parsing_number; + + // // signal what_to_write <== is_write * next_state_hash; + // // signal where_to_write_at[MAX_STACK_HEIGHT]; + // // signal what_to_write_at[MAX_STACK_HEIGHT]; + // // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + // // what_to_write_at[i] <== what_to_write + // // } + + // // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + // // next_tree_hasher[i] <== tree_hasher[i] * (1 - is_pop) + what_to_write_at[i]; // Rewrite the array, replacing at `i` + // // } - signal stack_hashes[MAX_STACK_HEIGHT]; - for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++){ - stack_hashes[i] <== PoseidonChainer()(next_stack[i]); - } - // signal base_hashes[MAX_STACK_HEIGHT] <== ArrayAdd(MAX_STACK_HEIGHT)(stack_hashes, tree_hasher); - component writeTo = WriteToIndex(MAX_STACK_HEIGHT, 1); - writeTo.array_to_write_to <== stack_hashes; - /* - IDEA: - if push, we write `[state_hash, 0]` at pointer - if pop, we write `[0,0]` at pointer - if neither, we write `[next_state_hash IF is_write ELSE 0, 0 ] - - */ + // signal stack_hashes[MAX_STACK_HEIGHT]; + // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++){ + // stack_hashes[i] <== PoseidonChainer()(next_stack[i]); + // } + // // signal base_hashes[MAX_STACK_HEIGHT] <== ArrayAdd(MAX_STACK_HEIGHT)(stack_hashes, tree_hasher); + // component writeTo = WriteToIndex(MAX_STACK_HEIGHT, 1); + // writeTo.array_to_write_to <== stack_hashes; + // /* + // IDEA: + // if push, we write `[state_hash, 0]` at pointer + // if pop, we write `[0,0]` at pointer + // if neither, we write `[next_state_hash IF is_write ELSE 0, 0 ] + + // */ - signal to_write_if_is_write <== next_state_hash * is_write; - signal to_write_if_is_push <== state_hash * is_push; - writeTo.array_to_write_at_index <== [to_write_if_is_write + to_write_if_is_push]; - writeTo.index <== next_pointer; - next_tree_hasher <== writeTo.out; + // signal to_write_if_is_write <== next_state_hash * is_write; + // signal to_write_if_is_push <== state_hash * is_push; + // writeTo.array_to_write_at_index <== [to_write_if_is_write + to_write_if_is_push]; + // writeTo.index <== next_pointer; + // next_tree_hasher <== writeTo.out; log("--------------------------------"); - log("state_hash: ", state_hash); - log("pointer: ", pointer); - log("next_pointer: ", next_pointer); + // log("state_hash: ", state_hash); + // log("pointer: ", pointer); + // log("next_pointer: ", next_pointer); log("byte: ", byte); log("--------------------------------"); } @@ -344,9 +346,12 @@ This template is for updating the stack given the current stack and the byte we template RewriteStack(n) { assert(n < 2**8); signal input stack[n][2]; + signal input tree_hasher[n][2]; signal input pointer; signal input current_value[2]; + signal input byte; + signal input read_write_value; signal input readStartBrace; signal input readStartBracket; @@ -356,7 +361,7 @@ template RewriteStack(n) { signal input readComma; signal output next_stack[n][2]; - signal output next_tree_hasher[n][2] + signal output next_tree_hasher[n][2]; //--------------------------------------------------------------------------------------------// // * scan value on top of stack * @@ -392,6 +397,12 @@ template RewriteStack(n) { } //--------------------------------------------------------------------------------------------// + /* TODO: Okay, for sake of simplicity, it would probably be much easier to just use the + WriteToIndex here for both the stack and tree hasher. Much more ergonomic and can probably + replace a good amount of this. + */ + + //--------------------------------------------------------------------------------------------// // * loop to modify the stack by rebuilding it * signal stack_change_value[2] <== [(isPush.out + isPop.out) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; @@ -400,6 +411,9 @@ template RewriteStack(n) { next_stack[i][0] <== stack[i][0] + indicator[i].out * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char next_stack[i][1] <== stack[i][1] + indicator[i].out * (stack_change_value[1] - second_index_clear[i]); + + next_tree_hasher[i][0] <== tree_hasher[i][0] + indicator[i].out * (stack_change_value[0] + byte); + next_tree_hasher[i][1] <== tree_hasher[i][1] + indicator[i].out; } //--------------------------------------------------------------------------------------------// diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom index 9d6e341..d22b36a 100644 --- a/circuits/json/parser/hash_parser.circom +++ b/circuits/json/parser/hash_parser.circom @@ -6,34 +6,26 @@ include "hash_machine.circom"; template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { signal input data[DATA_BYTES]; - // TODO: Add assertions on the inputs here! - //--------------------------------------------------------------------------------------------// - //-CONSTRAINTS--------------------------------------------------------------------------------// - //--------------------------------------------------------------------------------------------// - component dataASCII = ASCII(DATA_BYTES); - dataASCII.in <== data; //--------------------------------------------------------------------------------------------// // Initialze the parser component State[DATA_BYTES]; State[0] = StateUpdateHasher(MAX_STACK_HEIGHT); State[0].byte <== data[0]; for(var i = 0; i < MAX_STACK_HEIGHT; i++) { - State[0].stack[i] <== [0,0]; - State[0].tree_hasher[i] <== PoseidonChainer()([0,0]); + State[0].stack[i] <== [0,0]; + State[0].tree_hasher[i] <== [0,0]; } State[0].parsing_string <== 0; State[0].parsing_number <== 0; // Debugging for(var i = 0; i { }); }); - console.log("[0,0] hash: ", PoseidonModular([0, 0])); - console.log("[2,0] hash: ", PoseidonModular([2, 0])); - console.log("[2,1] hash: ", PoseidonModular([2, 1])); - console.log("[1,0] hash: ", PoseidonModular([1, 0])); + // console.log("[0,0] hash: ", PoseidonModular([0, 0])); + // console.log("[2,0] hash: ", PoseidonModular([2, 0])); + // console.log("[2,1] hash: ", PoseidonModular([2, 1])); + // console.log("[1,0] hash: ", PoseidonModular([1, 0])); // [0,0] hash: 14744269619966411208579211824598458697587494354926760081771325075741142829156n // [2,0] hash: 17525667638260400994329361135304146970274213890416440938331684485841550124768n // [2,1] hash: 9708419728795563670286566418307042748092204899363634976546883453490873071450n @@ -31,36 +31,36 @@ describe("hash_machine", () => { // TODO: Check that the hash of the packedState.in getting the next_state_hash is correct, the stack hashes are correct. - it(`example_input`, async () => { - let filename = "example"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); + // it(`example_input`, async () => { + // let filename = "example"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["a"]); - circuit = await circomkit.WitnessTester(`Parser`, { - file: "json/parser/hash_parser", - template: "ParserHasher", - params: [input.length, 7], - }); - console.log("#constraints:", await circuit.getConstraintCount()); + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 7], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ - data: input - }); - }); + // await circuit.expectPass({ + // data: input + // }); + // }); - it(`spotify_input`, async () => { - let filename = "spotify"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); + // it(`spotify_input`, async () => { + // let filename = "spotify"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); - circuit = await circomkit.WitnessTester(`Parser`, { - file: "json/parser/hash_parser", - template: "ParserHasher", - params: [input.length, 7], - }); - console.log("#constraints:", await circuit.getConstraintCount()); + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 7], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ - data: input - }); - }); + // await circuit.expectPass({ + // data: input + // }); + // }); }) \ No newline at end of file From b64014e02e30ae69af44485ad4f3efe0d2d35b20 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 15 Nov 2024 09:37:39 -0700 Subject: [PATCH 04/14] good save state --- circuits/json/parser/hash_machine.circom | 55 +++++++++++++++--------- circuits/json/parser/hash_parser.circom | 29 ++++++------- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 0542c9f..f666e1e 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -365,6 +365,7 @@ template RewriteStack(n) { //--------------------------------------------------------------------------------------------// // * scan value on top of stack * + // TODO: We do this outside rn // component topOfStack = GetTopOfStack(n); // topOfStack.stack <== stack; // signal pointer <== topOfStack.pointer; @@ -374,6 +375,12 @@ template RewriteStack(n) { component inArray = IsEqual(); inArray.in[0] <== current_value[0]; inArray.in[1] <== 2; + + + // TODO: doing the same now for tree hasher + component topOfTreeHasher = GetTopOfStack(n); + topOfTreeHasher.stack <== tree_hasher; + signal tree_hasher_current_value[2] <== topOfTreeHasher.value; //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// @@ -384,16 +391,13 @@ template RewriteStack(n) { //--------------------------------------------------------------------------------------------// // * determine whether we are pushing or popping from the stack * - component isPush = IsEqual(); - isPush.in <== [readStartBrace + readStartBracket, 1]; - component isPop = IsEqual(); - isPop.in <== [readEndBrace + readEndBracket, 1]; - // * set an indicator array for where we are pushing to or popping from* - component indicator[n]; + signal isPush <== IsEqual()([readStartBrace + readStartBracket, 1]); + signal isPop <== IsEqual()([readEndBrace + readEndBracket, 1]); + signal nextPointer <== pointer + isPush - isPop; + // // * set an indicator array for where we are pushing to or popping from* + signal indicator[n]; for(var i = 0; i < n; i++) { - // Points - indicator[i] = IsZero(); - indicator[i].in <== pointer - isPop.out - readColon - readComma - i; // Note, pointer points to unallocated region! + indicator[i] <== IsZero()(pointer - isPop - readColon - readComma - i); // Note, pointer points to unallocated region! } //--------------------------------------------------------------------------------------------// @@ -401,29 +405,40 @@ template RewriteStack(n) { WriteToIndex here for both the stack and tree hasher. Much more ergonomic and can probably replace a good amount of this. */ + // signal stack0[n]; + // signal stack1[n]; + // for(var i = 0 ; i < n ; i++) { + // stack0[i] <== stack[i][0]; + // stack1[i] <== stack[i][1]; + // } + // signal stack0Change[2] <== [isPush * current_value[0], isPop * 0 + current_value[0]]; + // signal newStack0[n] <== WriteToIndex(n, 2)(stack0, stack0Change, pointer); + + // signal stack1Change[2] <== [isPush * current_value[1], isPop * 0 + current_value[1]]; + // signal newStack1[n] <== WriteToIndex(n, 2)(stack1, stack1Change, pointer); //--------------------------------------------------------------------------------------------// // * loop to modify the stack by rebuilding it * - signal stack_change_value[2] <== [(isPush.out + isPop.out) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; + + signal stack_change_value[2] <== [(isPush + isPop) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; + // signal tree_hash_change_value[2] <== [(isPush + isPop), readColon + readCommaInArray - readCommaNotInArray]; signal second_index_clear[n]; + signal tree_hash_index_clear[2] <== [tree_hasher_current_value[0] * isPop, tree_hasher_current_value[1] * isPop]; + signal tree_hash_index_add[2] <== [(isPush + isPop) * byte, (readColon + readCommaInArray - readCommaNotInArray) * byte]; for(var i = 0; i < n; i++) { - next_stack[i][0] <== stack[i][0] + indicator[i].out * stack_change_value[0]; + next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char - next_stack[i][1] <== stack[i][1] + indicator[i].out * (stack_change_value[1] - second_index_clear[i]); + next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); - next_tree_hasher[i][0] <== tree_hasher[i][0] + indicator[i].out * (stack_change_value[0] + byte); - next_tree_hasher[i][1] <== tree_hasher[i][1] + indicator[i].out; + next_tree_hasher[i][0] <== tree_hasher[i][0] + indicator[i] * (tree_hash_index_add[0] - tree_hash_index_clear[0]); + next_tree_hasher[i][1] <== tree_hasher[i][1] + indicator[i] * (tree_hash_index_add[1] - tree_hash_index_clear[1]); } //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// // * check for under or overflow - component isUnderflowOrOverflow = InRange(8); - isUnderflowOrOverflow.in <== pointer - isPop.out + isPush.out; - isUnderflowOrOverflow.range <== [0,n]; - isUnderflowOrOverflow.out === 1; + signal isUnderflowOrOverflow <== InRange(8)(pointer - isPop + isPush, [0,n]); + isUnderflowOrOverflow === 1; //--------------------------------------------------------------------------------------------// - - signal output next_pointer <== pointer - isPop.out + isPush.out; } \ No newline at end of file diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom index d22b36a..88fdf41 100644 --- a/circuits/json/parser/hash_parser.circom +++ b/circuits/json/parser/hash_parser.circom @@ -21,11 +21,13 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { // Debugging for(var i = 0; i Date: Fri, 15 Nov 2024 10:04:40 -0700 Subject: [PATCH 05/14] feat: working hash version Though this will be too expensive, the idea works! --- circuits/json/parser/hash_machine.circom | 136 +++-------------------- circuits/json/parser/hash_parser.circom | 27 +++-- 2 files changed, 33 insertions(+), 130 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index f666e1e..60f00df 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -52,12 +52,12 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal input stack[MAX_STACK_HEIGHT][2]; signal input parsing_string; signal input parsing_number; - signal input tree_hasher[MAX_STACK_HEIGHT][2]; + signal input tree_hash; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; - signal output next_tree_hasher[MAX_STACK_HEIGHT][2]; + signal output next_tree_hash; component Command = Command(); @@ -137,17 +137,8 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { component mulMaskAndOut = ArrayMul(3); mulMaskAndOut.lhs <== mask.out; mulMaskAndOut.rhs <== [Instruction.out[0], Instruction.out[1], Instruction.out[2] - readOther.out]; - // * compute the new stack * - component topOfStack = GetTopOfStack(MAX_STACK_HEIGHT); - topOfStack.stack <== stack; - signal pointer <== topOfStack.pointer; - signal current_value[2] <== topOfStack.value; component newStack = RewriteStack(MAX_STACK_HEIGHT); newStack.stack <== stack; - newStack.tree_hasher <== tree_hasher; - newStack.byte <== byte; - newStack.pointer <== pointer; - newStack.current_value <== current_value; newStack.read_write_value <== mulMaskAndOut.out[0]; newStack.readStartBrace <== readStartBrace.out; newStack.readStartBracket <== readStartBracket.out; @@ -159,74 +150,21 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { next_stack <== newStack.next_stack; next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; - next_tree_hasher <== newStack.next_tree_hasher; + //--------------------------------------------------------------------------------------------// + // Hash the next_* states to produce hash we need + signal not_to_hash <== IsZero()(parsing_string * next_parsing_string + next_parsing_number); + signal option_hash[MAX_STACK_HEIGHT]; + signal hashes[MAX_STACK_HEIGHT + 1]; + hashes[0] <== tree_hash; + for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { + option_hash[i] <== PoseidonChainer()([hashes[i],stack[i][0] + (2**8)*stack[i][1] + (2**16)*byte]); + hashes[i+1] <== not_to_hash * (hashes[i] - option_hash[i]) + option_hash[i]; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; + } + next_tree_hash <== hashes[MAX_STACK_HEIGHT]; + //--------------------------------------------------------------------------------------------// - // //--------------------------------------------------------------------------------------------// - // // Get the next tree hasher state - // /* - // Idea: - // We basically want a hasher that only hashes the KVs in a tree structure, so we have it - // store a hash array for the KV hash at a given depth. We will have to accumulate bytes - // into the hasher state while reading a value, so ultimately we want to check the hash array - // pointer changes right after we get a hash match on the key byte sequence. - - // To start, let's just get something that hashes into the array like a buffer. - // */ - // // Get the next state hash - // component packedState = GenericBytePackArray(4,1); - // packedState.in <== [ [byte], [pointer], [current_value[0]], [current_value[1]] ]; - // signal state_hash <== IndexSelector(MAX_STACK_HEIGHT)(tree_hasher, pointer - 1); - // signal next_state_hash <== PoseidonChainer()([state_hash, packedState.out[0]]); - - // // TODO: can probably output these from rewrite stack - // // Now, use this to know how to modify the tree_hasher - // signal is_push <== IsZero()(next_pointer - (pointer + 1)); - // signal is_pop <== IsZero()(next_pointer - (pointer - 1)); - - - // // signal was_write <== parsing_number + parsing_string; // only write to slot if we are parsing a value type - // // signal is_next_write <== next_parsing_number + next_parsing_string; // only write to slot if we are parsing a value type - // // signal is_write <== was_write * is_next_write; - - // signal was_and_is_parsing_string <== parsing_string * next_parsing_string; - // signal is_write <== was_and_is_parsing_string + next_parsing_number; - - // // signal what_to_write <== is_write * next_state_hash; - // // signal where_to_write_at[MAX_STACK_HEIGHT]; - // // signal what_to_write_at[MAX_STACK_HEIGHT]; - // // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { - // // what_to_write_at[i] <== what_to_write - // // } - - // // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { - // // next_tree_hasher[i] <== tree_hasher[i] * (1 - is_pop) + what_to_write_at[i]; // Rewrite the array, replacing at `i` - // // } - - // signal stack_hashes[MAX_STACK_HEIGHT]; - // for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++){ - // stack_hashes[i] <== PoseidonChainer()(next_stack[i]); - // } - // // signal base_hashes[MAX_STACK_HEIGHT] <== ArrayAdd(MAX_STACK_HEIGHT)(stack_hashes, tree_hasher); - // component writeTo = WriteToIndex(MAX_STACK_HEIGHT, 1); - // writeTo.array_to_write_to <== stack_hashes; - // /* - // IDEA: - // if push, we write `[state_hash, 0]` at pointer - // if pop, we write `[0,0]` at pointer - // if neither, we write `[next_state_hash IF is_write ELSE 0, 0 ] - - // */ - - // signal to_write_if_is_write <== next_state_hash * is_write; - // signal to_write_if_is_push <== state_hash * is_push; - // writeTo.array_to_write_at_index <== [to_write_if_is_write + to_write_if_is_push]; - // writeTo.index <== next_pointer; - // next_tree_hasher <== writeTo.out; log("--------------------------------"); - // log("state_hash: ", state_hash); - // log("pointer: ", pointer); - // log("next_pointer: ", next_pointer); log("byte: ", byte); log("--------------------------------"); } @@ -346,12 +284,6 @@ This template is for updating the stack given the current stack and the byte we template RewriteStack(n) { assert(n < 2**8); signal input stack[n][2]; - signal input tree_hasher[n][2]; - signal input pointer; - signal input current_value[2]; - - signal input byte; - signal input read_write_value; signal input readStartBrace; signal input readStartBracket; @@ -361,26 +293,17 @@ template RewriteStack(n) { signal input readComma; signal output next_stack[n][2]; - signal output next_tree_hasher[n][2]; //--------------------------------------------------------------------------------------------// // * scan value on top of stack * - // TODO: We do this outside rn - // component topOfStack = GetTopOfStack(n); - // topOfStack.stack <== stack; - // signal pointer <== topOfStack.pointer; - // signal current_value[2] <== topOfStack.value; - // * check if we are currently in a value of an object * + component topOfStack = GetTopOfStack(n); + topOfStack.stack <== stack; + signal pointer <== topOfStack.pointer; + signal current_value[2] <== topOfStack.value; // * check if value indicates currently in an array * component inArray = IsEqual(); inArray.in[0] <== current_value[0]; inArray.in[1] <== 2; - - - // TODO: doing the same now for tree hasher - component topOfTreeHasher = GetTopOfStack(n); - topOfTreeHasher.stack <== tree_hasher; - signal tree_hasher_current_value[2] <== topOfTreeHasher.value; //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// @@ -401,38 +324,15 @@ template RewriteStack(n) { } //--------------------------------------------------------------------------------------------// - /* TODO: Okay, for sake of simplicity, it would probably be much easier to just use the - WriteToIndex here for both the stack and tree hasher. Much more ergonomic and can probably - replace a good amount of this. - */ - // signal stack0[n]; - // signal stack1[n]; - // for(var i = 0 ; i < n ; i++) { - // stack0[i] <== stack[i][0]; - // stack1[i] <== stack[i][1]; - // } - - // signal stack0Change[2] <== [isPush * current_value[0], isPop * 0 + current_value[0]]; - // signal newStack0[n] <== WriteToIndex(n, 2)(stack0, stack0Change, pointer); - - // signal stack1Change[2] <== [isPush * current_value[1], isPop * 0 + current_value[1]]; - // signal newStack1[n] <== WriteToIndex(n, 2)(stack1, stack1Change, pointer); - //--------------------------------------------------------------------------------------------// // * loop to modify the stack by rebuilding it * signal stack_change_value[2] <== [(isPush + isPop) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; - // signal tree_hash_change_value[2] <== [(isPush + isPop), readColon + readCommaInArray - readCommaNotInArray]; signal second_index_clear[n]; - signal tree_hash_index_clear[2] <== [tree_hasher_current_value[0] * isPop, tree_hasher_current_value[1] * isPop]; - signal tree_hash_index_add[2] <== [(isPush + isPop) * byte, (readColon + readCommaInArray - readCommaNotInArray) * byte]; for(var i = 0; i < n; i++) { next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); - - next_tree_hasher[i][0] <== tree_hasher[i][0] + indicator[i] * (tree_hash_index_add[0] - tree_hash_index_clear[0]); - next_tree_hasher[i][1] <== tree_hasher[i][1] + indicator[i] * (tree_hash_index_add[1] - tree_hash_index_clear[1]); } //--------------------------------------------------------------------------------------------// diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom index 88fdf41..f5a92a2 100644 --- a/circuits/json/parser/hash_parser.circom +++ b/circuits/json/parser/hash_parser.circom @@ -14,18 +14,20 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { State[0].byte <== data[0]; for(var i = 0; i < MAX_STACK_HEIGHT; i++) { State[0].stack[i] <== [0,0]; - State[0].tree_hasher[i] <== [0,0]; + } + State[0].tree_hash <== 0; State[0].parsing_string <== 0; State[0].parsing_number <== 0; // Debugging for(var i = 0; i Date: Fri, 15 Nov 2024 11:01:42 -0700 Subject: [PATCH 06/14] WIP: need to clear after comma --- circuits/json/parser/hash_machine.circom | 61 ++++++++++++++++-------- circuits/json/parser/hash_parser.circom | 19 ++++---- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 60f00df..74fd0a1 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -52,12 +52,12 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal input stack[MAX_STACK_HEIGHT][2]; signal input parsing_string; signal input parsing_number; - signal input tree_hash; + signal input tree_hash[MAX_STACK_HEIGHT]; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; - signal output next_tree_hash; + signal output next_tree_hash[MAX_STACK_HEIGHT]; component Command = Command(); @@ -137,8 +137,13 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { component mulMaskAndOut = ArrayMul(3); mulMaskAndOut.lhs <== mask.out; mulMaskAndOut.rhs <== [Instruction.out[0], Instruction.out[1], Instruction.out[2] - readOther.out]; + + next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; + next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; + component newStack = RewriteStack(MAX_STACK_HEIGHT); newStack.stack <== stack; + newStack.tree_hash <== tree_hash; newStack.read_write_value <== mulMaskAndOut.out[0]; newStack.readStartBrace <== readStartBrace.out; newStack.readStartBracket <== readStartBracket.out; @@ -146,23 +151,15 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { newStack.readEndBracket <== readEndBracket.out; newStack.readColon <== readColon.out; newStack.readComma <== readComma.out; + newStack.parsing_string <== parsing_string; + newStack.next_parsing_string <== next_parsing_string; + newStack.next_parsing_number <== next_parsing_number; + newStack.byte <== byte; // * set all the next state of the parser * next_stack <== newStack.next_stack; - next_parsing_string <== parsing_string + mulMaskAndOut.out[1]; - next_parsing_number <== parsing_number + mulMaskAndOut.out[2]; - //--------------------------------------------------------------------------------------------// - // Hash the next_* states to produce hash we need - signal not_to_hash <== IsZero()(parsing_string * next_parsing_string + next_parsing_number); - signal option_hash[MAX_STACK_HEIGHT]; - signal hashes[MAX_STACK_HEIGHT + 1]; - hashes[0] <== tree_hash; - for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) { - option_hash[i] <== PoseidonChainer()([hashes[i],stack[i][0] + (2**8)*stack[i][1] + (2**16)*byte]); - hashes[i+1] <== not_to_hash * (hashes[i] - option_hash[i]) + option_hash[i]; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; - } - next_tree_hash <== hashes[MAX_STACK_HEIGHT]; + next_tree_hash <== newStack.next_tree_hash; + - //--------------------------------------------------------------------------------------------// log("--------------------------------"); log("byte: ", byte); @@ -284,6 +281,7 @@ This template is for updating the stack given the current stack and the byte we template RewriteStack(n) { assert(n < 2**8); signal input stack[n][2]; + signal input tree_hash[n]; signal input read_write_value; signal input readStartBrace; signal input readStartBracket; @@ -292,7 +290,13 @@ template RewriteStack(n) { signal input readColon; signal input readComma; + signal input parsing_string; + signal input next_parsing_string; + signal input next_parsing_number; + signal input byte; + signal output next_stack[n][2]; + signal output next_tree_hash[n]; //--------------------------------------------------------------------------------------------// // * scan value on top of stack * @@ -319,20 +323,33 @@ template RewriteStack(n) { signal nextPointer <== pointer + isPush - isPop; // // * set an indicator array for where we are pushing to or popping from* signal indicator[n]; + signal tree_hash_indicator[n]; for(var i = 0; i < n; i++) { indicator[i] <== IsZero()(pointer - isPop - readColon - readComma - i); // Note, pointer points to unallocated region! + tree_hash_indicator[i] <== IsZero()(pointer - i - 1); // Note, pointer points to unallocated region! } //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// - // * loop to modify the stack by rebuilding it * + // Hash the next_* states to produce hash we need + signal state_hash <== IndexSelector(n)(tree_hash, pointer - 1); + signal not_to_hash <== IsZero()(parsing_string * next_parsing_string + next_parsing_number); + signal option_hash <== PoseidonChainer()([state_hash, current_value[0] + (2**8)*current_value[1] + (2**16)*byte]); + log("not_to_hash: ", not_to_hash); + signal next_state_hash <== not_to_hash * (state_hash - option_hash) + option_hash; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; + //--------------------------------------------------------------------------------------------// + //--------------------------------------------------------------------------------------------// + // * loop to modify the stack and tree hash by rebuilding it * signal stack_change_value[2] <== [(isPush + isPop) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; signal second_index_clear[n]; + signal not_changed[n]; for(var i = 0; i < n; i++) { - next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; - second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char - next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); + next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; + second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char + next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); + not_changed[i] <== tree_hash[i] * (1 - tree_hash_indicator[i]); + next_tree_hash[i] <== not_changed[i] + tree_hash_indicator[i] * next_state_hash; } //--------------------------------------------------------------------------------------------// @@ -341,4 +358,8 @@ template RewriteStack(n) { signal isUnderflowOrOverflow <== InRange(8)(pointer - isPop + isPush, [0,n]); isUnderflowOrOverflow === 1; //--------------------------------------------------------------------------------------------// + + + + } \ No newline at end of file diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom index f5a92a2..b8bbfd9 100644 --- a/circuits/json/parser/hash_parser.circom +++ b/circuits/json/parser/hash_parser.circom @@ -14,9 +14,8 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { State[0].byte <== data[0]; for(var i = 0; i < MAX_STACK_HEIGHT; i++) { State[0].stack[i] <== [0,0]; - + State[0].tree_hash[i] <== 0; } - State[0].tree_hash <== 0; State[0].parsing_string <== 0; State[0].parsing_number <== 0; @@ -24,10 +23,10 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { for(var i = 0; i Date: Fri, 15 Nov 2024 15:58:14 -0700 Subject: [PATCH 07/14] WIP: good progress --- circuits/json/parser/hash_machine.circom | 68 +++++++++++++++---- circuits/json/parser/hash_parser.circom | 7 +- .../test/json/parser/hash_machine.test.ts | 8 ++- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 74fd0a1..efd7655 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -52,12 +52,12 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { signal input stack[MAX_STACK_HEIGHT][2]; signal input parsing_string; signal input parsing_number; - signal input tree_hash[MAX_STACK_HEIGHT]; + signal input tree_hash[MAX_STACK_HEIGHT][2]; signal output next_stack[MAX_STACK_HEIGHT][2]; signal output next_parsing_string; signal output next_parsing_number; - signal output next_tree_hash[MAX_STACK_HEIGHT]; + signal output next_tree_hash[MAX_STACK_HEIGHT][2]; component Command = Command(); @@ -151,7 +151,9 @@ template StateUpdateHasher(MAX_STACK_HEIGHT) { newStack.readEndBracket <== readEndBracket.out; newStack.readColon <== readColon.out; newStack.readComma <== readComma.out; + newStack.readQuote <== readQuote.out; newStack.parsing_string <== parsing_string; + newStack.parsing_number <== parsing_number; newStack.next_parsing_string <== next_parsing_string; newStack.next_parsing_number <== next_parsing_number; newStack.byte <== byte; @@ -281,7 +283,7 @@ This template is for updating the stack given the current stack and the byte we template RewriteStack(n) { assert(n < 2**8); signal input stack[n][2]; - signal input tree_hash[n]; + signal input tree_hash[n][2]; signal input read_write_value; signal input readStartBrace; signal input readStartBracket; @@ -289,14 +291,16 @@ template RewriteStack(n) { signal input readEndBracket; signal input readColon; signal input readComma; + signal input readQuote; + signal input parsing_number; signal input parsing_string; signal input next_parsing_string; signal input next_parsing_number; signal input byte; signal output next_stack[n][2]; - signal output next_tree_hash[n]; + signal output next_tree_hash[n][2]; //--------------------------------------------------------------------------------------------// // * scan value on top of stack * @@ -323,33 +327,63 @@ template RewriteStack(n) { signal nextPointer <== pointer + isPush - isPop; // // * set an indicator array for where we are pushing to or popping from* signal indicator[n]; - signal tree_hash_indicator[n]; + signal tree_hash_indicator[n][2]; for(var i = 0; i < n; i++) { indicator[i] <== IsZero()(pointer - isPop - readColon - readComma - i); // Note, pointer points to unallocated region! - tree_hash_indicator[i] <== IsZero()(pointer - i - 1); // Note, pointer points to unallocated region! + tree_hash_indicator[i][0] <== IsZero()(pointer - i - 1); + tree_hash_indicator[i][1] <== IsZero()(pointer - i - 1); } //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// // Hash the next_* states to produce hash we need - signal state_hash <== IndexSelector(n)(tree_hash, pointer - 1); + // TODO: This could be optimized -- we don't really need to do the index selector, we can just accumulate elsewhere + component stateHash[2]; + stateHash[0] = IndexSelector(n); + stateHash[0].index <== pointer - 1; + stateHash[1] = IndexSelector(n); + stateHash[1].index <== pointer - 1; + for(var i = 0 ; i < n ; i++) { + stateHash[0].in[i] <== tree_hash[i][0]; + stateHash[1].in[i] <== tree_hash[i][1]; + } + + signal not_to_hash <== IsZero()(parsing_string * next_parsing_string + next_parsing_number); - signal option_hash <== PoseidonChainer()([state_hash, current_value[0] + (2**8)*current_value[1] + (2**16)*byte]); - log("not_to_hash: ", not_to_hash); - signal next_state_hash <== not_to_hash * (state_hash - option_hash) + option_hash; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; + signal option_hash[2]; + option_hash[0] <== PoseidonChainer()([stateHash[0].out, byte]); // TODO: Trying this now so we just hash the byte stream of KVs + option_hash[1] <== PoseidonChainer()([stateHash[1].out, byte]); // TODO: Now we are double hashing, we certainly don't need to do this, so should optimize this out + log("to_hash: ", (1-not_to_hash)); + signal next_state_hash[2]; + next_state_hash[0] <== not_to_hash * (stateHash[0].out - option_hash[0]) + option_hash[0]; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; + next_state_hash[1] <== not_to_hash * (stateHash[1].out - option_hash[1]) + option_hash[1]; + // ^^^^ next_state_hash is the previous value (state_hash) or it is the newly computed value (option_hash) //--------------------------------------------------------------------------------------------// //--------------------------------------------------------------------------------------------// // * loop to modify the stack and tree hash by rebuilding it * signal stack_change_value[2] <== [(isPush + isPop) * read_write_value, readColon + readCommaInArray - readCommaNotInArray]; signal second_index_clear[n]; - signal not_changed[n]; + signal not_changed[n][2]; + + // TODO: need two signals that say whether to hash into 0 or 1 index of tree hash + signal is_object_key <== IsEqualArray(2)([current_value,[1,0]]); + signal is_object_value <== IsEqualArray(2)([current_value,[1,1]]); + signal is_array <== IsEqual()([current_value[0], 2]); + + signal end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); + signal to_change_first <== end_char_for_first * (is_object_value + is_array); + signal tree_hash_change_value[2] <== [(1-(isPush + isPop)) * next_state_hash[0], to_change_first * next_state_hash[1]]; for(var i = 0; i < n; i++) { next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); - not_changed[i] <== tree_hash[i] * (1 - tree_hash_indicator[i]); - next_tree_hash[i] <== not_changed[i] + tree_hash_indicator[i] * next_state_hash; + + // Tree hash + // not_changed[i][0] <== tree_hash[i][0] * (1 - tree_hash_indicator[i][0]); + // not_changed[i][1] <== tree_hash[i][1] * (1 - tree_hash_indicator[i][1]); + next_tree_hash[i][0] <== 0; //tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - tree_hash[i][1]); + next_tree_hash[i][1] <== tree_hash[i][1] + tree_hash_indicator[i][1] * (tree_hash_change_value[1] - tree_hash[i][1]); } //--------------------------------------------------------------------------------------------// @@ -358,8 +392,12 @@ template RewriteStack(n) { signal isUnderflowOrOverflow <== InRange(8)(pointer - isPop + isPush, [0,n]); isUnderflowOrOverflow === 1; //--------------------------------------------------------------------------------------------// +} +/* + NOTES: + Actually, if we check that the stack matches and we get a hash match in all the positions too, then we are good. So we can pass in a target stack and the target hashes and check for the single match (fairly cheap). - -} \ No newline at end of file + For KV pairs, it may be good to just have the tree hashes hash the k into [i][0] and the v into [i][1]. This is just the most sensible way to do things. Still just one hash per loop +*/ \ No newline at end of file diff --git a/circuits/json/parser/hash_parser.circom b/circuits/json/parser/hash_parser.circom index b8bbfd9..d42b17a 100644 --- a/circuits/json/parser/hash_parser.circom +++ b/circuits/json/parser/hash_parser.circom @@ -6,7 +6,6 @@ include "hash_machine.circom"; template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { signal input data[DATA_BYTES]; - //--------------------------------------------------------------------------------------------// // Initialze the parser component State[DATA_BYTES]; @@ -14,7 +13,7 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { State[0].byte <== data[0]; for(var i = 0; i < MAX_STACK_HEIGHT; i++) { State[0].stack[i] <== [0,0]; - State[0].tree_hash[i] <== 0; + State[0].tree_hash[i] <== [0,0]; } State[0].parsing_string <== 0; State[0].parsing_number <== 0; @@ -24,7 +23,7 @@ template ParserHasher(DATA_BYTES, MAX_STACK_HEIGHT) { log("State[", 0, "].next_stack[", i,"] = [",State[0].next_stack[i][0], "][", State[0].next_stack[i][1],"]" ); } for(var i = 0; i { }); }); - // console.log("[0,0] hash: ", PoseidonModular([0, 0])); + // Numbers for the 42 read in 0th index + console.log("[0,\"4\"] hash: ", PoseidonModular([0, 52])); + console.log("[prev,\"2\"] hash: ", PoseidonModular([BigInt("10851631763548351427431043290272583122934382613350600043660274710013149244741"), 50])); + + // Numbers for the "b" read inside object in 1st index + console.log("[0,\"b\"] hash: ", PoseidonModular([0, 98])); + // console.log("[2,0] hash: ", PoseidonModular([2, 0])); // console.log("[2,1] hash: ", PoseidonModular([2, 1])); // console.log("[1,0] hash: ", PoseidonModular([1, 0])); From 8abc6fe2fd978dd7eba4422a49bca53c02b7f4e9 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 15 Nov 2024 16:06:07 -0700 Subject: [PATCH 08/14] WIP: getting keys also now --- circuits/json/parser/hash_machine.circom | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index efd7655..69c90c6 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -371,9 +371,12 @@ template RewriteStack(n) { signal is_object_value <== IsEqualArray(2)([current_value,[1,1]]); signal is_array <== IsEqual()([current_value[0], 2]); + signal still_parsing_string <== parsing_string * next_parsing_string; + signal to_change_zeroth <== still_parsing_string * is_object_key; + signal end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); signal to_change_first <== end_char_for_first * (is_object_value + is_array); - signal tree_hash_change_value[2] <== [(1-(isPush + isPop)) * next_state_hash[0], to_change_first * next_state_hash[1]]; + signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; for(var i = 0; i < n; i++) { next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char @@ -382,7 +385,7 @@ template RewriteStack(n) { // Tree hash // not_changed[i][0] <== tree_hash[i][0] * (1 - tree_hash_indicator[i][0]); // not_changed[i][1] <== tree_hash[i][1] * (1 - tree_hash_indicator[i][1]); - next_tree_hash[i][0] <== 0; //tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - tree_hash[i][1]); + next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - tree_hash[i][0]); next_tree_hash[i][1] <== tree_hash[i][1] + tree_hash_indicator[i][1] * (tree_hash_change_value[1] - tree_hash[i][1]); } //--------------------------------------------------------------------------------------------// From 7c4fa9177634f40af5464fc8b1c299c71628161e Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 15 Nov 2024 16:26:05 -0700 Subject: [PATCH 09/14] feat: (mostly?) working tree hasher --- circuits/json/parser/hash_machine.circom | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 69c90c6..a8e063f 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -373,19 +373,20 @@ template RewriteStack(n) { signal still_parsing_string <== parsing_string * next_parsing_string; signal to_change_zeroth <== still_parsing_string * is_object_key; + signal end_kv <== readComma + readEndBrace;// TODO: This is true if we hit a comma or an end brace + signal end_hash0[n]; - signal end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); - signal to_change_first <== end_char_for_first * (is_object_value + is_array); + signal not_end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); + signal to_change_first <== not_end_char_for_first * (is_object_value + is_array) + still_parsing_string; signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; + for(var i = 0; i < n; i++) { next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); - // Tree hash - // not_changed[i][0] <== tree_hash[i][0] * (1 - tree_hash_indicator[i][0]); - // not_changed[i][1] <== tree_hash[i][1] * (1 - tree_hash_indicator[i][1]); - next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - tree_hash[i][0]); + end_hash0[i] <== tree_hash[i][0] * end_kv; + next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - end_hash0[i]); next_tree_hash[i][1] <== tree_hash[i][1] + tree_hash_indicator[i][1] * (tree_hash_change_value[1] - tree_hash[i][1]); } //--------------------------------------------------------------------------------------------// From 9a90ea8409b13c83d6cda78138097cad3b346743 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 15 Nov 2024 16:34:45 -0700 Subject: [PATCH 10/14] seems to be correct for spotify --- circuits/json/parser/hash_machine.circom | 2 +- .../test/json/parser/hash_machine.test.ts | 52 +++++++++---------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index a8e063f..6ebd9e4 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -377,7 +377,7 @@ template RewriteStack(n) { signal end_hash0[n]; signal not_end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); - signal to_change_first <== not_end_char_for_first * (is_object_value + is_array) + still_parsing_string; + signal to_change_first <== (not_end_char_for_first + still_parsing_string) * (is_object_value + is_array); signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; for(var i = 0; i < n; i++) { diff --git a/circuits/test/json/parser/hash_machine.test.ts b/circuits/test/json/parser/hash_machine.test.ts index c1bad89..54a9b97 100644 --- a/circuits/test/json/parser/hash_machine.test.ts +++ b/circuits/test/json/parser/hash_machine.test.ts @@ -4,21 +4,21 @@ import { PoseidonModular } from "../../common/poseidon"; describe("hash_machine", () => { let circuit: WitnessTester<["data"]>; - it(`array_only_input`, async () => { - let filename = "array_only"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); + // it(`array_only_input`, async () => { + // let filename = "array_only"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); - circuit = await circomkit.WitnessTester(`Parser`, { - file: "json/parser/hash_parser", - template: "ParserHasher", - params: [input.length, 3], - }); - console.log("#constraints:", await circuit.getConstraintCount()); + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 3], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ - data: input - }); - }); + // await circuit.expectPass({ + // data: input + // }); + // }); // Numbers for the 42 read in 0th index console.log("[0,\"4\"] hash: ", PoseidonModular([0, 52])); @@ -54,19 +54,19 @@ describe("hash_machine", () => { // }); - // it(`spotify_input`, async () => { - // let filename = "spotify"; - // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); + it(`spotify_input`, async () => { + let filename = "spotify"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); - // circuit = await circomkit.WitnessTester(`Parser`, { - // file: "json/parser/hash_parser", - // template: "ParserHasher", - // params: [input.length, 7], - // }); - // console.log("#constraints:", await circuit.getConstraintCount()); + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 7], + }); + console.log("#constraints:", await circuit.getConstraintCount()); - // await circuit.expectPass({ - // data: input - // }); - // }); + await circuit.expectPass({ + data: input + }); + }); }) \ No newline at end of file From 7075dc16cfc1e28c7dc3f654e25a286833c3115c Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 15 Nov 2024 16:44:47 -0700 Subject: [PATCH 11/14] perf: first optimization --- circuits/json/parser/hash_machine.circom | 21 ++++---- .../test/json/parser/hash_machine.test.ts | 54 ++++++++++--------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 6ebd9e4..922a4ab 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -348,15 +348,21 @@ template RewriteStack(n) { stateHash[1].in[i] <== tree_hash[i][1]; } + // TODO: need two signals that say whether to hash into 0 or 1 index of tree hash + signal is_object_key <== IsEqualArray(2)([current_value,[1,0]]); + signal is_object_value <== IsEqualArray(2)([current_value,[1,1]]); + signal is_array <== IsEqual()([current_value[0], 2]); signal not_to_hash <== IsZero()(parsing_string * next_parsing_string + next_parsing_number); - signal option_hash[2]; - option_hash[0] <== PoseidonChainer()([stateHash[0].out, byte]); // TODO: Trying this now so we just hash the byte stream of KVs - option_hash[1] <== PoseidonChainer()([stateHash[1].out, byte]); // TODO: Now we are double hashing, we certainly don't need to do this, so should optimize this out + signal hash_0 <== is_object_key * stateHash[0].out; + signal hash_1 <== (is_object_value + is_array) * stateHash[1].out; + signal option_hash; + option_hash <== PoseidonChainer()([hash_0 + hash_1, byte]); // TODO: Trying this now so we just hash the byte stream of KVs + // option_hash <== PoseidonChainer()([stateHash[1].out, byte]); // TODO: Now we are double hashing, we certainly don't need to do this, so should optimize this out log("to_hash: ", (1-not_to_hash)); signal next_state_hash[2]; - next_state_hash[0] <== not_to_hash * (stateHash[0].out - option_hash[0]) + option_hash[0]; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; - next_state_hash[1] <== not_to_hash * (stateHash[1].out - option_hash[1]) + option_hash[1]; + next_state_hash[0] <== not_to_hash * (stateHash[0].out - option_hash) + option_hash; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; + next_state_hash[1] <== not_to_hash * (stateHash[1].out - option_hash) + option_hash; // ^^^^ next_state_hash is the previous value (state_hash) or it is the newly computed value (option_hash) //--------------------------------------------------------------------------------------------// @@ -366,10 +372,7 @@ template RewriteStack(n) { signal second_index_clear[n]; signal not_changed[n][2]; - // TODO: need two signals that say whether to hash into 0 or 1 index of tree hash - signal is_object_key <== IsEqualArray(2)([current_value,[1,0]]); - signal is_object_value <== IsEqualArray(2)([current_value,[1,1]]); - signal is_array <== IsEqual()([current_value[0], 2]); + signal still_parsing_string <== parsing_string * next_parsing_string; signal to_change_zeroth <== still_parsing_string * is_object_key; diff --git a/circuits/test/json/parser/hash_machine.test.ts b/circuits/test/json/parser/hash_machine.test.ts index 54a9b97..c72d8f4 100644 --- a/circuits/test/json/parser/hash_machine.test.ts +++ b/circuits/test/json/parser/hash_machine.test.ts @@ -4,26 +4,28 @@ import { PoseidonModular } from "../../common/poseidon"; describe("hash_machine", () => { let circuit: WitnessTester<["data"]>; - // it(`array_only_input`, async () => { - // let filename = "array_only"; - // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); + it(`array_only_input`, async () => { + let filename = "array_only"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); - // circuit = await circomkit.WitnessTester(`Parser`, { - // file: "json/parser/hash_parser", - // template: "ParserHasher", - // params: [input.length, 3], - // }); - // console.log("#constraints:", await circuit.getConstraintCount()); + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 3], + }); + console.log("#constraints:", await circuit.getConstraintCount()); - // await circuit.expectPass({ - // data: input - // }); - // }); + await circuit.expectPass({ + data: input + }); + }); // Numbers for the 42 read in 0th index console.log("[0,\"4\"] hash: ", PoseidonModular([0, 52])); console.log("[prev,\"2\"] hash: ", PoseidonModular([BigInt("10851631763548351427431043290272583122934382613350600043660274710013149244741"), 50])); + // Number for the "a" + console.log("[0,\"a\"] hash: ", PoseidonModular([0, 97])); // Numbers for the "b" read inside object in 1st index console.log("[0,\"b\"] hash: ", PoseidonModular([0, 98])); @@ -54,19 +56,19 @@ describe("hash_machine", () => { // }); - it(`spotify_input`, async () => { - let filename = "spotify"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); + // it(`spotify_input`, async () => { + // let filename = "spotify"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); - circuit = await circomkit.WitnessTester(`Parser`, { - file: "json/parser/hash_parser", - template: "ParserHasher", - params: [input.length, 7], - }); - console.log("#constraints:", await circuit.getConstraintCount()); + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 7], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ - data: input - }); - }); + // await circuit.expectPass({ + // data: input + // }); + // }); }) \ No newline at end of file From d41b22f3b67ef84bf11211cb221c2e78776ee93f Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Thu, 21 Nov 2024 18:27:14 -0700 Subject: [PATCH 12/14] wip: brain hurty left a note to myself --- circuits/json/parser/hash_machine.circom | 4 ++ .../test/json/parser/hash_machine.test.ts | 52 +++++++++---------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index 922a4ab..bccbcbf 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -357,10 +357,12 @@ template RewriteStack(n) { signal hash_0 <== is_object_key * stateHash[0].out; signal hash_1 <== (is_object_value + is_array) * stateHash[1].out; signal option_hash; + log("hash_0 + hash_1 = ", hash_0 + hash_1); option_hash <== PoseidonChainer()([hash_0 + hash_1, byte]); // TODO: Trying this now so we just hash the byte stream of KVs // option_hash <== PoseidonChainer()([stateHash[1].out, byte]); // TODO: Now we are double hashing, we certainly don't need to do this, so should optimize this out log("to_hash: ", (1-not_to_hash)); signal next_state_hash[2]; + log("option_hash = ", option_hash); next_state_hash[0] <== not_to_hash * (stateHash[0].out - option_hash) + option_hash; // same as: (1 - not_to_hash[i]) * option_hash[i] + not_to_hash[i] * hash[i]; next_state_hash[1] <== not_to_hash * (stateHash[1].out - option_hash) + option_hash; // ^^^^ next_state_hash is the previous value (state_hash) or it is the newly computed value (option_hash) @@ -383,6 +385,8 @@ template RewriteStack(n) { signal to_change_first <== (not_end_char_for_first + still_parsing_string) * (is_object_value + is_array); signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; + + // TODO (autoparallel): Okay, this isn't clearing off the previous hash value and is instead adding them to each other. I suppose this isn't wrong, but it's not what is intended. I really need to refactor this shit. for(var i = 0; i < n; i++) { next_stack[i][0] <== stack[i][0] + indicator[i] * stack_change_value[0]; second_index_clear[i] <== stack[i][1] * (readEndBrace + readEndBracket); // Checking if we read some end char diff --git a/circuits/test/json/parser/hash_machine.test.ts b/circuits/test/json/parser/hash_machine.test.ts index c72d8f4..268373a 100644 --- a/circuits/test/json/parser/hash_machine.test.ts +++ b/circuits/test/json/parser/hash_machine.test.ts @@ -4,21 +4,21 @@ import { PoseidonModular } from "../../common/poseidon"; describe("hash_machine", () => { let circuit: WitnessTester<["data"]>; - it(`array_only_input`, async () => { - let filename = "array_only"; - let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); + // it(`array_only_input`, async () => { + // let filename = "array_only"; + // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, [0]); - circuit = await circomkit.WitnessTester(`Parser`, { - file: "json/parser/hash_parser", - template: "ParserHasher", - params: [input.length, 3], - }); - console.log("#constraints:", await circuit.getConstraintCount()); + // circuit = await circomkit.WitnessTester(`Parser`, { + // file: "json/parser/hash_parser", + // template: "ParserHasher", + // params: [input.length, 3], + // }); + // console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ - data: input - }); - }); + // await circuit.expectPass({ + // data: input + // }); + // }); // Numbers for the 42 read in 0th index console.log("[0,\"4\"] hash: ", PoseidonModular([0, 52])); @@ -56,19 +56,19 @@ describe("hash_machine", () => { // }); - // it(`spotify_input`, async () => { - // let filename = "spotify"; - // let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); + it(`spotify_input`, async () => { + let filename = "spotify"; + let [input, keyUnicode, output] = readJSONInputFile(`${filename}.json`, ["data"]); - // circuit = await circomkit.WitnessTester(`Parser`, { - // file: "json/parser/hash_parser", - // template: "ParserHasher", - // params: [input.length, 7], - // }); - // console.log("#constraints:", await circuit.getConstraintCount()); + circuit = await circomkit.WitnessTester(`Parser`, { + file: "json/parser/hash_parser", + template: "ParserHasher", + params: [input.length, 7], + }); + console.log("#constraints:", await circuit.getConstraintCount()); - // await circuit.expectPass({ - // data: input - // }); - // }); + await circuit.expectPass({ + data: input + }); + }); }) \ No newline at end of file From b50e163f72fa62388aff820054ca545bf99ac871 Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 22 Nov 2024 09:28:58 -0700 Subject: [PATCH 13/14] fix: tree hasher seems correct now --- circuits/json/parser/hash_machine.circom | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index bccbcbf..e5b5e06 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -374,8 +374,6 @@ template RewriteStack(n) { signal second_index_clear[n]; signal not_changed[n][2]; - - signal still_parsing_string <== parsing_string * next_parsing_string; signal to_change_zeroth <== still_parsing_string * is_object_key; signal end_kv <== readComma + readEndBrace;// TODO: This is true if we hit a comma or an end brace @@ -383,7 +381,8 @@ template RewriteStack(n) { signal not_end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number)); signal to_change_first <== (not_end_char_for_first + still_parsing_string) * (is_object_value + is_array); - signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; + // signal tree_hash_change_value[2] <== [to_change_zeroth * next_state_hash[0], to_change_first * next_state_hash[1]]; + signal tree_hash_change_value[2] <== [(1-end_kv) * next_state_hash[0], to_change_first * next_state_hash[1]]; // TODO (autoparallel): Okay, this isn't clearing off the previous hash value and is instead adding them to each other. I suppose this isn't wrong, but it's not what is intended. I really need to refactor this shit. @@ -393,7 +392,8 @@ template RewriteStack(n) { next_stack[i][1] <== stack[i][1] + indicator[i] * (stack_change_value[1] - second_index_clear[i]); end_hash0[i] <== tree_hash[i][0] * end_kv; - next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - end_hash0[i]); + // next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - end_hash0[i]); + next_tree_hash[i][0] <== tree_hash[i][0] + tree_hash_indicator[i][0] * (tree_hash_change_value[0] - tree_hash[i][0]); next_tree_hash[i][1] <== tree_hash[i][1] + tree_hash_indicator[i][1] * (tree_hash_change_value[1] - tree_hash[i][1]); } //--------------------------------------------------------------------------------------------// From 5855e773612ce358386b2cfab27b3b09fcc0c33b Mon Sep 17 00:00:00 2001 From: Colin Roberts Date: Fri, 22 Nov 2024 09:53:29 -0700 Subject: [PATCH 14/14] TODO: note to self --- circuits/json/parser/hash_machine.circom | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circuits/json/parser/hash_machine.circom b/circuits/json/parser/hash_machine.circom index e5b5e06..9b7778a 100644 --- a/circuits/json/parser/hash_machine.circom +++ b/circuits/json/parser/hash_machine.circom @@ -376,7 +376,7 @@ template RewriteStack(n) { signal still_parsing_string <== parsing_string * next_parsing_string; signal to_change_zeroth <== still_parsing_string * is_object_key; - signal end_kv <== readComma + readEndBrace;// TODO: This is true if we hit a comma or an end brace + signal end_kv <== readComma + readEndBrace;// TODO: This is true if we hit a comma or an end brace (should also make sure we are not parsing string!) signal end_hash0[n]; signal not_end_char_for_first <== IsZero()(readColon + readComma + readQuote + (1-next_parsing_number));