Skip to content

Commit

Permalink
Fix key based matching in setCandidates
Browse files Browse the repository at this point in the history
  • Loading branch information
rajendrant committed Jan 7, 2019
1 parent 33c3224 commit 3d71c73
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 31 deletions.
15 changes: 13 additions & 2 deletions fuzzaldrin.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,23 @@ class FuzzaldrinPlusFast
@obj = new binding.Fuzzaldrin()

setCandidates: (candidates, options = {}) ->
if options.key?
@candidates = candidates
@candidate_key = options.key
candidates = candidates.map((item) => item[@candidate_key])
else
@candidates = null
@candidate_key = null
@obj.setCandidates(candidates)

filter: (query, options = {}) ->
options = parseOptions(options)
@obj.filter query, options.maxResults,
options.usePathScoring, options.useExtensionBonus
returnIndexes = @candidate_key?
res = @obj.filter query, options.maxResults,
options.usePathScoring, options.useExtensionBonus, returnIndexes
if returnIndexes
res = res.map((ind) => @candidates[ind])
return res

module.exports =

Expand Down
41 changes: 34 additions & 7 deletions spec/filter-options-spec.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,37 @@ describe "filtering", ->
]
expect(fuzzaldrinplusfast.filter(candidates, 'i', key: 'fname')).toEqual([candidates[3], candidates[2], candidates[1]])

# This test fails right now
# it "candidates with duplicate values when indexed by key are returned properly", ->
# candidates = [
# {uri: '/usr/bin/ls', fname: 'ls'},
# {uri: '/usr/sbin/ls', fname: 'ls'}
# ]
# expect(fuzzaldrinplusfast.filter(candidates, 'l', key: 'fname')).toEqual([candidates[0], candidates[1]])
it "candidates with duplicate values when indexed by key are returned properly", ->
candidates = [
{uri: '/usr/bin/ls', fname: 'ls'},
{uri: '/usr/sbin/ls', fname: 'ls'}
]
expect(fuzzaldrinplusfast.filter(candidates, 'l', key: 'fname')).toEqual([candidates[0], candidates[1]])

describe "filtering by creating an object", ->
it "with default options", ->
obj = fuzzaldrinplusfast.New()
obj.setCandidates ['ab', 'abc', 'cd', 'de']
expect(obj.filter('a')).toEqual(['ab', 'abc'])
expect(obj.filter('b')).toEqual(['ab', 'abc'])
expect(obj.filter('c')).toEqual(['cd', 'abc',])

it "candidates are able to be indexed by a given key", ->
candidates = [
{uri: '/usr/bin/ls', fname: 'ls'},
{uri: '/usr/bin/mkdir', fname: 'mkdir'},
{uri: '/usr/sbin/find', fname: 'find'},
{uri: '/usr/local/bin/git', fname: 'git'},
]
obj = fuzzaldrinplusfast.New()
obj.setCandidates candidates, key: 'fname'
expect(obj.filter('i')).toEqual([candidates[3], candidates[2], candidates[1]])

it "candidates with duplicate values when indexed by key are returned properly", ->
candidates = [
{uri: '/usr/bin/ls', fname: 'ls'},
{uri: '/usr/sbin/ls', fname: 'ls'}
]
obj = fuzzaldrinplusfast.New()
obj.setCandidates candidates, key: 'fname'
expect(obj.filter('l')).toEqual([candidates[0], candidates[1]])
1 change: 1 addition & 0 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

using namespace std;

const size_t kMaxThreads = 16;

#ifdef ENABLE_DEBUG
// Safe string class that logs error when index is accessed outside the string.
Expand Down
28 changes: 14 additions & 14 deletions src/filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <functional>
#include <thread>
#include <limits>
#include <cassert>

#include "ConcurrentQueue.h"

Expand Down Expand Up @@ -95,22 +96,22 @@ CandidateIndexes filter(const vector<Candidates> &candidates, const Element &que
}

// Split the dataset and pass down to multiple threads.
const size_t max_threads = candidates.size();
assert(candidates.size() == kMaxThreads);
vector<thread> threads;
vector<ThreadState> thread_state(max_threads);
for (size_t i = 0; i < max_threads; i++) {
vector<ThreadState> thread_state(kMaxThreads);
for (size_t i = 0; i < kMaxThreads; i++) {
threads.emplace_back(
thread_worker_filter, ref(thread_state[i]), i,
&candidates[i],
ref(query), ref(options), max_results);
}
// Push an empty vector for the threads to terminate.
for (size_t i = 0; i < max_threads; i++) {
for (size_t i = 0; i < kMaxThreads; i++) {
Candidates t;
thread_state[i].input.push(t);
}
// Wait for threads to complete and merge the restuls.
for (size_t i = 0; i < max_threads; i++) {
for (size_t i = 0; i < kMaxThreads; i++) {
threads[i].join();
auto &results = thread_state[i].results;
while(!results.empty()) {
Expand All @@ -131,16 +132,15 @@ Napi::Value filter_with_candidates(Napi::Env env, const Napi::Array &candidates,
max_results = std::numeric_limits<size_t>::max();

Napi::Array res = Napi::Array::New(env);
const size_t max_threads = 8;
vector<thread> threads;
vector<ThreadState> thread_state(max_threads);
vector<ThreadState> thread_state(kMaxThreads);
vector<size_t> chunks;
vector<Candidates> initial_candidates(max_threads);
vector<Candidates> initial_candidates(kMaxThreads);
size_t cur_start = 0;
for (size_t i = 0; i < max_threads; i++) {
size_t chunk_size = candidates.Length() / max_threads;
for (size_t i = 0; i < kMaxThreads; i++) {
size_t chunk_size = candidates.Length() / kMaxThreads;
// Distribute remainder among the chunks.
if (i < candidates.Length() % max_threads) {
if (i < candidates.Length() % kMaxThreads) {
chunk_size++;
}
for(size_t j=0; j<1000 && j<chunk_size; j++) {
Expand All @@ -153,20 +153,20 @@ Napi::Value filter_with_candidates(Napi::Env env, const Napi::Array &candidates,
cur_start += chunk_size;
chunks.push_back(cur_start);
}
for (size_t i = 0; i < max_threads; i++) {
for (size_t i = 0; i < kMaxThreads; i++) {
Candidates c;
for(size_t j=(i==0)?1000:chunks[i-1]+1000; j<chunks[i]; j++) {
c.push_back(candidates[j].ToObject().Get(key).ToString());
}
thread_state[i].input.push(c);
}
// Push an empty vector for the threads to terminate.
for (size_t i = 0; i < max_threads; i++) {
for (size_t i = 0; i < kMaxThreads; i++) {
Candidates t;
thread_state[i].input.push(t);
}
// Wait for threads to complete and merge the restuls.
for (size_t i = 0; i < max_threads; i++) {
for (size_t i = 0; i < kMaxThreads; i++) {
threads[i].join();
auto &results = thread_state[i].results;
while(!results.empty()) {
Expand Down
29 changes: 21 additions & 8 deletions src/fuzzaldrin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,35 @@

Napi::Value Fuzzaldrin::Filter(const Napi::CallbackInfo& info) {
Napi::Array res = Napi::Array::New(info.Env());
if (info.Length() != 4 || !info[0].IsString() || !info[1].IsNumber() ||
!info[2].IsBoolean() || !info[3].IsBoolean()) {
if (info.Length() != 5 || !info[0].IsString() || !info[1].IsNumber() ||
!info[2].IsBoolean() || !info[3].IsBoolean() || !info[4].IsBoolean()) {
Napi::TypeError::New(info.Env(), "Invalid arguments").ThrowAsJavaScriptException();
return Napi::Boolean();
}
std::string query = info[0].As<Napi::String>();
size_t maxResults = info[1].As<Napi::Number>().Uint32Value();
bool usePathScoring = info[2].As<Napi::Boolean>();
bool useExtensionBonus = info[3].As<Napi::Boolean>();
bool returnIndexes = info[4].As<Napi::Boolean>();
Options options(query, maxResults, usePathScoring, useExtensionBonus);
const auto matches = filter(candidates_, query, options);

std::vector<size_t> counts;
if (returnIndexes) {
size_t start = 0;
for(const auto &c : candidates_) {
counts.push_back(start);
start += c.size();
}
}

for(uint32_t i=0; i<matches.size(); i++) {
const auto &index = matches[i];
res[i] = Napi::String::New(info.Env(), candidates_[index.thread_id][index.index]);
if (returnIndexes) {
res[i] = Napi::Number::New(info.Env(), counts[index.thread_id] + index.index);
} else {
res[i] = Napi::String::New(info.Env(), candidates_[index.thread_id][index.index]);
}
}
return res;
}
Expand All @@ -28,16 +42,15 @@ Napi::Value Fuzzaldrin::SetCandidates(const Napi::CallbackInfo& info) {
Napi::TypeError::New(info.Env(), "Invalid arguments").ThrowAsJavaScriptException();
return Napi::Boolean();
}
const size_t max_threads = 8;
Napi::Array candidates = info[0].As<Napi::Array>();
candidates_.clear();
candidates_.resize(max_threads);
candidates_.resize(kMaxThreads);
const size_t N = candidates.Length();
size_t cur_start = 0;
for(size_t i=0; i<max_threads; i++) {
size_t chunk_size = N / max_threads;
for(size_t i=0; i<kMaxThreads; i++) {
size_t chunk_size = N / kMaxThreads;
// Distribute remainder among the chunks.
if (i < N % max_threads) {
if (i < N % kMaxThreads) {
chunk_size++;
}
for(size_t j=cur_start; j < cur_start+chunk_size; j++) {
Expand Down

0 comments on commit 3d71c73

Please sign in to comment.