Fix key based matching in setCandidates

atom-community · Jan 7, 2019 · 3d71c73 · 3d71c73
1 parent 33c3224
commit 3d71c73
Show file tree

Hide file tree

Showing 5 changed files with 83 additions and 31 deletions.
diff --git a/fuzzaldrin.coffee b/fuzzaldrin.coffee
@@ -17,12 +17,23 @@ class FuzzaldrinPlusFast
     @obj = new binding.Fuzzaldrin()
 
   setCandidates: (candidates, options = {}) ->
+    if options.key?
+      @candidates = candidates
+      @candidate_key = options.key
+      candidates = candidates.map((item) => item[@candidate_key])
+    else
+      @candidates = null
+      @candidate_key = null
     @obj.setCandidates(candidates)
 
   filter: (query, options = {}) ->
     options = parseOptions(options)
-    @obj.filter query, options.maxResults,
-      options.usePathScoring, options.useExtensionBonus
+    returnIndexes = @candidate_key?
+    res = @obj.filter query, options.maxResults,
+      options.usePathScoring, options.useExtensionBonus, returnIndexes
+    if returnIndexes
+      res = res.map((ind) => @candidates[ind])
+    return res
 
 module.exports =
 

diff --git a/spec/filter-options-spec.coffee b/spec/filter-options-spec.coffee
@@ -79,10 +79,37 @@ describe "filtering", ->
       ]
       expect(fuzzaldrinplusfast.filter(candidates, 'i', key: 'fname')).toEqual([candidates[3], candidates[2], candidates[1]])
 
-    # This test fails right now
-    # it "candidates with duplicate values when indexed by key are returned properly", ->
-    #   candidates = [
-    #     {uri: '/usr/bin/ls', fname: 'ls'},
-    #     {uri: '/usr/sbin/ls', fname: 'ls'}
-    #   ]
-    #   expect(fuzzaldrinplusfast.filter(candidates, 'l', key: 'fname')).toEqual([candidates[0], candidates[1]])
+    it "candidates with duplicate values when indexed by key are returned properly", ->
+      candidates = [
+        {uri: '/usr/bin/ls', fname: 'ls'},
+        {uri: '/usr/sbin/ls', fname: 'ls'}
+      ]
+      expect(fuzzaldrinplusfast.filter(candidates, 'l', key: 'fname')).toEqual([candidates[0], candidates[1]])
+
+  describe "filtering by creating an object", ->
+    it "with default options", ->
+      obj = fuzzaldrinplusfast.New()
+      obj.setCandidates ['ab', 'abc', 'cd', 'de']
+      expect(obj.filter('a')).toEqual(['ab', 'abc'])
+      expect(obj.filter('b')).toEqual(['ab', 'abc'])
+      expect(obj.filter('c')).toEqual(['cd', 'abc',])
+
+    it "candidates are able to be indexed by a given key", ->
+      candidates = [
+        {uri: '/usr/bin/ls', fname: 'ls'},
+        {uri: '/usr/bin/mkdir', fname: 'mkdir'},
+        {uri: '/usr/sbin/find', fname: 'find'},
+        {uri: '/usr/local/bin/git', fname: 'git'},
+      ]
+      obj = fuzzaldrinplusfast.New()
+      obj.setCandidates candidates, key: 'fname'
+      expect(obj.filter('i')).toEqual([candidates[3], candidates[2], candidates[1]])
+
+    it "candidates with duplicate values when indexed by key are returned properly", ->
+      candidates = [
+        {uri: '/usr/bin/ls', fname: 'ls'},
+        {uri: '/usr/sbin/ls', fname: 'ls'}
+      ]
+      obj = fuzzaldrinplusfast.New()
+      obj.setCandidates candidates, key: 'fname'
+      expect(obj.filter('l')).toEqual([candidates[0], candidates[1]])
diff --git a/src/common.h b/src/common.h
@@ -11,6 +11,7 @@
 
 using namespace std;
 
+const size_t kMaxThreads = 16;
 
 #ifdef ENABLE_DEBUG
 // Safe string class that logs error when index is accessed outside the string.

diff --git a/src/filter.cc b/src/filter.cc
@@ -3,6 +3,7 @@
 #include <functional>
 #include <thread>
 #include <limits>
+#include <cassert>
 
 #include "ConcurrentQueue.h"
 
@@ -95,22 +96,22 @@ CandidateIndexes filter(const vector<Candidates> &candidates, const Element &que
   }
 
   // Split the dataset and pass down to multiple threads.
-  const size_t max_threads = candidates.size();
+  assert(candidates.size() == kMaxThreads);
   vector<thread> threads;
-  vector<ThreadState> thread_state(max_threads);
-  for (size_t i = 0; i < max_threads; i++) {
+  vector<ThreadState> thread_state(kMaxThreads);
+  for (size_t i = 0; i < kMaxThreads; i++) {
     threads.emplace_back(
         thread_worker_filter, ref(thread_state[i]), i,
         &candidates[i],
         ref(query), ref(options), max_results);
   }
   // Push an empty vector for the threads to terminate.
-  for (size_t i = 0; i < max_threads; i++) {
+  for (size_t i = 0; i < kMaxThreads; i++) {
     Candidates t;
     thread_state[i].input.push(t);
   }
   // Wait for threads to complete and merge the restuls.
-  for (size_t i = 0; i < max_threads; i++) {
+  for (size_t i = 0; i < kMaxThreads; i++) {
     threads[i].join();
     auto &results = thread_state[i].results;
     while(!results.empty()) {
@@ -131,16 +132,15 @@ Napi::Value filter_with_candidates(Napi::Env env, const Napi::Array &candidates,
     max_results = std::numeric_limits<size_t>::max();
 
   Napi::Array res = Napi::Array::New(env);
-  const size_t max_threads = 8;
   vector<thread> threads;
-  vector<ThreadState> thread_state(max_threads);
+  vector<ThreadState> thread_state(kMaxThreads);
   vector<size_t> chunks;
-  vector<Candidates> initial_candidates(max_threads);
+  vector<Candidates> initial_candidates(kMaxThreads);
   size_t cur_start = 0;
-  for (size_t i = 0; i < max_threads; i++) {
-    size_t chunk_size = candidates.Length() / max_threads;
+  for (size_t i = 0; i < kMaxThreads; i++) {
+    size_t chunk_size = candidates.Length() / kMaxThreads;
     // Distribute remainder among the chunks.
-    if (i < candidates.Length() % max_threads) {
+    if (i < candidates.Length() % kMaxThreads) {
       chunk_size++;
     }
     for(size_t j=0; j<1000 && j<chunk_size; j++) {
@@ -153,20 +153,20 @@ Napi::Value filter_with_candidates(Napi::Env env, const Napi::Array &candidates,
     cur_start += chunk_size;
     chunks.push_back(cur_start);
   }
-  for (size_t i = 0; i < max_threads; i++) {
+  for (size_t i = 0; i < kMaxThreads; i++) {
     Candidates c;
     for(size_t j=(i==0)?1000:chunks[i-1]+1000; j<chunks[i]; j++) {
       c.push_back(candidates[j].ToObject().Get(key).ToString());
     }
     thread_state[i].input.push(c);
   }
   // Push an empty vector for the threads to terminate.
-  for (size_t i = 0; i < max_threads; i++) {
+  for (size_t i = 0; i < kMaxThreads; i++) {
     Candidates t;
     thread_state[i].input.push(t);
   }
   // Wait for threads to complete and merge the restuls.
-  for (size_t i = 0; i < max_threads; i++) {
+  for (size_t i = 0; i < kMaxThreads; i++) {
     threads[i].join();
     auto &results = thread_state[i].results;
     while(!results.empty()) {

diff --git a/src/fuzzaldrin.cc b/src/fuzzaldrin.cc
@@ -4,21 +4,35 @@
 
 Napi::Value Fuzzaldrin::Filter(const Napi::CallbackInfo& info) {
   Napi::Array res = Napi::Array::New(info.Env());
-  if (info.Length() != 4 || !info[0].IsString() || !info[1].IsNumber() ||
-      !info[2].IsBoolean() || !info[3].IsBoolean()) {
+  if (info.Length() != 5 || !info[0].IsString() || !info[1].IsNumber() ||
+      !info[2].IsBoolean() || !info[3].IsBoolean() || !info[4].IsBoolean()) {
     Napi::TypeError::New(info.Env(), "Invalid arguments").ThrowAsJavaScriptException();
     return Napi::Boolean();
   }
   std::string query = info[0].As<Napi::String>();
   size_t maxResults = info[1].As<Napi::Number>().Uint32Value();
   bool usePathScoring = info[2].As<Napi::Boolean>();
   bool useExtensionBonus = info[3].As<Napi::Boolean>();
+  bool returnIndexes = info[4].As<Napi::Boolean>();
   Options options(query, maxResults, usePathScoring, useExtensionBonus);
   const auto matches = filter(candidates_, query, options);
 
+  std::vector<size_t> counts;
+  if (returnIndexes) {
+    size_t start = 0;
+    for(const auto &c : candidates_) {
+      counts.push_back(start);
+      start += c.size();
+    }
+  }
+
   for(uint32_t i=0; i<matches.size(); i++) {
     const auto &index = matches[i];
-    res[i] = Napi::String::New(info.Env(), candidates_[index.thread_id][index.index]);
+    if (returnIndexes) {
+      res[i] = Napi::Number::New(info.Env(), counts[index.thread_id] + index.index);
+    } else {
+      res[i] = Napi::String::New(info.Env(), candidates_[index.thread_id][index.index]);
+    }
   }
   return res;
 }
@@ -28,16 +42,15 @@ Napi::Value Fuzzaldrin::SetCandidates(const Napi::CallbackInfo& info) {
     Napi::TypeError::New(info.Env(), "Invalid arguments").ThrowAsJavaScriptException();
     return Napi::Boolean();
   }
-  const size_t max_threads = 8;
   Napi::Array candidates = info[0].As<Napi::Array>();
   candidates_.clear();
-  candidates_.resize(max_threads);
+  candidates_.resize(kMaxThreads);
   const size_t N = candidates.Length();
   size_t cur_start = 0;
-  for(size_t i=0; i<max_threads; i++) {
-    size_t chunk_size = N / max_threads;
+  for(size_t i=0; i<kMaxThreads; i++) {
+    size_t chunk_size = N / kMaxThreads;
     // Distribute remainder among the chunks.
-    if (i < N % max_threads) {
+    if (i < N % kMaxThreads) {
       chunk_size++;
     }
     for(size_t j=cur_start; j < cur_start+chunk_size; j++) {