feat: move to fts fuzzy match with some modifications
This commit is contained in:
parent
d7df043eb9
commit
7439ff98b0
6 changed files with 233 additions and 11 deletions
212
cpp/fts_fuzzy_match.hpp
Normal file
212
cpp/fts_fuzzy_match.hpp
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
// LICENSE
|
||||
//
|
||||
// This software is dual-licensed to the public domain and under the following
|
||||
// license: you are granted a perpetual, irrevocable license to copy, modify,
|
||||
// publish, and distribute this file as you see fit.
|
||||
//
|
||||
// VERSION
|
||||
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
|
||||
// 0.1.0 (2016-03-28) Initial release
|
||||
//
|
||||
// AUTHOR
|
||||
// Forrest Smith
|
||||
//
|
||||
// NOTES
|
||||
// Compiling
|
||||
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
|
||||
//
|
||||
// fuzzy_match_simple(...)
|
||||
// Returns true if each character in pattern is found sequentially within str
|
||||
//
|
||||
// fuzzy_match(...)
|
||||
// Returns true if pattern is found AND calculates a score.
|
||||
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
|
||||
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
|
||||
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
|
||||
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
|
||||
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.
|
||||
|
||||
#ifndef FTS_FUZZY_MATCH_H
|
||||
#define FTS_FUZZY_MATCH_H
|
||||
|
||||
#include <ctype.h> // ::tolower, ::toupper
|
||||
|
||||
#include <cstdint> // uint8_t
|
||||
#include <cstdio>
|
||||
#include <cstring> // memcpy
|
||||
#include <iostream>
|
||||
|
||||
// Public interface
|
||||
namespace fts {
|
||||
static bool fuzzy_match_simple(char const* pattern, char const* str);
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore);
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches);
|
||||
} // namespace fts
|
||||
|
||||
#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
namespace fts {
|
||||
|
||||
// Forward declarations for "private" implementation
|
||||
namespace fuzzy_internal {
|
||||
static bool fuzzy_match_recursive(const char* pattern, const char* str, int& outScore, const char* strBegin,
|
||||
uint8_t const* srcMatches, uint8_t* newMatches, int maxMatches, int nextMatch,
|
||||
int& recursionCount, int recursionLimit);
|
||||
}
|
||||
|
||||
// Public interface
|
||||
static bool fuzzy_match_simple(char const* pattern, char const* str) {
|
||||
while (*pattern != '\0' && *str != '\0') {
|
||||
if (tolower(*pattern) == tolower(*str))
|
||||
++pattern;
|
||||
++str;
|
||||
}
|
||||
|
||||
return *pattern == '\0' ? true : false;
|
||||
}
|
||||
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore) {
|
||||
uint8_t matches[256];
|
||||
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
|
||||
}
|
||||
|
||||
static bool fuzzy_match(char const* pattern, char const* str, int& outScore, uint8_t* matches, int maxMatches) {
|
||||
int recursionCount = 0;
|
||||
int recursionLimit = 10;
|
||||
|
||||
return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
|
||||
}
|
||||
|
||||
// Private implementation
|
||||
static bool fuzzy_internal::fuzzy_match_recursive(const char* pattern, const char* str, int& outScore,
|
||||
const char* strBegin, uint8_t const* srcMatches, uint8_t* matches, int maxMatches,
|
||||
int nextMatch, int& recursionCount, int recursionLimit) {
|
||||
// Count recursions
|
||||
++recursionCount;
|
||||
if (recursionCount >= recursionLimit)
|
||||
return false;
|
||||
|
||||
// Detect end of strings
|
||||
if (*pattern == '\0' || *str == '\0')
|
||||
return false;
|
||||
|
||||
// Recursion params
|
||||
bool recursiveMatch = false;
|
||||
uint8_t bestRecursiveMatches[256];
|
||||
int bestRecursiveScore = 0;
|
||||
|
||||
// Loop through pattern and str looking for a match
|
||||
bool first_match = true;
|
||||
while (*pattern != '\0' && *str != '\0') {
|
||||
// Found match. We assume a space in the pattern is match so we can match paths better
|
||||
if (tolower(*pattern) == tolower(*str) || *pattern == ' ') {
|
||||
// Supplied matches buffer was too short
|
||||
if (nextMatch >= maxMatches) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// "Copy-on-Write" srcMatches into matches
|
||||
if (first_match && srcMatches) {
|
||||
memcpy(matches, srcMatches, nextMatch);
|
||||
first_match = false;
|
||||
}
|
||||
|
||||
// Recursive call that "skips" this match
|
||||
uint8_t recursiveMatches[256];
|
||||
int recursiveScore;
|
||||
if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {
|
||||
// Pick best recursive score
|
||||
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
|
||||
memcpy(bestRecursiveMatches, recursiveMatches, 256);
|
||||
bestRecursiveScore = recursiveScore;
|
||||
}
|
||||
recursiveMatch = true;
|
||||
}
|
||||
|
||||
// Advance
|
||||
matches[nextMatch++] = (uint8_t)(str - strBegin);
|
||||
++pattern;
|
||||
}
|
||||
++str;
|
||||
}
|
||||
|
||||
// Determine if full pattern was matched
|
||||
bool matched = *pattern == '\0' ? true : false;
|
||||
|
||||
// Calculate score
|
||||
if (matched) {
|
||||
const int sequential_bonus = 15; // bonus for adjacent matches
|
||||
const int separator_bonus = 30; // bonus if match occurs after a separator
|
||||
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
|
||||
const int first_letter_bonus = 15; // bonus if the first letter is matched
|
||||
|
||||
const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
|
||||
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
|
||||
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
|
||||
|
||||
// Iterate str to end
|
||||
while (*str != '\0')
|
||||
++str;
|
||||
|
||||
// Initialize score
|
||||
outScore = 100;
|
||||
|
||||
// Apply leading letter penalty
|
||||
int penalty = leading_letter_penalty * matches[0];
|
||||
if (penalty < max_leading_letter_penalty)
|
||||
penalty = max_leading_letter_penalty;
|
||||
outScore += penalty;
|
||||
|
||||
// Apply unmatched penalty
|
||||
int unmatched = (int)(str - strBegin) - nextMatch;
|
||||
outScore += unmatched_letter_penalty * unmatched;
|
||||
|
||||
// Apply ordering bonuses
|
||||
for (int i = 0; i < nextMatch; ++i) {
|
||||
uint8_t currIdx = matches[i];
|
||||
|
||||
if (i > 0) {
|
||||
uint8_t prevIdx = matches[i - 1];
|
||||
|
||||
// Sequential
|
||||
if (currIdx == (prevIdx + 1))
|
||||
outScore += sequential_bonus;
|
||||
}
|
||||
|
||||
// Check for bonuses based on neighbor character value
|
||||
if (currIdx > 0) {
|
||||
// Camel case
|
||||
char neighbor = strBegin[currIdx - 1];
|
||||
char curr = strBegin[currIdx];
|
||||
if (::islower(neighbor) && ::isupper(curr))
|
||||
outScore += camel_bonus;
|
||||
|
||||
// Separator
|
||||
bool neighborSeparator = neighbor == '_' || neighbor == ' ' || neighbor == '/' || neighbor == '-';
|
||||
if (neighborSeparator)
|
||||
outScore += separator_bonus;
|
||||
} else {
|
||||
// First letter
|
||||
outScore += first_letter_bonus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return best result
|
||||
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
|
||||
// Recursive score is better than "this"
|
||||
memcpy(matches, bestRecursiveMatches, maxMatches);
|
||||
outScore = bestRecursiveScore;
|
||||
return true;
|
||||
} else if (matched) {
|
||||
// "this" score is better than recursive
|
||||
return true;
|
||||
} else {
|
||||
// no match
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} // namespace fts
|
||||
|
||||
#endif // FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
|
||||
#endif // FTS_FUZZY_MATCH_H
|
||||
|
|
@ -3,8 +3,9 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
#include "./file_scanner.hpp"
|
||||
#include "./fuzzy_match.hpp"
|
||||
#include "./fts_fuzzy_match.hpp"
|
||||
#include "./match.hpp"
|
||||
#include "./sorter.hpp"
|
||||
|
||||
|
|
@ -18,8 +19,10 @@ extern "C" void ivy_init(const char* dir) {
|
|||
}
|
||||
|
||||
extern "C" int ivy_match(const char* pattern, const char* text) {
|
||||
auto matcher = ivy::FuzzyMatcher(pattern, 0);
|
||||
return matcher.match(text, false);
|
||||
int score = 0;
|
||||
fts::fuzzy_match(pattern, text, score);
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
extern "C" char* ivy_files(const char* search, const char* base_dir) {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "./fuzzy_match.hpp"
|
||||
#define FTS_FUZZY_MATCH_IMPLEMENTATION
|
||||
#include "./fts_fuzzy_match.hpp"
|
||||
#include "./match.hpp"
|
||||
#include "./thread_pool.hpp"
|
||||
|
||||
|
|
@ -9,16 +10,16 @@ namespace ivy {
|
|||
class Sorter {
|
||||
ivy::ThreadPool m_thread_pool;
|
||||
|
||||
std::string_view m_term;
|
||||
std::string m_term;
|
||||
|
||||
std::mutex m_matches_lock;
|
||||
std::vector<Match> m_matches;
|
||||
|
||||
inline void add_entry(const std::string& file) {
|
||||
ivy::FuzzyMatcher matcher(m_term, 0);
|
||||
int score = matcher.match(file, false);
|
||||
int score = 0;
|
||||
fts::fuzzy_match(m_term.c_str(), file.c_str(), score);
|
||||
|
||||
if (score > -200) {
|
||||
if (score > 50) {
|
||||
std::unique_lock<std::mutex> lock(m_matches_lock);
|
||||
m_matches.emplace_back(Match{score, std::move(file)});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
local library_path = (function()
|
||||
local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1)
|
||||
-- return dirname .. "/../../build/Debug/lib/libivy.so"
|
||||
return dirname .. "/../../build/Release/lib/libivy.so"
|
||||
end)()
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ local libivy = require "ivy.libivy"
|
|||
it("should run a simple match", function(t)
|
||||
local score = libivy.ivy_match("term", "I am a serch term")
|
||||
|
||||
if score > 0 then
|
||||
t.error "Score should not be grater than 0"
|
||||
if score <= 0 then
|
||||
t.error("Score should not be less than 0 found " .. score)
|
||||
end
|
||||
end)
|
||||
|
|
|
|||
|
|
@ -22,6 +22,13 @@ it("sould match path separator", function(t)
|
|||
end
|
||||
end)
|
||||
|
||||
it("sould match pattern with spaces", function(t)
|
||||
local result = match_test("so fi", "some/file.lua", "somefile.lua")
|
||||
if result then
|
||||
t.error(result)
|
||||
end
|
||||
end)
|
||||
|
||||
it("sould match the start of a string", function(t)
|
||||
local result = match_test("file", "file.lua", "somefile.lua")
|
||||
if result then
|
||||
|
|
|
|||
Loading…
Reference in a new issue