feat: add initial implementation of cpp file finder

This uses lua ffi and a cpp shard library to implement a file finder in
cpp so we can use threads more effectively and get better performance.
This commit is contained in:
Ade Attwood 2022-07-23 08:49:45 +01:00
parent b82f1af2a1
commit 3f6149d3e1
17 changed files with 698 additions and 19 deletions

3
.clang-format Normal file
View file

@ -0,0 +1,3 @@
Language: Cpp
BasedOnStyle: Google
ColumnLimit: 0

20
.clang-tidy Normal file
View file

@ -0,0 +1,20 @@
Checks: '
-*,
google-*,
-google-runtime-references,
-google-readability-avoid-underscore-in-googletest-name,
llvm-include-order,
llvm-namespace-comment,
misc-throw-by-value-catch-by-reference,
modernize*,
-modernize-use-trailing-return-type,
readability-container-size-empty,
'
WarningsAsErrors: '*'
HeaderFilterRegex: './src/**/*'
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '3'

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
build
.cache
compile_commands.json
.luacheckcache

60
CMakeLists.txt Normal file
View file

@ -0,0 +1,60 @@
cmake_minimum_required(VERSION 3.16)
set(PROJECT_VERSION_NAME "v0.0.1")
# Split and sanatize the project version so it can be uses as pars and used as
# the project version "v1.1.1" is not a valida version number
string(REPLACE "v" "" PROJECT_VERSION ${PROJECT_VERSION_NAME})
string(REPLACE "." ";" VERSION_LIST ${PROJECT_VERSION})
list(GET VERSION_LIST 0 PROJECT_VERSION_MAJOR)
list(GET VERSION_LIST 1 PROJECT_VERSION_MINOR)
list(GET VERSION_LIST 2 PROJECT_VERSION_PATCH)
project ("Ivy" VERSION ${PROJECT_VERSION})
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Set the build type if its not test
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif()
# Ensure the build type is valid
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug" AND
NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND
NOT "${CMAKE_BUILD_TYPE}" STREQUAL "MinSizeRel" AND
NOT "${CMAKE_BUILD_TYPE}" STREQUAL "RelWithDebInfo")
message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo, and MinSizeRel.")
endif()
# detect operating system and host processor
message(STATUS "We are on a ${CMAKE_SYSTEM_NAME} system")
message(STATUS "The host processor is ${CMAKE_HOST_SYSTEM_PROCESSOR}")
# Place binaries and libraries according to GNU standards. For example
# executables created with `add_executable` will be built into the `bin`
# directory
include(GNUInstallDirs)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR})
# Set the default compiler flags for GNU
if(CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunreachable-code -Wno-unknown-pragmas -Wno-sign-compare -Wwrite-strings -Wno-unused")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
endif()
find_package(PkgConfig REQUIRED)
find_package(Threads REQUIRED)
file(GLOB_RECURSE IVY_HEADER "${CMAKE_CURRENT_LIST_DIR}/cpp/*.hpp")
file(GLOB_RECURSE IVY_SOURCE "${CMAKE_CURRENT_LIST_DIR}/cpp/*.cpp")
list(FILTER IVY_SOURCE EXCLUDE REGEX "_test\\.cpp$")
list(FILTER IVY_SOURCE EXCLUDE REGEX "cli\\.cpp$")
add_library(ivy SHARED ${IVY_SOURCE} ${IVY_HEADER})
target_link_libraries(ivy Threads::Threads)
add_executable(ivycli ${IVY_SOURCE} ${IVY_HEADER} ${CMAKE_CURRENT_LIST_DIR}/cpp/cli.cpp)
target_link_libraries(ivycli Threads::Threads)

View file

@ -19,6 +19,21 @@ git clone https://github.com/AdeAttwood/ivy.nvim ~/.config/nvim/pack/bundle/star
TODO: Add docs in the plugin managers I don't use any
### Compiling
For the native searching, you will need to compile the shard library. You can
do that by running the below command in the root of the plugin.
```sh
cmake -DCMAKE_BUILD_TYPE=Release -B build/Release && (cd build/Release; make -j)
```
If you are missing build dependencies, you can install them via apt.
```sh
sudo apt-get install build-essential pkg-config cmake
```
## Features
### Commands

35
cpp/cli.cpp Normal file
View file

@ -0,0 +1,35 @@
#include <filesystem>
#include <iostream>
#include <optional>
#include <regex>
#include <string>
#include "./file_scanner.hpp"
#include "./sorter.hpp"
int main(int argc, char* argv[]) {
std::vector<std::string> args;
args.reserve(argc);
// Skip the first argument because that will be the programme name.
for (int i = 1; i < argc; i++) {
args.emplace_back(argv[i]);
}
if (args.empty()) {
std::cout << "Missing required search term" << std::endl;
return 1;
}
auto base_dir = std::filesystem::current_path();
std::string search = args.at(0);
auto sorter = ivy::Sorter(search);
auto scanner = ivy::FileScanner(base_dir);
std::regex pattern("([" + search + "])");
for (ivy::Match const& match : sorter.sort(scanner.scan())) {
std::cout << match.score << " " << std::regex_replace(match.content, pattern, "\033[1m$&\033[0m") << std::endl;
}
return 0;
}

35
cpp/file_scanner.hpp Normal file
View file

@ -0,0 +1,35 @@
#pragma once
#include <filesystem>
#include <string>
#include <vector>
namespace fs = std::filesystem;
namespace ivy {
class FileScanner {
std::string m_base_dir;
public:
explicit FileScanner(const std::string base_dir) : m_base_dir(base_dir) {}
std::vector<std::string> scan() {
std::vector<std::string> results;
for (const fs::directory_entry& dir_entry : fs::recursive_directory_iterator(m_base_dir)) {
fs::path path = dir_entry.path();
// TODO(ade): sort out some kind of ignore thing. This will be needed
// when we start adding wildcard ignore functionality
if (path.string().find(".git") != std::string::npos) {
continue;
}
if (dir_entry.is_regular_file()) {
results.emplace_back(fs::relative(path, m_base_dir));
}
}
return results;
}
};
} // namespace ivy

198
cpp/fuzzy_match.cpp Normal file
View file

@ -0,0 +1,198 @@
// Copyright 2017-2018 ccls Authors
// SPDX-License-Identifier: Apache-2.0
// https://github.com/MaskRay/ccls/blob/master/src/fuzzy_match.cc
#include "fuzzy_match.hpp"
#include <ctype.h>
#include <stdio.h>
#include <algorithm>
#include <vector>
namespace ivy {
namespace {
enum CharClass { Other,
Lower,
Upper };
enum CharRole { None,
Tail,
Head };
CharClass getCharClass(int c) {
if (islower(c))
return Lower;
if (isupper(c))
return Upper;
return Other;
}
void calculateRoles(std::string_view s, int roles[], int *class_set) {
if (s.empty()) {
*class_set = 0;
return;
}
CharClass pre = Other, cur = getCharClass(s[0]), suc;
*class_set = 1 << cur;
auto fn = [&]() {
if (cur == Other)
return None;
// U(U)L is Head while U(U)U is Tail
return pre == Other || (cur == Upper && (pre == Lower || suc == Lower))
? Head
: Tail;
};
for (size_t i = 0; i < s.size() - 1; i++) {
suc = getCharClass(s[i + 1]);
*class_set |= 1 << suc;
roles[i] = fn();
pre = cur;
cur = suc;
}
roles[s.size() - 1] = fn();
}
} // namespace
int FuzzyMatcher::missScore(int j, bool last) {
int s = -3;
if (last)
s -= 10;
if (text_role[j] == Head)
s -= 10;
return s;
}
int FuzzyMatcher::matchScore(int i, int j, bool last) {
int s = 0;
// Case matching.
if (pat[i] == text[j]) {
s++;
// pat contains uppercase letters or prefix matching.
if ((pat_set & 1 << Upper) || i == j)
s++;
}
if (pat_role[i] == Head) {
if (text_role[j] == Head)
s += 30;
else if (text_role[j] == Tail)
s -= 10;
}
// Matching a tail while previous char wasn't matched.
if (text_role[j] == Tail && i && !last)
s -= 30;
// First char of pat matches a tail.
if (i == 0 && text_role[j] == Tail)
s -= 40;
return s;
}
FuzzyMatcher::FuzzyMatcher(std::string_view pattern, int sensitivity) {
calculateRoles(pattern, pat_role, &pat_set);
if (sensitivity == 1)
sensitivity = pat_set & 1 << Upper ? 2 : 0;
case_sensitivity = sensitivity;
size_t n = 0;
for (size_t i = 0; i < pattern.size(); i++)
if (pattern[i] != ' ') {
pat += pattern[i];
low_pat[n] = (char)::tolower(pattern[i]);
pat_role[n] = pat_role[i];
n++;
}
}
int FuzzyMatcher::match(std::string_view text, bool strict) {
if (pat.empty() != text.empty())
return kMinScore;
int n = int(text.size());
if (n > kMaxText)
return kMinScore + 1;
this->text = text;
for (int i = 0; i < n; i++)
low_text[i] = (char)::tolower(text[i]);
calculateRoles(text, text_role, &text_set);
if (strict && n && !!pat_role[0] != !!text_role[0])
return kMinScore;
dp[0][0][0] = dp[0][0][1] = 0;
for (int j = 0; j < n; j++) {
dp[0][j + 1][0] = dp[0][j][0] + missScore(j, false);
dp[0][j + 1][1] = kMinScore * 2;
}
for (int i = 0; i < int(pat.size()); i++) {
int(*pre)[2] = dp[i & 1];
int(*cur)[2] = dp[(i + 1) & 1];
cur[i][0] = cur[i][1] = kMinScore;
for (int j = i; j < n; j++) {
cur[j + 1][0] = std::max(cur[j][0] + missScore(j, false),
cur[j][1] + missScore(j, true));
// For the first char of pattern, apply extra restriction to filter bad
// candidates (e.g. |int| in |PRINT|)
cur[j + 1][1] = (case_sensitivity ? pat[i] == text[j]
: low_pat[i] == low_text[j] &&
(i || text_role[j] != Tail ||
pat[i] == text[j]))
? std::max(pre[j][0] + matchScore(i, j, false),
pre[j][1] + matchScore(i, j, true))
: kMinScore * 2;
}
}
// Enumerate the end position of the match in str. Each removed trailing
// character has a penulty.
int ret = kMinScore;
for (int j = pat.size(); j <= n; j++)
ret = std::max(ret, dp[pat.size() & 1][j][1] - 2 * (n - j));
return ret;
}
} // namespace ivy
#if 0
TEST_SUITE("fuzzy_match") {
bool Ranks(std::string_view pat, std::vector<const char*> texts) {
FuzzyMatcher fuzzy(pat, 0);
std::vector<int> scores;
for (auto text : texts)
scores.push_back(fuzzy.Match(text));
bool ret = true;
for (size_t i = 0; i < texts.size() - 1; i++)
if (scores[i] < scores[i + 1]) {
ret = false;
break;
}
if (!ret) {
for (size_t i = 0; i < texts.size(); i++)
printf("%s %d ", texts[i], scores[i]);
puts("");
}
return ret;
}
TEST_CASE("test") {
FuzzyMatcher fuzzy("", 0);
CHECK(fuzzy.Match("") == 0);
CHECK(fuzzy.Match("aaa") < 0);
// case
CHECK(Ranks("monad", {"monad", "Monad", "mONAD"}));
// initials
CHECK(Ranks("ab", {"ab", "aoo_boo", "acb"}));
CHECK(Ranks("CC", {"CamelCase", "camelCase", "camelcase"}));
CHECK(Ranks("cC", {"camelCase", "CamelCase", "camelcase"}));
CHECK(Ranks("c c", {"camelCase", "camel case", "CamelCase", "camelcase",
"camel ace"}));
CHECK(Ranks("Da.Te",
{"Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"}));
CHECK(Ranks("foo bar.h", {"foo/bar.h", "foobar.h"}));
// prefix
CHECK(Ranks("is", {"isIEEE", "inSuf"}));
// shorter
CHECK(Ranks("ma", {"map", "many", "maximum"}));
CHECK(Ranks("print", {"printf", "sprintf"}));
// score(PRINT) = kMinScore
CHECK(Ranks("ast", {"ast", "AST", "INT_FAST16_MAX"}));
// score(PRINT) > kMinScore
CHECK(Ranks("Int", {"int", "INT", "PRINT"}));
}
}
#endif

37
cpp/fuzzy_match.hpp Normal file
View file

@ -0,0 +1,37 @@
// Copyright 2017-2018 ccls Authors
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include <climits>
#include <string>
#include <string_view>
namespace ivy {
class FuzzyMatcher {
public:
constexpr static int kMaxPat = 100;
constexpr static int kMaxText = 200;
// Negative but far from INT_MIN so that intermediate results are hard to
// overflow.
constexpr static int kMinScore = INT_MIN / 4;
// 0: case-insensitive
// 1: case-folded, i.e. insensitive if no input character is uppercase.
// 2: case-sensitive
FuzzyMatcher(std::string_view pattern, int case_sensitivity);
int match(std::string_view text, bool strict);
private:
int case_sensitivity;
std::string pat;
std::string_view text;
int pat_set, text_set;
char low_pat[kMaxPat], low_text[kMaxText];
int pat_role[kMaxPat], text_role[kMaxText];
int dp[2][kMaxText + 1][2];
int matchScore(int i, int j, bool last);
int missScore(int j, bool last);
};
} // namespace ivy

41
cpp/lib.cpp Normal file
View file

@ -0,0 +1,41 @@
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include "./file_scanner.hpp"
#include "./fuzzy_match.hpp"
#include "./match.hpp"
#include "./sorter.hpp"
namespace ivy {
static std::map<std::string, std::vector<std::string>> file_cache;
}; // namespace ivy
extern "C" void ivy_init(const char* dir) {
auto scanner = ivy::FileScanner(dir);
ivy::file_cache[std::string(dir)] = scanner.scan();
}
extern "C" int ivy_match(const char* pattern, const char* text) {
auto matcher = ivy::FuzzyMatcher(pattern, 0);
return matcher.match(text, false);
}
extern "C" char* ivy_files(const char* search, const char* base_dir) {
if (!ivy::file_cache.count(base_dir)) {
auto scanner = ivy::FileScanner(base_dir);
ivy::file_cache[std::string(base_dir)] = scanner.scan();
}
auto sorter = ivy::Sorter(search);
// TODO(ade): Sort out how this memory is freed. I am assuming its in lua
// land via ffi
auto* s = new std::string();
for (ivy::Match const& match : sorter.sort(ivy::file_cache.at(base_dir))) {
s->append(match.content + "\n");
}
return s->data();
}

14
cpp/match.hpp Normal file
View file

@ -0,0 +1,14 @@
#pragma once
#include <string>
namespace ivy {
struct Match {
int score;
std::string content;
};
static bool sort_match(const Match& a, const Match& b) { return a.score > b.score; }
} // namespace ivy

45
cpp/sorter.hpp Normal file
View file

@ -0,0 +1,45 @@
#pragma once
#include "./fuzzy_match.hpp"
#include "./match.hpp"
#include "./thread_pool.hpp"
namespace ivy {
class Sorter {
ivy::ThreadPool m_thread_pool;
std::string_view m_term;
std::mutex m_matches_lock;
std::vector<Match> m_matches;
inline void add_entry(const std::string& file) {
ivy::FuzzyMatcher matcher(m_term, 0);
int score = matcher.match(file, false);
if (score > -200) {
std::unique_lock<std::mutex> lock(m_matches_lock);
m_matches.emplace_back(Match{score, file});
}
}
public:
explicit Sorter(std::string_view term) : m_term(term) {}
~Sorter() { m_thread_pool.shutdown(); }
inline std::vector<Match> sort(std::vector<std::string> list) {
for (auto item : list) {
m_thread_pool.push([item, this]() { add_entry(item); });
}
while (!m_thread_pool.empty()) {
// Wait for all of the jobs to be finished
}
std::sort(m_matches.begin(), m_matches.end(), sort_match);
return m_matches;
}
};
} // namespace ivy

70
cpp/thread_pool.cpp Normal file
View file

@ -0,0 +1,70 @@
// Copyright 2021 Practically.io All rights reserved
//
// Use of this source is governed by a BSD-style
// licence that can be found in the LICENCE file or at
// https://www.practically.io/copyright/
#include "thread_pool.hpp"
namespace ivy {
void ThreadPool::run_job() {
std::function<void()> job;
while (true) {
{
std::unique_lock<std::mutex> lock(m_queue_lock);
m_condition.wait(lock, [&]() { return !m_queue.empty() || m_stop; });
if (m_queue.empty()) {
return;
}
job = m_queue.front();
m_queue.pop();
}
job();
{
// Only decrement the job count when the job has finished running.
std::unique_lock<std::mutex> lock(m_count_lock);
m_job_count--;
}
}
}
void ThreadPool::create_threads(unsigned int thread_count) {
for (int i = 0; i < thread_count; i++) {
m_threads.emplace_back(std::thread([this] { run_job(); }));
}
}
void ThreadPool::push(std::function<void()> job) {
{
{
std::unique_lock<std::mutex> lock(m_count_lock);
m_job_count++;
}
std::unique_lock<std::mutex> lock(m_queue_lock);
m_queue.push(job);
}
m_condition.notify_one();
}
bool ThreadPool::empty() {
std::unique_lock<std::mutex> lock(m_count_lock);
return m_job_count == 0;
}
void ThreadPool::shutdown() {
{
std::unique_lock<std::mutex> lock(m_queue_lock);
m_stop = true;
}
m_condition.notify_all();
for (auto &thread : m_threads) {
thread.join();
}
}
} // namespace ivy

66
cpp/thread_pool.hpp Normal file
View file

@ -0,0 +1,66 @@
// Copyright 2021 Practically.io All rights reserved
//
// Use of this source is governed by a BSD-style
// licence that can be found in the LICENCE file or at
// https://www.practically.io/copyright/
#pragma once
#include <condition_variable>
#include <functional>
#include <queue>
#include <thread>
namespace ivy {
// Basic thread pool implementation to run callbacks distributed across
// specified number of threads
//
// Example:
//
// ivy::ThreadPool thread_pool;
// for (int i = 0; i < 10; i++) {
// thread_pool.push([i]() {
// std::cout << "The number is " << i << std::endl;
// });
// }
//
// thread_pool.shutdown();
//
class ThreadPool {
bool m_stop = false;
// Need to track the number of jobs that need to be processed separately
// because we cant rely on the queue length to check if pool has finished all
// the jobs. It dose not take into account the jobs that have already been
// picked up by a thread.
int m_job_count = 0;
std::mutex m_queue_lock;
std::queue<std::function<void()>> m_queue;
std::mutex m_count_lock;
std::vector<std::thread> m_threads;
std::condition_variable m_condition;
void run_job();
void create_threads(unsigned int thread_count);
public:
// Create a new thread pool with the maximum number of threads you can have on
// the current machine
ThreadPool() { create_threads(std::thread::hardware_concurrency()); }
// Create a thread pool that will use the specified number of threads
explicit ThreadPool(unsigned int thread_count) {
create_threads(thread_count);
}
// Push a call back function into the queue that will be run on the thread
// pool as some time.
void push(std::function<void()>);
// Tests to see if there is any jobs that still need to be processed by the
// queue
bool empty();
// Shuts down the thread pool and waits for the queue to be empty. This must
// be called when all of the jobs have been pushed into the queue. This is a
// blocking operation and will not exit until the queue is empty and all of
// the pushed jobs have been handled.
void shutdown();
};
} // namespace ivy

30
lua/ivy/libivy.lua Normal file
View file

@ -0,0 +1,30 @@
local library_path = (function()
local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1)
-- return dirname .. "/../../build/Debug/lib/libivy.so"
return dirname .. "/../../build/Release/lib/libivy.so"
end)()
local ffi = require "ffi"
local ivy_c = ffi.load(library_path)
ffi.cdef [[
void ivy_init(const char*);
int ivy_match(const char*, const char*);
char* ivy_files(const char*, const char*);
]]
local libivy = {}
libivy.ivy_init = function(dir)
ivy_c.ivy_init(dir)
end
libivy.ivy_match = function(pattern, text)
return ivy_c.ivy_match(pattern, text)
end
libivy.ivy_files = function(pattern, base_dir)
return ffi.string(ivy_c.ivy_files(pattern, base_dir))
end
return libivy

View file

@ -12,7 +12,7 @@ local chars = {
"[", "]", " ",
}
local function parse_lines(lines)
local function string_to_table(lines)
local items = {}
for line in lines:gmatch "[^\r\n]+" do
table.insert(items, line)
@ -21,8 +21,18 @@ local function parse_lines(lines)
return items
end
local function parse_array(arr)
return arr
local function set_items_string(buffer, lines)
vim.api.nvim_buf_set_lines(buffer, 0, 9999, false, string_to_table(lines))
end
local function set_items_array(buffer, lines)
if type(lines[1]) == "string" then
vim.api.nvim_buf_set_lines(buffer, 0, 9999, false, lines)
else
for i = 1, #lines do
vim.api.nvim_buf_set_lines(buffer, i - 1, 9999, false, { lines[i][2] })
end
end
end
local window = {}
@ -100,28 +110,21 @@ window.update = function()
end
window.set_items = function(items)
local lines = {}
if type(items) == "string" then
lines = parse_lines(items)
if #items == 0 then
vim.api.nvim_buf_set_lines(window.get_buffer(), 0, 9999, false, { "-- No Items --" })
elseif type(items) == "string" then
set_items_string(window.get_buffer(), items)
elseif type(items) == "table" then
lines = parse_array(items)
set_items_array(window.get_buffer(), items)
end
if #lines == 0 then
lines = { "-- No Items --" }
end
vim.api.nvim_buf_set_lines(window.get_buffer(), 0, 9999, false, lines)
local line_count = #lines
window.index = 0
local line_count = vim.api.nvim_buf_line_count(window.buffer)
window.index = line_count - 1
if line_count > 10 then
line_count = 10
end
vim.api.nvim_win_set_height(window.window, line_count)
vim.api.nvim_win_set_height(window.window, line_count)
window.update()
end

View file

@ -1,5 +1,6 @@
local controller = require "ivy.controller"
local utils = require "ivy.utils"
local libivy = require "ivy.libivy"
-- Put the controller in to the vim global so we can access it in mappings
-- better without requires. You can call controller commands like `vim.ivy.xxx`.
@ -10,7 +11,9 @@ vim.api.nvim_create_user_command("IvyAg", function()
end, { bang = true, desc = "Run ag to search for content in files" })
vim.api.nvim_create_user_command("IvyFd", function()
vim.ivy.run(utils.command_finder("fd --hidden --type f --exclude .git", 0), utils.file_action())
vim.ivy.run(function(term)
return libivy.ivy_files(term, vim.fn.getcwd())
end, utils.file_action())
end, { bang = true, desc = "Find files in the project" })
vim.api.nvim_create_user_command("IvyBuffers", function()