From e7b7dc1e4cc055dc1c63be2f7956ae2c99846eb7 Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Sat, 13 Aug 2022 15:09:48 +0100 Subject: [PATCH 01/15] feat: experimental first rust implementation of libivy --- .gitignore | 1 + Cargo.lock | 196 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 17 ++++ lua/ivy/libivy.lua | 2 +- rust/finder.rs | 26 ++++++ rust/lib.rs | 68 +++++++++++++++ rust/matcher.rs | 18 ++++ rust/sorter.rs | 48 +++++++++++ rust/thread_pool.rs | 87 ++++++++++++++++++++ 9 files changed, 462 insertions(+), 1 deletion(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 rust/finder.rs create mode 100644 rust/lib.rs create mode 100644 rust/matcher.rs create mode 100644 rust/sorter.rs create mode 100644 rust/thread_pool.rs diff --git a/.gitignore b/.gitignore index 88eb5a1..e138ae8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build +target .cache compile_commands.json .luacheckcache \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..6c62469 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,196 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "memchr", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "fuzzy-matcher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94" +dependencies = [ + "thread_local", +] + +[[package]] +name = "globset" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a1e17342619edbc21a964c2afbeb6c820c6a2560032872f397bb97ea127bd0a" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] + +[[package]] +name = "ignore" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "713f1b139373f96a2e0ce3ac931cd01ee973c3c5dd7c40c0c2efe96ad2b6751d" +dependencies = [ + "crossbeam-utils", + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + +[[package]] +name = "ivy" +version = "0.0.1" +dependencies = [ + "fuzzy-matcher", + "ignore", + "lazy_static", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "once_cell" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" + +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..cd601b6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "ivy" +version = "0.0.1" +edition = "2021" + +[lib] +name = "ivyrs" +crate-type = ["cdylib"] +path = "rust/lib.rs" + +[dependencies] +ignore = "0.4" +fuzzy-matcher = "0.3.7" +lazy_static = "1.4.0" + +[profile.release] +opt-level = 3 diff --git a/lua/ivy/libivy.lua b/lua/ivy/libivy.lua index d6222fa..0b00c2f 100644 --- a/lua/ivy/libivy.lua +++ b/lua/ivy/libivy.lua @@ -1,6 +1,6 @@ local library_path = (function() local dirname = string.sub(debug.getinfo(1).source, 2, #"/fzf_lib.lua" * -1) - return dirname .. "/../../build/Release/lib/libivy.so" + return dirname .. "/../../target/release/libivyrs.so" end)() local ffi = require "ffi" diff --git a/rust/finder.rs b/rust/finder.rs new file mode 100644 index 0000000..4a3ccf5 --- /dev/null +++ b/rust/finder.rs @@ -0,0 +1,26 @@ +use ignore::WalkBuilder; +use std::fs; + +pub struct Options { + pub directory: String, +} + +pub fn find_files(options: Options) -> Vec { + let mut files: Vec = Vec::new(); + let base_path = &fs::canonicalize(options.directory).unwrap(); + + let mut builder = WalkBuilder::new(base_path); + builder.ignore(true).hidden(true); + + for result in builder.build() { + let absolute_candidate = result.unwrap(); + let candidate_path = absolute_candidate.path().strip_prefix(base_path).unwrap(); + if candidate_path.is_dir() { + continue; + } + + files.push(candidate_path.to_str().unwrap().to_string()); + } + + return files; +} diff --git a/rust/lib.rs b/rust/lib.rs new file mode 100644 index 0000000..52b861d --- /dev/null +++ b/rust/lib.rs @@ -0,0 +1,68 @@ +mod matcher; +mod finder; +mod sorter; +mod thread_pool; + +use std::sync::Mutex; +use std::collections::HashMap; +use std::os::raw::{c_int, c_char}; +use std::ffi::CString; +use std::ffi::CStr; + +#[macro_use] +extern crate lazy_static; + +lazy_static! { + static ref GLOBAL_FILE_CACHE: Mutex>> = return Mutex::new(HashMap::new()) ; +} + +fn to_string(input: *const c_char) -> String { + return unsafe { CStr::from_ptr(input) }.to_str().unwrap().to_string(); +} + +fn get_files(directory: &String) -> Vec { + let mut cache = GLOBAL_FILE_CACHE.lock().unwrap(); + if !cache.contains_key(directory) { + let finder_options = finder::Options{ directory: directory.clone() }; + cache.insert( directory.clone(), finder::find_files(finder_options)); + } + + return cache.get(directory).unwrap().to_vec(); +} + +#[no_mangle] +pub extern "C" fn ivy_init() {} + +#[no_mangle] +pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> c_int { + let pattern = to_string(c_pattern); + let text = to_string(c_text); + + let m = matcher::Matcher{ pattern }; + return m.score(text) as i32; +} + +#[no_mangle] +pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) -> *const c_char { + let pattern = to_string(c_pattern); + let directory = to_string(c_base_dir); + + // Bail out early if the pattern is empty its never going to find anything + if pattern.is_empty() { + return CString::new("").unwrap().into_raw() + } + + let files = get_files(&directory); + + let mut output = String::new(); + let sorter_options = sorter::Options::new(pattern); + + let files = sorter::sort_strings(sorter_options, files); + for file in files.lock().unwrap().iter() { + output.push_str(&file.content); + output.push('\n'); + } + + return CString::new(output).unwrap().into_raw() +} + diff --git a/rust/matcher.rs b/rust/matcher.rs new file mode 100644 index 0000000..43d364d --- /dev/null +++ b/rust/matcher.rs @@ -0,0 +1,18 @@ +use fuzzy_matcher::FuzzyMatcher; +use fuzzy_matcher::skim::SkimMatcherV2; + +pub struct Matcher { + /// The search pattern that we want to match against some text + pub pattern: String, +} + +impl Matcher { + pub fn score(self: &Self, text: String) -> i64 { + let matcher = SkimMatcherV2::default(); + if let Some((score, _indices)) = matcher.fuzzy_indices(&text, &self.pattern) { + return score; + } + + return 0; + } +} diff --git a/rust/sorter.rs b/rust/sorter.rs new file mode 100644 index 0000000..3bb5c47 --- /dev/null +++ b/rust/sorter.rs @@ -0,0 +1,48 @@ +use super::matcher; +use super::thread_pool; + + +use std::sync::Mutex; +use std::sync::Arc; + +pub struct Match { + pub score: i64, + pub content: String, +} + +pub struct Options { + pub pattern: String, + pub minimun_score: i64, +} + +impl Options { + pub fn new(pattern: String) -> Self { + return Self { pattern, minimun_score: 20 }; + } +} + +pub fn sort_strings(options: Options, strings: Vec) -> Arc>> { + let matches: Arc>> = Arc::new(Mutex::new(Vec::new())); + let matcher = Arc::new(Mutex::new(matcher::Matcher{ pattern: options.pattern })); + + let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); + + for string in strings { + let thread_matcher = Arc::clone(&matcher); + let thread_matches = Arc::clone(&matches); + pool.execute(move || { + let score = thread_matcher.lock().unwrap().score(string.to_string()); + if score > 25 { + let mut tmp = thread_matches.lock().unwrap(); + let content = string.clone(); + tmp.push(Match{ score, content }); + } + }) + } + + drop(pool); + + matches.lock().unwrap().sort_by(|a, b| a.score.cmp(&b.score)); + return matches; +} + diff --git a/rust/thread_pool.rs b/rust/thread_pool.rs new file mode 100644 index 0000000..df49872 --- /dev/null +++ b/rust/thread_pool.rs @@ -0,0 +1,87 @@ +use std::sync::mpsc; +use std::sync::Arc; +use std::sync::Mutex; +use std::thread; + +enum Message { + NewJob(Job), + Terminate, +} + +pub struct ThreadPool { + jobs: mpsc::Sender, + threads: Vec, +} + +trait FnBox { + fn call_box(self: Box); +} + +impl FnBox for F { + fn call_box(self: Box) { + (*self)() + } +} + +type Job = Box; + +impl ThreadPool { + pub fn new(thread_count: usize) -> Self { + let (jobs, receiver) = mpsc::channel(); + let receiver = Arc::new(Mutex::new(receiver)); + + let mut threads: Vec = Vec::new(); + for id in 1..thread_count { + threads.push(Worker::new(id, Arc::clone(&receiver))); + } + + return ThreadPool { jobs, threads }; + } + + pub fn execute(&self, f: F) + where + F: FnOnce() + Send + 'static, + { + let job = Box::new(f); + self.jobs.send(Message::NewJob(job)).unwrap(); + } +} + +impl Drop for ThreadPool { + fn drop(&mut self) { + for _ in &mut self.threads { + self.jobs.send(Message::Terminate).unwrap(); + } + + for worker in &mut self.threads { + if let Some(thread) = worker.thread.take() { + thread.join().unwrap(); + } + } + } +} + +struct Worker { + id: usize, + thread: Option>, +} + +impl Worker { + fn new(id: usize, receiver: Arc>>) -> Worker { + let thread = thread::spawn(move || loop { + let message = receiver.lock().unwrap().recv().unwrap(); + + match message { + Message::NewJob(job) => job.call_box(), + Message::Terminate => { + break; + } + } + }); + + return Worker { + id, + thread: Some(thread), + }; + } +} From 9fdb633f3e9c909010f0b7c03547f04964517715 Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Thu, 25 Aug 2022 19:40:49 +0100 Subject: [PATCH 02/15] chore: cache directory when ivy_init is called This is now implemented in rust --- rust/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/lib.rs b/rust/lib.rs index 52b861d..a0f0faf 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -31,7 +31,10 @@ fn get_files(directory: &String) -> Vec { } #[no_mangle] -pub extern "C" fn ivy_init() {} +pub extern "C" fn ivy_init(c_base_dir: *const c_char) { + let directory = to_string(c_base_dir); + get_files(&directory); +} #[no_mangle] pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> c_int { From f4a65a574cc143571c292a8a584bff6b4a56df01 Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Thu, 25 Aug 2022 19:42:22 +0100 Subject: [PATCH 03/15] perf: add instance prop of SkimMatcherV2 This is so we are not crating an new instance of this each time we are scoring a match. --- rust/lib.rs | 2 +- rust/matcher.rs | 11 +++++++++-- rust/sorter.rs | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/rust/lib.rs b/rust/lib.rs index a0f0faf..7baaad6 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -41,7 +41,7 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> let pattern = to_string(c_pattern); let text = to_string(c_text); - let m = matcher::Matcher{ pattern }; + let m = matcher::Matcher::new( pattern ); return m.score(text) as i32; } diff --git a/rust/matcher.rs b/rust/matcher.rs index 43d364d..28eac59 100644 --- a/rust/matcher.rs +++ b/rust/matcher.rs @@ -4,12 +4,19 @@ use fuzzy_matcher::skim::SkimMatcherV2; pub struct Matcher { /// The search pattern that we want to match against some text pub pattern: String, + matcher: SkimMatcherV2, } impl Matcher { + pub fn new(pattern: String) -> Self { + return Self { + pattern, + matcher: SkimMatcherV2::default(), + } + } + pub fn score(self: &Self, text: String) -> i64 { - let matcher = SkimMatcherV2::default(); - if let Some((score, _indices)) = matcher.fuzzy_indices(&text, &self.pattern) { + if let Some((score, _indices)) = self.matcher.fuzzy_indices(&text, &self.pattern) { return score; } diff --git a/rust/sorter.rs b/rust/sorter.rs index 3bb5c47..eccb179 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -23,7 +23,7 @@ impl Options { pub fn sort_strings(options: Options, strings: Vec) -> Arc>> { let matches: Arc>> = Arc::new(Mutex::new(Vec::new())); - let matcher = Arc::new(Mutex::new(matcher::Matcher{ pattern: options.pattern })); + let matcher = Arc::new(Mutex::new(matcher::Matcher::new(options.pattern))); let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); From ac01e9e9a40bb401e7880ce5997b327e894a027c Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Thu, 25 Aug 2022 21:01:19 +0100 Subject: [PATCH 04/15] docs: update the build info and benchmark status --- README.md | 29 ++++++++++++++++++----------- scripts/benchmark.lua | 2 +- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c4b5903..76fe9c4 100644 --- a/README.md +++ b/README.md @@ -25,14 +25,11 @@ For the native searching, you will need to compile the shard library. You can do that by running the below command in the root of the plugin. ```sh -cmake -DCMAKE_BUILD_TYPE=Release -B build/Release && (cd build/Release; make -j) +cargo build --release ``` -If you are missing build dependencies, you can install them via apt. - -```sh -sudo apt-get install build-essential pkg-config cmake -``` +You will need to have the rust toolchain installed. You can find more about +that [here](https://www.rust-lang.org/tools/install) ## Features @@ -95,12 +92,22 @@ bash ./scripts/fixtures.bash luajit ./scripts/benchmark.lua ``` -Current benchmark status with 8 CPU(s) Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz +Current benchmark status running on a `e2-standard-2` 2 vCPU + 8 GB memory VM +running on GCP. -| Name | Total | Adverage | Min | Max | -| ---------------------------- | ------------- | ------------- | ------------- | ------------- | -| ivy_match(file.lua) 1000000x | 02.353386 (s) | 00.000002 (s) | 00.000002 (s) | 00.000049 (s) | -| ivy_files(kubneties) 100x | 24.809576 (s) | 00.248096 (s) | 00.203167 (s) | 00.270263 (s) | +Rust + +| Name | Total | Adverage | Min | Max | +|--------------------------------|---------------|---------------|---------------|---------------| +| ivy_match(file.lua) 1000000x | 03.961640 (s) | 00.000004 (s) | 00.000003 (s) | 00.002146 (s) | +| ivy_files(kubernetes) 100x | 03.895758 (s) | 00.038958 (s) | 00.034903 (s) | 00.043660 (s) | + +CPP + +| Name | Total | Adverage | Min | Max | +|--------------------------------|---------------|---------------|---------------|---------------| +| ivy_match(file.lua) 1000000x | 01.855197 (s) | 00.000002 (s) | 00.000001 (s) | 00.000177 (s) | +| ivy_files(kubernetes) 100x | 14.696396 (s) | 00.146964 (s) | 00.056604 (s) | 00.168478 (s) | ## Other stuff you might like diff --git a/scripts/benchmark.lua b/scripts/benchmark.lua index 24a1e7a..2f1937b 100644 --- a/scripts/benchmark.lua +++ b/scripts/benchmark.lua @@ -43,6 +43,6 @@ benchmark("ivy_match(file.lua) 1000000x", 1000000, function() end) libivy.ivy_init "/tmp/ivy-trees/kubernetes" -benchmark("ivy_files(kubneties) 100x", 100, function() +benchmark("ivy_files(kubernetes) 100x", 100, function() libivy.ivy_files("file.go", "/tmp/ivy-trees/kubernetes") end) From 12a1a64c54f97ab4d2a61a1d731ecf84d6adb14b Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 10:25:05 +0100 Subject: [PATCH 05/15] Format and clippy --- README.md | 8 ++++---- rust/finder.rs | 2 +- rust/lib.rs | 31 ++++++++++++++++++------------- rust/matcher.rs | 15 +++++++-------- rust/sorter.rs | 18 +++++++++++------- rust/thread_pool.rs | 10 +++++----- scripts/benchmark.lua | 2 +- 7 files changed, 47 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 76fe9c4..d523b32 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ to optimize, you will probably need to get a baseline on your hardware. There are fixtures provided that will create the directory structure of the [kubernetes](https://github.com/kubernetes/kubernetes) source code, from -somewhere arround commit sha 985c9202ccd250a5fe22c01faf0d8f83d804b9f3. This will +somewhere around commit sha 985c9202ccd250a5fe22c01faf0d8f83d804b9f3. This will create a directory tree of 23511 files a relative large source tree to get a good idea of performance. To create the source tree under `/tmp/ivy-trees/kubernetes` run the following command. This will need to be run @@ -95,16 +95,16 @@ luajit ./scripts/benchmark.lua Current benchmark status running on a `e2-standard-2` 2 vCPU + 8 GB memory VM running on GCP. -Rust +Rust -| Name | Total | Adverage | Min | Max | +| Name | Total | Average | Min | Max | |--------------------------------|---------------|---------------|---------------|---------------| | ivy_match(file.lua) 1000000x | 03.961640 (s) | 00.000004 (s) | 00.000003 (s) | 00.002146 (s) | | ivy_files(kubernetes) 100x | 03.895758 (s) | 00.038958 (s) | 00.034903 (s) | 00.043660 (s) | CPP -| Name | Total | Adverage | Min | Max | +| Name | Total | Average | Min | Max | |--------------------------------|---------------|---------------|---------------|---------------| | ivy_match(file.lua) 1000000x | 01.855197 (s) | 00.000002 (s) | 00.000001 (s) | 00.000177 (s) | | ivy_files(kubernetes) 100x | 14.696396 (s) | 00.146964 (s) | 00.056604 (s) | 00.168478 (s) | diff --git a/rust/finder.rs b/rust/finder.rs index 4a3ccf5..3a4dd31 100644 --- a/rust/finder.rs +++ b/rust/finder.rs @@ -22,5 +22,5 @@ pub fn find_files(options: Options) -> Vec { files.push(candidate_path.to_str().unwrap().to_string()); } - return files; + files } diff --git a/rust/lib.rs b/rust/lib.rs index 7baaad6..db271bf 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -1,30 +1,35 @@ -mod matcher; mod finder; +mod matcher; mod sorter; mod thread_pool; -use std::sync::Mutex; use std::collections::HashMap; -use std::os::raw::{c_int, c_char}; -use std::ffi::CString; use std::ffi::CStr; +use std::ffi::CString; +use std::os::raw::{c_char, c_int}; +use std::sync::Mutex; #[macro_use] extern crate lazy_static; lazy_static! { - static ref GLOBAL_FILE_CACHE: Mutex>> = return Mutex::new(HashMap::new()) ; + static ref GLOBAL_FILE_CACHE: Mutex>> = Mutex::new(HashMap::new()); } fn to_string(input: *const c_char) -> String { - return unsafe { CStr::from_ptr(input) }.to_str().unwrap().to_string(); + unsafe { CStr::from_ptr(input) } + .to_str() + .unwrap() + .to_string() } fn get_files(directory: &String) -> Vec { let mut cache = GLOBAL_FILE_CACHE.lock().unwrap(); if !cache.contains_key(directory) { - let finder_options = finder::Options{ directory: directory.clone() }; - cache.insert( directory.clone(), finder::find_files(finder_options)); + let finder_options = finder::Options { + directory: directory.clone(), + }; + cache.insert(directory.clone(), finder::find_files(finder_options)); } return cache.get(directory).unwrap().to_vec(); @@ -41,8 +46,9 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> let pattern = to_string(c_pattern); let text = to_string(c_text); - let m = matcher::Matcher::new( pattern ); - return m.score(text) as i32; + let m = matcher::Matcher::new(pattern); + + m.score(text) as i32 } #[no_mangle] @@ -52,7 +58,7 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) // Bail out early if the pattern is empty its never going to find anything if pattern.is_empty() { - return CString::new("").unwrap().into_raw() + return CString::new("").unwrap().into_raw(); } let files = get_files(&directory); @@ -66,6 +72,5 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) output.push('\n'); } - return CString::new(output).unwrap().into_raw() + CString::new(output).unwrap().into_raw() } - diff --git a/rust/matcher.rs b/rust/matcher.rs index 28eac59..0560717 100644 --- a/rust/matcher.rs +++ b/rust/matcher.rs @@ -1,5 +1,5 @@ -use fuzzy_matcher::FuzzyMatcher; use fuzzy_matcher::skim::SkimMatcherV2; +use fuzzy_matcher::FuzzyMatcher; pub struct Matcher { /// The search pattern that we want to match against some text @@ -9,17 +9,16 @@ pub struct Matcher { impl Matcher { pub fn new(pattern: String) -> Self { - return Self { + Self { pattern, matcher: SkimMatcherV2::default(), } } - pub fn score(self: &Self, text: String) -> i64 { - if let Some((score, _indices)) = self.matcher.fuzzy_indices(&text, &self.pattern) { - return score; - } - - return 0; + pub fn score(&self, text: String) -> i64 { + self.matcher + .fuzzy_indices(&text, &self.pattern) + .map(|(score, _indices)| score) + .unwrap_or_default() } } diff --git a/rust/sorter.rs b/rust/sorter.rs index eccb179..cdb9983 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,9 +1,8 @@ use super::matcher; use super::thread_pool; - -use std::sync::Mutex; use std::sync::Arc; +use std::sync::Mutex; pub struct Match { pub score: i64, @@ -17,7 +16,10 @@ pub struct Options { impl Options { pub fn new(pattern: String) -> Self { - return Self { pattern, minimun_score: 20 }; + Self { + pattern, + minimun_score: 20, + } } } @@ -35,14 +37,16 @@ pub fn sort_strings(options: Options, strings: Vec) -> Arc 25 { let mut tmp = thread_matches.lock().unwrap(); let content = string.clone(); - tmp.push(Match{ score, content }); + tmp.push(Match { score, content }); } }) } drop(pool); - matches.lock().unwrap().sort_by(|a, b| a.score.cmp(&b.score)); - return matches; + matches + .lock() + .unwrap() + .sort_by(|a, b| a.score.cmp(&b.score)); + matches } - diff --git a/rust/thread_pool.rs b/rust/thread_pool.rs index df49872..d2d287e 100644 --- a/rust/thread_pool.rs +++ b/rust/thread_pool.rs @@ -35,7 +35,7 @@ impl ThreadPool { threads.push(Worker::new(id, Arc::clone(&receiver))); } - return ThreadPool { jobs, threads }; + ThreadPool { jobs, threads } } pub fn execute(&self, f: F) @@ -62,7 +62,7 @@ impl Drop for ThreadPool { } struct Worker { - id: usize, + _id: usize, thread: Option>, } @@ -79,9 +79,9 @@ impl Worker { } }); - return Worker { - id, + Worker { + _id: id, thread: Some(thread), - }; + } } } diff --git a/scripts/benchmark.lua b/scripts/benchmark.lua index 2f1937b..457b671 100644 --- a/scripts/benchmark.lua +++ b/scripts/benchmark.lua @@ -35,7 +35,7 @@ local benchmark = function(name, n, callback) ) end -print "| Name | Total | Adverage | Min | Max |" +print "| Name | Total | Average | Min | Max |" print "|--------------------------------|---------------|---------------|---------------|---------------|" benchmark("ivy_match(file.lua) 1000000x", 1000000, function() From 8ab074b5377bee7608abfdc390b3b62e2cb88fb7 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 14:20:17 +0100 Subject: [PATCH 06/15] Add benchmarks and an example bin for profiling --- Cargo.lock | 466 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 16 +- benches/ivy_files.rs | 17 ++ benches/ivy_match.rs | 17 ++ examples/filename_search.rs | 5 + rust/lib.rs | 16 +- 6 files changed, 533 insertions(+), 4 deletions(-) create mode 100644 benches/ivy_files.rs create mode 100644 benches/ivy_match.rs create mode 100644 examples/filename_search.rs diff --git a/Cargo.lock b/Cargo.lock index 6c62469..11ab482 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,21 +11,141 @@ dependencies = [ "memchr", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bstr" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ + "lazy_static", "memchr", + "regex-automata", + "serde", ] +[[package]] +name = "bumpalo" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "once_cell", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.11" @@ -36,6 +156,34 @@ dependencies = [ "once_cell", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + [[package]] name = "fnv" version = "1.0.7" @@ -64,6 +212,21 @@ dependencies = [ "regex", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "ignore" version = "0.4.18" @@ -82,21 +245,58 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + [[package]] name = "ivy" version = "0.0.1" dependencies = [ + "criterion", "fuzzy-matcher", "ignore", "lazy_static", ] +[[package]] +name = "js-sys" +version = "0.3.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" + [[package]] name = "log" version = "0.4.17" @@ -112,12 +312,116 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "716b4eeb6c4a1d3ecc956f75b43ec2e8e8ba80026413e70a3f41fd3313d3492b" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" + +[[package]] +name = "plotters-svg" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "regex" version = "1.6.0" @@ -129,12 +433,24 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + [[package]] name = "same-file" version = "1.0.6" @@ -144,6 +460,70 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +dependencies = [ + "itoa 1.0.3", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thread_local" version = "1.1.4" @@ -153,6 +533,28 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + [[package]] name = "walkdir" version = "2.3.2" @@ -164,6 +566,70 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" + +[[package]] +name = "web-sys" +version = "0.3.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index cd601b6..44b677a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [lib] name = "ivyrs" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] path = "rust/lib.rs" [dependencies] @@ -13,5 +13,19 @@ ignore = "0.4" fuzzy-matcher = "0.3.7" lazy_static = "1.4.0" +[dev-dependencies] +criterion = "0.3.6" + [profile.release] opt-level = 3 + +[profile.bench] +debug = true + +[[bench]] +name = "ivy_match" +harness = false + +[[bench]] +name = "ivy_files" +harness = false diff --git a/benches/ivy_files.rs b/benches/ivy_files.rs new file mode 100644 index 0000000..eac785f --- /dev/null +++ b/benches/ivy_files.rs @@ -0,0 +1,17 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use ivyrs::inner_files; + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("ivy_files(kubernetes)", |b| { + b.iter(|| { + inner_files( + black_box("file.go".to_owned()), + black_box("/tmp/ivy-trees/kubernetes".to_owned()), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/benches/ivy_match.rs b/benches/ivy_match.rs new file mode 100644 index 0000000..a3e16b9 --- /dev/null +++ b/benches/ivy_match.rs @@ -0,0 +1,17 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use ivyrs::inner_match; + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("ivy_match(file.lua)", |b| { + b.iter(|| { + inner_match( + black_box("file.lua".to_owned()), + black_box("some/long/path/to/file/file.lua".to_owned()), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/examples/filename_search.rs b/examples/filename_search.rs new file mode 100644 index 0000000..0e91b96 --- /dev/null +++ b/examples/filename_search.rs @@ -0,0 +1,5 @@ +use ivyrs::inner_files; + +pub fn main() { + inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); +} diff --git a/rust/lib.rs b/rust/lib.rs index db271bf..0189f15 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -46,6 +46,10 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> let pattern = to_string(c_pattern); let text = to_string(c_text); + inner_match(pattern, text) +} + +pub fn inner_match(pattern: String, text: String) -> i32 { let m = matcher::Matcher::new(pattern); m.score(text) as i32 @@ -56,12 +60,18 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) let pattern = to_string(c_pattern); let directory = to_string(c_base_dir); + let output = inner_files(pattern, directory); + + CString::new(output).unwrap().into_raw() +} + +pub fn inner_files(pattern: String, base_dir: String) -> String { // Bail out early if the pattern is empty its never going to find anything if pattern.is_empty() { - return CString::new("").unwrap().into_raw(); + return String::new(); } - let files = get_files(&directory); + let files = get_files(&base_dir); let mut output = String::new(); let sorter_options = sorter::Options::new(pattern); @@ -72,5 +82,5 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) output.push('\n'); } - CString::new(output).unwrap().into_raw() + output } From ce28b248fa1a0c891dcce59a8c40a4d0c1ffc6cb Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 15:17:23 +0100 Subject: [PATCH 07/15] Add results to .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e138ae8..d4be7ab 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ build target .cache compile_commands.json -.luacheckcache \ No newline at end of file +.luacheckcache +benchmarks +flamegraph* From 7fb8be541a2173f91ec0f072a73630806abcba8d Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:01:22 +0100 Subject: [PATCH 08/15] Reduce lock contention (round 1) - Use an async (i.e. unlimited buffer) MPSC channel instead of an Arc> for storing the scored matches in Sorter - Use Arc instead of Arc> for the matcher, as it's not mutated and appears to be threadsafe. This cuts average iteration time (on the benchmarked machine) from 25.98ms to 16.08ms for the ivy_files benchmark. --- .gitignore | 1 + rust/lib.rs | 9 +++++---- rust/sorter.rs | 33 ++++++++++++++++++++------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index d4be7ab..ce5edeb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ compile_commands.json .luacheckcache benchmarks flamegraph* +perf.data* diff --git a/rust/lib.rs b/rust/lib.rs index 0189f15..b1e6cf0 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -66,18 +66,19 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) } pub fn inner_files(pattern: String, base_dir: String) -> String { - // Bail out early if the pattern is empty its never going to find anything + let mut output = String::new(); + + // Bail out early if the pattern is empty; it's never going to find anything if pattern.is_empty() { - return String::new(); + return output; } let files = get_files(&base_dir); - let mut output = String::new(); let sorter_options = sorter::Options::new(pattern); let files = sorter::sort_strings(sorter_options, files); - for file in files.lock().unwrap().iter() { + for file in files.iter() { output.push_str(&file.content); output.push('\n'); } diff --git a/rust/sorter.rs b/rust/sorter.rs index cdb9983..83ba5b9 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,8 +1,8 @@ use super::matcher; use super::thread_pool; +use std::sync::mpsc; use std::sync::Arc; -use std::sync::Mutex; pub struct Match { pub score: i64, @@ -23,30 +23,37 @@ impl Options { } } -pub fn sort_strings(options: Options, strings: Vec) -> Arc>> { - let matches: Arc>> = Arc::new(Mutex::new(Vec::new())); - let matcher = Arc::new(Mutex::new(matcher::Matcher::new(options.pattern))); +pub fn sort_strings(options: Options, strings: Vec) -> Vec { + let mut matches = Vec::new(); + let matcher = Arc::new(matcher::Matcher::new(options.pattern)); let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); + let (tx, rx) = mpsc::channel::(); + for string in strings { let thread_matcher = Arc::clone(&matcher); - let thread_matches = Arc::clone(&matches); + let thread_transmitter = tx.clone(); pool.execute(move || { - let score = thread_matcher.lock().unwrap().score(string.to_string()); + let score = thread_matcher.score(string.to_string()); if score > 25 { - let mut tmp = thread_matches.lock().unwrap(); - let content = string.clone(); - tmp.push(Match { score, content }); + thread_transmitter + .send(Match { + score, + content: string, + }) + .expect("Failed to push data to channel"); } }) } drop(pool); + drop(tx); - matches - .lock() - .unwrap() - .sort_by(|a, b| a.score.cmp(&b.score)); + while let Ok(result) = rx.recv() { + matches.push(result) + } + + matches.sort_by(|a, b| a.score.cmp(&b.score)); matches } From cec83937709db98117c094455f448a2c355ae8e4 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:29:11 +0100 Subject: [PATCH 09/15] Remove multithreading for sorting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relevant processes are so fast, mutexes and mutex locks are so expensive, and iterators so efficient, that it's actually faster to run single-threaded across all the data than to spin up a bunch of threads and have them basically spinlock waiting for the global mutex involved either directly or in a channel. ivy_files(kubernetes) time: [10.209 ms 10.245 ms 10.286 ms] change: [-36.781% -36.178% -35.601%] (p = 0.00 < 0.05) Performance has improved. ivy_match(file.lua) time: [1.1626 µs 1.1668 µs 1.1709 µs] change: [+0.2131% +1.5409% +2.9109%] (p = 0.02 < 0.05) Change within noise threshold. --- examples/filename_search.rs | 4 +++- rust/lib.rs | 2 +- rust/matcher.rs | 4 ++-- rust/sorter.rs | 36 ++++++++---------------------------- 4 files changed, 14 insertions(+), 32 deletions(-) diff --git a/examples/filename_search.rs b/examples/filename_search.rs index 0e91b96..68cba68 100644 --- a/examples/filename_search.rs +++ b/examples/filename_search.rs @@ -1,5 +1,7 @@ use ivyrs::inner_files; pub fn main() { - inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); + let res = inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); + + println!("{}", res); } diff --git a/rust/lib.rs b/rust/lib.rs index b1e6cf0..74bda52 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -52,7 +52,7 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> pub fn inner_match(pattern: String, text: String) -> i32 { let m = matcher::Matcher::new(pattern); - m.score(text) as i32 + m.score(text.as_str()) as i32 } #[no_mangle] diff --git a/rust/matcher.rs b/rust/matcher.rs index 0560717..cbbf6d7 100644 --- a/rust/matcher.rs +++ b/rust/matcher.rs @@ -15,9 +15,9 @@ impl Matcher { } } - pub fn score(&self, text: String) -> i64 { + pub fn score(&self, text: &str) -> i64 { self.matcher - .fuzzy_indices(&text, &self.pattern) + .fuzzy_indices(text, &self.pattern) .map(|(score, _indices)| score) .unwrap_or_default() } diff --git a/rust/sorter.rs b/rust/sorter.rs index 83ba5b9..cc7c31b 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -24,36 +24,16 @@ impl Options { } pub fn sort_strings(options: Options, strings: Vec) -> Vec { - let mut matches = Vec::new(); - let matcher = Arc::new(matcher::Matcher::new(options.pattern)); + let matcher = matcher::Matcher::new(options.pattern); - let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); - - let (tx, rx) = mpsc::channel::(); - - for string in strings { - let thread_matcher = Arc::clone(&matcher); - let thread_transmitter = tx.clone(); - pool.execute(move || { - let score = thread_matcher.score(string.to_string()); - if score > 25 { - thread_transmitter - .send(Match { - score, - content: string, - }) - .expect("Failed to push data to channel"); - } + let mut matches = strings + .into_iter() + .map(|candidate| Match { + score: matcher.score(candidate.as_str()), + content: candidate, }) - } - - drop(pool); - drop(tx); - - while let Ok(result) = rx.recv() { - matches.push(result) - } - + .filter(|m| m.score > 25) + .collect::>(); matches.sort_by(|a, b| a.score.cmp(&b.score)); matches } From c5e8677a37042c4152b3df9916725cf4ea74cf87 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:34:15 +0100 Subject: [PATCH 10/15] Introduce Rayon for parallel iteration and sorting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use `into_par_iter()` before setting out to calculate scores and then filter by them This represents a more efficient parallelism approach, with no mutex or global state at top level. ivy_files(kubernetes) time: [4.5800 ms 4.6121 ms 4.6467 ms] change: [-55.056% -54.570% -54.133%] (p = 0.00 < 0.05) Performance has improved. ivy_match(file.lua) time: [1.1514 µs 1.1599 µs 1.1694 µs] change: [+0.4116% +2.0753% +3.6710%] (p = 0.01 < 0.05) Change within noise threshold. --- Cargo.lock | 1 + Cargo.toml | 1 + rust/sorter.rs | 7 ++----- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 11ab482..fd5d598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,6 +274,7 @@ dependencies = [ "fuzzy-matcher", "ignore", "lazy_static", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 44b677a..75c6974 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ path = "rust/lib.rs" ignore = "0.4" fuzzy-matcher = "0.3.7" lazy_static = "1.4.0" +rayon = "1.5.3" [dev-dependencies] criterion = "0.3.6" diff --git a/rust/sorter.rs b/rust/sorter.rs index cc7c31b..71d2efa 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,8 +1,5 @@ use super::matcher; -use super::thread_pool; - -use std::sync::mpsc; -use std::sync::Arc; +use rayon::prelude::*; pub struct Match { pub score: i64, @@ -27,7 +24,7 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { let matcher = matcher::Matcher::new(options.pattern); let mut matches = strings - .into_iter() + .into_par_iter() .map(|candidate| Match { score: matcher.score(candidate.as_str()), content: candidate, From d95d65c6a31aa908ff72ddaf1fcf5dea58493f9b Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:36:34 +0100 Subject: [PATCH 11/15] Use Rayon for sorting as well - For completeness, but also for additional performance when there are extremely large numbers of results, use `par_sort_unstable_by()` for sorting the results. For most sane result sets this will not represent a significant speedup (for the Kubernetes benchmark it's around 1%) but as the set to be sorted grows the impact would be larger. --- rust/sorter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/sorter.rs b/rust/sorter.rs index 71d2efa..3cce282 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -31,6 +31,6 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { }) .filter(|m| m.score > 25) .collect::>(); - matches.sort_by(|a, b| a.score.cmp(&b.score)); + matches.par_sort_unstable_by(|a, b| a.score.cmp(&b.score)); matches } From de41712291f194d7ea82aec5601138fc0a1102a7 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:40:12 +0100 Subject: [PATCH 12/15] Return to using `minimum_score` - Update the provided `minimum_score` in `sorter::Option::new` to match what was being used in `sort_strings` - Use the `minimum_score` value instead of a hardcoded number This seems like functionality that was either intended and not added, or added and then part removed. Either way the performance impact is minimal and it's a nice idea. --- rust/sorter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/sorter.rs b/rust/sorter.rs index 3cce282..f81f622 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -8,14 +8,14 @@ pub struct Match { pub struct Options { pub pattern: String, - pub minimun_score: i64, + pub minimum_score: i64, } impl Options { pub fn new(pattern: String) -> Self { Self { pattern, - minimun_score: 20, + minimum_score: 25, } } } @@ -29,7 +29,7 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { score: matcher.score(candidate.as_str()), content: candidate, }) - .filter(|m| m.score > 25) + .filter(|m| m.score > options.minimum_score) .collect::>(); matches.par_sort_unstable_by(|a, b| a.score.cmp(&b.score)); matches From b509a5842f79fa9163b5739449da6f18aea8b8e2 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:46:51 +0100 Subject: [PATCH 13/15] Remove ThreadPool Having switched to iterators and Rayon this is no longer used. --- rust/lib.rs | 1 - rust/thread_pool.rs | 87 --------------------------------------------- 2 files changed, 88 deletions(-) delete mode 100644 rust/thread_pool.rs diff --git a/rust/lib.rs b/rust/lib.rs index 74bda52..e2aca28 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -1,7 +1,6 @@ mod finder; mod matcher; mod sorter; -mod thread_pool; use std::collections::HashMap; use std::ffi::CStr; diff --git a/rust/thread_pool.rs b/rust/thread_pool.rs deleted file mode 100644 index d2d287e..0000000 --- a/rust/thread_pool.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::sync::mpsc; -use std::sync::Arc; -use std::sync::Mutex; -use std::thread; - -enum Message { - NewJob(Job), - Terminate, -} - -pub struct ThreadPool { - jobs: mpsc::Sender, - threads: Vec, -} - -trait FnBox { - fn call_box(self: Box); -} - -impl FnBox for F { - fn call_box(self: Box) { - (*self)() - } -} - -type Job = Box; - -impl ThreadPool { - pub fn new(thread_count: usize) -> Self { - let (jobs, receiver) = mpsc::channel(); - let receiver = Arc::new(Mutex::new(receiver)); - - let mut threads: Vec = Vec::new(); - for id in 1..thread_count { - threads.push(Worker::new(id, Arc::clone(&receiver))); - } - - ThreadPool { jobs, threads } - } - - pub fn execute(&self, f: F) - where - F: FnOnce() + Send + 'static, - { - let job = Box::new(f); - self.jobs.send(Message::NewJob(job)).unwrap(); - } -} - -impl Drop for ThreadPool { - fn drop(&mut self) { - for _ in &mut self.threads { - self.jobs.send(Message::Terminate).unwrap(); - } - - for worker in &mut self.threads { - if let Some(thread) = worker.thread.take() { - thread.join().unwrap(); - } - } - } -} - -struct Worker { - _id: usize, - thread: Option>, -} - -impl Worker { - fn new(id: usize, receiver: Arc>>) -> Worker { - let thread = thread::spawn(move || loop { - let message = receiver.lock().unwrap().recv().unwrap(); - - match message { - Message::NewJob(job) => job.call_box(), - Message::Terminate => { - break; - } - } - }); - - Worker { - _id: id, - thread: Some(thread), - } - } -} From 39febd82e236a9c79f5b408e98cbd20410f11e9e Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Sun, 28 Aug 2022 10:15:55 +0100 Subject: [PATCH 14/15] docs: add updated benchmark to the readme --- README.md | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d523b32..dca3d3d 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,16 @@ cargo build --release You will need to have the rust toolchain installed. You can find more about that [here](https://www.rust-lang.org/tools/install) +If you get a linker error you may need to install `build-essential` to get +`ld`. This is a common issue if you are running the [benchmarks](#benchmarks) +in a VM + +``` +error: linker `cc` not found + | + = note: No such file or directory (os error 2) +``` + ## Features ### Commands @@ -95,19 +105,26 @@ luajit ./scripts/benchmark.lua Current benchmark status running on a `e2-standard-2` 2 vCPU + 8 GB memory VM running on GCP. -Rust +IvyRs (Lua) -| Name | Total | Average | Min | Max | -|--------------------------------|---------------|---------------|---------------|---------------| -| ivy_match(file.lua) 1000000x | 03.961640 (s) | 00.000004 (s) | 00.000003 (s) | 00.002146 (s) | -| ivy_files(kubernetes) 100x | 03.895758 (s) | 00.038958 (s) | 00.034903 (s) | 00.043660 (s) | +| Name | Total | Average | Min | Max | +| ---------------------------- | ------------- | ------------- | ------------- | ------------- | +| ivy_match(file.lua) 1000000x | 04.153531 (s) | 00.000004 (s) | 00.000003 (s) | 00.002429 (s) | +| ivy_files(kubernetes) 100x | 03.526795 (s) | 00.035268 (s) | 00.021557 (s) | 00.037127 (s) | + +IvyRs (Criterion) + +| Name | Min | Mean | Max | +| --------------------- | --------- | --------- | --------- | +| ivy_files(kubernetes) | 19.727 ms | 19.784 ms | 19.842 ms | +| ivy_match(file.lua) | 2.6772 µs | 2.6822 µs | 2.6873 µs | CPP -| Name | Total | Average | Min | Max | -|--------------------------------|---------------|---------------|---------------|---------------| -| ivy_match(file.lua) 1000000x | 01.855197 (s) | 00.000002 (s) | 00.000001 (s) | 00.000177 (s) | -| ivy_files(kubernetes) 100x | 14.696396 (s) | 00.146964 (s) | 00.056604 (s) | 00.168478 (s) | +| Name | Total | Average | Min | Max | +| ---------------------------- | ------------- | ------------- | ------------- | ------------- | +| ivy_match(file.lua) 1000000x | 01.855197 (s) | 00.000002 (s) | 00.000001 (s) | 00.000177 (s) | +| ivy_files(kubernetes) 100x | 14.696396 (s) | 00.146964 (s) | 00.056604 (s) | 00.168478 (s) | ## Other stuff you might like From 45d61ffc320a0006ed555120c0eefb4c93e2c080 Mon Sep 17 00:00:00 2001 From: Ade Attwood Date: Sun, 28 Aug 2022 18:11:51 +0100 Subject: [PATCH 15/15] ci: add rust build and format into the actions --- .github/workflows/ci.yml | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7887a4f..83d39c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - name: Lint commits uses: docker://registry.k1.zportal.co.uk/practically-oss/conventional-tools:0.x with: - args: conventional-tools commitlint -l1 + args: conventional-tools commitlint -l1 -f39febd82e236a9c79f5b408e98cbd20410f11e9e luacheck: name: Luacheck @@ -48,6 +48,19 @@ jobs: - name: Run clang format run: find ./cpp -name "*.cpp" -o -name "*.hpp" | xargs clang-format -Werror --dry-run + cargo-format: + name: Cargo Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Run cargo format + uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + test: name: Build and test runs-on: ubuntu-latest @@ -55,11 +68,16 @@ jobs: - name: Checkout uses: actions/checkout@v2 + - name: Install rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - name: Install dependencies - run: sudo apt update && sudo apt install -y luajit build-essential pkg-config cmake + run: sudo apt update && sudo apt install -y luajit build-essential - name: Build - run: cmake -DCMAKE_BUILD_TYPE=Release -B build/Release && (cd build/Release; make -j) + run: cargo build --release - name: Test run: find lua -name "*_test.lua" | xargs luajit scripts/test.lua