From 12a1a64c54f97ab4d2a61a1d731ecf84d6adb14b Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 10:25:05 +0100 Subject: [PATCH 1/9] Format and clippy --- README.md | 8 ++++---- rust/finder.rs | 2 +- rust/lib.rs | 31 ++++++++++++++++++------------- rust/matcher.rs | 15 +++++++-------- rust/sorter.rs | 18 +++++++++++------- rust/thread_pool.rs | 10 +++++----- scripts/benchmark.lua | 2 +- 7 files changed, 47 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 76fe9c4..d523b32 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ to optimize, you will probably need to get a baseline on your hardware. There are fixtures provided that will create the directory structure of the [kubernetes](https://github.com/kubernetes/kubernetes) source code, from -somewhere arround commit sha 985c9202ccd250a5fe22c01faf0d8f83d804b9f3. This will +somewhere around commit sha 985c9202ccd250a5fe22c01faf0d8f83d804b9f3. This will create a directory tree of 23511 files a relative large source tree to get a good idea of performance. To create the source tree under `/tmp/ivy-trees/kubernetes` run the following command. This will need to be run @@ -95,16 +95,16 @@ luajit ./scripts/benchmark.lua Current benchmark status running on a `e2-standard-2` 2 vCPU + 8 GB memory VM running on GCP. -Rust +Rust -| Name | Total | Adverage | Min | Max | +| Name | Total | Average | Min | Max | |--------------------------------|---------------|---------------|---------------|---------------| | ivy_match(file.lua) 1000000x | 03.961640 (s) | 00.000004 (s) | 00.000003 (s) | 00.002146 (s) | | ivy_files(kubernetes) 100x | 03.895758 (s) | 00.038958 (s) | 00.034903 (s) | 00.043660 (s) | CPP -| Name | Total | Adverage | Min | Max | +| Name | Total | Average | Min | Max | |--------------------------------|---------------|---------------|---------------|---------------| | ivy_match(file.lua) 1000000x | 01.855197 (s) | 00.000002 (s) | 00.000001 (s) | 00.000177 (s) | | ivy_files(kubernetes) 100x | 14.696396 (s) | 00.146964 (s) | 00.056604 (s) | 00.168478 (s) | diff --git a/rust/finder.rs b/rust/finder.rs index 4a3ccf5..3a4dd31 100644 --- a/rust/finder.rs +++ b/rust/finder.rs @@ -22,5 +22,5 @@ pub fn find_files(options: Options) -> Vec { files.push(candidate_path.to_str().unwrap().to_string()); } - return files; + files } diff --git a/rust/lib.rs b/rust/lib.rs index 7baaad6..db271bf 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -1,30 +1,35 @@ -mod matcher; mod finder; +mod matcher; mod sorter; mod thread_pool; -use std::sync::Mutex; use std::collections::HashMap; -use std::os::raw::{c_int, c_char}; -use std::ffi::CString; use std::ffi::CStr; +use std::ffi::CString; +use std::os::raw::{c_char, c_int}; +use std::sync::Mutex; #[macro_use] extern crate lazy_static; lazy_static! { - static ref GLOBAL_FILE_CACHE: Mutex>> = return Mutex::new(HashMap::new()) ; + static ref GLOBAL_FILE_CACHE: Mutex>> = Mutex::new(HashMap::new()); } fn to_string(input: *const c_char) -> String { - return unsafe { CStr::from_ptr(input) }.to_str().unwrap().to_string(); + unsafe { CStr::from_ptr(input) } + .to_str() + .unwrap() + .to_string() } fn get_files(directory: &String) -> Vec { let mut cache = GLOBAL_FILE_CACHE.lock().unwrap(); if !cache.contains_key(directory) { - let finder_options = finder::Options{ directory: directory.clone() }; - cache.insert( directory.clone(), finder::find_files(finder_options)); + let finder_options = finder::Options { + directory: directory.clone(), + }; + cache.insert(directory.clone(), finder::find_files(finder_options)); } return cache.get(directory).unwrap().to_vec(); @@ -41,8 +46,9 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> let pattern = to_string(c_pattern); let text = to_string(c_text); - let m = matcher::Matcher::new( pattern ); - return m.score(text) as i32; + let m = matcher::Matcher::new(pattern); + + m.score(text) as i32 } #[no_mangle] @@ -52,7 +58,7 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) // Bail out early if the pattern is empty its never going to find anything if pattern.is_empty() { - return CString::new("").unwrap().into_raw() + return CString::new("").unwrap().into_raw(); } let files = get_files(&directory); @@ -66,6 +72,5 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) output.push('\n'); } - return CString::new(output).unwrap().into_raw() + CString::new(output).unwrap().into_raw() } - diff --git a/rust/matcher.rs b/rust/matcher.rs index 28eac59..0560717 100644 --- a/rust/matcher.rs +++ b/rust/matcher.rs @@ -1,5 +1,5 @@ -use fuzzy_matcher::FuzzyMatcher; use fuzzy_matcher::skim::SkimMatcherV2; +use fuzzy_matcher::FuzzyMatcher; pub struct Matcher { /// The search pattern that we want to match against some text @@ -9,17 +9,16 @@ pub struct Matcher { impl Matcher { pub fn new(pattern: String) -> Self { - return Self { + Self { pattern, matcher: SkimMatcherV2::default(), } } - pub fn score(self: &Self, text: String) -> i64 { - if let Some((score, _indices)) = self.matcher.fuzzy_indices(&text, &self.pattern) { - return score; - } - - return 0; + pub fn score(&self, text: String) -> i64 { + self.matcher + .fuzzy_indices(&text, &self.pattern) + .map(|(score, _indices)| score) + .unwrap_or_default() } } diff --git a/rust/sorter.rs b/rust/sorter.rs index eccb179..cdb9983 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,9 +1,8 @@ use super::matcher; use super::thread_pool; - -use std::sync::Mutex; use std::sync::Arc; +use std::sync::Mutex; pub struct Match { pub score: i64, @@ -17,7 +16,10 @@ pub struct Options { impl Options { pub fn new(pattern: String) -> Self { - return Self { pattern, minimun_score: 20 }; + Self { + pattern, + minimun_score: 20, + } } } @@ -35,14 +37,16 @@ pub fn sort_strings(options: Options, strings: Vec) -> Arc 25 { let mut tmp = thread_matches.lock().unwrap(); let content = string.clone(); - tmp.push(Match{ score, content }); + tmp.push(Match { score, content }); } }) } drop(pool); - matches.lock().unwrap().sort_by(|a, b| a.score.cmp(&b.score)); - return matches; + matches + .lock() + .unwrap() + .sort_by(|a, b| a.score.cmp(&b.score)); + matches } - diff --git a/rust/thread_pool.rs b/rust/thread_pool.rs index df49872..d2d287e 100644 --- a/rust/thread_pool.rs +++ b/rust/thread_pool.rs @@ -35,7 +35,7 @@ impl ThreadPool { threads.push(Worker::new(id, Arc::clone(&receiver))); } - return ThreadPool { jobs, threads }; + ThreadPool { jobs, threads } } pub fn execute(&self, f: F) @@ -62,7 +62,7 @@ impl Drop for ThreadPool { } struct Worker { - id: usize, + _id: usize, thread: Option>, } @@ -79,9 +79,9 @@ impl Worker { } }); - return Worker { - id, + Worker { + _id: id, thread: Some(thread), - }; + } } } diff --git a/scripts/benchmark.lua b/scripts/benchmark.lua index 2f1937b..457b671 100644 --- a/scripts/benchmark.lua +++ b/scripts/benchmark.lua @@ -35,7 +35,7 @@ local benchmark = function(name, n, callback) ) end -print "| Name | Total | Adverage | Min | Max |" +print "| Name | Total | Average | Min | Max |" print "|--------------------------------|---------------|---------------|---------------|---------------|" benchmark("ivy_match(file.lua) 1000000x", 1000000, function() From 8ab074b5377bee7608abfdc390b3b62e2cb88fb7 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 14:20:17 +0100 Subject: [PATCH 2/9] Add benchmarks and an example bin for profiling --- Cargo.lock | 466 ++++++++++++++++++++++++++++++++++++ Cargo.toml | 16 +- benches/ivy_files.rs | 17 ++ benches/ivy_match.rs | 17 ++ examples/filename_search.rs | 5 + rust/lib.rs | 16 +- 6 files changed, 533 insertions(+), 4 deletions(-) create mode 100644 benches/ivy_files.rs create mode 100644 benches/ivy_match.rs create mode 100644 examples/filename_search.rs diff --git a/Cargo.lock b/Cargo.lock index 6c62469..11ab482 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,21 +11,141 @@ dependencies = [ "memchr", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bstr" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" dependencies = [ + "lazy_static", "memchr", + "regex-automata", + "serde", ] +[[package]] +name = "bumpalo" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1ad822118d20d2c234f427000d5acc36eabe1e29a348c89b63dd60b13f28e5d" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", + "once_cell", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.11" @@ -36,6 +156,34 @@ dependencies = [ "once_cell", ] +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" + [[package]] name = "fnv" version = "1.0.7" @@ -64,6 +212,21 @@ dependencies = [ "regex", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "ignore" version = "0.4.18" @@ -82,21 +245,58 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + [[package]] name = "ivy" version = "0.0.1" dependencies = [ + "criterion", "fuzzy-matcher", "ignore", "lazy_static", ] +[[package]] +name = "js-sys" +version = "0.3.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258451ab10b34f8af53416d1fdab72c22e805f0c92a1136d59470ec0b11138b2" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" + [[package]] name = "log" version = "0.4.17" @@ -112,12 +312,116 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "716b4eeb6c4a1d3ecc956f75b43ec2e8e8ba80026413e70a3f41fd3313d3492b" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" + +[[package]] +name = "plotters-svg" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9a81d2759aae1dae668f783c308bc5c8ebd191ff4184aaa1b37f65a6ae5a56f" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "regex" version = "1.6.0" @@ -129,12 +433,24 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + [[package]] name = "same-file" version = "1.0.6" @@ -144,6 +460,70 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +dependencies = [ + "itoa 1.0.3", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thread_local" version = "1.1.4" @@ -153,6 +533,28 @@ dependencies = [ "once_cell", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-ident" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + [[package]] name = "walkdir" version = "2.3.2" @@ -164,6 +566,70 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7652e3f6c4706c8d9cd54832c4a4ccb9b5336e2c3bd154d5cccfbf1c1f5f7d" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "662cd44805586bd52971b9586b1df85cdbbd9112e4ef4d8f41559c334dc6ac3f" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b260f13d3012071dfb1512849c033b1925038373aea48ced3012c09df952c602" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be8e654bdd9b79216c2929ab90721aa82faf65c48cdf08bdc4e7f51357b80da" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" + +[[package]] +name = "web-sys" +version = "0.3.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index cd601b6..44b677a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [lib] name = "ivyrs" -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] path = "rust/lib.rs" [dependencies] @@ -13,5 +13,19 @@ ignore = "0.4" fuzzy-matcher = "0.3.7" lazy_static = "1.4.0" +[dev-dependencies] +criterion = "0.3.6" + [profile.release] opt-level = 3 + +[profile.bench] +debug = true + +[[bench]] +name = "ivy_match" +harness = false + +[[bench]] +name = "ivy_files" +harness = false diff --git a/benches/ivy_files.rs b/benches/ivy_files.rs new file mode 100644 index 0000000..eac785f --- /dev/null +++ b/benches/ivy_files.rs @@ -0,0 +1,17 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use ivyrs::inner_files; + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("ivy_files(kubernetes)", |b| { + b.iter(|| { + inner_files( + black_box("file.go".to_owned()), + black_box("/tmp/ivy-trees/kubernetes".to_owned()), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/benches/ivy_match.rs b/benches/ivy_match.rs new file mode 100644 index 0000000..a3e16b9 --- /dev/null +++ b/benches/ivy_match.rs @@ -0,0 +1,17 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use ivyrs::inner_match; + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("ivy_match(file.lua)", |b| { + b.iter(|| { + inner_match( + black_box("file.lua".to_owned()), + black_box("some/long/path/to/file/file.lua".to_owned()), + ) + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/examples/filename_search.rs b/examples/filename_search.rs new file mode 100644 index 0000000..0e91b96 --- /dev/null +++ b/examples/filename_search.rs @@ -0,0 +1,5 @@ +use ivyrs::inner_files; + +pub fn main() { + inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); +} diff --git a/rust/lib.rs b/rust/lib.rs index db271bf..0189f15 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -46,6 +46,10 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> let pattern = to_string(c_pattern); let text = to_string(c_text); + inner_match(pattern, text) +} + +pub fn inner_match(pattern: String, text: String) -> i32 { let m = matcher::Matcher::new(pattern); m.score(text) as i32 @@ -56,12 +60,18 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) let pattern = to_string(c_pattern); let directory = to_string(c_base_dir); + let output = inner_files(pattern, directory); + + CString::new(output).unwrap().into_raw() +} + +pub fn inner_files(pattern: String, base_dir: String) -> String { // Bail out early if the pattern is empty its never going to find anything if pattern.is_empty() { - return CString::new("").unwrap().into_raw(); + return String::new(); } - let files = get_files(&directory); + let files = get_files(&base_dir); let mut output = String::new(); let sorter_options = sorter::Options::new(pattern); @@ -72,5 +82,5 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) output.push('\n'); } - CString::new(output).unwrap().into_raw() + output } From ce28b248fa1a0c891dcce59a8c40a4d0c1ffc6cb Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 15:17:23 +0100 Subject: [PATCH 3/9] Add results to .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e138ae8..d4be7ab 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ build target .cache compile_commands.json -.luacheckcache \ No newline at end of file +.luacheckcache +benchmarks +flamegraph* From 7fb8be541a2173f91ec0f072a73630806abcba8d Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:01:22 +0100 Subject: [PATCH 4/9] Reduce lock contention (round 1) - Use an async (i.e. unlimited buffer) MPSC channel instead of an Arc> for storing the scored matches in Sorter - Use Arc instead of Arc> for the matcher, as it's not mutated and appears to be threadsafe. This cuts average iteration time (on the benchmarked machine) from 25.98ms to 16.08ms for the ivy_files benchmark. --- .gitignore | 1 + rust/lib.rs | 9 +++++---- rust/sorter.rs | 33 ++++++++++++++++++++------------- 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index d4be7ab..ce5edeb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ compile_commands.json .luacheckcache benchmarks flamegraph* +perf.data* diff --git a/rust/lib.rs b/rust/lib.rs index 0189f15..b1e6cf0 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -66,18 +66,19 @@ pub extern "C" fn ivy_files(c_pattern: *const c_char, c_base_dir: *const c_char) } pub fn inner_files(pattern: String, base_dir: String) -> String { - // Bail out early if the pattern is empty its never going to find anything + let mut output = String::new(); + + // Bail out early if the pattern is empty; it's never going to find anything if pattern.is_empty() { - return String::new(); + return output; } let files = get_files(&base_dir); - let mut output = String::new(); let sorter_options = sorter::Options::new(pattern); let files = sorter::sort_strings(sorter_options, files); - for file in files.lock().unwrap().iter() { + for file in files.iter() { output.push_str(&file.content); output.push('\n'); } diff --git a/rust/sorter.rs b/rust/sorter.rs index cdb9983..83ba5b9 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,8 +1,8 @@ use super::matcher; use super::thread_pool; +use std::sync::mpsc; use std::sync::Arc; -use std::sync::Mutex; pub struct Match { pub score: i64, @@ -23,30 +23,37 @@ impl Options { } } -pub fn sort_strings(options: Options, strings: Vec) -> Arc>> { - let matches: Arc>> = Arc::new(Mutex::new(Vec::new())); - let matcher = Arc::new(Mutex::new(matcher::Matcher::new(options.pattern))); +pub fn sort_strings(options: Options, strings: Vec) -> Vec { + let mut matches = Vec::new(); + let matcher = Arc::new(matcher::Matcher::new(options.pattern)); let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); + let (tx, rx) = mpsc::channel::(); + for string in strings { let thread_matcher = Arc::clone(&matcher); - let thread_matches = Arc::clone(&matches); + let thread_transmitter = tx.clone(); pool.execute(move || { - let score = thread_matcher.lock().unwrap().score(string.to_string()); + let score = thread_matcher.score(string.to_string()); if score > 25 { - let mut tmp = thread_matches.lock().unwrap(); - let content = string.clone(); - tmp.push(Match { score, content }); + thread_transmitter + .send(Match { + score, + content: string, + }) + .expect("Failed to push data to channel"); } }) } drop(pool); + drop(tx); - matches - .lock() - .unwrap() - .sort_by(|a, b| a.score.cmp(&b.score)); + while let Ok(result) = rx.recv() { + matches.push(result) + } + + matches.sort_by(|a, b| a.score.cmp(&b.score)); matches } From cec83937709db98117c094455f448a2c355ae8e4 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:29:11 +0100 Subject: [PATCH 5/9] Remove multithreading for sorting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relevant processes are so fast, mutexes and mutex locks are so expensive, and iterators so efficient, that it's actually faster to run single-threaded across all the data than to spin up a bunch of threads and have them basically spinlock waiting for the global mutex involved either directly or in a channel. ivy_files(kubernetes) time: [10.209 ms 10.245 ms 10.286 ms] change: [-36.781% -36.178% -35.601%] (p = 0.00 < 0.05) Performance has improved. ivy_match(file.lua) time: [1.1626 µs 1.1668 µs 1.1709 µs] change: [+0.2131% +1.5409% +2.9109%] (p = 0.02 < 0.05) Change within noise threshold. --- examples/filename_search.rs | 4 +++- rust/lib.rs | 2 +- rust/matcher.rs | 4 ++-- rust/sorter.rs | 36 ++++++++---------------------------- 4 files changed, 14 insertions(+), 32 deletions(-) diff --git a/examples/filename_search.rs b/examples/filename_search.rs index 0e91b96..68cba68 100644 --- a/examples/filename_search.rs +++ b/examples/filename_search.rs @@ -1,5 +1,7 @@ use ivyrs::inner_files; pub fn main() { - inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); + let res = inner_files("file.go".to_owned(), "/tmp/ivy-trees/kubernetes".to_owned()); + + println!("{}", res); } diff --git a/rust/lib.rs b/rust/lib.rs index b1e6cf0..74bda52 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -52,7 +52,7 @@ pub extern "C" fn ivy_match(c_pattern: *const c_char, c_text: *const c_char) -> pub fn inner_match(pattern: String, text: String) -> i32 { let m = matcher::Matcher::new(pattern); - m.score(text) as i32 + m.score(text.as_str()) as i32 } #[no_mangle] diff --git a/rust/matcher.rs b/rust/matcher.rs index 0560717..cbbf6d7 100644 --- a/rust/matcher.rs +++ b/rust/matcher.rs @@ -15,9 +15,9 @@ impl Matcher { } } - pub fn score(&self, text: String) -> i64 { + pub fn score(&self, text: &str) -> i64 { self.matcher - .fuzzy_indices(&text, &self.pattern) + .fuzzy_indices(text, &self.pattern) .map(|(score, _indices)| score) .unwrap_or_default() } diff --git a/rust/sorter.rs b/rust/sorter.rs index 83ba5b9..cc7c31b 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -24,36 +24,16 @@ impl Options { } pub fn sort_strings(options: Options, strings: Vec) -> Vec { - let mut matches = Vec::new(); - let matcher = Arc::new(matcher::Matcher::new(options.pattern)); + let matcher = matcher::Matcher::new(options.pattern); - let pool = thread_pool::ThreadPool::new(std::thread::available_parallelism().unwrap().get()); - - let (tx, rx) = mpsc::channel::(); - - for string in strings { - let thread_matcher = Arc::clone(&matcher); - let thread_transmitter = tx.clone(); - pool.execute(move || { - let score = thread_matcher.score(string.to_string()); - if score > 25 { - thread_transmitter - .send(Match { - score, - content: string, - }) - .expect("Failed to push data to channel"); - } + let mut matches = strings + .into_iter() + .map(|candidate| Match { + score: matcher.score(candidate.as_str()), + content: candidate, }) - } - - drop(pool); - drop(tx); - - while let Ok(result) = rx.recv() { - matches.push(result) - } - + .filter(|m| m.score > 25) + .collect::>(); matches.sort_by(|a, b| a.score.cmp(&b.score)); matches } From c5e8677a37042c4152b3df9916725cf4ea74cf87 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:34:15 +0100 Subject: [PATCH 6/9] Introduce Rayon for parallel iteration and sorting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use `into_par_iter()` before setting out to calculate scores and then filter by them This represents a more efficient parallelism approach, with no mutex or global state at top level. ivy_files(kubernetes) time: [4.5800 ms 4.6121 ms 4.6467 ms] change: [-55.056% -54.570% -54.133%] (p = 0.00 < 0.05) Performance has improved. ivy_match(file.lua) time: [1.1514 µs 1.1599 µs 1.1694 µs] change: [+0.4116% +2.0753% +3.6710%] (p = 0.01 < 0.05) Change within noise threshold. --- Cargo.lock | 1 + Cargo.toml | 1 + rust/sorter.rs | 7 ++----- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 11ab482..fd5d598 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,6 +274,7 @@ dependencies = [ "fuzzy-matcher", "ignore", "lazy_static", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 44b677a..75c6974 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ path = "rust/lib.rs" ignore = "0.4" fuzzy-matcher = "0.3.7" lazy_static = "1.4.0" +rayon = "1.5.3" [dev-dependencies] criterion = "0.3.6" diff --git a/rust/sorter.rs b/rust/sorter.rs index cc7c31b..71d2efa 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -1,8 +1,5 @@ use super::matcher; -use super::thread_pool; - -use std::sync::mpsc; -use std::sync::Arc; +use rayon::prelude::*; pub struct Match { pub score: i64, @@ -27,7 +24,7 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { let matcher = matcher::Matcher::new(options.pattern); let mut matches = strings - .into_iter() + .into_par_iter() .map(|candidate| Match { score: matcher.score(candidate.as_str()), content: candidate, From d95d65c6a31aa908ff72ddaf1fcf5dea58493f9b Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:36:34 +0100 Subject: [PATCH 7/9] Use Rayon for sorting as well - For completeness, but also for additional performance when there are extremely large numbers of results, use `par_sort_unstable_by()` for sorting the results. For most sane result sets this will not represent a significant speedup (for the Kubernetes benchmark it's around 1%) but as the set to be sorted grows the impact would be larger. --- rust/sorter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/sorter.rs b/rust/sorter.rs index 71d2efa..3cce282 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -31,6 +31,6 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { }) .filter(|m| m.score > 25) .collect::>(); - matches.sort_by(|a, b| a.score.cmp(&b.score)); + matches.par_sort_unstable_by(|a, b| a.score.cmp(&b.score)); matches } From de41712291f194d7ea82aec5601138fc0a1102a7 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:40:12 +0100 Subject: [PATCH 8/9] Return to using `minimum_score` - Update the provided `minimum_score` in `sorter::Option::new` to match what was being used in `sort_strings` - Use the `minimum_score` value instead of a hardcoded number This seems like functionality that was either intended and not added, or added and then part removed. Either way the performance impact is minimal and it's a nice idea. --- rust/sorter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/sorter.rs b/rust/sorter.rs index 3cce282..f81f622 100644 --- a/rust/sorter.rs +++ b/rust/sorter.rs @@ -8,14 +8,14 @@ pub struct Match { pub struct Options { pub pattern: String, - pub minimun_score: i64, + pub minimum_score: i64, } impl Options { pub fn new(pattern: String) -> Self { Self { pattern, - minimun_score: 20, + minimum_score: 25, } } } @@ -29,7 +29,7 @@ pub fn sort_strings(options: Options, strings: Vec) -> Vec { score: matcher.score(candidate.as_str()), content: candidate, }) - .filter(|m| m.score > 25) + .filter(|m| m.score > options.minimum_score) .collect::>(); matches.par_sort_unstable_by(|a, b| a.score.cmp(&b.score)); matches From b509a5842f79fa9163b5739449da6f18aea8b8e2 Mon Sep 17 00:00:00 2001 From: Xymist Date: Fri, 26 Aug 2022 16:46:51 +0100 Subject: [PATCH 9/9] Remove ThreadPool Having switched to iterators and Rayon this is no longer used. --- rust/lib.rs | 1 - rust/thread_pool.rs | 87 --------------------------------------------- 2 files changed, 88 deletions(-) delete mode 100644 rust/thread_pool.rs diff --git a/rust/lib.rs b/rust/lib.rs index 74bda52..e2aca28 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -1,7 +1,6 @@ mod finder; mod matcher; mod sorter; -mod thread_pool; use std::collections::HashMap; use std::ffi::CStr; diff --git a/rust/thread_pool.rs b/rust/thread_pool.rs deleted file mode 100644 index d2d287e..0000000 --- a/rust/thread_pool.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::sync::mpsc; -use std::sync::Arc; -use std::sync::Mutex; -use std::thread; - -enum Message { - NewJob(Job), - Terminate, -} - -pub struct ThreadPool { - jobs: mpsc::Sender, - threads: Vec, -} - -trait FnBox { - fn call_box(self: Box); -} - -impl FnBox for F { - fn call_box(self: Box) { - (*self)() - } -} - -type Job = Box; - -impl ThreadPool { - pub fn new(thread_count: usize) -> Self { - let (jobs, receiver) = mpsc::channel(); - let receiver = Arc::new(Mutex::new(receiver)); - - let mut threads: Vec = Vec::new(); - for id in 1..thread_count { - threads.push(Worker::new(id, Arc::clone(&receiver))); - } - - ThreadPool { jobs, threads } - } - - pub fn execute(&self, f: F) - where - F: FnOnce() + Send + 'static, - { - let job = Box::new(f); - self.jobs.send(Message::NewJob(job)).unwrap(); - } -} - -impl Drop for ThreadPool { - fn drop(&mut self) { - for _ in &mut self.threads { - self.jobs.send(Message::Terminate).unwrap(); - } - - for worker in &mut self.threads { - if let Some(thread) = worker.thread.take() { - thread.join().unwrap(); - } - } - } -} - -struct Worker { - _id: usize, - thread: Option>, -} - -impl Worker { - fn new(id: usize, receiver: Arc>>) -> Worker { - let thread = thread::spawn(move || loop { - let message = receiver.lock().unwrap().recv().unwrap(); - - match message { - Message::NewJob(job) => job.call_box(), - Message::Terminate => { - break; - } - } - }); - - Worker { - _id: id, - thread: Some(thread), - } - } -}