use regex::Regex;
use std::collections::HashMap;
fn get_top_n_words(s: &str, n: usize) -> Vec<(String, i32)> {
let re = Regex::new(r"\w+").unwrap();
// Split the string into words
let words: Vec<String> = re.find_iter(&s.to_lowercase()) // Directly iterate over the lowercase String
.map(|mat| mat.as_str().to_string()) // Convert &str to String
.collect();
let mut word_count: HashMap<String, i32> = HashMap::new();
for word in words {
// Exclude stop words (commonly used words)
if word == "a" || word == "and" || word == "it" || word == "the" {
continue;
}
*word_count.entry(word).or_insert(0) += 1;
}
let mut word_vec: Vec<(String, i32)> = word_count.into_iter().collect();
word_vec.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
word_vec.into_iter().take(n).collect()
}
fn main() {
let s = "Rust is a general-purpose programming language \
emphasizing performance, type safety, and concurrency. \
It enforces memory safety, meaning that all references \
point to valid memory. Rust does so without a traditional \
garbage collector; instead, memory safety errors \
and data races are prevented by the borrow checker, \
which tracks the object lifetime of references at compile time.";
let n = 4;
let top_n_words = get_top_n_words(s, n);
for (word, count) in top_n_words {
println!("{}: {}", word, count);
}
}
/*
run:
memory: 3
safety: 3
references: 2
rust: 2
*/