/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
function tokenize(text: string): Set<string> {
const words: Set<string> = new Set<string>();
let word: string = "";
for (const c of text) {
if (/[A-Za-z0-9]/.test(c)) {
word += c.toLowerCase();
} else if (word.length > 0) {
words.add(word);
word = "";
}
}
if (word.length > 0) {
words.add(word);
}
return words;
}
/*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives an array of sets.
It returns the intersection of ALL sets.
*/
function findMatchesMultiple(allSets: Array<Set<string>>): Set<string> {
if (allSets.length === 0) {
return new Set<string>();
}
// Start with the first set
let result: Set<string> = new Set<string>(allSets[0]);
// Intersect with each remaining set
for (let i: number = 1; i < allSets.length; i++) {
const temp: Set<string> = new Set<string>();
for (const w of result) {
if (allSets[i].has(w)) {
temp.add(w);
}
}
result = temp;
}
return result;
}
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
const text1: string =
"Machine learning allows computers to learn from data. " +
"It is widely used in modern applications.";
const text2: string =
"Data science uses machine learning techniques. " +
"Applications rely on data-driven models.";
const text3: string =
"Modern applications of machine learning include data analysis, " +
"automation, and intelligent systems.";
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
const words1: Set<string> = tokenize(text1);
const words2: Set<string> = tokenize(text2);
const words3: Set<string> = tokenize(text3);
// Put them into an array for multi-text comparison
const allSets: Array<Set<string>> = [words1, words2, words3];
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
const matches: Set<string> = findMatchesMultiple(allSets);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
console.log("Matched Keywords Across ALL Texts:");
for (const w of matches) {
console.log(w + " ");
}
/*
run:
Matched Keywords Across ALL Texts:
machine
learning
data
applications
*/