/*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*/
function tokenize(string $text): array {
$words = [];
$word = "";
foreach (str_split($text) as $c) {
if (ctype_alnum($c)) {
$word .= strtolower($c);
} else if ($word !== "") {
$words[$word] = true; // simulate std::set
$word = "";
}
}
if ($word !== "") {
$words[$word] = true;
}
return $words;
}
/*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives an array of sets.
It returns the intersection of ALL sets.
*/
function findMatchesMultiple(array $allSets): array {
if (empty($allSets)) return [];
// Start with the first set
$result = $allSets[0];
// Intersect with each remaining set
for ($i = 1; $i < count($allSets); $i++) {
$temp = [];
foreach ($result as $w => $_) {
if (isset($allSets[$i][$w])) {
$temp[$w] = true;
}
}
$result = $temp;
}
return $result;
}
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
$text1 =
"Machine learning allows computers to learn from data. " .
"It is widely used in modern applications.";
$text2 =
"Data science uses machine learning techniques. " .
"Applications rely on data-driven models.";
$text3 =
"Modern applications of machine learning include data analysis, " .
"automation, and intelligent systems.";
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
$words1 = tokenize($text1);
$words2 = tokenize($text2);
$words3 = tokenize($text3);
// Put them into an array for multi-text comparison
$allSets = [ $words1, $words2, $words3 ];
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
$matches = findMatchesMultiple($allSets);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
echo "Matched Keywords Across ALL Texts:\n";
foreach ($matches as $w => $_) {
echo $w . " ";
}
/*
run:
Matched Keywords Across ALL Texts:
machine learning data applications
*/