program KeywordMatching;
{$mode objfpc}{$H+}
type
TStringSet = array of string;
(*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*)
function tokenize(const text: string): TStringSet;
var
words: TStringSet;
size: integer = 0;
buffer: string = '';
i: integer;
c: char;
begin
SetLength(words, 0);
for i := 1 to Length(text) do
begin
c := text[i];
if (c in ['A'..'Z','a'..'z','0'..'9']) then
buffer := buffer + LowerCase(c)
else if buffer <> '' then
begin
SetLength(words, size + 1);
words[size] := buffer;
Inc(size);
buffer := '';
end;
end;
if buffer <> '' then
begin
SetLength(words, size + 1);
words[size] := buffer;
end;
Result := words;
end;
(*
Helper: check if a word exists in a set
*)
function contains(const words: TStringSet; const w: string): boolean;
var
i: integer;
begin
Result := False;
for i := 0 to High(words) do
if words[i] = w then
Exit(True);
end;
(*
// Find keyword matches (set intersection) for TWO texts
// -------------------------------------------------------------
*)
function findMatches(const words1, words2: TStringSet): TStringSet;
var
matches: TStringSet;
size: integer = 0;
i: integer;
begin
SetLength(matches, 0);
for i := 0 to High(words1) do
if contains(words2, words1[i]) then
begin
SetLength(matches, size + 1);
matches[size] := words1[i];
Inc(size);
end;
Result := matches;
end;
(*
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives an array of sets.
It returns the intersection of ALL sets.
*)
function findMatchesMultiple(const allSets: array of TStringSet): TStringSet;
var
resultSet, temp: TStringSet;
size, i, j: integer;
begin
if Length(allSets) = 0 then Exit(nil);
(* Start with the first set *)
resultSet := allSets[0];
(* Intersect with each remaining set *)
for i := 1 to High(allSets) do
begin
SetLength(temp, 0);
size := 0;
for j := 0 to High(resultSet) do
if contains(allSets[i], resultSet[j]) then
begin
SetLength(temp, size + 1);
temp[size] := resultSet[j];
Inc(size);
end;
resultSet := temp;
end;
Result := resultSet;
end;
var
text1, text2, text3: string;
words1, words2, words3, matches: TStringSet;
i: integer;
begin
// -------------------------------------------------------------
// Three text blocks to compare
// -------------------------------------------------------------
text1 :=
'Machine learning allows computers to learn from data. ' +
'It is widely used in modern applications.';
text2 :=
'Data science uses machine learning techniques. ' +
'Applications rely on data-driven models.';
text3 :=
'Modern applications of machine learning include data analysis, ' +
'automation, and intelligent systems.';
// -------------------------------------------------------------
// Tokenize all texts
// -------------------------------------------------------------
words1 := tokenize(text1);
words2 := tokenize(text2);
words3 := tokenize(text3);
// -------------------------------------------------------------
// Find keyword matches across ALL texts
// -------------------------------------------------------------
matches := findMatchesMultiple([words1, words2, words3]);
// -------------------------------------------------------------
// Output results
// -------------------------------------------------------------
WriteLn('Matched Keywords Across ALL Texts:');
for i := 0 to High(matches) do
Write(matches[i], ' ');
end.
(*
run:
Matched Keywords Across ALL Texts:
machine learning data applications
*)