program KeywordMatching;
{$mode objfpc}{$H+}
type
TStringListDyn = array of string;
(*
Check if a word is already in the list (simulating std::set)
*)
function contains(const list: TStringListDyn; const word: string): boolean;
var
i: integer;
begin
Result := False;
for i := 0 to High(list) do
if list[i] = word then
Exit(True);
end;
(*
Add a word to the list if not already present
*)
procedure addWord(var list: TStringListDyn; var size: integer; const word: string);
begin
if contains(list, word) then Exit;
SetLength(list, size + 1);
list[size] := word;
Inc(size);
end;
(*
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
*)
function tokenize(const text: string): TStringListDyn;
var
words: TStringListDyn;
size: integer = 0;
buffer: string = '';
i: integer;
c: char;
begin
SetLength(words, 0);
for i := 1 to Length(text) do
begin
c := text[i];
(* FIXED: Free Pascal does not support c.IsLetterOrDigit *)
if (c in ['A'..'Z','a'..'z','0'..'9']) then
buffer := buffer + LowerCase(c)
else if buffer <> '' then
begin
addWord(words, size, buffer);
buffer := '';
end;
end;
if buffer <> '' then
addWord(words, size, buffer);
Result := words;
end;
(*
// Find keyword matches (set intersection)
// -------------------------------------------------------------
This function receives two word lists and returns a new list
containing only the words that appear in BOTH lists.
*)
function findMatches(const words1, words2: TStringListDyn): TStringListDyn;
var
matches: TStringListDyn;
size: integer = 0;
i: integer;
begin
SetLength(matches, 0);
for i := 0 to High(words1) do
if contains(words2, words1[i]) then
addWord(matches, size, words1[i]);
Result := matches;
end;
var
text1, text2: string;
words1, words2, matches: TStringListDyn;
i: integer;
begin
(*
Two text blocks to compare
*)
text1 :=
'Machine learning allows computers to learn from data. ' +
'It is widely used in modern applications.';
text2 :=
'Data science uses machine learning techniques. ' +
'Applications rely on data-driven models.';
(*
Tokenize both texts
*)
words1 := tokenize(text1);
words2 := tokenize(text2);
(*
Find keyword matches (set intersection)
*)
matches := findMatches(words1, words2);
(*
Output results
*)
WriteLn('Keywords in Text 1:');
for i := 0 to High(words1) do Write(words1[i], ' ');
WriteLn(#10#10'Keywords in Text 2:');
for i := 0 to High(words2) do Write(words2[i], ' ');
WriteLn(#10#10'Matched Keywords:');
for i := 0 to High(matches) do Write(matches[i], ' ');
WriteLn;
end.
(*
run:
Keywords in Text 1:
machine learning allows computers to learn from data it is widely used in modern applications
Keywords in Text 2:
data science uses machine learning techniques applications rely on driven models
Matched Keywords:
machine learning data applications
*)