How to remove stop words from a string in Pascal

1 Answer

0 votes
program RemoveStopWords;

// A stopwords list is a collection of commonly used words in a language
// that are often removed during text processing tasks. 

uses
  SysUtils; // Trim

const
  StopWords: array[0..114] of string = (
    'i','me','my','myself','we','our','ours','ourselves','you','your',
    'yours','yourself','yourselves','he','him','his','himself','she','her',
    'hers','herself','it','its','itself','they','them','their','theirs',
    'themselves','what','which','who','whom','this','that','these','those',
    'am','is','are','was','were','be','been','being','have','has','had',
    'having','do','does','did','doing','a','an','the','and','but','if','or',
    'because','as','until','while','of','at','by','for','with','about','against',
    'between','into','through','to','from','in','out','on','off','over','further',
    'then','here','there','when','where','why','how','all','any','both','each',
    'few','more','most','other','some','such','no','nor','not','only','own',
    'same','so','than','too','very','can','will','just','don','should','now'
  );

function IsStopWord(word: string): boolean;
var
  i: integer;
begin
  for i := Low(StopWords) to High(StopWords) do
    if StopWords[i] = word then
    begin
      IsStopWord := true;
      exit;
    end;
  IsStopWord := false;
end;

function RemoveStopWords(input: string): string;
var
  word: string;
  output: string;
  i: integer;
  words: array[0..255] of string;
  wordCount: integer;
begin
  wordCount := 0;
  input := Trim(input);
  while Pos(' ', input) > 0 do
  begin
    word := Copy(input, 1, Pos(' ', input) - 1);
    input := Trim(Copy(input, Pos(' ', input) + 1, Length(input)));
    words[wordCount] := word;
    Inc(wordCount);
  end;
  if Length(input) > 0 then
  begin
    words[wordCount] := input;
    Inc(wordCount);
  end;

  output := '';
  for i := 0 to wordCount - 1 do
  begin
    if not IsStopWord(words[i]) then
      output := output + words[i] + ' ';
  end;

  RemoveStopWords := Trim(output);
end;

var
  input, filtered: string;
begin
  input := 'a pascal and java to python a we if c# then a and aa';
  writeln(input);
  
  filtered := RemoveStopWords(input);
  writeln(filtered);
end.


   
(*
run:
  
a pascal and java to python a we if c# then a and aa
pascal java python c# aa
  
*)


 



answered Jul 17 by avibootz
...