How to get the N top words of a string by occurrences in C

1 Answer

0 votes
#include <stdio.h>
#include <string.h>
#include <stdlib.h> // qsort

#define MAX_WORDS 1024
#define MAX_WORD_LEN 16

typedef struct {
    char word[MAX_WORD_LEN];
    int count;
} WordCount;

void add_word(WordCount words[], int *index, const char *word) {
    // Exclude stop words (commonly used words)
    if (strcmp(word, "is") == 0 || strcmp(word, "a") == 0 ||
        strcmp(word, "the") == 0 || strcmp(word, "It") == 0 ||
        strcmp(word, "in") == 0 || strcmp(word, "and") == 0) {
        return;
    }
    
    for (int i = 0; i < *index; i++) {
        if (strcmp(words[i].word, word) == 0) {
            words[i].count++;
            return;
        }
    }
    
    strcpy(words[*index].word, word);
    words[*index].count = 1;
    (*index)++;
}

int compare(const void *a, const void *b) {
    WordCount *word1 = (WordCount *)a;
    WordCount *word2 = (WordCount *)b;
    
    return word2->count - word1->count; // Sort descending
}

int main() {
    char str[] = "C is a general-purpose programming language. "
                 "It was created in the 1970s by Dennis Ritchie and "
                 "remains very widely used and influential. "
                 "By design, C's features cleanly reflect the capabilities "
                 "of the targeted CPUs. It has found lasting use in operating systems code";
    int N = 4; // Get top N words
    WordCount words[MAX_WORDS] = {0};
    int index = 0;

    // Tokenize the string into words
    char *token = strtok(str, " .");
    while (token != NULL) {
        add_word(words, &index, token);
        token = strtok(NULL, " .");
    }

    // Sort words by occurrences
    qsort(words, index, sizeof(WordCount), compare);

    // Print the top N words
    for (int i = 0; i < N && i < index; i++) {
        printf("%s\n", words[i].word);
    }
}
   
      
       
/*
run:
    
C
general-purpose
programming
language
   
*/

 



answered Feb 1, 2025 by avibootz
edited Feb 1, 2025 by avibootz

Related questions

1 answer 86 views
1 answer 130 views
1 answer 114 views
1 answer 128 views
1 answer 112 views
1 answer 120 views
...