How to get the N top words of a string by occurrences in Python

1 Answer

0 votes
from collections import Counter
import re

def remove_word(s, word):
    words = re.findall(r'\w+', s.lower())
    new_str = ' '.join([w for w in words if w != word])
    
    return new_str

def get_top_n_words(s, n):
    # Exclude stop words (commonly used words)
    stop_words = ["is", "a", "to", "as", "can", "that", "on", "and"]
    for word in stop_words:
        s = remove_word(s, word)
    
    # Split the string into words
    words = re.findall(r'\w+', s.lower())
    
    # Count the occurrences of each word
    word_count = Counter(words)
    
    # Sort the words by their occurrences and get the top N words
    top_n_words = dict(sorted(word_count.items(), key=lambda item: (-item[1], item[0]))[:n])
    
    return top_n_words


s = ("Python is a high-level, general-purpose programming language. " 
     "Its design philosophy emphasizes code readability with the use of "
     "significant indentation. Python is dynamically type-checked and "
     "garbage-collected. It supports multiple programming paradigms, "
     "including procedural, object-oriented and functional programming. ")
     
n = 4

top_n_words = get_top_n_words(s, n)

for key in top_n_words:
    print(key)



'''
run:

programming
python
checked
code

'''

 



answered Feb 2 by avibootz
...