"""
Tokenize text into words.
- Keeps only letters and digits
- Splits on punctuation and spaces
"""
def tokenize(text: str) -> set:
words = set()
word = ""
for c in text:
if c.isalnum():
word += c.lower()
elif word:
words.add(word)
word = ""
if word:
words.add(word)
return words
"""
// Find keyword matches across THREE OR MORE texts
// -------------------------------------------------------------
This function receives a list of sets.
It returns the intersection of ALL sets.
"""
def findMatchesMultiple(allSets: list) -> set:
if not allSets:
return set()
# Start with the first set
result = set(allSets[0])
# Intersect with each remaining set
for s in allSets[1:]:
temp = set()
for w in result:
if w in s:
temp.add(w)
result = temp
return result
def main():
# -------------------------------------------------------------
# Three text blocks to compare
# -------------------------------------------------------------
text1 = (
"Machine learning allows computers to learn from data. "
"It is widely used in modern applications."
)
text2 = (
"Data science uses machine learning techniques. "
"Applications rely on data-driven models."
)
text3 = (
"Modern applications of machine learning include data analysis, "
"automation, and intelligent systems."
)
# -------------------------------------------------------------
# Tokenize all texts
# -------------------------------------------------------------
words1 = tokenize(text1)
words2 = tokenize(text2)
words3 = tokenize(text3)
# Put them into a list for multi-text comparison
allSets = [words1, words2, words3]
# -------------------------------------------------------------
# Find keyword matches across ALL texts
# -------------------------------------------------------------
matches = findMatchesMultiple(allSets)
# -------------------------------------------------------------
# Output results
# -------------------------------------------------------------
print("Matched Keywords Across ALL Texts:")
for w in matches:
print(w, end=" ")
if __name__ == "__main__":
main()
"""
run:
Matched Keywords Across ALL Texts:
applications data learning machine
"""