import re
import unicodedata
def standardize_text(text:str) -> str:
text = text.lower()
# Normalize unicode characters to ASCII
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
# Remove punctuation
text = re.sub(r'[^\w\s]', '', text)
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text).strip()
return text
sentence1 = "the Quick, BROWN Fox Isnt Jumps OVER the lazy dog!!!"
sentence2 = "The quick; BROWN big Fox Isn't Jumps OVER the lãzy dog!"
std_sentence1 = standardize_text(sentence1)
std_sentence2 = standardize_text(sentence2)
print(std_sentence1)
print(std_sentence2)
'''
run:
the quick brown fox isnt jumps over the lazy dog
the quick brown big fox isnt jumps over the lazy dog
'''