How to get the word before the last word from a string (edge‑case‑safe) in Python

1 Answer

0 votes
import re

def get_word_before_last(text: str) -> str:
    """
    Returns the word before the last word in a string.
    Handles ASCII and Unicode punctuation/whitespace.
    """
    # Use regex to split by any non-alphanumeric character.
    # \W+ matches one or more non-word characters (punctuation, spaces, etc.)
    # Python's \W is Unicode-aware by default in Python 3.
    # We filter out empty strings resulting from leading/trailing punctuation.
    words = [w for w in re.split(r'\W+', text) if w]

    if len(words) < 2:
        return ""

    return words[-2]

def main():
    print("=== Testing: Get Word Before Last ===\n")

    tests = [
        "python c++",
        "  many   spaces   here   now  ",
        "OneWord",
        "",
        "   ",
        "Hello, world!",
        "Tabs\tand\nnewlines work too",
        "Unicode 世界、こんにちは",
        "Ends with punctuation.",
        "Multiple words, with punctuation, here!",
        "state-of-the-art program example"
    ]

    for t in tests:
        result = get_word_before_last(t)
        # Using f-strings for clean output formatting
        display_result = result if result else "null"
        
        print(f'Input: "{t}"')
        print(f'Output: {display_result}')
        print("-" * 40)

if __name__ == "__main__":
    main()


"""
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python c++"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs	and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

"""

 



answered Mar 28 by avibootz

Related questions

...