How to get the word before the last word from a string (edge‑case‑safe) in TypeScript

1 Answer

0 votes
/**
 * Returns the word before the last word from a given string.
 * Handles Unicode characters (like Kanji/Hiragana) and various punctuation natively.
 * * @param text - The input string to process
 * @returns The second-to-last word, or "null" if fewer than two words exist
 */
function getWordBeforeLast(text: string): string {
    // Regex breakdown:
    // [^\p{L}\p{N}]+ : Match one or more characters that are NOT Unicode Letters or Numbers.
    // /u : Enables Unicode support for the property escapes.
    const words: string[] = text.split(/[^\p{L}\p{N}]+/u).filter(word => word.length > 0);

    if (words.length < 2) {
        return "null";
    }

    // .at(-2) is the idiomatic ES2022+ way to get the second-to-last element
    return words.at(-2) ?? "null";
}

function main(): void {
    console.log("=== Testing: Get Word Before Last ===\n");

    const tests: string[] = [
        "python typescript",
        "  many   spaces   here   now  ",
        "OneWord",
        "",
        "   ",
        "Hello, world!",
        "Tabs\tand\nnewlines work too",
        "Unicode 世界、こんにちは",
        "Ends with punctuation.",
        "Multiple words, with punctuation, here!",
        "state-of-the-art program example"
    ];

    tests.forEach((t: string) => {
        const result: string = getWordBeforeLast(t);
        
        console.log(`Input: "${t}"`);
        console.log(`Output: ${result}`);
        console.log("-".repeat(40));
    });
}

main();



/*
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python typescript"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs	and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

*/

 



answered Mar 29 by avibootz
edited Mar 29 by avibootz

Related questions

...