How to get the word before the last word from a string (edge‑case‑safe) in Java

1 Answer

0 votes
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Program {

    public static void main(String[] args) {

        System.out.println("=== Testing: Get Word Before Last ===\n");

        // Test cases to validate all edge conditions
        List<String> testCases = Arrays.asList(
            "python java",
            "  many   spaces   here   now  ",
            "OneWord",
            "",
            "   ",
            "Hello, world!",
            "Tabs\tand\nnewlines work too",
            "Unicode 世界、こんにちは",
            "Ends with punctuation.",
            "Multiple words, with punctuation, here!",
            "state-of-the-art program example"
        );

        for (String test : testCases) {
            String result = getWordBeforeLast(test);

            System.out.println("Input: \"" + test + "\"");
            System.out.println("Output: " + (result != null ? result : "null"));
            System.out.println("----------------------------------------");
        }
    }

    /**
     * Returns the word before the last word in a string.
     * Handles Unicode, punctuation, multiple spaces, tabs, and edge cases.
     */
    public static String getWordBeforeLast(String text) {

        if (text == null || text.trim().isEmpty()) {
            return null;
        }

        // Extract words (letters only)
        // \p{L}+ handles Unicode letters
        Pattern pattern = Pattern.compile("\\p{L}+");
        Matcher matcher = pattern.matcher(text);

        List<String> words = new ArrayList<>();

        while (matcher.find()) {
            words.add(matcher.group());
        }

        // Need at least two words to return the one before the last
        if (words.size() < 2) {
            return null;
        }

        // Return the second-to-last word
        return words.get(words.size() - 2);
    }
}


/*
OUTPUT:

=== Testing: Get Word Before Last ===

Input: "python java"
Output: python
----------------------------------------
Input: "  many   spaces   here   now  "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: "   "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs	and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode ????????"
Output: ??
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------

*/

 



answered Mar 28 by avibootz

Related questions

...