/**
* Returns the word before the last word.
* Uses Unicode-aware regex to handle scripts like Japanese/Chinese.
*/
function getWordBeforeLast(string $text): string {
// \p{P} matches any Unicode punctuation
// \s+ matches any whitespace
// The 'u' modifier at the end enables UTF-8 mode
$words = preg_split('/[\p{P}\s]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
if (count($words) < 2) {
return "";
}
// array_slice with -2 gets the second to last element
return $words[count($words) - 2];
}
function main() {
echo "=== Testing: Get Word Before Last ===\n\n";
$tests = [
"python php",
" many spaces here now ",
"OneWord",
"",
" ",
"Hello, world!",
"Tabs\tand\nnewlines work too",
"Unicode 世界、こんにちは",
"Ends with punctuation.",
"Multiple words, with punctuation, here!",
"state-of-the-art program example"
];
foreach ($tests as $t) {
$result = getWordBeforeLast($t);
$display = ($result === "") ? "null" : $result;
echo "Input: \"$t\"\n";
echo "Output: $display\n";
echo str_repeat("-", 40) . "\n";
}
}
main();
/*
OUTPUT:
=== Testing: Get Word Before Last ===
Input: "python php"
Output: python
----------------------------------------
Input: " many spaces here now "
Output: here
----------------------------------------
Input: "OneWord"
Output: null
----------------------------------------
Input: ""
Output: null
----------------------------------------
Input: " "
Output: null
----------------------------------------
Input: "Hello, world!"
Output: Hello
----------------------------------------
Input: "Tabs and
newlines work too"
Output: work
----------------------------------------
Input: "Unicode 世界、こんにちは"
Output: 世界
----------------------------------------
Input: "Ends with punctuation."
Output: with
----------------------------------------
Input: "Multiple words, with punctuation, here!"
Output: punctuation
----------------------------------------
Input: "state-of-the-art program example"
Output: program
----------------------------------------
*/