How to naturally sort a string like humans do (text & numbers sort the number parts as numbers) in C++

2 Answers

0 votes
#include <algorithm>
#include <iostream>
#include <cctype>
#include <string>
#include <vector>

/*
    The most natural way to sort strings “like humans do” is to use a custom comparator 
    that splits each string into alternating text‑chunks and number‑chunks,
    then compares number chunks numerically instead of lexicographically.  
    It ensures "file2" comes before "file10".
*/

// Extract the next chunk (digits or non-digits)
static std::string nextChunk(const std::string &s, size_t &i) {
    if (i >= s.size()) return "";

    size_t start = i;
    bool digit = std::isdigit((unsigned char)s[i]);

    while (i < s.size() && std::isdigit((unsigned char)s[i]) == digit) {
        i++;
    }
    
    return s.substr(start, i - start);
}

// Compare two chunks: numeric chunks as numbers, text chunks lexicographically
static int compareChunk(const std::string &a, const std::string &b) {
    bool aNum = !a.empty() && std::isdigit((unsigned char)a[0]);
    bool bNum = !b.empty() && std::isdigit((unsigned char)b[0]);

    if (aNum && bNum) {
        // Strip leading zeros
        size_t i = a.find_first_not_of('0');
        size_t j = b.find_first_not_of('0');
        std::string aa = (i == std::string::npos ? "0" : a.substr(i));
        std::string bb = (j == std::string::npos ? "0" : b.substr(j));

        // Compare by length first
        if (aa.size() != bb.size()) return aa.size() < bb.size() ? -1 : 1;

        // Same length → lexicographic compare
        if (aa != bb) return aa < bb ? -1 : 1;
        return 0;
    }

    // Case-insensitive text comparison
    std::string aa = a, bb = b;
    std::transform(aa.begin(), aa.end(), aa.begin(), ::tolower);
    std::transform(bb.begin(), bb.end(), bb.begin(), ::tolower);

    if (aa != bb) return aa < bb ? -1 : 1;
    
    return 0;
}

// Natural sort comparator
bool naturalLess(const std::string &a, const std::string &b) {
    size_t i = 0, j = 0;

    while (i < a.size() || j < b.size()) {
        std::string ca = nextChunk(a, i);
        std::string cb = nextChunk(b, j);

        int cmp = compareChunk(ca, cb);
        if (cmp != 0) return cmp < 0;
    }

    return false; // equal
}

int main() {
    std::vector<std::string> items = {
        "file1",
        "file10",
        "file2",
        "file20",
        "file3",
        "file11",
        "file100",
        "file21"
    };

    std::sort(items.begin(), items.end(), naturalLess);

    std::cout << "Natural sort result:\n";
    for (const auto &s : items) {
        std::cout << s << "\n";
    }
}



/*
run:

Natural sort result:
file1
file2
file3
file10
file11
file20
file21
file100

*/

 



answered 4 days ago by avibootz
edited 4 days ago by avibootz
0 votes
#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>
#include <vector>

bool natural_compare(const std::string &a, const std::string &b) {
    size_t i = 0, j = 0;
    size_t la = a.size(), lb = b.size();

    while (i < la && j < lb) {

        // If both characters are digits → compare number chunks
        if (std::isdigit((unsigned char)a[i]) &&
            std::isdigit((unsigned char)b[j])) {

            // Skip leading zeros
            size_t ia = i, ib = j;
            while (ia < la && a[ia] == '0') ia++;
            while (ib < lb && b[ib] == '0') ib++;

            // Find end of digit runs
            size_t start_a = ia, start_b = ib;
            while (ia < la && std::isdigit((unsigned char)a[ia])) ia++;
            while (ib < lb && std::isdigit((unsigned char)b[ib])) ib++;

            size_t len_a = ia - start_a;
            size_t len_b = ib - start_b;

            // Compare by length (numeric magnitude)
            if (len_a != len_b)
                return len_a < len_b;

            // Same length → lexicographic compare
            for (size_t k = 0; k < len_a; k++) {
                if (a[start_a + k] != b[start_b + k])
                    return a[start_a + k] < b[start_b + k];
            }

            // Numbers equal → continue
            i = ia;
            j = ib;
        }
        else {
            // Case-insensitive text comparison
            char ca = std::tolower((unsigned char)a[i]);
            char cb = std::tolower((unsigned char)b[j]);

            if (ca != cb)
                return ca < cb;

            i++;
            j++;
        }
    }

    // One string ended first
    return la < lb;
}

int main() {
    std::vector<std::string> items = {
        "file1",
        "file10",
        "file2",
        "file20",
        "file3",
        "file11",
        "file100",
        "file21"
    };

    std::sort(items.begin(), items.end(), natural_compare);

    std::cout << "Natural sort result:\n";
    for (const auto &s : items)
        std::cout << s << "\n";

    return 0;
}


/*
run:

Natural sort result:
file1
file2
file3
file10
file11
file20
file21
file100

*/

 



answered 4 days ago by avibootz
...