How to split a string on multiple multi‑character delimiters (and keep them) in C++

3 Answers

0 votes
#include <iostream>
#include <string>
#include <vector>

std::vector<std::string> split_keep_multi_delims(
        std::string_view s,
        const std::vector<std::string_view>& delims)
{
    std::vector<std::string> result;
    size_t pos = 0;

    while (pos < s.size()) {
        bool matched = false;

        // Try each delimiter
        for (auto d : delims) {
            if (s.substr(pos, d.size()) == d) {
                // Add delimiter itself
                result.emplace_back(d);
                pos += d.size();
                matched = true;
                break;
            }
        }

        if (!matched) {
            // Accumulate normal text until next delimiter
            size_t start = pos;
            while (pos < s.size()) {
                bool hit = false;
                for (auto d : delims) {
                    if (s.substr(pos, d.size()) == d) {
                        hit = true;
                        break;
                    }
                }
                if (hit) break;
                pos++;
            }
            result.emplace_back(s.substr(start, pos - start));
        }
    }

    return result;
}

int main() {
    std::string s = "aa==bbb---cccc++++ddddd";
    std::vector<std::string_view> delims = {"==", "---", "++++"};

    auto parts = split_keep_multi_delims(s, delims);

    for (auto& p : parts) {
        std::cout << "[" << p << "] ";
    }
}



/*
run:

[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 

*/

 



answered Mar 9 by avibootz
0 votes
#include <iostream>
#include <string>
#include <vector>

bool isDelimChar(char c, const std::vector<std::string_view>& delims) {
    for (auto d : delims) {
        if (!d.empty() && d[0] == c)
            return true;
    }
    
    return false;
}

std::vector<std::string> splitKeepMultiDelims(
        std::string_view s,
        const std::vector<std::string_view>& delims)
{
    std::vector<std::string> result;
    size_t pos = 0;

    while (pos < s.size()) {
        char c = s[pos];

        if (isDelimChar(c, delims)) {
            // Count repeated delimiter characters
            size_t start = pos;
            while (pos < s.size() && s[pos] == c)
                pos++;

            result.emplace_back(s.substr(start, pos - start));
        }
        else {
            // Accumulate normal text until next delimiter run
            size_t start = pos;
            while (pos < s.size() && !isDelimChar(s[pos], delims))
                pos++;

            result.emplace_back(s.substr(start, pos - start));
        }
    }

    return result;
}

int main() {
    std::string s = "aa==bbb---cccc++++ddddd";
    std::vector<std::string_view> delims = {"=", "-", "+"};

    auto parts = splitKeepMultiDelims(s, delims);

    for (auto& p : parts) {
        std::cout << "[" << p << "] ";
    }
}



/*
run:

[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 

*/

 



answered Mar 9 by avibootz
0 votes
#include <iostream>
#include <string>
#include <vector>
#include <unordered_set>

bool isDelimChar(char c, const std::unordered_set<char>& delims) {
    return delims.count(c) > 0;
}

std::vector<std::string> splitAndKeep(std::string_view text,
                                      const std::unordered_set<char>& delims)
{
    std::vector<std::string> result;
    if (text.empty()) return result;

    size_t start = 0;

    for (size_t i = 1; i < text.size(); ++i) {
        char prev = text[i - 1];
        char curr = text[i];

        bool prevIsDelim = isDelimChar(prev, delims);
        bool currIsDelim = isDelimChar(curr, delims);

        bool shouldSplit =
            (prevIsDelim != currIsDelim) ||                 // text ↔ delim
            (prevIsDelim && currIsDelim && prev != curr);   // delim type changed

        if (shouldSplit) {
            result.emplace_back(text.substr(start, i - start));
            start = i;
        }
    }

    // Add final segment
    result.emplace_back(text.substr(start));

    return result;
}

int main() {
    std::string s = "aa==bbb---cccc++++ddddd";
    std::unordered_set<char> delims = {'=', '-', '+'};

    auto parts = splitAndKeep(s, delims);

    for (const auto& p : parts) {
        std::cout << "[" << p << "] ";
    }
}



/*
run:

[aa] [==] [bbb] [---] [cccc] [++++] [ddddd]  

*/

 



answered Mar 10 by avibootz

Related questions

...