How to split a string on multiple multi‑character delimiters (and keep them) in C

3 Answers

0 votes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
  
int starts_with(const char* s, const char* prefix) {
    return strncmp(s, prefix, strlen(prefix)) == 0;
}
  
char** split_keep_multi_delims(const char* s, const char** delims, int delim_count, int* out_count) {
    int capacity = 16;
    int count = 0;
    char** result = malloc(capacity * sizeof(char*));
  
    const char* p = s;
    char buffer[1024];
    int buf_i = 0;
  
    while (*p) {
        int matched = 0;
  
        for (int i = 0; i < delim_count; i++) {
            const char* d = delims[i];
            int len = strlen(d);
  
            if (starts_with(p, d)) {
                if (buf_i > 0) {
                    buffer[buf_i] = '\0';
                    result[count++] = strdup(buffer);
                    buf_i = 0;
                }
  
                result[count++] = strndup(p, len);
                p += len;
                matched = 1;
                break;
            }
        }
  
        if (!matched) {
            buffer[buf_i++] = *p;
            p++;
        }
  
        if (count >= capacity) {
            capacity *= 2;
            result = realloc(result, capacity * sizeof(char*));
        }
    }
  
    if (buf_i > 0) {
        buffer[buf_i] = '\0';
        result[count++] = strdup(buffer);
    }
  
    *out_count = count;
      
    return result;
}
  
int main() {
    const char* s = "aa==bbb---cccc++++ddddd";
    const char* delims[] = {"==", "---", "++++"};
    int size;
  
    char** parts = split_keep_multi_delims(s, delims, 3, &size);
  
    for (int i = 0; i < size; i++) {
        printf("[%s] ", parts[i]);
        free(parts[i]);
    }
      
    free(parts);
}
  
  
  
/*
  
[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 
  
*/

 



answered Mar 9 by avibootz
edited Mar 9 by avibootz
0 votes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_delim_char(char c, const char** delims, int delim_count) {
    for (int i = 0; i < delim_count; i++) {
        if (delims[i][0] == c) return 1;
    }

    return 0;
}

char** split_keep_multi_delims(const char* s,
                               const char** delims,
                               int delim_count,
                               int* out_count)
{
    int capacity = 16;
    int count = 0;
    char** result = malloc(capacity * sizeof(char*));

    const char* p = s;
    char buffer[1024];
    int buf_i = 0;

    while (*p) {
        if (is_delim_char(*p, delims, delim_count)) {
            // flush text buffer
            if (buf_i > 0) {
                buffer[buf_i] = '\0';
                result[count++] = strdup(buffer);
                buf_i = 0;
            }

            // count repeated delimiter characters
            char d = *p;
            const char* start = p;
            while (*p == d) p++;

            int len = p - start;
            result[count++] = strndup(start, len);

        } else {
            buffer[buf_i++] = *p;
            p++;
        }

        if (count >= capacity) {
            capacity *= 2;
            result = realloc(result, capacity * sizeof(char*));
        }
    }

    // flush remaining text
    if (buf_i > 0) {
        buffer[buf_i] = '\0';
        result[count++] = strdup(buffer);
    }

    *out_count = count;

    return result;
}

int main() {
    const char* s = "aa==bbb---cccc++++ddddd";
    const char* delims[] = {"=", "-", "+"};
    int size;

    char** parts = split_keep_multi_delims(s, delims, 3, &size);

    for (int i = 0; i < size; i++) {
        printf("[%s] ", parts[i]);
        free(parts[i]);
    }

    free(parts);
}

 
 
/*
 
[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 
 
*/

 



answered Mar 9 by avibootz
0 votes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int is_delim_char(char c, const char *delims) {
    return strchr(delims, c) != NULL;
}

char **split_and_keep(const char *s, const char *delims, int *out_count) {
    int capacity = 16;
    int count = 0;
    char **result = malloc(capacity * sizeof(char*));

    int start = 0;
    int len = strlen(s);

    for (int i = 1; i < len; i++) {
        char prev = s[i - 1];
        char curr = s[i];

        int prev_is_delim = is_delim_char(prev, delims);
        int curr_is_delim = is_delim_char(curr, delims);

        int should_split =
            (prev_is_delim != curr_is_delim) ||          // text ↔ delim
            (prev_is_delim && curr_is_delim && prev != curr); // delim type changed

        if (should_split) {
            int part_len = i - start;
            char *part = malloc(part_len + 1);
            memcpy(part, s + start, part_len);
            part[part_len] = '\0';

            result[count++] = part;
            start = i;

            if (count >= capacity) {
                capacity *= 2;
                result = realloc(result, capacity * sizeof(char*));
            }
        }
    }

    // Add final segment
    int part_len = len - start;
    char *part = malloc(part_len + 1);
    memcpy(part, s + start, part_len);
    part[part_len] = '\0';
    result[count++] = part;

    *out_count = count;
    return result;
}

int main() {
    const char *s = "aa==bbb---cccc++++ddddd";
    const char *delims = "=-+";

    int n;
    char **parts = split_and_keep(s, delims, &n);

    for (int i = 0; i < n; i++) {
        printf("[%s] ", parts[i]);
        free(parts[i]);
    }

    free(parts);
}

  
  
/*
  
[aa] [==] [bbb] [---] [cccc] [++++] [ddddd] 
  
*/

 



answered Mar 10 by avibootz

Related questions

...