How to compress and decompress repeated string characters by storing the repeated length (RLE) in C

1 Answer

0 votes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/* ---------------------------------------------------------
   Compress: turn "aaabbcccc" into "a3b2c4"
   This is run-length encoding (RLE)
   --------------------------------------------------------- */
char* compress(const char *s) {
    if (s[0] == '\0') {
        char *empty = malloc(1);
        empty[0] = '\0';
        return empty;   // Edge case: empty string
    }

    size_t len = strlen(s);
    char *out = malloc(1);
    out[0] = '\0';
    size_t outSize = 1;

    int count = 1;  // Count of repeated characters

    // Start from index 1 and compare with previous character
    for (size_t i = 1; i <= len; i++) {

        // If still repeating the same character, increase count
        if (i < len && s[i] == s[i - 1]) {
            count++;
        } 
        else {
            // Character changed OR reached end of string
            char buffer[32];
            sprintf(buffer, "%c%d", s[i - 1], count);

            // Grow output buffer
            outSize += strlen(buffer);
            out = realloc(out, outSize);

            strcat(out, buffer);

            count = 1;  // Reset counter
        }
    }

    return out;
}

/* ---------------------------------------------------------
   Decompress: turn "a3b2c4" into "aaabbcccc"
   Reads a letter, then reads digits, expands them.
   --------------------------------------------------------- */
char* decompress(const char *s) {
    char *out = malloc(1);
    out[0] = '\0';
    size_t outSize = 1;

    char currentChar = '\0';  // The character being processed
    char number[32];
    int numIndex = 0;

    for (size_t i = 0; s[i] != '\0'; i++) {
        char c = s[i];

        if (isalpha((unsigned char)c)) {  // Found a letter
            // If we already have a previous letter + number, expand it
            if (currentChar != '\0' && numIndex > 0) {
                number[numIndex] = '\0';
                int count = atoi(number);

                // Expand
                outSize += count;
                out = realloc(out, outSize);

                for (int k = 0; k < count; k++) {
                    size_t len = strlen(out);
                    out[len] = currentChar;
                    out[len + 1] = '\0';
                }
            }

            currentChar = c;   // Start new character
            numIndex = 0;      // Reset number buffer
        }
        else if (isdigit((unsigned char)c)) {  // Found a digit
            number[numIndex++] = c;            // Build multi-digit number
        }
    }

    // Handle the last character + number pair
    if (currentChar != '\0' && numIndex > 0) {
        number[numIndex] = '\0';
        int count = atoi(number);

        outSize += count;
        out = realloc(out, outSize);

        for (int k = 0; k < count; k++) {
            size_t len = strlen(out);
            out[len] = currentChar;
            out[len + 1] = '\0';
        }
    }

    return out;
}

int main() {
    char s[] = "wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww";

    char *c = compress(s);
    char *d = decompress(c);

    printf("Original:   %s\n", s);
    printf("Compressed: %s\n", c);
    printf("Decompressed: %s\n", d);

    free(c);
    free(d);
}



/*
run:

Original:   wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww
Compressed: w12b1w10b3w6c4w12
Decompressed: wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww

*/

 



answered 2 hours ago by avibootz
...