How to compress and decompress repeated string characters by storing the repeated length (RLE) in Python

2 Answers

0 votes
# ---------------------------------------------------------
#   Compress: turn "aaabbcccc" into "a3b2c4"
#   This is run-length encoding (RLE)
# ---------------------------------------------------------
def compress(s: str) -> str:
    if not s:
        return ""   # Edge case: empty string

    out = []        # Result string (list for efficiency)
    count = 1       # Count of repeated characters

    # Start from index 1 and compare with previous character
    for i in range(1, len(s) + 1):

        # If still repeating the same character, increase count
        if i < len(s) and s[i] == s[i - 1]:
            count += 1
        else:
            # Character changed OR reached end of string
            out.append(s[i - 1])     # Add the character
            out.append(str(count))   # Add how many times it repeated
            count = 1                # Reset counter

    return "".join(out)


# ---------------------------------------------------------
#   Decompress: turn "a3b2c4" into "aaabbcccc"
#   Reads a letter, then reads digits, expands them.
# ---------------------------------------------------------
def decompress(s: str) -> str:
    out = []                     # Result string
    current_char = None          # The character being processed
    number = []                  # Digits representing the count

    for c in s:

        if c.isalpha():          # Found a letter
            # If we already have a previous letter + number, expand it
            if current_char is not None and number:
                count = int("".join(number))
                out.append(current_char * count)

            current_char = c     # Start new character
            number = []          # Reset number buffer

        elif c.isdigit():        # Found a digit
            number.append(c)     # Build multi-digit number

    # Handle the last character + number pair
    if current_char is not None and number:
        count = int("".join(number))
        out.append(current_char * count)

    return "".join(out)


def main():
    s = "wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww"

    c = compress(s)
    d = decompress(c)

    print("Original:   ", s)
    print("Compressed: ", c)
    print("Decompressed:", d)


if __name__ == "__main__":
    main()



"""
run:

Original:    wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww
Compressed:  w12b1w10b3w6c4w12
Decompressed: wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww

"""

 



answered 1 day ago by avibootz
0 votes
from itertools import groupby

# ---------------------------------------------------------
#   Compress: turn "aaabbcccc" into "a3b2c4"
#   This is run-length encoding (RLE)
# ---------------------------------------------------------
def compress(s: str) -> str:
    if not s:
        return ""   # Edge case: empty string

    # groupby automatically groups consecutive identical characters
    parts = []
    for char, group in groupby(s):
        count = sum(1 for _ in group)   # Count how many times it repeats
        parts.append(f"{char}{count}")  # Add "char + count"

    return "".join(parts)


# ---------------------------------------------------------
#   Decompress: turn "a3b2c4" into "aaabbcccc"
#   Reads a letter, then reads digits, expands them.
# ---------------------------------------------------------
def decompress(s: str) -> str:
    out = []                     # Result string
    current_char = None          # The character being processed
    number = []                  # Digits representing the count

    for c in s:

        if c.isalpha():          # Found a letter
            # If we already have a previous letter + number, expand it
            if current_char is not None and number:
                out.append(current_char * int("".join(number)))

            current_char = c     # Start new character
            number = []          # Reset number buffer

        elif c.isdigit():        # Found a digit
            number.append(c)     # Build multi-digit number

    # Handle the last character + number pair
    if current_char is not None and number:
        out.append(current_char * int("".join(number)))

    return "".join(out)


def main():
    s = "wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww"

    c = compress(s)
    d = decompress(c)

    print("Original:   ", s)
    print("Compressed: ", c)
    print("Decompressed:", d)


if __name__ == "__main__":
    main()



"""
run:

Original:    wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww
Compressed:  w12b1w10b3w6c4w12
Decompressed: wwwwwwwwwwwwbwwwwwwwwwwbbbwwwwwwccccwwwwwwwwwwww

"""

 



answered 1 day ago by avibootz

Related questions

...