How to detect and remove invisible or non‑printing characters that break a string in C

1 Answer

0 votes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

// Remove non‑printable ASCII characters (keep only isprint)
char* removeNonPrintable(const char* input) {
    size_t len = strlen(input);
    char* out = malloc(len + 1);
    if (!out) return NULL;

    size_t j = 0;
    for (size_t i = 0; i < len; i++) {
        unsigned char ch = input[i];
        if (isprint(ch)) {
            out[j++] = ch;
        }
    }
    out[j] = '\0';
    return out;
}

// Remove control characters but keep spaces, tabs, newlines
char* removeControlChars(const char* input) {
    size_t len = strlen(input);
    char* out = malloc(len + 1);
    if (!out) return NULL;

    size_t j = 0;
    for (size_t i = 0; i < len; i++) {
        unsigned char ch = input[i];
        if (!iscntrl(ch)) {
            out[j++] = ch;
        }
    }
    out[j] = '\0';
    
    return out;
}

// Debug print: printable chars as-is, others as \xNN
void debugPrint(const char* s) {
    for (size_t i = 0; s[i] != '\0'; i++) {
        unsigned char ch = s[i];
        if (isprint(ch)) {
            putchar(ch);
        } else {
            printf("\\x%02x", ch);
        }
    }
    putchar('\n');
}

int main(void) {
    // Contains ASCII control chars + UTF‑8 zero‑width space (E2 80 8B)
    const char* raw = "Hello\x01\x02 Milky Way Galaxy\xe2\x80\x8b!";

    printf("Original (debug): ");
    debugPrint(raw);

    char* asciiClean = removeNonPrintable(raw);
    printf("After ASCII clean: ");
    debugPrint(asciiClean);
    free(asciiClean);
    
    char* removeControlCharsstr = removeControlChars("Hello\x01\x02 Milky Way Galaxy\xe2\x80\x8b!");
    printf("After ASCII removeControlChars(): ");
    debugPrint(removeControlCharsstr);
    free(removeControlCharsstr);
    
    return 0;
}


/*
run:

Original (debug): Hello\x01\x02 Milky Way Galaxy\xe2\x80\x8b!
After ASCII clean: Hello Milky Way Galaxy!
After ASCII removeControlChars(): Hello Milky Way Galaxy\xe2\x80\x8b!

*/

 



answered Dec 23, 2025 by avibootz
...