Welcome to collectivesolver - Programming & Software Q&A with code examples. A website with trusted programming answers. All programs are tested and work.

Contact: aviboots(AT)netvision.net.il

Buy a domain name - Register cheap domain names from $0.99 - Namecheap

Scalable Hosting That Grows With You

Secure & Reliable Web Hosting, Free Domain, Free SSL, 1-Click WordPress Install, Expert 24/7 Support

Semrush - keyword research tool

Boost your online presence with premium web hosting and servers

Disclosure: My content contains affiliate links.

39,890 questions

51,817 answers

573 users

How to convert 16-bit wide character (UTF-16) to its narrow multibyte character (UTF-8) in C

2 Answers

0 votes
#include <stdio.h>
#include <locale.h>
#include <uchar.h> // c16rtomb
#include <stdlib.h> // MB_CUR_MAX 

int main(void)
{
    setlocale(LC_ALL, "en_US.utf8");
    char16_t char16[] = u"aßb£c";
    size_t char16_size = sizeof char16 / sizeof *char16;
 
    printf("Convert %zu UTF-16 with codes: [ ", char16_size);
    for (size_t i = 0; i < char16_size; i++) {
        printf("%#x, ", char16[i]);
    }
    puts("]");
    
    // MB_CUR_MAX = A macro that indicates the maximum number 
    // of bytes in a multibyte character for the current locale
    
    char result[MB_CUR_MAX * char16_size];
    char *p = result;
    mbstate_t state;
    for (size_t i = 0; i < char16_size; i++) {
        size_t byteswritten = c16rtomb(p, char16[i], &state); 
        if (byteswritten == (size_t)-1) {
            break;
        }
        p += byteswritten;
    }
 
    size_t result_size = p - result;
    printf("into %zu UTF-8 code units: [ ", result_size);
    for (size_t i = 0; i < result_size; i++) {
        printf("%#x, ", +(unsigned char)result[i]);
    }
    puts("]");
}


 
/*
run:
   
Convert 6 UTF-16 with codes: [ 0x61, 0xdf, 0x62, 0xa3, 0x63, 0, ]
into 8 UTF-8 code units: [ 0x61, 0xc3, 0x9f, 0x62, 0xc2, 0xa3, 0x63, 0, ]
  
*/

 



answered Dec 17, 2024 by avibootz
edited Dec 18, 2024 by avibootz
0 votes
#include <stdio.h>
#include <locale.h>
#include <uchar.h>
#include <stdlib.h> 

int main(void)
{
    setlocale(LC_ALL, "en_US.utf8");
    char16_t char16[] = u"aßb£c";
    size_t char16_size = (sizeof char16 / sizeof *char16) - 1;

    printf("Convert %zu UTF-16 with codes: [ ", char16_size);
    for (size_t i = 0; i < char16_size; i++) {
        printf("%#x, ", char16[i]);
    }
    puts("]");
    
    char result[MB_CUR_MAX * char16_size];
    char *p = result;
    mbstate_t state;
    for (size_t i = 0; i < char16_size; i++) {
        size_t byteswritten = c16rtomb(p, char16[i], &state); 
        if (byteswritten == (size_t)-1) {
            break;
        }
        p += byteswritten;
    }
 
    size_t result_size = p - result;
    printf("into %zu UTF-8 code units: [ ", result_size);
    for (size_t i = 0; i < result_size; i++) {
        printf("%#x, ", +(unsigned char)result[i]);
    }
    puts("]");
}

 
/*
run:
   
Convert 5 UTF-16 with codes: [ 0x61, 0xdf, 0x62, 0xa3, 0x63, ]
into 7 UTF-8 code units: [ 0x61, 0xc3, 0x9f, 0x62, 0xc2, 0xa3, 0x63, ]
  
*/





 



answered Dec 18, 2024 by avibootz
...