LCOV - code coverage report
Current view: top level - u8stream/source/utf8stringbuf - utf8stringbuf.c (source / functions) Hit Total Coverage
Test: crystal-facet-uml_v1.65.6_covts Lines: 62 62 100.0 %
Date: 2025-09-25 21:07:53 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /* File: utf8stringbuf.c; Copyright and License: see below */
       2             : 
       3             : #include <inttypes.h>
       4             : #include "utf8stringbuf/utf8stringbuf.h"
       5             : 
       6             : /* utf8stringbuf_private_empty_buf is constantly 0, but may be overwritten by a 0 - therefore stored in a read-writeable memory page */
       7             : char utf8stringbuf_private_empty_buf[1] = "";
       8             : 
       9             : const char *utf8stringbuf_private_format_signed_64_bit_int = "%" PRIi64;
      10             : 
      11             : const char *utf8stringbuf_private_format_64_bit_hex = "%" PRIx64;
      12             : 
      13             : /*!
      14             :  *  \fn utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source )
      15             :  *  \private
      16             :  */
      17             : /* function to write a code point as utf8, returns the number of bytes written and an error code */
      18             : static inline utf8error_t utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source, int *out_len );
      19             : 
      20             : /* utf8 sequences longer or equal 2 bytes start with a byte with 2 highest bits set: 0xc0 */
      21             : /* utf8 sequences longer or equal 3 bytes start with a byte with 3 highest bits set: 0xe0 */
      22             : /* utf8 sequences           equal 4 bytes start with a byte with 4 highest bits set: 0xf0 */
      23             : static const unsigned char utf8stringbuf_private_pattern_to_detect_half_utf8_sequences[5] = { 0, 0, 0xc0, 0xe0, 0xf0 };
      24             : 
      25             : /* Note: There is some magic in the design of utf8 which makes the implementation of this function quite short */
      26          85 : unsigned int utf8_string_buf_private_make_null_termination( utf8stringbuf_t *this_ ) {
      27             :     unsigned int truncatedLength;
      28          85 :     int clearAtEnd = 1;
      29             : 
      30         309 :     for ( int searchBackwards = 2; searchBackwards <= 4; searchBackwards ++ ) {
      31         237 :         if ( searchBackwards > (*this_).size ) {
      32           4 :             break;
      33             :         }
      34         233 :         const char pattern = utf8stringbuf_private_pattern_to_detect_half_utf8_sequences[searchBackwards];
      35         233 :         if ( ( (*this_).buf[(*this_).size-searchBackwards] & pattern ) == pattern ) {
      36           9 :             clearAtEnd = searchBackwards;
      37           9 :             break;
      38             :         }
      39             :     }
      40             : 
      41          85 :     truncatedLength = (*this_).size - clearAtEnd;
      42             :     /* (*this_).buf[truncatedLength] = '\0'; */ /* Note: some functions like splitIn2 require complete zeroed trailings */
      43          85 :     memset( &((*this_).buf[truncatedLength]), '\0', clearAtEnd );
      44          85 :     return truncatedLength;
      45             : }
      46             : 
      47          14 : utf8error_t utf8stringbuf_append_char( utf8stringbuf_t *this_, const uint32_t appendix ) {
      48             :     utf8error_t result;
      49          14 :     const unsigned int start = utf8stringbuf_get_length( this_ );
      50             :     int appendLen;
      51          14 :     result = utf8stringbuf_private_write_char( &((*this_).buf[start]), (*this_).size - start - 1, appendix, &appendLen );
      52          14 :     if ( result == UTF8ERROR_SUCCESS ) {
      53          11 :         (*this_).buf[start+appendLen] = '\0';
      54             :     }
      55          14 :     return result;
      56             : }
      57             : 
      58           5 : utf8error_t utf8stringbuf_append_wstr( utf8stringbuf_t *this_, const wchar_t *appendix ) {
      59           5 :     utf8error_t result = UTF8ERROR_NULL_PARAM;
      60           5 :     if ( appendix != NULL ) {
      61           4 :         unsigned int start = utf8stringbuf_get_length( this_ );
      62           4 :         result = UTF8ERROR_SUCCESS;
      63           9 :         for( ; appendix[0]!=L'\0'; appendix = &(appendix[1]) ) {
      64             :             int appendLen;
      65           7 :             result |= utf8stringbuf_private_write_char( &((*this_).buf[start]), (*this_).size - start - 1, appendix[0], &appendLen );
      66           7 :             if ( result != UTF8ERROR_SUCCESS ) {
      67           2 :                 if ( ( result & UTF8ERROR_TRUNCATED ) != 0 ) {
      68           2 :                     break;
      69             :                 }
      70             :             }
      71           5 :             start = start + appendLen;
      72             :         }
      73           4 :         (*this_).buf[start] = '\0';
      74             :     }
      75           5 :     return result;
      76             : }
      77             : 
      78          21 : static inline utf8error_t utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source, int *out_len ) {
      79          21 :     *out_len = 0;
      80          21 :     utf8error_t result = UTF8ERROR_TRUNCATED;
      81          21 :     if ( source <= 0x7ff ) {
      82          12 :         if ( source <= 0x7f ) {
      83             :             /* 1 byte character */
      84             :             /* check if there is enough space for the character */
      85          10 :             if ( max_size >= 1 ) {
      86           8 :                 destination[0] = source;
      87           8 :                 *out_len = 1;
      88           8 :                 result = UTF8ERROR_SUCCESS;
      89             :             }
      90             :         }
      91             :         else {
      92             :             /* 2 byte character */
      93           2 :             if ( max_size >= 2 ) {
      94           2 :                 destination[0] = 0xc0 | ( source >> 6 );
      95           2 :                 destination[1] = 0x80 | ( source & 0x3f );
      96           2 :                 *out_len = 2;
      97           2 :                 result = UTF8ERROR_SUCCESS;
      98             :             }
      99             :         }
     100             :     }
     101             :     else {
     102           9 :         if ( source <= 0x10ffff ) {
     103           8 :             if ( source <= 0xffff ) {
     104             :                 /* 3 byte character */
     105           6 :                 if ( max_size >= 3 ) {
     106           4 :                     destination[0] = 0xe0 | ( source >> 12 );
     107           4 :                     destination[1] = 0x80 | (( source >> 6 ) & 0x3f );
     108           4 :                     destination[2] = 0x80 | ( source & 0x3f );
     109           4 :                     *out_len = 3;
     110           4 :                     result = UTF8ERROR_SUCCESS;
     111             :                 }
     112             :             }
     113             :             else {
     114             :                 /* 4 byte character */
     115           2 :                 if ( max_size >= 4 ) {
     116           2 :                     destination[0] = 0xf0 | ( source >> 18 );
     117           2 :                     destination[1] = 0x80 | (( source >> 12 ) & 0x3f );
     118           2 :                     destination[2] = 0x80 | (( source >> 6 ) & 0x3f );
     119           2 :                     destination[3] = 0x80 | ( source & 0x3f );
     120           2 :                     *out_len = 4;
     121           2 :                     result = UTF8ERROR_SUCCESS;
     122             :                 }
     123             :             }
     124             :         }
     125             :         else {
     126             :             /* note: utf8 can not encode more than 21 bits per character, and even there only 0-0x10ffff is allowed. */
     127           1 :             result = UTF8ERROR_NOT_A_CODEPOINT;
     128             :         }
     129             :     }
     130          21 :     return result;
     131             : }
     132             : 
     133             : 
     134             : /*
     135             :  * Copyright 2012-2025 Andreas Warnke
     136             :  *
     137             :  * Licensed under the Apache License, Version 2.0 (the "License");
     138             :  * you may not use this file except in compliance with the License.
     139             :  * You may obtain a copy of the License at
     140             :  *
     141             :  *    http://www.apache.org/licenses/LICENSE-2.0
     142             :  *
     143             :  * Unless required by applicable law or agreed to in writing, software
     144             :  * distributed under the License is distributed on an "AS IS" BASIS,
     145             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     146             :  * See the License for the specific language governing permissions and
     147             :  * limitations under the License.
     148             :  */

Generated by: LCOV version 1.16