LCOV - code coverage report
Current view: top level - u8stream/source/utf8stringbuf - utf8stringbuf.c (source / functions) Coverage Total Hit
Test: crystal-facet-uml_v1.63.2_covts Lines: 100.0 % 62 62
Test Date: 2025-05-01 10:10:14 Functions: 100.0 % 4 4

            Line data    Source code
       1              : /* File: utf8stringbuf.c; Copyright and License: see below */
       2              : 
       3              : #include <inttypes.h>
       4              : #include "utf8stringbuf/utf8stringbuf.h"
       5              : 
       6              : /* utf8stringbuf_private_empty_buf is constantly 0, but may be overwritten by a 0 - therefore stored in a read-writeable memory page */
       7              : char utf8stringbuf_private_empty_buf[1] = "";
       8              : 
       9              : const char *utf8stringbuf_private_format_signed_64_bit_int = "%" PRIi64;
      10              : 
      11              : const char *utf8stringbuf_private_format_64_bit_hex = "%" PRIx64;
      12              : 
      13              : /*!
      14              :  *  \fn utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source )
      15              :  *  \private
      16              :  */
      17              : /* function to write a code point as utf8, returns the number of bytes written and an error code */
      18              : static inline utf8error_t utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source, int *out_len );
      19              : 
      20              : /* utf8 sequences longer or equal 2 bytes start with a byte with 2 highest bits set: 0xc0 */
      21              : /* utf8 sequences longer or equal 3 bytes start with a byte with 3 highest bits set: 0xe0 */
      22              : /* utf8 sequences           equal 4 bytes start with a byte with 4 highest bits set: 0xf0 */
      23              : static const unsigned char utf8stringbuf_private_pattern_to_detect_half_utf8_sequences[5] = { 0, 0, 0xc0, 0xe0, 0xf0 };
      24              : 
      25              : /* Note: There is some magic in the design of utf8 which makes the implementation of this function quite short */
      26           85 : unsigned int utf8_string_buf_private_make_null_termination( utf8stringbuf_t *this_ ) {
      27              :     unsigned int truncatedLength;
      28           85 :     int clearAtEnd = 1;
      29              : 
      30          309 :     for ( int searchBackwards = 2; searchBackwards <= 4; searchBackwards ++ ) {
      31          237 :         if ( searchBackwards > (*this_).size ) {
      32            4 :             break;
      33              :         }
      34          233 :         const char pattern = utf8stringbuf_private_pattern_to_detect_half_utf8_sequences[searchBackwards];
      35          233 :         if ( ( (*this_).buf[(*this_).size-searchBackwards] & pattern ) == pattern ) {
      36            9 :             clearAtEnd = searchBackwards;
      37            9 :             break;
      38              :         }
      39              :     }
      40              : 
      41           85 :     truncatedLength = (*this_).size - clearAtEnd;
      42              :     /* (*this_).buf[truncatedLength] = '\0'; */ /* Note: some functions like splitIn2 require complete zeroed trailings */
      43           85 :     memset( &((*this_).buf[truncatedLength]), '\0', clearAtEnd );
      44           85 :     return truncatedLength;
      45              : }
      46              : 
      47           14 : utf8error_t utf8stringbuf_append_char( utf8stringbuf_t *this_, const uint32_t appendix ) {
      48              :     utf8error_t result;
      49           14 :     const unsigned int start = utf8stringbuf_get_length( this_ );
      50              :     int appendLen;
      51           14 :     result = utf8stringbuf_private_write_char( &((*this_).buf[start]), (*this_).size - start - 1, appendix, &appendLen );
      52           14 :     if ( result == UTF8ERROR_SUCCESS ) {
      53           11 :         (*this_).buf[start+appendLen] = '\0';
      54              :     }
      55           14 :     return result;
      56              : }
      57              : 
      58            5 : utf8error_t utf8stringbuf_append_wstr( utf8stringbuf_t *this_, const wchar_t *appendix ) {
      59            5 :     utf8error_t result = UTF8ERROR_NULL_PARAM;
      60            5 :     if ( appendix != NULL ) {
      61            4 :         unsigned int start = utf8stringbuf_get_length( this_ );
      62            4 :         result = UTF8ERROR_SUCCESS;
      63            9 :         for( ; appendix[0]!=L'\0'; appendix = &(appendix[1]) ) {
      64              :             int appendLen;
      65            7 :             result |= utf8stringbuf_private_write_char( &((*this_).buf[start]), (*this_).size - start - 1, appendix[0], &appendLen );
      66            7 :             if ( result != UTF8ERROR_SUCCESS ) {
      67            2 :                 if ( ( result & UTF8ERROR_TRUNCATED ) != 0 ) {
      68            2 :                     break;
      69              :                 }
      70              :             }
      71            5 :             start = start + appendLen;
      72              :         }
      73            4 :         (*this_).buf[start] = '\0';
      74              :     }
      75            5 :     return result;
      76              : }
      77              : 
      78           21 : static inline utf8error_t utf8stringbuf_private_write_char( char *destination, unsigned int max_size, const uint32_t source, int *out_len ) {
      79           21 :     *out_len = 0;
      80           21 :     utf8error_t result = UTF8ERROR_TRUNCATED;
      81           21 :     if ( source <= 0x7ff ) {
      82           12 :         if ( source <= 0x7f ) {
      83              :             /* 1 byte character */
      84              :             /* check if there is enough space for the character */
      85           10 :             if ( max_size >= 1 ) {
      86            8 :                 destination[0] = source;
      87            8 :                 *out_len = 1;
      88            8 :                 result = UTF8ERROR_SUCCESS;
      89              :             }
      90              :         }
      91              :         else {
      92              :             /* 2 byte character */
      93            2 :             if ( max_size >= 2 ) {
      94            2 :                 destination[0] = 0xc0 | ( source >> 6 );
      95            2 :                 destination[1] = 0x80 | ( source & 0x3f );
      96            2 :                 *out_len = 2;
      97            2 :                 result = UTF8ERROR_SUCCESS;
      98              :             }
      99              :         }
     100              :     }
     101              :     else {
     102            9 :         if ( source <= 0x10ffff ) {
     103            8 :             if ( source <= 0xffff ) {
     104              :                 /* 3 byte character */
     105            6 :                 if ( max_size >= 3 ) {
     106            4 :                     destination[0] = 0xe0 | ( source >> 12 );
     107            4 :                     destination[1] = 0x80 | (( source >> 6 ) & 0x3f );
     108            4 :                     destination[2] = 0x80 | ( source & 0x3f );
     109            4 :                     *out_len = 3;
     110            4 :                     result = UTF8ERROR_SUCCESS;
     111              :                 }
     112              :             }
     113              :             else {
     114              :                 /* 4 byte character */
     115            2 :                 if ( max_size >= 4 ) {
     116            2 :                     destination[0] = 0xf0 | ( source >> 18 );
     117            2 :                     destination[1] = 0x80 | (( source >> 12 ) & 0x3f );
     118            2 :                     destination[2] = 0x80 | (( source >> 6 ) & 0x3f );
     119            2 :                     destination[3] = 0x80 | ( source & 0x3f );
     120            2 :                     *out_len = 4;
     121            2 :                     result = UTF8ERROR_SUCCESS;
     122              :                 }
     123              :             }
     124              :         }
     125              :         else {
     126              :             /* note: utf8 can not encode more than 21 bits per character, and even there only 0-0x10ffff is allowed. */
     127            1 :             result = UTF8ERROR_NOT_A_CODEPOINT;
     128              :         }
     129              :     }
     130           21 :     return result;
     131              : }
     132              : 
     133              : 
     134              : /*
     135              :  * Copyright 2012-2025 Andreas Warnke
     136              :  *
     137              :  * Licensed under the Apache License, Version 2.0 (the "License");
     138              :  * you may not use this file except in compliance with the License.
     139              :  * You may obtain a copy of the License at
     140              :  *
     141              :  *    http://www.apache.org/licenses/LICENSE-2.0
     142              :  *
     143              :  * Unless required by applicable law or agreed to in writing, software
     144              :  * distributed under the License is distributed on an "AS IS" BASIS,
     145              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     146              :  * See the License for the specific language governing permissions and
     147              :  * limitations under the License.
     148              :  */
        

Generated by: LCOV version 2.0-1