LCOV - code coverage report
Current view: top level - u8stream/include/utf8stringbuf - utf8stringview.inl (source / functions) Coverage Total Hit
Test: crystal-facet-uml_v1.63.2_covts Lines: 100.0 % 206 206
Test Date: 2025-05-01 10:10:14 Functions: 100.0 % 19 19

            Line data    Source code
       1              : /* File: utf8stringview.inl; Copyright and License: see below */
       2              : 
       3              : #include "u8/u8_i32.h"
       4              : 
       5              : #ifdef __cplusplus
       6              : extern "C" {
       7              : #endif
       8              : 
       9           86 : static inline utf8error_t utf8stringview_init( utf8stringview_t *this_, const char* start, size_t length )
      10              : {
      11           86 :     assert( start != NULL );
      12           86 :     utf8error_t result = UTF8ERROR_SUCCESS;
      13              :     /* clean type would have been:  char ( *start_arr )[] = (char(*)[]) start; */
      14              : 
      15              :     /* check start */
      16           86 :     char start_copy[4] = {'\0','\0','\0','\0'};
      17           86 :     const size_t start_len = ( length >= 4 ) ? 4 : length;
      18           86 :     memcpy( &start_copy, start, start_len );
      19              : 
      20           86 :     if ( ( 0xc0 & (start_copy[0]) ) == 0x80 )
      21              :     {
      22            5 :         if ( ( 0xc0 & (start_copy[1]) ) == 0x80 )
      23              :         {
      24            2 :             if ( ( 0xc0 & (start_copy[2]) ) == 0x80 )
      25              :             {
      26            1 :                 start += 3;
      27            1 :                 length -= 3;  /* length was greater than 2 - otherwise start_copy[2] would have been 0x0 */
      28            1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      29              :             }
      30              :             else
      31              :             {
      32            1 :                 start += 2;
      33            1 :                 length -= 2;  /* length was greater than 1 - otherwise start_copy[1] would have been 0x0 */
      34            1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      35              :             }
      36              :         }
      37              :         else
      38              :         {
      39            3 :             start += 1;
      40            3 :             length -= 1;  /* length was greater than 0 - otherwise start_copy[0] would have been 0x0 */
      41            3 :             result = UTF8ERROR_OUT_OF_RANGE;
      42              :         }
      43              :     }
      44              :     else
      45              :     {
      46              :         /* valid start */
      47              :     }
      48              : 
      49              :     /* check end */
      50           86 :     char end_copy[4] = {'\0','\0','\0','\0'};
      51           86 :     const size_t end_len = ( length >= 4 ) ? 4 : length;
      52           86 :     memcpy( &(end_copy[4-end_len]), &(start[length-end_len]), end_len );
      53              : 
      54           86 :     if ( ( 0x80 & (end_copy[3]) ) == 0x00 )
      55              :     {
      56              :         /* valid single-byte end */
      57              :     }
      58              :     else
      59              :     {
      60            9 :         if ( ( 0xe0 & (end_copy[2]) ) == 0xc0 )
      61              :         {
      62              :             /* valid 2 byte end */
      63              :         }
      64            8 :         else if ( ( 0x80 & (end_copy[2]) ) == 0x00 )
      65              :         {
      66              :             /* 1 byte char at end_copy[2] */
      67            1 :             length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      68            1 :             result = UTF8ERROR_OUT_OF_RANGE;
      69              :         }
      70              :         else
      71              :         {
      72            7 :             if ( ( 0xf0 & (end_copy[1]) ) == 0xe0 )
      73              :             {
      74              :                 /* valid 3 byte end */
      75              :             }
      76            6 :             else if ( ( 0xe0 & (end_copy[1]) ) == 0xc0 )
      77              :             {
      78              :                 /* 2 byte char at end_copy[1] */
      79            1 :                 length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      80            1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      81              :             }
      82            5 :             else if ( ( 0x80 & (end_copy[1]) ) == 0x00 )
      83              :             {
      84              :                 /* 1 byte char at end_copy[1] */
      85            1 :                 length -= 2;  /* length was greater than 1 - otherwise end_copy[2] would have been 0x0 */
      86            1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      87              :             }
      88              :             else
      89              :             {
      90            4 :                 if ( ( 0xf8 & (end_copy[0]) ) == 0xf0 )
      91              :                 {
      92              :                     /* valid 4 byte end */
      93              :                 }
      94              :                 else
      95              :                 {
      96              :                     /* assume 1 byte char at end_copy[0] */
      97            2 :                     length -= 3;  /* length was greater than 2 - otherwise end_copy[1] would have been 0x0 */
      98            2 :                     result = UTF8ERROR_OUT_OF_RANGE;
      99              :                 }
     100              :             }
     101              :         }
     102              :     }
     103              : 
     104           86 :     *this_ = (utf8stringview_t){.start=start,.length=length};
     105           86 :     return result;
     106              : }
     107              : 
     108           61 : static inline void utf8stringview_init_str( utf8stringview_t *this_, const char* cstring )
     109              : {
     110           61 :     *this_ = (utf8stringview_t){.start=cstring,.length=(cstring==NULL)?0:strlen(cstring)};
     111           61 : }
     112              : 
     113           16 : static inline utf8error_t utf8stringview_init_region( utf8stringview_t *this_, const char* cstring, size_t start_idx, size_t length )
     114              : {
     115           16 :     assert( cstring != NULL );
     116           16 :     utf8error_t result = UTF8ERROR_SUCCESS;
     117           16 :     const size_t cstring_len = strlen( cstring );
     118           16 :     if ( start_idx > cstring_len )
     119              :     {
     120            1 :         *this_ = (utf8stringview_t){.start=cstring+start_idx,.length=0};
     121            1 :         result |= UTF8ERROR_OUT_OF_RANGE;
     122              :     }
     123              :     else
     124              :     {
     125           15 :         const size_t max_len = cstring_len - start_idx;
     126           15 :         result |= utf8stringview_init( this_, cstring+start_idx, u8_i32_min2( length, max_len ) );
     127           15 :         if ( length > max_len )
     128              :         {
     129              :             /* notify that stringview cannot exceed the cstring */
     130            1 :             result |= UTF8ERROR_OUT_OF_RANGE;
     131              :         }
     132              :     }
     133           16 :     return result;
     134              : }
     135              : 
     136          143 : static inline void utf8stringview_destroy( utf8stringview_t *this_ )
     137              : {
     138          143 :     *this_ = (utf8stringview_t){.start=NULL,.length=0};
     139          143 : }
     140              : 
     141        52192 : static inline const char* utf8stringview_get_start( const utf8stringview_t *this_ ) {
     142        52192 :     return (*this_).start;
     143              : }
     144              : 
     145        49746 : static inline size_t utf8stringview_get_length( const utf8stringview_t *this_ ) {
     146        49746 :     return (*this_).length;
     147              : }
     148              : 
     149            3 : static inline size_t utf8stringview_count_codepoints( const utf8stringview_t *this_ ) {
     150            3 :     size_t result = 0;
     151            3 :     unsigned int skip = 0;
     152            3 :     if ( (*this_).start != NULL ) {
     153           38 :         for ( size_t pos = 0; pos < (*this_).length; pos ++ )
     154              :         {
     155           35 :             if ( skip > 0 )
     156              :             {
     157           16 :                 skip --;
     158           16 :                 if ( skip == 0 ) {
     159            7 :                     result ++;  /* This is the last byte of a multi byte code point */
     160              :                 }
     161              :             }
     162              :             else
     163              :             {
     164           19 :                 const unsigned char firstByte = (const unsigned char) ((*this_).start[pos]);
     165           19 :                 if (( 0x80 & firstByte ) == 0x00 )
     166              :                 {
     167           10 :                     result ++;  /* This is a 1 byte code point */
     168              :                 }
     169            9 :                 else if (( 0xc0 & firstByte ) == 0x80 )
     170              :                 {
     171              :                     /* This is not a valid first byte, skipping to the next byte... */
     172              :                 }
     173            8 :                 else if (( 0xe0 & firstByte ) == 0xc0 )
     174              :                 {
     175            2 :                     skip = 1;  /* This is the start of a 2 byte code point */
     176              :                 }
     177            6 :                 else if (( 0xf0 & firstByte ) == 0xe0 )
     178              :                 {
     179            3 :                     skip = 2;  /* This is the start of a 3 byte code point */
     180              :                 }
     181            3 :                 else if (( 0xf8 & firstByte ) == 0xf0 )
     182              :                 {
     183            3 :                     skip = 3;  /* This is the start of a 4 byte code point */
     184              :                 }
     185              :                 else
     186              :                 {
     187              :                     /* This is not a valid first byte, skipping to the next byte... */
     188              :                 }
     189              :             }
     190              :         }
     191              :     }
     192            3 :     return result;
     193              : }
     194              : 
     195         2112 : static inline bool utf8stringview_equals_str( const utf8stringview_t *this_, const char *that )
     196              : {
     197              :     bool result;
     198         2112 :     if ( that != NULL )
     199              :     {
     200         2111 :         size_t len = strlen( that );
     201         2111 :         if ( len == (*this_).length )
     202              :         {
     203         1541 :             if ( ( len == 0 )/*&&( this_.length == 0 )*/)
     204              :             {
     205            4 :                 result = true;
     206              :             }
     207              :             else
     208              :             {
     209         1537 :                 result = ( 0 == memcmp ( (*this_).start, that, len ) );
     210              :             }
     211              :         }
     212              :         else
     213              :         {
     214          570 :             result = false;
     215              :         }
     216              :     }
     217              :     else
     218              :     {
     219            1 :         result = false;
     220              :     }
     221         2112 :     return result;
     222              : }
     223              : 
     224            4 : static inline bool utf8stringview_equals_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     225              : {
     226            4 :     assert( that != NULL );
     227              :     bool result;
     228            4 :     if ( (*that).length == (*this_).length )
     229              :     {
     230            2 :         if ( ( (*that).length == 0 )/*&&( this_.length == 0 )*/)
     231              :         {
     232            1 :             result = true;
     233              :         }
     234              :         else
     235              :         {
     236            1 :             result = ( 0 == memcmp ( (*this_).start, (*that).start, (*that).length ) );
     237              :         }
     238              :     }
     239              :     else
     240              :     {
     241            2 :         result = false;
     242              :     }
     243            4 :     return result;
     244              : }
     245              : 
     246           15 : static inline bool utf8stringview_starts_with_str( const utf8stringview_t *this_, utf8string_t *that )
     247              : {
     248           15 :     bool result = false;
     249           15 :     if (( this_ != NULL )&&( that != NULL ))
     250              :     {
     251           14 :         const size_t that_len = strlen( that );
     252           14 :         if ( that_len <= (*this_).length )
     253              :         {
     254           10 :             result = ( 0 == memcmp( (*this_).start, that, that_len ) );
     255              :         }
     256              :         else
     257              :         {
     258            4 :             result = false;
     259              :         }
     260              :     }
     261           15 :     return result;
     262              : }
     263              : 
     264            5 : static inline bool utf8stringview_starts_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     265              : {
     266            5 :     assert( that != NULL );
     267            5 :     bool result = false;
     268            5 :     if (( this_ != NULL )&&( that != NULL ))
     269              :     {
     270            5 :         if ( (*that).length <= (*this_).length )
     271              :         {
     272            4 :             result = ( 0 == memcmp( (*this_).start, (*that).start, (*that).length ) );
     273              :         }
     274              :         else
     275              :         {
     276            1 :             result = false;
     277              :         }
     278              :     }
     279            5 :     return result;
     280              : }
     281              : 
     282            6 : static inline bool utf8stringview_ends_with_str( const utf8stringview_t *this_, utf8string_t *that )
     283              : {
     284            6 :     bool result = false;
     285            6 :     if (( this_ != NULL )&&( that != NULL ))
     286              :     {
     287            5 :         const size_t that_len = strlen( that );
     288            5 :         if ( that_len <= (*this_).length )
     289              :         {
     290            4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - that_len, that, that_len ) );
     291              :         }
     292              :         else
     293              :         {
     294            1 :             result = false;
     295              :         }
     296              :     }
     297            6 :     return result;
     298              : }
     299              : 
     300            5 : static inline bool utf8stringview_ends_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     301              : {
     302            5 :     assert( that != NULL );
     303            5 :     bool result = false;
     304            5 :     if (( this_ != NULL )&&( that != NULL ))
     305              :     {
     306            5 :         if ( (*that).length <= (*this_).length )
     307              :         {
     308            4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - (*that).length, (*that).start, (*that).length ) );
     309              :         }
     310              :         else
     311              :         {
     312            1 :             result = false;
     313              :         }
     314              :     }
     315            5 :     return result;
     316              : }
     317              : 
     318            6 : static inline bool utf8stringview_contains_str( const utf8stringview_t *this_, utf8string_t *that )
     319              : {
     320            6 :     bool result = false;
     321            6 :     if (( this_ != NULL )&&( that != NULL ))
     322              :     {
     323            5 :         const size_t that_len = strlen( that );
     324            5 :         if ( that_len <= (*this_).length )
     325              :         {
     326            4 :             const char *const end = (*this_).start + (*this_).length - that_len;
     327           13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     328              :             {
     329            9 :                 if ( 0 == memcmp( pos, that, that_len ) )
     330              :                 {
     331            3 :                     result = true;
     332              :                 }
     333              :             }
     334              :         }
     335              :     }
     336            6 :     return result;
     337              : }
     338              : 
     339            5 : static inline bool utf8stringview_contains_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     340              : {
     341            5 :     assert( that != NULL );
     342            5 :     bool result = false;
     343            5 :     if (( this_ != NULL )&&( that != NULL ))
     344              :     {
     345            5 :         if ( (*that).length <= (*this_).length )
     346              :         {
     347            4 :             const char *const end = (*this_).start + (*this_).length - (*that).length;
     348           13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     349              :             {
     350            9 :                 if ( 0 == memcmp( pos, (*that).start, (*that).length ) )
     351              :                 {
     352            3 :                     result = true;
     353              :                 }
     354              :             }
     355              :         }
     356              :     }
     357            5 :     return result;
     358              : }
     359              : 
     360           20 : static inline utf8error_t utf8stringview_split_at_first_str( const utf8stringview_t *this_,
     361              :                                                              utf8string_t *pattern,
     362              :                                                              utf8stringview_t *out_before,
     363              :                                                              utf8stringview_t *out_after )
     364              : {
     365           20 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     366              : 
     367           20 :     if (( pattern != NULL )&&( this_ != NULL ))
     368           19 :     {
     369           19 :         const size_t pattern_len = strlen( pattern );
     370           19 :         if ( pattern_len <= (*this_).length )
     371              :         {
     372           15 :             const char *const end = (*this_).start + (*this_).length - pattern_len;
     373           62 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     374              :             {
     375           47 :                 if ( 0 == memcmp( pos, pattern, pattern_len ) )
     376              :                 {
     377           13 :                     result = UTF8ERROR_SUCCESS;
     378           13 :                     if ( out_before != NULL )
     379              :                     {
     380           12 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     381              :                     }
     382           13 :                     if ( out_after != NULL )
     383              :                     {
     384           12 :                         *out_after = (utf8stringview_t){ .start = ( pos + pattern_len ), .length = ( end - pos ) };
     385              :                     }
     386              :                 }
     387              :             }
     388              :         }
     389              :     }
     390              :     else
     391              :     {
     392            1 :         result = UTF8ERROR_NULL_PARAM;
     393              :     }
     394              : 
     395           20 :     return result;
     396              : }
     397              : 
     398            5 : static inline utf8error_t utf8stringview_split_at_first_view( const utf8stringview_t *this_,
     399              :                                                               const utf8stringview_t *pattern,
     400              :                                                               utf8stringview_t *out_before,
     401              :                                                               utf8stringview_t *out_after )
     402              : {
     403            5 :     assert( pattern != NULL );
     404            5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     405              : 
     406            5 :     if ( (*pattern).length <= (*this_).length )
     407              :     {
     408            4 :         const char *const end = (*this_).start + (*this_).length - (*pattern).length;
     409           12 :         for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     410              :         {
     411            8 :             if ( 0 == memcmp( pos, (*pattern).start, (*pattern).length ) )
     412              :             {
     413            3 :                 result = UTF8ERROR_SUCCESS;
     414            3 :                 if ( out_before != NULL )
     415              :                 {
     416            2 :                     *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     417              :                 }
     418            3 :                 if ( out_after != NULL )
     419              :                 {
     420            2 :                     *out_after = (utf8stringview_t){ .start = ( pos + (*pattern).length ), .length = ( end - pos ) };
     421              :                 }
     422              :             }
     423              :         }
     424              :     }
     425              : 
     426            5 :     return result;
     427              : }
     428              : 
     429           18 : static inline utf8error_t utf8stringview_split_at_last_str( const utf8stringview_t *this_,
     430              :                                                             utf8string_t *pattern,
     431              :                                                             utf8stringview_t *out_before,
     432              :                                                             utf8stringview_t *out_after )
     433              : {
     434           18 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     435              : 
     436           18 :     if (( pattern != NULL )&&( this_ != NULL ))
     437           17 :     {
     438           17 :         const size_t pattern_len = strlen( pattern );
     439           17 :         if ( pattern_len <= (*this_).length )
     440              :         {
     441          358 :             for ( ptrdiff_t pos = (*this_).length - pattern_len; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     442              :             {
     443          342 :                 if ( 0 == memcmp( (*this_).start + pos, pattern, pattern_len ) )
     444              :                 {
     445            7 :                     result = UTF8ERROR_SUCCESS;
     446            7 :                     if ( out_before != NULL )
     447              :                     {
     448            6 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     449              :                     }
     450            7 :                     if ( out_after != NULL )
     451              :                     {
     452            6 :                         *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + pattern_len ), .length = ( (*this_).length - pattern_len - pos ) };
     453              :                     }
     454              :                 }
     455              :             }
     456              :         }
     457              :     }
     458              :     else
     459              :     {
     460            1 :         result = UTF8ERROR_NULL_PARAM;
     461              :     }
     462              : 
     463           18 :     return result;
     464              : }
     465              : 
     466            5 : static inline utf8error_t utf8stringview_split_at_last_view( const utf8stringview_t *this_,
     467              :                                                              const utf8stringview_t *pattern,
     468              :                                                              utf8stringview_t *out_before,
     469              :                                                              utf8stringview_t *out_after )
     470              : {
     471            5 :     assert( pattern != NULL );
     472            5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     473              : 
     474            5 :     if ( (*pattern).length <= (*this_).length )
     475              :     {
     476           14 :         for ( ptrdiff_t pos = (*this_).length - (*pattern).length; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     477              :         {
     478           10 :             if ( 0 == memcmp( (*this_).start + pos, (*pattern).start, (*pattern).length ) )
     479              :             {
     480            3 :                 result = UTF8ERROR_SUCCESS;
     481            3 :                 if ( out_before != NULL )
     482              :                 {
     483            2 :                     *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     484              :                 }
     485            3 :                 if ( out_after != NULL )
     486              :                 {
     487            2 :                     *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + (*pattern).length ), .length = ( (*this_).length - (*pattern).length - pos ) };
     488              :                 }
     489              :             }
     490              :         }
     491              :     }
     492              : 
     493            5 :     return result;
     494              : }
     495              : 
     496              : #ifdef __cplusplus
     497              : }
     498              : #endif
     499              : 
     500              : 
     501              : /*
     502              :  * Copyright 2021-2025 Andreas Warnke
     503              :  *
     504              :  * Licensed under the Apache License, Version 2.0 (the "License");
     505              :  * you may not use this file except in compliance with the License.
     506              :  * You may obtain a copy of the License at
     507              :  *
     508              :  *    http://www.apache.org/licenses/LICENSE-2.0
     509              :  *
     510              :  * Unless required by applicable law or agreed to in writing, software
     511              :  * distributed under the License is distributed on an "AS IS" BASIS,
     512              :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     513              :  * See the License for the specific language governing permissions and
     514              :  * limitations under the License.
     515              :  */
        

Generated by: LCOV version 2.0-1