LCOV - code coverage report
Current view: top level - u8stream/include/utf8stringbuf - utf8stringview.inl (source / functions) Hit Total Coverage
Test: crystal-facet-uml_v1.61.0_covts Lines: 206 206 100.0 %
Date: 2024-10-26 21:44:38 Functions: 19 19 100.0 %

          Line data    Source code
       1             : /* File: utf8stringview.inl; Copyright and License: see below */
       2             : 
       3             : #include "u8/u8_i32.h"
       4             : 
       5             : #ifdef __cplusplus
       6             : extern "C" {
       7             : #endif
       8             : 
       9          71 : static inline utf8error_t utf8stringview_init( utf8stringview_t *this_, const char* start, size_t length )
      10             : {
      11          71 :     assert( start != NULL );
      12          71 :     utf8error_t result = UTF8ERROR_SUCCESS;
      13             :     /* clean type would have been:  char ( *start_arr )[] = (char(*)[]) start; */
      14             : 
      15             :     /* check start */
      16          71 :     char start_copy[4] = {'\0','\0','\0','\0'};
      17          71 :     const size_t start_len = ( length >= 4 ) ? 4 : length;
      18          71 :     memcpy( &start_copy, start, start_len );
      19             : 
      20          71 :     if ( ( 0xc0 & (start_copy[0]) ) == 0x80 )
      21             :     {
      22           5 :         if ( ( 0xc0 & (start_copy[1]) ) == 0x80 )
      23             :         {
      24           2 :             if ( ( 0xc0 & (start_copy[2]) ) == 0x80 )
      25             :             {
      26           1 :                 start += 3;
      27           1 :                 length -= 3;  /* length was greater than 2 - otherwise start_copy[2] would have been 0x0 */
      28           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      29             :             }
      30             :             else
      31             :             {
      32           1 :                 start += 2;
      33           1 :                 length -= 2;  /* length was greater than 1 - otherwise start_copy[1] would have been 0x0 */
      34           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      35             :             }
      36             :         }
      37             :         else
      38             :         {
      39           3 :             start += 1;
      40           3 :             length -= 1;  /* length was greater than 0 - otherwise start_copy[0] would have been 0x0 */
      41           3 :             result = UTF8ERROR_OUT_OF_RANGE;
      42             :         }
      43             :     }
      44             :     else
      45             :     {
      46             :         /* valid start */
      47             :     }
      48             : 
      49             :     /* check end */
      50          71 :     char end_copy[4] = {'\0','\0','\0','\0'};
      51          71 :     const size_t end_len = ( length >= 4 ) ? 4 : length;
      52          71 :     memcpy( &(end_copy[4-end_len]), &(start[length-end_len]), end_len );
      53             : 
      54          71 :     if ( ( 0x80 & (end_copy[3]) ) == 0x00 )
      55             :     {
      56             :         /* valid single-byte end */
      57             :     }
      58             :     else
      59             :     {
      60           9 :         if ( ( 0xe0 & (end_copy[2]) ) == 0xc0 )
      61             :         {
      62             :             /* valid 2 byte end */
      63             :         }
      64           8 :         else if ( ( 0x80 & (end_copy[2]) ) == 0x00 )
      65             :         {
      66             :             /* 1 byte char at end_copy[2] */
      67           1 :             length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      68           1 :             result = UTF8ERROR_OUT_OF_RANGE;
      69             :         }
      70             :         else
      71             :         {
      72           7 :             if ( ( 0xf0 & (end_copy[1]) ) == 0xe0 )
      73             :             {
      74             :                 /* valid 3 byte end */
      75             :             }
      76           6 :             else if ( ( 0xe0 & (end_copy[1]) ) == 0xc0 )
      77             :             {
      78             :                 /* 2 byte char at end_copy[1] */
      79           1 :                 length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      80           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      81             :             }
      82           5 :             else if ( ( 0x80 & (end_copy[1]) ) == 0x00 )
      83             :             {
      84             :                 /* 1 byte char at end_copy[1] */
      85           1 :                 length -= 2;  /* length was greater than 1 - otherwise end_copy[2] would have been 0x0 */
      86           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      87             :             }
      88             :             else
      89             :             {
      90           4 :                 if ( ( 0xf8 & (end_copy[0]) ) == 0xf0 )
      91             :                 {
      92             :                     /* valid 4 byte end */
      93             :                 }
      94             :                 else
      95             :                 {
      96             :                     /* assume 1 byte char at end_copy[0] */
      97           2 :                     length -= 3;  /* length was greater than 2 - otherwise end_copy[1] would have been 0x0 */
      98           2 :                     result = UTF8ERROR_OUT_OF_RANGE;
      99             :                 }
     100             :             }
     101             :         }
     102             :     }
     103             : 
     104          71 :     *this_ = (utf8stringview_t){.start=start,.length=length};
     105          71 :     return result;
     106             : }
     107             : 
     108          61 : static inline void utf8stringview_init_str( utf8stringview_t *this_, const char* cstring )
     109             : {
     110          61 :     *this_ = (utf8stringview_t){.start=cstring,.length=(cstring==NULL)?0:strlen(cstring)};
     111          61 : }
     112             : 
     113          16 : static inline utf8error_t utf8stringview_init_region( utf8stringview_t *this_, const char* cstring, size_t start_idx, size_t length )
     114             : {
     115          16 :     assert( cstring != NULL );
     116          16 :     utf8error_t result = UTF8ERROR_SUCCESS;
     117          16 :     const size_t cstring_len = strlen( cstring );
     118          16 :     if ( start_idx > cstring_len )
     119             :     {
     120           1 :         *this_ = (utf8stringview_t){.start=cstring+start_idx,.length=0};
     121           1 :         result |= UTF8ERROR_OUT_OF_RANGE;
     122             :     }
     123             :     else
     124             :     {
     125          15 :         const size_t max_len = cstring_len - start_idx;
     126          15 :         result |= utf8stringview_init( this_, cstring+start_idx, u8_i32_min2( length, max_len ) );
     127          15 :         if ( length > max_len )
     128             :         {
     129             :             /* notify that stringview cannot exceed the cstring */
     130           1 :             result |= UTF8ERROR_OUT_OF_RANGE;
     131             :         }
     132             :     }
     133          16 :     return result;
     134             : }
     135             : 
     136         128 : static inline void utf8stringview_destroy( utf8stringview_t *this_ )
     137             : {
     138         128 :     *this_ = (utf8stringview_t){.start=NULL,.length=0};
     139         128 : }
     140             : 
     141       78552 : static inline const char* utf8stringview_get_start( const utf8stringview_t *this_ ) {
     142       78552 :     return (*this_).start;
     143             : }
     144             : 
     145       76104 : static inline size_t utf8stringview_get_length( const utf8stringview_t *this_ ) {
     146       76104 :     return (*this_).length;
     147             : }
     148             : 
     149           3 : static inline size_t utf8stringview_count_codepoints( const utf8stringview_t *this_ ) {
     150           3 :     size_t result = 0;
     151           3 :     unsigned int skip = 0;
     152           3 :     if ( (*this_).start != NULL ) {
     153          38 :         for ( size_t pos = 0; pos < (*this_).length; pos ++ )
     154             :         {
     155          35 :             if ( skip > 0 )
     156             :             {
     157          16 :                 skip --;
     158          16 :                 if ( skip == 0 ) {
     159           7 :                     result ++;  /* This is the last byte of a multi byte code point */
     160             :                 }
     161             :             }
     162             :             else
     163             :             {
     164          19 :                 const unsigned char firstByte = (const unsigned char) ((*this_).start[pos]);
     165          19 :                 if (( 0x80 & firstByte ) == 0x00 )
     166             :                 {
     167          10 :                     result ++;  /* This is a 1 byte code point */
     168             :                 }
     169           9 :                 else if (( 0xc0 & firstByte ) == 0x80 )
     170             :                 {
     171             :                     /* This is not a valid first byte, skipping to the next byte... */
     172             :                 }
     173           8 :                 else if (( 0xe0 & firstByte ) == 0xc0 )
     174             :                 {
     175           2 :                     skip = 1;  /* This is the start of a 2 byte code point */
     176             :                 }
     177           6 :                 else if (( 0xf0 & firstByte ) == 0xe0 )
     178             :                 {
     179           3 :                     skip = 2;  /* This is the start of a 3 byte code point */
     180             :                 }
     181           3 :                 else if (( 0xf8 & firstByte ) == 0xf0 )
     182             :                 {
     183           3 :                     skip = 3;  /* This is the start of a 4 byte code point */
     184             :                 }
     185             :                 else
     186             :                 {
     187             :                     /* This is not a valid first byte, skipping to the next byte... */
     188             :                 }
     189             :             }
     190             :         }
     191             :     }
     192           3 :     return result;
     193             : }
     194             : 
     195        1236 : static inline bool utf8stringview_equals_str( const utf8stringview_t *this_, const char *that )
     196             : {
     197             :     bool result;
     198        1236 :     if ( that != NULL )
     199             :     {
     200        1235 :         size_t len = strlen( that );
     201        1235 :         if ( len == (*this_).length )
     202             :         {
     203         905 :             if ( ( len == 0 )/*&&( this_.length == 0 )*/)
     204             :             {
     205           4 :                 result = true;
     206             :             }
     207             :             else
     208             :             {
     209         901 :                 result = ( 0 == memcmp ( (*this_).start, that, len ) );
     210             :             }
     211             :         }
     212             :         else
     213             :         {
     214         330 :             result = false;
     215             :         }
     216             :     }
     217             :     else
     218             :     {
     219           1 :         result = false;
     220             :     }
     221        1236 :     return result;
     222             : }
     223             : 
     224           4 : static inline bool utf8stringview_equals_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     225             : {
     226           4 :     assert( that != NULL );
     227             :     bool result;
     228           4 :     if ( (*that).length == (*this_).length )
     229             :     {
     230           2 :         if ( ( (*that).length == 0 )/*&&( this_.length == 0 )*/)
     231             :         {
     232           1 :             result = true;
     233             :         }
     234             :         else
     235             :         {
     236           1 :             result = ( 0 == memcmp ( (*this_).start, (*that).start, (*that).length ) );
     237             :         }
     238             :     }
     239             :     else
     240             :     {
     241           2 :         result = false;
     242             :     }
     243           4 :     return result;
     244             : }
     245             : 
     246          14 : static inline bool utf8stringview_starts_with_str( const utf8stringview_t *this_, utf8string_t *that )
     247             : {
     248          14 :     bool result = false;
     249          14 :     if (( this_ != NULL )&&( that != NULL ))
     250             :     {
     251          13 :         const size_t that_len = strlen( that );
     252          13 :         if ( that_len <= (*this_).length )
     253             :         {
     254           9 :             result = ( 0 == memcmp( (*this_).start, that, that_len ) );
     255             :         }
     256             :         else
     257             :         {
     258           4 :             result = false;
     259             :         }
     260             :     }
     261          14 :     return result;
     262             : }
     263             : 
     264           5 : static inline bool utf8stringview_starts_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     265             : {
     266           5 :     assert( that != NULL );
     267           5 :     bool result = false;
     268           5 :     if (( this_ != NULL )&&( that != NULL ))
     269             :     {
     270           5 :         if ( (*that).length <= (*this_).length )
     271             :         {
     272           4 :             result = ( 0 == memcmp( (*this_).start, (*that).start, (*that).length ) );
     273             :         }
     274             :         else
     275             :         {
     276           1 :             result = false;
     277             :         }
     278             :     }
     279           5 :     return result;
     280             : }
     281             : 
     282           6 : static inline bool utf8stringview_ends_with_str( const utf8stringview_t *this_, utf8string_t *that )
     283             : {
     284           6 :     bool result = false;
     285           6 :     if (( this_ != NULL )&&( that != NULL ))
     286             :     {
     287           5 :         const size_t that_len = strlen( that );
     288           5 :         if ( that_len <= (*this_).length )
     289             :         {
     290           4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - that_len, that, that_len ) );
     291             :         }
     292             :         else
     293             :         {
     294           1 :             result = false;
     295             :         }
     296             :     }
     297           6 :     return result;
     298             : }
     299             : 
     300           5 : static inline bool utf8stringview_ends_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     301             : {
     302           5 :     assert( that != NULL );
     303           5 :     bool result = false;
     304           5 :     if (( this_ != NULL )&&( that != NULL ))
     305             :     {
     306           5 :         if ( (*that).length <= (*this_).length )
     307             :         {
     308           4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - (*that).length, (*that).start, (*that).length ) );
     309             :         }
     310             :         else
     311             :         {
     312           1 :             result = false;
     313             :         }
     314             :     }
     315           5 :     return result;
     316             : }
     317             : 
     318           6 : static inline bool utf8stringview_contains_str( const utf8stringview_t *this_, utf8string_t *that )
     319             : {
     320           6 :     bool result = false;
     321           6 :     if (( this_ != NULL )&&( that != NULL ))
     322             :     {
     323           5 :         const size_t that_len = strlen( that );
     324           5 :         if ( that_len <= (*this_).length )
     325             :         {
     326           4 :             const char *const end = (*this_).start + (*this_).length - that_len;
     327          13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     328             :             {
     329           9 :                 if ( 0 == memcmp( pos, that, that_len ) )
     330             :                 {
     331           3 :                     result = true;
     332             :                 }
     333             :             }
     334             :         }
     335             :     }
     336           6 :     return result;
     337             : }
     338             : 
     339           5 : static inline bool utf8stringview_contains_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     340             : {
     341           5 :     assert( that != NULL );
     342           5 :     bool result = false;
     343           5 :     if (( this_ != NULL )&&( that != NULL ))
     344             :     {
     345           5 :         if ( (*that).length <= (*this_).length )
     346             :         {
     347           4 :             const char *const end = (*this_).start + (*this_).length - (*that).length;
     348          13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     349             :             {
     350           9 :                 if ( 0 == memcmp( pos, (*that).start, (*that).length ) )
     351             :                 {
     352           3 :                     result = true;
     353             :                 }
     354             :             }
     355             :         }
     356             :     }
     357           5 :     return result;
     358             : }
     359             : 
     360          16 : static inline utf8error_t utf8stringview_split_at_first_str( const utf8stringview_t *this_,
     361             :                                                              utf8string_t *pattern,
     362             :                                                              utf8stringview_t *out_before,
     363             :                                                              utf8stringview_t *out_after )
     364             : {
     365          16 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     366             : 
     367          16 :     if (( pattern != NULL )&&( this_ != NULL ))
     368          15 :     {
     369          15 :         const size_t pattern_len = strlen( pattern );
     370          15 :         if ( pattern_len <= (*this_).length )
     371             :         {
     372          11 :             const char *const end = (*this_).start + (*this_).length - pattern_len;
     373          34 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     374             :             {
     375          23 :                 if ( 0 == memcmp( pos, pattern, pattern_len ) )
     376             :                 {
     377           9 :                     result = UTF8ERROR_SUCCESS;
     378           9 :                     if ( out_before != NULL )
     379             :                     {
     380           8 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     381             :                     }
     382           9 :                     if ( out_after != NULL )
     383             :                     {
     384           8 :                         *out_after = (utf8stringview_t){ .start = ( pos + pattern_len ), .length = ( end - pos ) };
     385             :                     }
     386             :                 }
     387             :             }
     388             :         }
     389             :     }
     390             :     else
     391             :     {
     392           1 :         result = UTF8ERROR_NULL_PARAM;
     393             :     }
     394             : 
     395          16 :     return result;
     396             : }
     397             : 
     398           5 : static inline utf8error_t utf8stringview_split_at_first_view( const utf8stringview_t *this_,
     399             :                                                               const utf8stringview_t *pattern,
     400             :                                                               utf8stringview_t *out_before,
     401             :                                                               utf8stringview_t *out_after )
     402             : {
     403           5 :     assert( pattern != NULL );
     404           5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     405             : 
     406           5 :     if ( (*pattern).length <= (*this_).length )
     407             :     {
     408           4 :         const char *const end = (*this_).start + (*this_).length - (*pattern).length;
     409          12 :         for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     410             :         {
     411           8 :             if ( 0 == memcmp( pos, (*pattern).start, (*pattern).length ) )
     412             :             {
     413           3 :                 result = UTF8ERROR_SUCCESS;
     414           3 :                 if ( out_before != NULL )
     415             :                 {
     416           2 :                     *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     417             :                 }
     418           3 :                 if ( out_after != NULL )
     419             :                 {
     420           2 :                     *out_after = (utf8stringview_t){ .start = ( pos + (*pattern).length ), .length = ( end - pos ) };
     421             :                 }
     422             :             }
     423             :         }
     424             :     }
     425             : 
     426           5 :     return result;
     427             : }
     428             : 
     429          18 : static inline utf8error_t utf8stringview_split_at_last_str( const utf8stringview_t *this_,
     430             :                                                             utf8string_t *pattern,
     431             :                                                             utf8stringview_t *out_before,
     432             :                                                             utf8stringview_t *out_after )
     433             : {
     434          18 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     435             : 
     436          18 :     if (( pattern != NULL )&&( this_ != NULL ))
     437          17 :     {
     438          17 :         const size_t pattern_len = strlen( pattern );
     439          17 :         if ( pattern_len <= (*this_).length )
     440             :         {
     441         358 :             for ( ptrdiff_t pos = (*this_).length - pattern_len; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     442             :             {
     443         342 :                 if ( 0 == memcmp( (*this_).start + pos, pattern, pattern_len ) )
     444             :                 {
     445           7 :                     result = UTF8ERROR_SUCCESS;
     446           7 :                     if ( out_before != NULL )
     447             :                     {
     448           6 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     449             :                     }
     450           7 :                     if ( out_after != NULL )
     451             :                     {
     452           6 :                         *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + pattern_len ), .length = ( (*this_).length - pattern_len - pos ) };
     453             :                     }
     454             :                 }
     455             :             }
     456             :         }
     457             :     }
     458             :     else
     459             :     {
     460           1 :         result = UTF8ERROR_NULL_PARAM;
     461             :     }
     462             : 
     463          18 :     return result;
     464             : }
     465             : 
     466           5 : static inline utf8error_t utf8stringview_split_at_last_view( const utf8stringview_t *this_,
     467             :                                                              const utf8stringview_t *pattern,
     468             :                                                              utf8stringview_t *out_before,
     469             :                                                              utf8stringview_t *out_after )
     470             : {
     471           5 :     assert( pattern != NULL );
     472           5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     473             : 
     474           5 :     if ( (*pattern).length <= (*this_).length )
     475             :     {
     476          14 :         for ( ptrdiff_t pos = (*this_).length - (*pattern).length; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     477             :         {
     478          10 :             if ( 0 == memcmp( (*this_).start + pos, (*pattern).start, (*pattern).length ) )
     479             :             {
     480           3 :                 result = UTF8ERROR_SUCCESS;
     481           3 :                 if ( out_before != NULL )
     482             :                 {
     483           2 :                     *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     484             :                 }
     485           3 :                 if ( out_after != NULL )
     486             :                 {
     487           2 :                     *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + (*pattern).length ), .length = ( (*this_).length - (*pattern).length - pos ) };
     488             :                 }
     489             :             }
     490             :         }
     491             :     }
     492             : 
     493           5 :     return result;
     494             : }
     495             : 
     496             : #ifdef __cplusplus
     497             : }
     498             : #endif
     499             : 
     500             : 
     501             : /*
     502             :  * Copyright 2021-2024 Andreas Warnke
     503             :  *
     504             :  * Licensed under the Apache License, Version 2.0 (the "License");
     505             :  * you may not use this file except in compliance with the License.
     506             :  * You may obtain a copy of the License at
     507             :  *
     508             :  *    http://www.apache.org/licenses/LICENSE-2.0
     509             :  *
     510             :  * Unless required by applicable law or agreed to in writing, software
     511             :  * distributed under the License is distributed on an "AS IS" BASIS,
     512             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     513             :  * See the License for the specific language governing permissions and
     514             :  * limitations under the License.
     515             :  */

Generated by: LCOV version 1.16