LCOV - code coverage report
Current view: top level - u8stream/include/utf8stringbuf - utf8stringview.inl (source / functions) Hit Total Coverage
Test: crystal-facet-uml_v1.57.0_covts Lines: 208 210 99.0 %
Date: 2024-04-07 11:14:42 Functions: 19 19 100.0 %

          Line data    Source code
       1             : /* File: utf8stringview.inl; Copyright and License: see below */
       2             : 
       3             : #include "u8/u8_i32.h"
       4             : 
       5             : #ifdef __cplusplus
       6             : extern "C" {
       7             : #endif
       8             : 
       9          98 : static inline utf8error_t utf8stringview_init( utf8stringview_t *this_, const char* start, size_t length )
      10             : {
      11          98 :     assert( start != NULL );
      12          98 :     utf8error_t result = UTF8ERROR_SUCCESS;
      13             :     /* clean type would have been:  char ( *start_arr )[] = (char(*)[]) start; */
      14             : 
      15             :     /* check start */
      16          98 :     char start_copy[4] = {'\0','\0','\0','\0'};
      17          98 :     const size_t start_len = ( length >= 4 ) ? 4 : length;
      18          98 :     memcpy( &start_copy, start, start_len );
      19             : 
      20          98 :     if ( ( 0xc0 & (start_copy[0]) ) == 0x80 )
      21             :     {
      22           5 :         if ( ( 0xc0 & (start_copy[1]) ) == 0x80 )
      23             :         {
      24           2 :             if ( ( 0xc0 & (start_copy[2]) ) == 0x80 )
      25             :             {
      26           1 :                 start += 3;
      27           1 :                 length -= 3;  /* length was greater than 2 - otherwise start_copy[2] would have been 0x0 */
      28           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      29             :             }
      30             :             else
      31             :             {
      32           1 :                 start += 2;
      33           1 :                 length -= 2;  /* length was greater than 1 - otherwise start_copy[1] would have been 0x0 */
      34           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      35             :             }
      36             :         }
      37             :         else
      38             :         {
      39           3 :             start += 1;
      40           3 :             length -= 1;  /* length was greater than 0 - otherwise start_copy[0] would have been 0x0 */
      41           3 :             result = UTF8ERROR_OUT_OF_RANGE;
      42             :         }
      43             :     }
      44             :     else
      45             :     {
      46             :         /* valid start */
      47             :     }
      48             : 
      49             :     /* check end */
      50          98 :     char end_copy[4] = {'\0','\0','\0','\0'};
      51          98 :     const size_t end_len = ( length >= 4 ) ? 4 : length;
      52          98 :     memcpy( &(end_copy[4-end_len]), &(start[length-end_len]), end_len );
      53             : 
      54          98 :     if ( ( 0x80 & (end_copy[3]) ) == 0x00 )
      55             :     {
      56             :         /* valid single-byte end */
      57             :     }
      58             :     else
      59             :     {
      60          10 :         if ( ( 0xe0 & (end_copy[2]) ) == 0xc0 )
      61             :         {
      62             :             /* valid 2 byte end */
      63             :         }
      64           9 :         else if ( ( 0x80 & (end_copy[2]) ) == 0x00 )
      65             :         {
      66             :             /* 1 byte char at end_copy[2] */
      67           1 :             length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      68           1 :             result = UTF8ERROR_OUT_OF_RANGE;
      69             :         }
      70             :         else
      71             :         {
      72           8 :             if ( ( 0xf0 & (end_copy[1]) ) == 0xe0 )
      73             :             {
      74             :                 /* valid 3 byte end */
      75             :             }
      76           7 :             else if ( ( 0xe0 & (end_copy[1]) ) == 0xc0 )
      77             :             {
      78             :                 /* 2 byte char at end_copy[1] */
      79           1 :                 length -= 1;  /* length was greater than 0 - otherwise end_copy[3] would have been 0x0 */
      80           1 :                 result = UTF8ERROR_OUT_OF_RANGE;
      81             :             }
      82           6 :             else if ( ( 0x80 & (end_copy[1]) ) == 0x00 )
      83             :             {
      84             :                 /* 1 byte char at end_copy[1] */
      85           2 :                 length -= 2;  /* length was greater than 1 - otherwise end_copy[2] would have been 0x0 */
      86           2 :                 result = UTF8ERROR_OUT_OF_RANGE;
      87             :             }
      88             :             else
      89             :             {
      90           4 :                 if ( ( 0xf8 & (end_copy[0]) ) == 0xf0 )
      91             :                 {
      92             :                     /* valid 4 byte end */
      93             :                 }
      94             :                 else
      95             :                 {
      96             :                     /* assume 1 byte char at end_copy[0] */
      97           2 :                     length -= 3;  /* length was greater than 2 - otherwise end_copy[1] would have been 0x0 */
      98           2 :                     result = UTF8ERROR_OUT_OF_RANGE;
      99             :                 }
     100             :             }
     101             :         }
     102             :     }
     103             : 
     104          98 :     *this_ = (utf8stringview_t){.start=start,.length=length};
     105          98 :     return result;
     106             : }
     107             : 
     108          53 : static inline void utf8stringview_init_str( utf8stringview_t *this_, const char* cstring )
     109             : {
     110          53 :     *this_ = (utf8stringview_t){.start=cstring,.length=(cstring==NULL)?0:strlen(cstring)};
     111          53 : }
     112             : 
     113           4 : static inline utf8error_t utf8stringview_init_region( utf8stringview_t *this_, const char* cstring, size_t start_idx, size_t length )
     114             : {
     115           4 :     assert( cstring != NULL );
     116           4 :     utf8error_t result = UTF8ERROR_SUCCESS;
     117           4 :     const size_t cstring_len = strlen( cstring );
     118           4 :     if ( start_idx > cstring_len )
     119             :     {
     120           1 :         *this_ = (utf8stringview_t){.start=cstring+start_idx,.length=0};
     121           1 :         result |= UTF8ERROR_OUT_OF_RANGE;
     122             :     }
     123             :     else
     124             :     {
     125           3 :         const size_t max_len = cstring_len - start_idx;
     126           3 :         result |= utf8stringview_init( this_, cstring+start_idx, u8_i32_min2( length, max_len ) );
     127           3 :         if ( length > max_len )
     128             :         {
     129             :             /* notify that stringview cannot exceed the cstring */
     130           1 :             result |= UTF8ERROR_OUT_OF_RANGE;
     131             :         }
     132             :     }
     133           4 :     return result;
     134             : }
     135             : 
     136         151 : static inline void utf8stringview_destroy( utf8stringview_t *this_ )
     137             : {
     138         151 :     *this_ = (utf8stringview_t){.start=NULL,.length=0};
     139         151 : }
     140             : 
     141       28134 : static inline const char* utf8stringview_get_start( const utf8stringview_t *this_ ) {
     142       28134 :     return (*this_).start;
     143             : }
     144             : 
     145       25606 : static inline size_t utf8stringview_get_length( const utf8stringview_t *this_ ) {
     146       25606 :     return (*this_).length;
     147             : }
     148             : 
     149           3 : static inline size_t utf8stringview_count_codepoints( const utf8stringview_t *this_ ) {
     150           3 :     size_t result = 0;
     151           3 :     unsigned int skip = 0;
     152           3 :     if ( (*this_).start != NULL ) {
     153          38 :         for ( size_t pos = 0; pos < (*this_).length; pos ++ )
     154             :         {
     155          35 :             if ( skip > 0 )
     156             :             {
     157          16 :                 skip --;
     158          16 :                 if ( skip == 0 ) {
     159           7 :                     result ++;  /* This is the last byte of a multi byte code point */
     160             :                 }
     161             :             }
     162             :             else
     163             :             {
     164          19 :                 const unsigned char firstByte = (const unsigned char) ((*this_).start[pos]);
     165          19 :                 if (( 0x80 & firstByte ) == 0x00 )
     166             :                 {
     167          10 :                     result ++;  /* This is a 1 byte code point */
     168             :                 }
     169           9 :                 else if (( 0xc0 & firstByte ) == 0x80 )
     170             :                 {
     171             :                     /* This is not a valid first byte, skipping to the next byte... */
     172             :                 }
     173           8 :                 else if (( 0xe0 & firstByte ) == 0xc0 )
     174             :                 {
     175           2 :                     skip = 1;  /* This is the start of a 2 byte code point */
     176             :                 }
     177           6 :                 else if (( 0xf0 & firstByte ) == 0xe0 )
     178             :                 {
     179           3 :                     skip = 2;  /* This is the start of a 3 byte code point */
     180             :                 }
     181           3 :                 else if (( 0xf8 & firstByte ) == 0xf0 )
     182             :                 {
     183           3 :                     skip = 3;  /* This is the start of a 4 byte code point */
     184             :                 }
     185             :                 else
     186             :                 {
     187             :                     /* This is not a valid first byte, skipping to the next byte... */
     188             :                 }
     189             :             }
     190             :         }
     191             :     }
     192           3 :     return result;
     193             : }
     194             : 
     195        1228 : static inline bool utf8stringview_equals_str( const utf8stringview_t *this_, const char *that )
     196             : {
     197             :     bool result;
     198        1228 :     if ( that != NULL )
     199             :     {
     200        1227 :         size_t len = strlen( that );
     201        1227 :         if ( len == (*this_).length )
     202             :         {
     203         899 :             if ( ( len == 0 )/*&&( this_.length == 0 )*/)
     204             :             {
     205           1 :                 result = true;
     206             :             }
     207             :             else
     208             :             {
     209         898 :                 result = ( 0 == memcmp ( (*this_).start, that, len ) );
     210             :             }
     211             :         }
     212             :         else
     213             :         {
     214         328 :             result = false;
     215             :         }
     216             :     }
     217             :     else
     218             :     {
     219           1 :         result = false;
     220             :     }
     221        1228 :     return result;
     222             : }
     223             : 
     224           4 : static inline bool utf8stringview_equals_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     225             : {
     226           4 :     assert( that != NULL );
     227             :     bool result;
     228           4 :     if ( (*that).length == (*this_).length )
     229             :     {
     230           2 :         if ( ( (*that).length == 0 )/*&&( this_.length == 0 )*/)
     231             :         {
     232           1 :             result = true;
     233             :         }
     234             :         else
     235             :         {
     236           1 :             result = ( 0 == memcmp ( (*this_).start, (*that).start, (*that).length ) );
     237             :         }
     238             :     }
     239             :     else
     240             :     {
     241           2 :         result = false;
     242             :     }
     243           4 :     return result;
     244             : }
     245             : 
     246           6 : static inline bool utf8stringview_starts_with_str( const utf8stringview_t *this_, utf8string_t *that )
     247             : {
     248           6 :     bool result = false;
     249           6 :     if (( this_ != NULL )&&( that != NULL ))
     250             :     {
     251           5 :         const size_t that_len = strlen( that );
     252           5 :         if ( that_len <= (*this_).length )
     253             :         {
     254           4 :             result = ( 0 == memcmp( (*this_).start, that, that_len ) );
     255             :         }
     256             :         else
     257             :         {
     258           1 :             result = false;
     259             :         }
     260             :     }
     261           6 :     return result;
     262             : }
     263             : 
     264           5 : static inline bool utf8stringview_starts_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     265             : {
     266           5 :     assert( that != NULL );
     267           5 :     bool result = false;
     268           5 :     if (( this_ != NULL )&&( that != NULL ))
     269             :     {
     270           5 :         if ( (*that).length <= (*this_).length )
     271             :         {
     272           4 :             result = ( 0 == memcmp( (*this_).start, (*that).start, (*that).length ) );
     273             :         }
     274             :         else
     275             :         {
     276           1 :             result = false;
     277             :         }
     278             :     }
     279           5 :     return result;
     280             : }
     281             : 
     282           6 : static inline bool utf8stringview_ends_with_str( const utf8stringview_t *this_, utf8string_t *that )
     283             : {
     284           6 :     bool result = false;
     285           6 :     if (( this_ != NULL )&&( that != NULL ))
     286             :     {
     287           5 :         const size_t that_len = strlen( that );
     288           5 :         if ( that_len <= (*this_).length )
     289             :         {
     290           4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - that_len, that, that_len ) );
     291             :         }
     292             :         else
     293             :         {
     294           1 :             result = false;
     295             :         }
     296             :     }
     297           6 :     return result;
     298             : }
     299             : 
     300           5 : static inline bool utf8stringview_ends_with_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     301             : {
     302           5 :     assert( that != NULL );
     303           5 :     bool result = false;
     304           5 :     if (( this_ != NULL )&&( that != NULL ))
     305             :     {
     306           5 :         if ( (*that).length <= (*this_).length )
     307             :         {
     308           4 :             result = ( 0 == memcmp( (*this_).start + (*this_).length - (*that).length, (*that).start, (*that).length ) );
     309             :         }
     310             :         else
     311             :         {
     312           1 :             result = false;
     313             :         }
     314             :     }
     315           5 :     return result;
     316             : }
     317             : 
     318           6 : static inline bool utf8stringview_contains_str( const utf8stringview_t *this_, utf8string_t *that )
     319             : {
     320           6 :     bool result = false;
     321           6 :     if (( this_ != NULL )&&( that != NULL ))
     322             :     {
     323           5 :         const size_t that_len = strlen( that );
     324           5 :         if ( that_len <= (*this_).length )
     325             :         {
     326           4 :             const char *const end = (*this_).start + (*this_).length - that_len;
     327          13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     328             :             {
     329           9 :                 if ( 0 == memcmp( pos, that, that_len ) )
     330             :                 {
     331           3 :                     result = true;
     332             :                 }
     333             :             }
     334             :         }
     335             :     }
     336           6 :     return result;
     337             : }
     338             : 
     339           5 : static inline bool utf8stringview_contains_view( const utf8stringview_t *this_, const utf8stringview_t *that )
     340             : {
     341           5 :     assert( that != NULL );
     342           5 :     bool result = false;
     343           5 :     if (( this_ != NULL )&&( that != NULL ))
     344             :     {
     345           5 :         if ( (*that).length <= (*this_).length )
     346             :         {
     347           4 :             const char *const end = (*this_).start + (*this_).length - (*that).length;
     348          13 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == false ); pos ++ )
     349             :             {
     350           9 :                 if ( 0 == memcmp( pos, (*that).start, (*that).length ) )
     351             :                 {
     352           3 :                     result = true;
     353             :                 }
     354             :             }
     355             :         }
     356             :     }
     357           5 :     return result;
     358             : }
     359             : 
     360          16 : static inline utf8error_t utf8stringview_split_at_first_str( const utf8stringview_t *this_,
     361             :                                                              utf8string_t *pattern,
     362             :                                                              utf8stringview_t *out_before,
     363             :                                                              utf8stringview_t *out_after )
     364             : {
     365          16 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     366          16 :     if (( pattern != NULL )&&( this_ != NULL ))
     367          15 :     {
     368          15 :         const size_t pattern_len = strlen( pattern );
     369          15 :         if ( pattern_len <= (*this_).length )
     370             :         {
     371          11 :             const char *const end = (*this_).start + (*this_).length - pattern_len;
     372          34 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     373             :             {
     374          23 :                 if ( 0 == memcmp( pos, pattern, pattern_len ) )
     375             :                 {
     376           9 :                     result = UTF8ERROR_SUCCESS;
     377           9 :                     if ( out_before != NULL )
     378             :                     {
     379           8 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     380             :                     }
     381           9 :                     if ( out_after != NULL )
     382             :                     {
     383           8 :                         *out_after = (utf8stringview_t){ .start = ( pos + pattern_len ), .length = ( end - pos ) };
     384             :                     }
     385             :                 }
     386             :             }
     387             :         }
     388             :     }
     389             :     else
     390             :     {
     391           1 :         result = UTF8ERROR_NULL_PARAM;
     392             :     }
     393          16 :     return result;
     394             : }
     395             : 
     396           5 : static inline utf8error_t utf8stringview_split_at_first_view( const utf8stringview_t *this_,
     397             :                                                               const utf8stringview_t *pattern,
     398             :                                                               utf8stringview_t *out_before,
     399             :                                                               utf8stringview_t *out_after )
     400             : {
     401           5 :     assert( pattern != NULL );
     402           5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     403           5 :     if (( pattern != NULL )&&( this_ != NULL ))
     404             :     {
     405           5 :         if ( (*pattern).length <= (*this_).length )
     406             :         {
     407           4 :             const char *const end = (*this_).start + (*this_).length - (*pattern).length;
     408          12 :             for ( const char* pos = (*this_).start; ( pos <= end )&&( result == UTF8ERROR_NOT_FOUND ); pos ++ )
     409             :             {
     410           8 :                 if ( 0 == memcmp( pos, (*pattern).start, (*pattern).length ) )
     411             :                 {
     412           3 :                     result = UTF8ERROR_SUCCESS;
     413           3 :                     if ( out_before != NULL )
     414             :                     {
     415           2 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = ( pos - (*this_).start ) };
     416             :                     }
     417           3 :                     if ( out_after != NULL )
     418             :                     {
     419           2 :                         *out_after = (utf8stringview_t){ .start = ( pos + (*pattern).length ), .length = ( end - pos ) };
     420             :                     }
     421             :                 }
     422             :             }
     423             :         }
     424             :     }
     425             :     else
     426             :     {
     427           0 :         result = UTF8ERROR_NULL_PARAM;
     428             :     }
     429           5 :     return result;
     430             : }
     431             : 
     432          18 : static inline utf8error_t utf8stringview_split_at_last_str( const utf8stringview_t *this_,
     433             :                                                             utf8string_t *pattern,
     434             :                                                             utf8stringview_t *out_before,
     435             :                                                             utf8stringview_t *out_after )
     436             : {
     437          18 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     438          18 :     if (( pattern != NULL )&&( this_ != NULL ))
     439          17 :     {
     440          17 :         const size_t pattern_len = strlen( pattern );
     441          17 :         if ( pattern_len <= (*this_).length )
     442             :         {
     443         358 :             for ( ptrdiff_t pos = (*this_).length - pattern_len; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     444             :             {
     445         342 :                 if ( 0 == memcmp( (*this_).start + pos, pattern, pattern_len ) )
     446             :                 {
     447           7 :                     result = UTF8ERROR_SUCCESS;
     448           7 :                     if ( out_before != NULL )
     449             :                     {
     450           6 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     451             :                     }
     452           7 :                     if ( out_after != NULL )
     453             :                     {
     454           6 :                         *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + pattern_len ), .length = ( (*this_).length - pattern_len - pos ) };
     455             :                     }
     456             :                 }
     457             :             }
     458             :         }
     459             :     }
     460             :     else
     461             :     {
     462           1 :         result = UTF8ERROR_NULL_PARAM;
     463             :     }
     464          18 :     return result;
     465             : }
     466             : 
     467           5 : static inline utf8error_t utf8stringview_split_at_last_view( const utf8stringview_t *this_,
     468             :                                                              const utf8stringview_t *pattern,
     469             :                                                              utf8stringview_t *out_before,
     470             :                                                              utf8stringview_t *out_after )
     471             : {
     472           5 :     assert( pattern != NULL );
     473           5 :     utf8error_t result = UTF8ERROR_NOT_FOUND;
     474           5 :     if (( pattern != NULL )&&( this_ != NULL ))
     475             :     {
     476           5 :         if ( (*pattern).length <= (*this_).length )
     477             :         {
     478          14 :             for ( ptrdiff_t pos = (*this_).length - (*pattern).length; ( pos >= 0 )&&( result == UTF8ERROR_NOT_FOUND ); pos -- )
     479             :             {
     480          10 :                 if ( 0 == memcmp( (*this_).start + pos, (*pattern).start, (*pattern).length ) )
     481             :                 {
     482           3 :                     result = UTF8ERROR_SUCCESS;
     483           3 :                     if ( out_before != NULL )
     484             :                     {
     485           2 :                         *out_before = (utf8stringview_t){ .start = (*this_).start, .length = pos };
     486             :                     }
     487           3 :                     if ( out_after != NULL )
     488             :                     {
     489           2 :                         *out_after = (utf8stringview_t){ .start = ( (*this_).start + pos + (*pattern).length ), .length = ( (*this_).length - (*pattern).length - pos ) };
     490             :                     }
     491             :                 }
     492             :             }
     493             :         }
     494             :     }
     495             :     else
     496             :     {
     497           0 :         result = UTF8ERROR_NULL_PARAM;
     498             :     }
     499           5 :     return result;
     500             : }
     501             : 
     502             : #ifdef __cplusplus
     503             : }
     504             : #endif
     505             : 
     506             : 
     507             : /*
     508             :  * Copyright 2021-2024 Andreas Warnke
     509             :  *
     510             :  * Licensed under the Apache License, Version 2.0 (the "License");
     511             :  * you may not use this file except in compliance with the License.
     512             :  * You may obtain a copy of the License at
     513             :  *
     514             :  *    http://www.apache.org/licenses/LICENSE-2.0
     515             :  *
     516             :  * Unless required by applicable law or agreed to in writing, software
     517             :  * distributed under the License is distributed on an "AS IS" BASIS,
     518             :  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     519             :  * See the License for the specific language governing permissions and
     520             :  * limitations under the License.
     521             :  */

Generated by: LCOV version 1.16