Line data Source code
1 : /* File: universal_escaping_output_stream.c; Copyright and License: see below */ 2 : 3 : #include "u8stream/universal_escaping_output_stream.h" 4 : #include "u8stream/universal_output_stream_if.h" 5 : #include "u8/u8_trace.h" 6 : #include "u8/u8_log.h" 7 : #include <string.h> 8 : #include <assert.h> 9 : 10 : /* the vmt implementing the interface */ 11 : static const universal_output_stream_if_t universal_escaping_output_stream_private_if 12 : = { 13 : .write = (u8_error_t (*)(universal_output_stream_impl_t*, const void*, size_t)) &universal_escaping_output_stream_write, 14 : .flush = (u8_error_t (*)(universal_output_stream_impl_t*)) &universal_escaping_output_stream_flush 15 : }; 16 : 17 1178 : void universal_escaping_output_stream_init ( universal_escaping_output_stream_t *this_, 18 : const char *const ((*patterns_and_replacements)[][2]), 19 : universal_output_stream_t *sink ) 20 : { 21 1178 : U8_TRACE_BEGIN(); 22 1178 : assert( patterns_and_replacements != NULL ); 23 1178 : assert( sink != NULL ); 24 : 25 1178 : (*this_).patterns_and_replacements = patterns_and_replacements; 26 1178 : (*this_).sink = sink; 27 1178 : universal_output_stream_private_init( &((*this_).output_stream), &universal_escaping_output_stream_private_if, this_ ); 28 : 29 1178 : U8_TRACE_END(); 30 1178 : } 31 : 32 1178 : u8_error_t universal_escaping_output_stream_destroy( universal_escaping_output_stream_t *this_ ) 33 : { 34 1178 : U8_TRACE_BEGIN(); 35 1178 : u8_error_t err = U8_ERROR_NONE; 36 : 37 1178 : (*this_).patterns_and_replacements = NULL; 38 1178 : (*this_).sink = NULL; 39 1178 : universal_output_stream_private_destroy( &((*this_).output_stream) ); 40 : 41 1178 : U8_TRACE_END_ERR(err); 42 1178 : return err; 43 : } 44 : 45 32 : void universal_escaping_output_stream_change_rules( universal_escaping_output_stream_t *this_, 46 : const char *const ((*patterns_and_replacements)[][2]) ) 47 : { 48 32 : U8_TRACE_BEGIN(); 49 32 : assert( patterns_and_replacements != NULL ); 50 : 51 32 : (*this_).patterns_and_replacements = patterns_and_replacements; 52 : 53 32 : U8_TRACE_END(); 54 32 : } 55 : 56 2392 : u8_error_t universal_escaping_output_stream_write ( universal_escaping_output_stream_t *this_, const void *start, size_t length ) 57 : { 58 : /*U8_TRACE_BEGIN();*/ 59 2392 : assert( start != NULL ); 60 2392 : assert( (*this_).patterns_and_replacements != NULL ); 61 2392 : assert( (*this_).sink != NULL ); 62 2392 : u8_error_t err = U8_ERROR_NONE; 63 2392 : const char (*char_buf)[] = (void*)start; 64 : 65 : /* count and analyze input patterns */ 66 2392 : unsigned int pattern_count = 0; 67 2392 : char head_common_bits = '\x00'; /* optimization, improves xml export by 5% */ 68 2392 : char head_common_pattern = '\x00'; /* optimization, improves xml export by 5% */ 69 12571 : for ( unsigned int pattern_idx = 0; (*((*this_).patterns_and_replacements))[pattern_idx][0] != NULL; pattern_idx++ ) 70 : { 71 10179 : pattern_count++; 72 10179 : if ( pattern_idx == 0 ) 73 : { 74 2392 : head_common_bits = '\xff'; 75 2392 : head_common_pattern = *((*((*this_).patterns_and_replacements))[pattern_idx][0]); 76 : } 77 : else 78 : { 79 7787 : const char unequal_bits = head_common_pattern ^ *((*((*this_).patterns_and_replacements))[pattern_idx][0]); 80 7787 : head_common_bits = head_common_bits & ( ~ unequal_bits ); 81 : } 82 : }; 83 2392 : head_common_pattern &= head_common_bits; 84 : 85 : /* search and replace patterns */ 86 2392 : size_t bytes_already_written = 0; 87 42559 : for ( size_t index = 0; index < length; index ++ ) 88 : { 89 : /* check if a pattern matches */ 90 40167 : int matching_pattern_idx = -1; 91 40167 : const char chr_at_idx = (*char_buf)[index]; 92 40167 : if (( chr_at_idx & head_common_bits ) == head_common_pattern ) 93 : { 94 1035 : for ( unsigned int pattern_idx = 0; ( pattern_idx < pattern_count )&&( matching_pattern_idx == -1 ); pattern_idx++ ) 95 : { 96 856 : const char * pattern = (*((*this_).patterns_and_replacements))[pattern_idx][0]; 97 856 : const unsigned int pattern_len = strlen( pattern ); 98 856 : if (( index + pattern_len <= length )&&( pattern_len > 0 )) 99 : { 100 852 : if ( 0 == memcmp( &((*char_buf)[index]), pattern, pattern_len ) ) 101 : { 102 122 : matching_pattern_idx = pattern_idx; 103 : /*fprintf(stderr,"found pattern %d at pos %zd\n",matching_pattern_idx,index);*/ 104 : } 105 : } 106 : } 107 : } 108 : 109 : /* replace pattern */ 110 40167 : if ( matching_pattern_idx != -1 ) 111 : { 112 : /* write previously processed bytes */ 113 122 : err |= universal_output_stream_write( (*this_).sink, &((*char_buf)[bytes_already_written]), index-bytes_already_written ); 114 122 : bytes_already_written = index; 115 : /* write pattern */ 116 122 : const char * pattern = (*((*this_).patterns_and_replacements))[matching_pattern_idx][0]; 117 122 : const unsigned int pattern_len = strlen( pattern ); 118 122 : const char * replacement = (*((*this_).patterns_and_replacements))[matching_pattern_idx][1]; 119 122 : unsigned int replace_len = 0; 120 122 : if ( replacement != NULL ) 121 : { 122 122 : replace_len = strlen(replacement); 123 : } 124 122 : err |= universal_output_stream_write( (*this_).sink, replacement, replace_len ); 125 122 : bytes_already_written += pattern_len; 126 : /* forward index */ 127 122 : index = index + pattern_len - 1; 128 : } 129 : 130 40167 : if ( (index+1)==length ) /* is last? */ 131 : { 132 2280 : err |= universal_output_stream_write( (*this_).sink, &((*char_buf)[bytes_already_written]), length-bytes_already_written ); 133 2280 : bytes_already_written = length; 134 : } 135 : } 136 : 137 : /*U8_TRACE_END_ERR(err);*/ 138 2392 : return err; 139 : } 140 : 141 2878 : u8_error_t universal_escaping_output_stream_flush( universal_escaping_output_stream_t *this_ ) 142 : { 143 2878 : U8_TRACE_BEGIN(); 144 2878 : assert( (*this_).sink != NULL ); 145 : 146 2878 : const u8_error_t err = universal_output_stream_flush( (*this_).sink ); 147 : 148 2878 : U8_TRACE_END_ERR(err); 149 2878 : return err; 150 : } 151 : 152 1175 : universal_output_stream_t* universal_escaping_output_stream_get_output_stream( universal_escaping_output_stream_t *this_ ) 153 : { 154 1175 : U8_TRACE_BEGIN(); 155 : 156 1175 : universal_output_stream_t* result = &((*this_).output_stream); 157 : 158 1175 : U8_TRACE_END(); 159 1175 : return result; 160 : } 161 : 162 : 163 : /* 164 : Copyright 2020-2024 Andreas Warnke 165 : 166 : Licensed under the Apache License, Version 2.0 (the "License"); 167 : you may not use this file except in compliance with the License. 168 : You may obtain a copy of the License at 169 : 170 : http://www.apache.org/licenses/LICENSE-2.0 171 : 172 : Unless required by applicable law or agreed to in writing, software 173 : distributed under the License is distributed on an "AS IS" BASIS, 174 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 175 : See the License for the specific language governing permissions and 176 : limitations under the License. 177 : */