Line data Source code
1 : /* File: universal_escaping_output_stream.c; Copyright and License: see below */
2 :
3 : #include "u8stream/universal_escaping_output_stream.h"
4 : #include "u8stream/universal_output_stream_if.h"
5 : #include "u8/u8_trace.h"
6 : #include "u8/u8_log.h"
7 : #include <string.h>
8 : #include <assert.h>
9 :
10 : /* the vmt implementing the interface */
11 : static const universal_output_stream_if_t universal_escaping_output_stream_private_if
12 : = {
13 : .write = (u8_error_t (*)(universal_output_stream_impl_t*, const void*, size_t)) &universal_escaping_output_stream_write,
14 : .flush = (u8_error_t (*)(universal_output_stream_impl_t*)) &universal_escaping_output_stream_flush
15 : };
16 :
17 1235 : void universal_escaping_output_stream_init ( universal_escaping_output_stream_t *this_,
18 : const char *const ((*patterns_and_replacements)[][2]),
19 : universal_output_stream_t *sink )
20 : {
21 1235 : U8_TRACE_BEGIN();
22 1235 : assert( patterns_and_replacements != NULL );
23 1235 : assert( sink != NULL );
24 :
25 1235 : (*this_).patterns_and_replacements = patterns_and_replacements;
26 1235 : (*this_).sink = sink;
27 1235 : universal_output_stream_private_init( &((*this_).output_stream), &universal_escaping_output_stream_private_if, this_ );
28 :
29 1235 : U8_TRACE_END();
30 1235 : }
31 :
32 1235 : u8_error_t universal_escaping_output_stream_destroy( universal_escaping_output_stream_t *this_ )
33 : {
34 1235 : U8_TRACE_BEGIN();
35 1235 : u8_error_t err = U8_ERROR_NONE;
36 :
37 1235 : (*this_).patterns_and_replacements = NULL;
38 1235 : (*this_).sink = NULL;
39 1235 : universal_output_stream_private_destroy( &((*this_).output_stream) );
40 :
41 1235 : U8_TRACE_END_ERR(err);
42 1235 : return err;
43 : }
44 :
45 52 : void universal_escaping_output_stream_change_rules( universal_escaping_output_stream_t *this_,
46 : const char *const ((*patterns_and_replacements)[][2]) )
47 : {
48 52 : U8_TRACE_BEGIN();
49 52 : assert( patterns_and_replacements != NULL );
50 :
51 52 : (*this_).patterns_and_replacements = patterns_and_replacements;
52 :
53 52 : U8_TRACE_END();
54 52 : }
55 :
56 5123 : u8_error_t universal_escaping_output_stream_write ( universal_escaping_output_stream_t *this_, const void *start, size_t length )
57 : {
58 : /*U8_TRACE_BEGIN();*/
59 5123 : assert( start != NULL );
60 5123 : assert( (*this_).patterns_and_replacements != NULL );
61 5123 : assert( (*this_).sink != NULL );
62 5123 : u8_error_t err = U8_ERROR_NONE;
63 5123 : const char (*char_buf)[] = (void*)start;
64 :
65 : /* count and analyze input patterns */
66 5123 : unsigned int pattern_count = 0;
67 5123 : char head_common_bits = '\x00'; /* optimization, improves xml export by 5% */
68 5123 : char head_common_pattern = '\x00'; /* optimization, improves xml export by 5% */
69 18453 : for ( unsigned int pattern_idx = 0; (*((*this_).patterns_and_replacements))[pattern_idx][0] != NULL; pattern_idx++ )
70 : {
71 13330 : pattern_count++;
72 13330 : if ( pattern_idx == 0 )
73 : {
74 5123 : head_common_bits = '\xff';
75 5123 : head_common_pattern = *((*((*this_).patterns_and_replacements))[pattern_idx][0]);
76 : }
77 : else
78 : {
79 8207 : const char unequal_bits = head_common_pattern ^ *((*((*this_).patterns_and_replacements))[pattern_idx][0]);
80 8207 : head_common_bits = head_common_bits & ( ~ unequal_bits );
81 : }
82 : };
83 5123 : head_common_pattern &= head_common_bits;
84 :
85 : /* search and replace patterns */
86 5123 : size_t bytes_already_written = 0;
87 100882 : for ( size_t index = 0; index < length; index ++ )
88 : {
89 : /* check if a pattern matches */
90 95759 : int matching_pattern_idx = -1;
91 95759 : const char chr_at_idx = (*char_buf)[index];
92 95759 : if (( chr_at_idx & head_common_bits ) == head_common_pattern )
93 : {
94 2115 : for ( unsigned int pattern_idx = 0; ( pattern_idx < pattern_count )&&( matching_pattern_idx == -1 ); pattern_idx++ )
95 : {
96 1799 : const char * pattern = (*((*this_).patterns_and_replacements))[pattern_idx][0];
97 1799 : const unsigned int pattern_len = strlen( pattern );
98 1799 : if (( index + pattern_len <= length )&&( pattern_len > 0 ))
99 : {
100 1795 : if ( 0 == memcmp( &((*char_buf)[index]), pattern, pattern_len ) )
101 : {
102 121 : matching_pattern_idx = pattern_idx;
103 : /*fprintf(stderr,"found pattern %d at pos %zd\n",matching_pattern_idx,index);*/
104 : }
105 : }
106 : }
107 : }
108 :
109 : /* replace pattern */
110 95759 : if ( matching_pattern_idx != -1 )
111 : {
112 : /* write previously processed bytes */
113 121 : err |= universal_output_stream_write( (*this_).sink, &((*char_buf)[bytes_already_written]), index-bytes_already_written );
114 121 : bytes_already_written = index;
115 : /* write pattern */
116 121 : const char * pattern = (*((*this_).patterns_and_replacements))[matching_pattern_idx][0];
117 121 : const unsigned int pattern_len = strlen( pattern );
118 121 : const char * replacement = (*((*this_).patterns_and_replacements))[matching_pattern_idx][1];
119 121 : unsigned int replace_len = 0;
120 121 : if ( replacement != NULL )
121 : {
122 121 : replace_len = strlen(replacement);
123 : }
124 121 : err |= universal_output_stream_write( (*this_).sink, replacement, replace_len );
125 121 : bytes_already_written += pattern_len;
126 : /* forward index */
127 121 : index = index + pattern_len - 1;
128 : }
129 :
130 95759 : if ( (index+1)==length ) /* is last? */
131 : {
132 5008 : err |= universal_output_stream_write( (*this_).sink, &((*char_buf)[bytes_already_written]), length-bytes_already_written );
133 5008 : bytes_already_written = length;
134 : }
135 : }
136 :
137 : /*U8_TRACE_END_ERR(err);*/
138 5123 : return err;
139 : }
140 :
141 6437 : u8_error_t universal_escaping_output_stream_flush( universal_escaping_output_stream_t *this_ )
142 : {
143 6437 : U8_TRACE_BEGIN();
144 6437 : assert( (*this_).sink != NULL );
145 :
146 6437 : const u8_error_t err = universal_output_stream_flush( (*this_).sink );
147 :
148 6437 : U8_TRACE_END_ERR(err);
149 6437 : return err;
150 : }
151 :
152 1228 : universal_output_stream_t* universal_escaping_output_stream_get_output_stream( universal_escaping_output_stream_t *this_ )
153 : {
154 1228 : U8_TRACE_BEGIN();
155 :
156 1228 : universal_output_stream_t* result = &((*this_).output_stream);
157 :
158 1228 : U8_TRACE_END();
159 1228 : return result;
160 : }
161 :
162 :
163 : /*
164 : Copyright 2020-2025 Andreas Warnke
165 :
166 : Licensed under the Apache License, Version 2.0 (the "License");
167 : you may not use this file except in compliance with the License.
168 : You may obtain a copy of the License at
169 :
170 : http://www.apache.org/licenses/LICENSE-2.0
171 :
172 : Unless required by applicable law or agreed to in writing, software
173 : distributed under the License is distributed on an "AS IS" BASIS,
174 : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
175 : See the License for the specific language governing permissions and
176 : limitations under the License.
177 : */
|