1
2
3
4
5
6
7
8
9
10
11
12
13#include "qemu/osdep.h"
14#include "qemu/cutils.h"
15#include "qemu/host-utils.h"
16#include "xbzrle.h"
17
18
19
20
21
22
23
24
25
26
27
28int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
29 uint8_t *dst, int dlen)
30{
31 uint32_t zrun_len = 0, nzrun_len = 0;
32 int d = 0, i = 0;
33 long res;
34 uint8_t *nzrun_start = NULL;
35
36 g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
37 sizeof(long)));
38
39 while (i < slen) {
40
41 if (d + 2 > dlen) {
42 return -1;
43 }
44
45
46 res = (slen - i) % sizeof(long);
47 while (res && old_buf[i] == new_buf[i]) {
48 zrun_len++;
49 i++;
50 res--;
51 }
52
53
54 if (!res) {
55 while (i < slen &&
56 (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
57 i += sizeof(long);
58 zrun_len += sizeof(long);
59 }
60
61
62 while (i < slen && old_buf[i] == new_buf[i]) {
63 zrun_len++;
64 i++;
65 }
66 }
67
68
69 if (zrun_len == slen) {
70 return 0;
71 }
72
73
74 if (i == slen) {
75 return d;
76 }
77
78 d += uleb128_encode_small(dst + d, zrun_len);
79
80 zrun_len = 0;
81 nzrun_start = new_buf + i;
82
83
84 if (d + 2 > dlen) {
85 return -1;
86 }
87
88 res = (slen - i) % sizeof(long);
89 while (res && old_buf[i] != new_buf[i]) {
90 i++;
91 nzrun_len++;
92 res--;
93 }
94
95
96 if (!res) {
97
98 unsigned long mask = (unsigned long)0x0101010101010101ULL;
99 while (i < slen) {
100 unsigned long xor;
101 xor = *(unsigned long *)(old_buf + i)
102 ^ *(unsigned long *)(new_buf + i);
103 if ((xor - mask) & ~xor & (mask << 7)) {
104
105 while (old_buf[i] != new_buf[i]) {
106 nzrun_len++;
107 i++;
108 }
109 break;
110 } else {
111 i += sizeof(long);
112 nzrun_len += sizeof(long);
113 }
114 }
115 }
116
117 d += uleb128_encode_small(dst + d, nzrun_len);
118
119 if (d + nzrun_len > dlen) {
120 return -1;
121 }
122 memcpy(dst + d, nzrun_start, nzrun_len);
123 d += nzrun_len;
124 nzrun_len = 0;
125 }
126
127 return d;
128}
129
130int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
131{
132 int i = 0, d = 0;
133 int ret;
134 uint32_t count = 0;
135
136 while (i < slen) {
137
138
139 if ((slen - i) < 2) {
140 return -1;
141 }
142
143 ret = uleb128_decode_small(src + i, &count);
144 if (ret < 0 || (i && !count)) {
145 return -1;
146 }
147 i += ret;
148 d += count;
149
150
151 if (d > dlen) {
152 return -1;
153 }
154
155
156 if ((slen - i) < 2) {
157 return -1;
158 }
159
160 ret = uleb128_decode_small(src + i, &count);
161 if (ret < 0 || !count) {
162 return -1;
163 }
164 i += ret;
165
166
167 if (d + count > dlen || i + count > slen) {
168 return -1;
169 }
170
171 memcpy(dst + d, src + i, count);
172 d += count;
173 i += count;
174 }
175
176 return d;
177}
178
179#if defined(CONFIG_AVX512BW_OPT)
180#pragma GCC push_options
181#pragma GCC target("avx512bw")
182#include <immintrin.h>
183int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
184 uint8_t *dst, int dlen)
185{
186 uint32_t zrun_len = 0, nzrun_len = 0;
187 int d = 0, i = 0, num = 0;
188 uint8_t *nzrun_start = NULL;
189
190 uint32_t count512s = (slen >> 6) + 1;
191
192 uint32_t count_residual = slen & 0b111111;
193 bool never_same = true;
194 uint64_t mask_residual = 1;
195 mask_residual <<= count_residual;
196 mask_residual -= 1;
197 __m512i r = _mm512_set1_epi32(0);
198
199 while (count512s) {
200 int bytes_to_check = 64;
201 uint64_t mask = 0xffffffffffffffff;
202 if (count512s == 1) {
203 bytes_to_check = count_residual;
204 mask = mask_residual;
205 }
206 __m512i old_data = _mm512_mask_loadu_epi8(r,
207 mask, old_buf + i);
208 __m512i new_data = _mm512_mask_loadu_epi8(r,
209 mask, new_buf + i);
210 uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
211 count512s--;
212
213 bool is_same = (comp & 0x1);
214 while (bytes_to_check) {
215 if (d + 2 > dlen) {
216 return -1;
217 }
218 if (is_same) {
219 if (nzrun_len) {
220 d += uleb128_encode_small(dst + d, nzrun_len);
221 if (d + nzrun_len > dlen) {
222 return -1;
223 }
224 nzrun_start = new_buf + i - nzrun_len;
225 memcpy(dst + d, nzrun_start, nzrun_len);
226 d += nzrun_len;
227 nzrun_len = 0;
228 }
229
230 if (count512s && (comp == 0xffffffffffffffff)) {
231 i += 64;
232 zrun_len += 64;
233 break;
234 }
235 never_same = false;
236 num = ctz64(~comp);
237 num = (num < bytes_to_check) ? num : bytes_to_check;
238 zrun_len += num;
239 bytes_to_check -= num;
240 comp >>= num;
241 i += num;
242 if (bytes_to_check) {
243
244 d += uleb128_encode_small(dst + d, zrun_len);
245 zrun_len = 0;
246 } else {
247 break;
248 }
249 }
250 if (never_same || zrun_len) {
251
252
253
254
255 d += uleb128_encode_small(dst + d, zrun_len);
256 zrun_len = 0;
257 never_same = false;
258 }
259
260 if ((bytes_to_check == 64) && (comp == 0x0)) {
261 i += 64;
262 nzrun_len += 64;
263 break;
264 }
265 num = ctz64(comp);
266 num = (num < bytes_to_check) ? num : bytes_to_check;
267 nzrun_len += num;
268 bytes_to_check -= num;
269 comp >>= num;
270 i += num;
271 if (bytes_to_check) {
272
273 d += uleb128_encode_small(dst + d, nzrun_len);
274
275 if (d + nzrun_len > dlen) {
276 return -1;
277 }
278 nzrun_start = new_buf + i - nzrun_len;
279 memcpy(dst + d, nzrun_start, nzrun_len);
280 d += nzrun_len;
281 nzrun_len = 0;
282 is_same = true;
283 }
284 }
285 }
286
287 if (nzrun_len != 0) {
288 d += uleb128_encode_small(dst + d, nzrun_len);
289
290 if (d + nzrun_len > dlen) {
291 return -1;
292 }
293 nzrun_start = new_buf + i - nzrun_len;
294 memcpy(dst + d, nzrun_start, nzrun_len);
295 d += nzrun_len;
296 }
297 return d;
298}
299#pragma GCC pop_options
300#endif
301