qemu/migration/xbzrle.c
<<
>>
Prefs
   1/*
   2 * Xor Based Zero Run Length Encoding
   3 *
   4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Orit Wasserman  <owasserm@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13#include "qemu/osdep.h"
  14#include "qemu/cutils.h"
  15#include "qemu/host-utils.h"
  16#include "xbzrle.h"
  17
  18#if defined(CONFIG_AVX512BW_OPT)
  19#include <immintrin.h>
  20#include "host/cpuinfo.h"
  21
  22static int __attribute__((target("avx512bw")))
  23xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
  24                            uint8_t *dst, int dlen)
  25{
  26    uint32_t zrun_len = 0, nzrun_len = 0;
  27    int d = 0, i = 0, num = 0;
  28    uint8_t *nzrun_start = NULL;
  29    /* add 1 to include residual part in main loop */
  30    uint32_t count512s = (slen >> 6) + 1;
  31    /* countResidual is tail of data, i.e., countResidual = slen % 64 */
  32    uint32_t count_residual = slen & 0b111111;
  33    bool never_same = true;
  34    uint64_t mask_residual = 1;
  35    mask_residual <<= count_residual;
  36    mask_residual -= 1;
  37    __m512i r = _mm512_set1_epi32(0);
  38
  39    while (count512s) {
  40        int bytes_to_check = 64;
  41        uint64_t mask = 0xffffffffffffffff;
  42        if (count512s == 1) {
  43            bytes_to_check = count_residual;
  44            mask = mask_residual;
  45        }
  46        __m512i old_data = _mm512_mask_loadu_epi8(r,
  47                                                  mask, old_buf + i);
  48        __m512i new_data = _mm512_mask_loadu_epi8(r,
  49                                                  mask, new_buf + i);
  50        uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
  51        count512s--;
  52
  53        bool is_same = (comp & 0x1);
  54        while (bytes_to_check) {
  55            if (d + 2 > dlen) {
  56                return -1;
  57            }
  58            if (is_same) {
  59                if (nzrun_len) {
  60                    d += uleb128_encode_small(dst + d, nzrun_len);
  61                    if (d + nzrun_len > dlen) {
  62                        return -1;
  63                    }
  64                    nzrun_start = new_buf + i - nzrun_len;
  65                    memcpy(dst + d, nzrun_start, nzrun_len);
  66                    d += nzrun_len;
  67                    nzrun_len = 0;
  68                }
  69                /* 64 data at a time for speed */
  70                if (count512s && (comp == 0xffffffffffffffff)) {
  71                    i += 64;
  72                    zrun_len += 64;
  73                    break;
  74                }
  75                never_same = false;
  76                num = ctz64(~comp);
  77                num = (num < bytes_to_check) ? num : bytes_to_check;
  78                zrun_len += num;
  79                bytes_to_check -= num;
  80                comp >>= num;
  81                i += num;
  82                if (bytes_to_check) {
  83                    /* still has different data after same data */
  84                    d += uleb128_encode_small(dst + d, zrun_len);
  85                    zrun_len = 0;
  86                } else {
  87                    break;
  88                }
  89            }
  90            if (never_same || zrun_len) {
  91                /*
  92                 * never_same only acts if
  93                 * data begins with diff in first count512s
  94                 */
  95                d += uleb128_encode_small(dst + d, zrun_len);
  96                zrun_len = 0;
  97                never_same = false;
  98            }
  99            /* has diff, 64 data at a time for speed */
 100            if ((bytes_to_check == 64) && (comp == 0x0)) {
 101                i += 64;
 102                nzrun_len += 64;
 103                break;
 104            }
 105            num = ctz64(comp);
 106            num = (num < bytes_to_check) ? num : bytes_to_check;
 107            nzrun_len += num;
 108            bytes_to_check -= num;
 109            comp >>= num;
 110            i += num;
 111            if (bytes_to_check) {
 112                /* mask like 111000 */
 113                d += uleb128_encode_small(dst + d, nzrun_len);
 114                /* overflow */
 115                if (d + nzrun_len > dlen) {
 116                    return -1;
 117                }
 118                nzrun_start = new_buf + i - nzrun_len;
 119                memcpy(dst + d, nzrun_start, nzrun_len);
 120                d += nzrun_len;
 121                nzrun_len = 0;
 122                is_same = true;
 123            }
 124        }
 125    }
 126
 127    if (nzrun_len != 0) {
 128        d += uleb128_encode_small(dst + d, nzrun_len);
 129        /* overflow */
 130        if (d + nzrun_len > dlen) {
 131            return -1;
 132        }
 133        nzrun_start = new_buf + i - nzrun_len;
 134        memcpy(dst + d, nzrun_start, nzrun_len);
 135        d += nzrun_len;
 136    }
 137    return d;
 138}
 139
 140static int xbzrle_encode_buffer_int(uint8_t *old_buf, uint8_t *new_buf,
 141                                    int slen, uint8_t *dst, int dlen);
 142
 143static int (*accel_func)(uint8_t *, uint8_t *, int, uint8_t *, int);
 144
 145static void __attribute__((constructor)) init_accel(void)
 146{
 147    unsigned info = cpuinfo_init();
 148    if (info & CPUINFO_AVX512BW) {
 149        accel_func = xbzrle_encode_buffer_avx512;
 150    } else {
 151        accel_func = xbzrle_encode_buffer_int;
 152    }
 153}
 154
 155int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
 156                         uint8_t *dst, int dlen)
 157{
 158    return accel_func(old_buf, new_buf, slen, dst, dlen);
 159}
 160
 161#define xbzrle_encode_buffer xbzrle_encode_buffer_int
 162#endif
 163
 164/*
 165  page = zrun nzrun
 166       | zrun nzrun page
 167
 168  zrun = length
 169
 170  nzrun = length byte...
 171
 172  length = uleb128 encoded integer
 173 */
 174int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
 175                         uint8_t *dst, int dlen)
 176{
 177    uint32_t zrun_len = 0, nzrun_len = 0;
 178    int d = 0, i = 0;
 179    long res;
 180    uint8_t *nzrun_start = NULL;
 181
 182    g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
 183               sizeof(long)));
 184
 185    while (i < slen) {
 186        /* overflow */
 187        if (d + 2 > dlen) {
 188            return -1;
 189        }
 190
 191        /* not aligned to sizeof(long) */
 192        res = (slen - i) % sizeof(long);
 193        while (res && old_buf[i] == new_buf[i]) {
 194            zrun_len++;
 195            i++;
 196            res--;
 197        }
 198
 199        /* word at a time for speed */
 200        if (!res) {
 201            while (i < slen &&
 202                   (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
 203                i += sizeof(long);
 204                zrun_len += sizeof(long);
 205            }
 206
 207            /* go over the rest */
 208            while (i < slen && old_buf[i] == new_buf[i]) {
 209                zrun_len++;
 210                i++;
 211            }
 212        }
 213
 214        /* buffer unchanged */
 215        if (zrun_len == slen) {
 216            return 0;
 217        }
 218
 219        /* skip last zero run */
 220        if (i == slen) {
 221            return d;
 222        }
 223
 224        d += uleb128_encode_small(dst + d, zrun_len);
 225
 226        zrun_len = 0;
 227        nzrun_start = new_buf + i;
 228
 229        /* overflow */
 230        if (d + 2 > dlen) {
 231            return -1;
 232        }
 233        /* not aligned to sizeof(long) */
 234        res = (slen - i) % sizeof(long);
 235        while (res && old_buf[i] != new_buf[i]) {
 236            i++;
 237            nzrun_len++;
 238            res--;
 239        }
 240
 241        /* word at a time for speed, use of 32-bit long okay */
 242        if (!res) {
 243            /* truncation to 32-bit long okay */
 244            unsigned long mask = (unsigned long)0x0101010101010101ULL;
 245            while (i < slen) {
 246                unsigned long xor;
 247                xor = *(unsigned long *)(old_buf + i)
 248                    ^ *(unsigned long *)(new_buf + i);
 249                if ((xor - mask) & ~xor & (mask << 7)) {
 250                    /* found the end of an nzrun within the current long */
 251                    while (old_buf[i] != new_buf[i]) {
 252                        nzrun_len++;
 253                        i++;
 254                    }
 255                    break;
 256                } else {
 257                    i += sizeof(long);
 258                    nzrun_len += sizeof(long);
 259                }
 260            }
 261        }
 262
 263        d += uleb128_encode_small(dst + d, nzrun_len);
 264        /* overflow */
 265        if (d + nzrun_len > dlen) {
 266            return -1;
 267        }
 268        memcpy(dst + d, nzrun_start, nzrun_len);
 269        d += nzrun_len;
 270        nzrun_len = 0;
 271    }
 272
 273    return d;
 274}
 275
 276int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
 277{
 278    int i = 0, d = 0;
 279    int ret;
 280    uint32_t count = 0;
 281
 282    while (i < slen) {
 283
 284        /* zrun */
 285        if ((slen - i) < 2) {
 286            return -1;
 287        }
 288
 289        ret = uleb128_decode_small(src + i, &count);
 290        if (ret < 0 || (i && !count)) {
 291            return -1;
 292        }
 293        i += ret;
 294        d += count;
 295
 296        /* overflow */
 297        if (d > dlen) {
 298            return -1;
 299        }
 300
 301        /* nzrun */
 302        if ((slen - i) < 2) {
 303            return -1;
 304        }
 305
 306        ret = uleb128_decode_small(src + i, &count);
 307        if (ret < 0 || !count) {
 308            return -1;
 309        }
 310        i += ret;
 311
 312        /* overflow */
 313        if (d + count > dlen || i + count > slen) {
 314            return -1;
 315        }
 316
 317        memcpy(dst + d, src + i, count);
 318        d += count;
 319        i += count;
 320    }
 321
 322    return d;
 323}
 324