qemu/migration/xbzrle.c
<<
>>
Prefs
   1/*
   2 * Xor Based Zero Run Length Encoding
   3 *
   4 * Copyright 2013 Red Hat, Inc. and/or its affiliates
   5 *
   6 * Authors:
   7 *  Orit Wasserman  <owasserm@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 *
  12 */
  13#include "qemu/osdep.h"
  14#include "qemu/cutils.h"
  15#include "qemu/host-utils.h"
  16#include "xbzrle.h"
  17
  18/*
  19  page = zrun nzrun
  20       | zrun nzrun page
  21
  22  zrun = length
  23
  24  nzrun = length byte...
  25
  26  length = uleb128 encoded integer
  27 */
  28int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
  29                         uint8_t *dst, int dlen)
  30{
  31    uint32_t zrun_len = 0, nzrun_len = 0;
  32    int d = 0, i = 0;
  33    long res;
  34    uint8_t *nzrun_start = NULL;
  35
  36    g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) %
  37               sizeof(long)));
  38
  39    while (i < slen) {
  40        /* overflow */
  41        if (d + 2 > dlen) {
  42            return -1;
  43        }
  44
  45        /* not aligned to sizeof(long) */
  46        res = (slen - i) % sizeof(long);
  47        while (res && old_buf[i] == new_buf[i]) {
  48            zrun_len++;
  49            i++;
  50            res--;
  51        }
  52
  53        /* word at a time for speed */
  54        if (!res) {
  55            while (i < slen &&
  56                   (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) {
  57                i += sizeof(long);
  58                zrun_len += sizeof(long);
  59            }
  60
  61            /* go over the rest */
  62            while (i < slen && old_buf[i] == new_buf[i]) {
  63                zrun_len++;
  64                i++;
  65            }
  66        }
  67
  68        /* buffer unchanged */
  69        if (zrun_len == slen) {
  70            return 0;
  71        }
  72
  73        /* skip last zero run */
  74        if (i == slen) {
  75            return d;
  76        }
  77
  78        d += uleb128_encode_small(dst + d, zrun_len);
  79
  80        zrun_len = 0;
  81        nzrun_start = new_buf + i;
  82
  83        /* overflow */
  84        if (d + 2 > dlen) {
  85            return -1;
  86        }
  87        /* not aligned to sizeof(long) */
  88        res = (slen - i) % sizeof(long);
  89        while (res && old_buf[i] != new_buf[i]) {
  90            i++;
  91            nzrun_len++;
  92            res--;
  93        }
  94
  95        /* word at a time for speed, use of 32-bit long okay */
  96        if (!res) {
  97            /* truncation to 32-bit long okay */
  98            unsigned long mask = (unsigned long)0x0101010101010101ULL;
  99            while (i < slen) {
 100                unsigned long xor;
 101                xor = *(unsigned long *)(old_buf + i)
 102                    ^ *(unsigned long *)(new_buf + i);
 103                if ((xor - mask) & ~xor & (mask << 7)) {
 104                    /* found the end of an nzrun within the current long */
 105                    while (old_buf[i] != new_buf[i]) {
 106                        nzrun_len++;
 107                        i++;
 108                    }
 109                    break;
 110                } else {
 111                    i += sizeof(long);
 112                    nzrun_len += sizeof(long);
 113                }
 114            }
 115        }
 116
 117        d += uleb128_encode_small(dst + d, nzrun_len);
 118        /* overflow */
 119        if (d + nzrun_len > dlen) {
 120            return -1;
 121        }
 122        memcpy(dst + d, nzrun_start, nzrun_len);
 123        d += nzrun_len;
 124        nzrun_len = 0;
 125    }
 126
 127    return d;
 128}
 129
 130int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
 131{
 132    int i = 0, d = 0;
 133    int ret;
 134    uint32_t count = 0;
 135
 136    while (i < slen) {
 137
 138        /* zrun */
 139        if ((slen - i) < 2) {
 140            return -1;
 141        }
 142
 143        ret = uleb128_decode_small(src + i, &count);
 144        if (ret < 0 || (i && !count)) {
 145            return -1;
 146        }
 147        i += ret;
 148        d += count;
 149
 150        /* overflow */
 151        if (d > dlen) {
 152            return -1;
 153        }
 154
 155        /* nzrun */
 156        if ((slen - i) < 2) {
 157            return -1;
 158        }
 159
 160        ret = uleb128_decode_small(src + i, &count);
 161        if (ret < 0 || !count) {
 162            return -1;
 163        }
 164        i += ret;
 165
 166        /* overflow */
 167        if (d + count > dlen || i + count > slen) {
 168            return -1;
 169        }
 170
 171        memcpy(dst + d, src + i, count);
 172        d += count;
 173        i += count;
 174    }
 175
 176    return d;
 177}
 178
 179#if defined(CONFIG_AVX512BW_OPT)
 180#pragma GCC push_options
 181#pragma GCC target("avx512bw")
 182#include <immintrin.h>
 183int xbzrle_encode_buffer_avx512(uint8_t *old_buf, uint8_t *new_buf, int slen,
 184                             uint8_t *dst, int dlen)
 185{
 186    uint32_t zrun_len = 0, nzrun_len = 0;
 187    int d = 0, i = 0, num = 0;
 188    uint8_t *nzrun_start = NULL;
 189    /* add 1 to include residual part in main loop */
 190    uint32_t count512s = (slen >> 6) + 1;
 191    /* countResidual is tail of data, i.e., countResidual = slen % 64 */
 192    uint32_t count_residual = slen & 0b111111;
 193    bool never_same = true;
 194    uint64_t mask_residual = 1;
 195    mask_residual <<= count_residual;
 196    mask_residual -= 1;
 197    __m512i r = _mm512_set1_epi32(0);
 198
 199    while (count512s) {
 200        int bytes_to_check = 64;
 201        uint64_t mask = 0xffffffffffffffff;
 202        if (count512s == 1) {
 203            bytes_to_check = count_residual;
 204            mask = mask_residual;
 205        }
 206        __m512i old_data = _mm512_mask_loadu_epi8(r,
 207                                                  mask, old_buf + i);
 208        __m512i new_data = _mm512_mask_loadu_epi8(r,
 209                                                  mask, new_buf + i);
 210        uint64_t comp = _mm512_cmpeq_epi8_mask(old_data, new_data);
 211        count512s--;
 212
 213        bool is_same = (comp & 0x1);
 214        while (bytes_to_check) {
 215            if (d + 2 > dlen) {
 216                return -1;
 217            }
 218            if (is_same) {
 219                if (nzrun_len) {
 220                    d += uleb128_encode_small(dst + d, nzrun_len);
 221                    if (d + nzrun_len > dlen) {
 222                        return -1;
 223                    }
 224                    nzrun_start = new_buf + i - nzrun_len;
 225                    memcpy(dst + d, nzrun_start, nzrun_len);
 226                    d += nzrun_len;
 227                    nzrun_len = 0;
 228                }
 229                /* 64 data at a time for speed */
 230                if (count512s && (comp == 0xffffffffffffffff)) {
 231                    i += 64;
 232                    zrun_len += 64;
 233                    break;
 234                }
 235                never_same = false;
 236                num = ctz64(~comp);
 237                num = (num < bytes_to_check) ? num : bytes_to_check;
 238                zrun_len += num;
 239                bytes_to_check -= num;
 240                comp >>= num;
 241                i += num;
 242                if (bytes_to_check) {
 243                    /* still has different data after same data */
 244                    d += uleb128_encode_small(dst + d, zrun_len);
 245                    zrun_len = 0;
 246                } else {
 247                    break;
 248                }
 249            }
 250            if (never_same || zrun_len) {
 251                /*
 252                 * never_same only acts if
 253                 * data begins with diff in first count512s
 254                 */
 255                d += uleb128_encode_small(dst + d, zrun_len);
 256                zrun_len = 0;
 257                never_same = false;
 258            }
 259            /* has diff, 64 data at a time for speed */
 260            if ((bytes_to_check == 64) && (comp == 0x0)) {
 261                i += 64;
 262                nzrun_len += 64;
 263                break;
 264            }
 265            num = ctz64(comp);
 266            num = (num < bytes_to_check) ? num : bytes_to_check;
 267            nzrun_len += num;
 268            bytes_to_check -= num;
 269            comp >>= num;
 270            i += num;
 271            if (bytes_to_check) {
 272                /* mask like 111000 */
 273                d += uleb128_encode_small(dst + d, nzrun_len);
 274                /* overflow */
 275                if (d + nzrun_len > dlen) {
 276                    return -1;
 277                }
 278                nzrun_start = new_buf + i - nzrun_len;
 279                memcpy(dst + d, nzrun_start, nzrun_len);
 280                d += nzrun_len;
 281                nzrun_len = 0;
 282                is_same = true;
 283            }
 284        }
 285    }
 286
 287    if (nzrun_len != 0) {
 288        d += uleb128_encode_small(dst + d, nzrun_len);
 289        /* overflow */
 290        if (d + nzrun_len > dlen) {
 291            return -1;
 292        }
 293        nzrun_start = new_buf + i - nzrun_len;
 294        memcpy(dst + d, nzrun_start, nzrun_len);
 295        d += nzrun_len;
 296    }
 297    return d;
 298}
 299#pragma GCC pop_options
 300#endif
 301