qemu/tests/qemu-iotests/060
<<
>>
Prefs
   1#!/usr/bin/env bash
   2# group: rw auto quick
   3#
   4# Test case for image corruption (overlapping data structures) in qcow2
   5#
   6# Copyright (C) 2013 Red Hat, Inc.
   7#
   8# This program is free software; you can redistribute it and/or modify
   9# it under the terms of the GNU General Public License as published by
  10# the Free Software Foundation; either version 2 of the License, or
  11# (at your option) any later version.
  12#
  13# This program is distributed in the hope that it will be useful,
  14# but WITHOUT ANY WARRANTY; without even the implied warranty of
  15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16# GNU General Public License for more details.
  17#
  18# You should have received a copy of the GNU General Public License
  19# along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20#
  21
  22# creator
  23owner=mreitz@redhat.com
  24
  25seq="$(basename $0)"
  26echo "QA output created by $seq"
  27
  28status=1        # failure is the default!
  29
  30_cleanup()
  31{
  32        _cleanup_test_img
  33}
  34trap "_cleanup; exit \$status" 0 1 2 3 15
  35
  36# Sometimes the error line might be dumped before/after an event
  37# randomly.  Mask it out for specific test that may trigger this
  38# uncertainty for current test for now.
  39_filter_io_error()
  40{
  41    sed '/Input\/output error/d'
  42}
  43
  44# get standard environment, filters and checks
  45. ./common.rc
  46. ./common.filter
  47
  48# This tests qcow2-specific low-level functionality
  49_supported_fmt qcow2
  50_supported_proto file fuse
  51_supported_os Linux
  52# These tests only work for compat=1.1 images without an external
  53# data file with refcount_bits=16
  54_unsupported_imgopts 'compat=0.10' data_file \
  55    'refcount_bits=\([^1]\|.\([^6]\|$\)\)'
  56
  57# The repair process will create a large file - so check for availability first
  58_require_large_file 64G
  59
  60rt_offset=65536  # 0x10000 (XXX: just an assumption)
  61rb_offset=131072 # 0x20000 (XXX: just an assumption)
  62l1_offset=196608 # 0x30000 (XXX: just an assumption)
  63l2_offset=262144 # 0x40000 (XXX: just an assumption)
  64l2_offset_after_snapshot=524288 # 0x80000 (XXX: just an assumption)
  65
  66OPEN_RW="open -o overlap-check=all $TEST_IMG"
  67# Overlap checks are done before write operations only, therefore opening an
  68# image read-only makes the overlap-check option irrelevant
  69OPEN_RO="open -r $TEST_IMG"
  70
  71echo
  72echo "=== Testing L2 reference into L1 ==="
  73echo
  74_make_test_img 64M
  75# Link first L1 entry (first L2 table) onto itself
  76# (Note the MSb in the L1 entry is set, ensuring the refcount is one - else any
  77# later write will result in a COW operation, effectively ruining this attempt
  78# on image corruption)
  79poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x03\x00\x00"
  80_check_test_img
  81
  82# The corrupt bit should not be set anyway
  83$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
  84
  85# Try to write something, thereby forcing the corrupt bit to be set
  86$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
  87
  88# The corrupt bit must now be set
  89$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
  90
  91# This information should be available through qemu-img info
  92_img_info --format-specific
  93
  94# Try to open the image R/W (which should fail)
  95$QEMU_IO -c "$OPEN_RW" -c "read 0 512" 2>&1 | _filter_qemu_io \
  96                                            | _filter_testdir \
  97                                            | _filter_imgfmt
  98
  99# Try to open it RO (which should succeed)
 100$QEMU_IO -c "$OPEN_RO" -c "read 0 512" | _filter_qemu_io
 101
 102# We could now try to fix the image, but this would probably fail (how should an
 103# L2 table linked onto the L1 table be fixed?)
 104
 105echo
 106echo "=== Testing cluster data reference into refcount block ==="
 107echo
 108_make_test_img 64M
 109# Allocate L2 table
 110truncate -s "$(($l2_offset+65536))" "$TEST_IMG"
 111poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x00\x00"
 112# Mark cluster as used
 113poke_file "$TEST_IMG" "$(($rb_offset+8))" "\x00\x01"
 114# Redirect new data cluster onto refcount block
 115poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x02\x00\x00"
 116_check_test_img
 117$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 118$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
 119$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 120
 121# Try to fix it
 122_check_test_img -r all
 123
 124# The corrupt bit should be cleared
 125$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 126
 127# Look if it's really really fixed
 128$QEMU_IO -c "$OPEN_RW" -c "write -P 0x2a 0 512" | _filter_qemu_io
 129$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 130
 131echo
 132echo "=== Testing cluster data reference into inactive L2 table ==="
 133echo
 134_make_test_img 64M
 135$QEMU_IO -c "$OPEN_RW" -c "write -P 1 0 512" | _filter_qemu_io
 136$QEMU_IMG snapshot -c foo "$TEST_IMG"
 137$QEMU_IO -c "$OPEN_RW" -c "write -P 2 0 512" | _filter_qemu_io
 138# The inactive L2 table remains at its old offset
 139poke_file "$TEST_IMG" "$l2_offset_after_snapshot" \
 140                      "\x80\x00\x00\x00\x00\x04\x00\x00"
 141_check_test_img
 142$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 143$QEMU_IO -c "$OPEN_RW" -c "write -P 3 0 512" | _filter_qemu_io
 144$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 145_check_test_img -r all
 146$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 147$QEMU_IO -c "$OPEN_RW" -c "write -P 4 0 512" | _filter_qemu_io
 148$PYTHON qcow2.py "$TEST_IMG" dump-header | grep incompatible_features
 149
 150# Check data
 151$QEMU_IO -c "$OPEN_RO" -c "read -P 4 0 512" | _filter_qemu_io
 152$QEMU_IMG snapshot -a foo "$TEST_IMG"
 153_check_test_img
 154$QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" | _filter_qemu_io
 155
 156echo
 157echo "=== Testing overlap while COW is in flight ==="
 158echo
 159BACKING_IMG=$TEST_IMG.base
 160TEST_IMG=$BACKING_IMG _make_test_img 1G
 161
 162$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
 163
 164_make_test_img -b "$BACKING_IMG" -F $IMGFMT 1G
 165# Write two clusters, the second one enforces creation of an L2 table after
 166# the first data cluster.
 167$QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
 168# Free the first cluster. This cluster will soon enough be reallocated and
 169# used for COW.
 170poke_file "$TEST_IMG" "$l2_offset" "\x00\x00\x00\x00\x00\x00\x00\x00"
 171poke_file "$TEST_IMG" "$(($rb_offset+10))" "\x00\x00"
 172# Now, corrupt the image by marking the second L2 table cluster as free.
 173poke_file "$TEST_IMG" "$(($rb_offset+12))" "\x00\x00"
 174# Start a write operation requiring COW on the image stopping it right before
 175# doing the read; then, trigger the corruption prevention by writing anything to
 176# any unallocated cluster, leading to an attempt to overwrite the second L2
 177# table. Finally, resume the COW write and see it fail (but not crash).
 178echo "open -o file.driver=blkdebug $TEST_IMG
 179break cow_read 0
 180aio_write 0k 1k
 181wait_break 0
 182write 64k 64k
 183resume 0" | $QEMU_IO | _filter_qemu_io
 184
 185echo
 186echo "=== Testing unallocated image header ==="
 187echo
 188_make_test_img 64M
 189# Create L1/L2
 190$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 191poke_file "$TEST_IMG" "$rb_offset" "\x00\x00"
 192$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
 193
 194echo
 195echo "=== Testing unaligned L1 entry ==="
 196echo
 197_make_test_img 64M
 198$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 199# This will be masked with ~(512 - 1) = ~0x1ff, so whether the lower 9 bits are
 200# aligned or not does not matter
 201poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
 202$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
 203
 204# Test how well zero cluster expansion can cope with this
 205_make_test_img 64M
 206$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 207poke_file "$TEST_IMG" "$l1_offset" "\x80\x00\x00\x00\x00\x04\x2a\x00"
 208$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
 209
 210echo
 211echo "=== Testing unaligned L2 entry ==="
 212echo
 213_make_test_img 64M
 214$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 215poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
 216$QEMU_IO -c "read 0 64k" "$TEST_IMG" | _filter_qemu_io
 217
 218echo
 219echo "=== Testing unaligned pre-allocated zero cluster ==="
 220echo
 221_make_test_img 64M
 222$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 223poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x01"
 224# zero cluster expansion
 225$QEMU_IMG amend -o compat=0.10 "$TEST_IMG"
 226
 227echo
 228echo "=== Testing unaligned reftable entry ==="
 229echo
 230_make_test_img 64M
 231poke_file "$TEST_IMG" "$rt_offset" "\x00\x00\x00\x00\x00\x02\x2a\x00"
 232$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 233
 234echo
 235echo "=== Testing non-fatal corruption on freeing ==="
 236echo
 237_make_test_img 64M
 238$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 239poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
 240$QEMU_IO -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
 241
 242echo
 243echo "=== Testing read-only corruption report ==="
 244echo
 245_make_test_img 64M
 246$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 247poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x05\x2a\x00"
 248# Should only emit a single error message
 249$QEMU_IO -c "$OPEN_RO" -c "read 0 64k" -c "read 0 64k" | _filter_qemu_io
 250
 251echo
 252echo "=== Testing non-fatal and then fatal corruption report ==="
 253echo
 254_make_test_img 64M
 255$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
 256poke_file "$TEST_IMG" "$l2_offset"        "\x80\x00\x00\x00\x00\x05\x2a\x00"
 257poke_file "$TEST_IMG" "$(($l2_offset+8))" "\x80\x00\x00\x00\x00\x06\x2a\x00"
 258# Should emit two error messages
 259$QEMU_IO -c "discard 0 64k" -c "read 64k 64k" "$TEST_IMG" | _filter_qemu_io
 260
 261echo
 262echo "=== Testing empty refcount table ==="
 263echo
 264_make_test_img 64M
 265poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
 266$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 267# Repair the image
 268_check_test_img -r all
 269
 270echo
 271echo "=== Testing empty refcount table with valid L1 and L2 tables ==="
 272echo
 273_make_test_img 64M
 274$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 275poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
 276# Since the first data cluster is already allocated this triggers an
 277# allocation with an explicit offset (using qcow2_alloc_clusters_at())
 278# causing a refcount block to be allocated at offset 0
 279$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
 280# Repair the image
 281_check_test_img -r all
 282
 283echo
 284echo "=== Testing empty refcount block ==="
 285echo
 286_make_test_img 64M
 287poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
 288$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 289# Repair the image
 290_check_test_img -r all
 291
 292echo
 293echo "=== Testing empty refcount block with compressed write ==="
 294echo
 295_make_test_img 64M
 296$QEMU_IO -c "write 64k 64k" "$TEST_IMG" | _filter_qemu_io
 297poke_file "$TEST_IMG" "$rb_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
 298# The previous write already allocated an L2 table, so now this new
 299# write will try to allocate a compressed data cluster at offset 0.
 300$QEMU_IO -c "write -c 0k 64k" "$TEST_IMG" | _filter_qemu_io
 301# Repair the image
 302_check_test_img -r all
 303
 304echo
 305echo "=== Testing zero refcount table size ==="
 306echo
 307_make_test_img 64M
 308poke_file "$TEST_IMG" "56"                "\x00\x00\x00\x00"
 309$QEMU_IO -c "write 0 64k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
 310# Repair the image
 311_check_test_img -r all
 312
 313echo
 314echo "=== Testing incorrect refcount table offset ==="
 315echo
 316_make_test_img 64M
 317poke_file "$TEST_IMG" "48"                "\x00\x00\x00\x00\x00\x00\x00\x00"
 318$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 319
 320echo
 321echo "=== Testing dirty corrupt image ==="
 322echo
 323
 324_make_test_img 64M
 325
 326# Let the refblock appear unaligned
 327poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\xff\xff\x2a\x00"
 328# Mark the image dirty, thus forcing an automatic check when opening it
 329poke_file "$TEST_IMG" 72 "\x00\x00\x00\x00\x00\x00\x00\x01"
 330# Open the image (qemu should refuse to do so)
 331$QEMU_IO -c close "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt
 332
 333echo '--- Repairing ---'
 334
 335# The actual repair should have happened (because of the dirty bit),
 336# but some cleanup may have failed (like freeing the old reftable)
 337# because the image was already marked corrupt by that point
 338_check_test_img -r all
 339
 340echo
 341echo "=== Writing to an unaligned preallocated zero cluster ==="
 342echo
 343
 344_make_test_img 64M
 345
 346# Allocate the L2 table
 347$QEMU_IO -c "write 0 64k" -c "discard 0 64k" "$TEST_IMG" | _filter_qemu_io
 348# Pretend there is a preallocated zero cluster somewhere inside the
 349# image header
 350poke_file "$TEST_IMG" "$l2_offset" "\x80\x00\x00\x00\x00\x00\x2a\x01"
 351# Let's write to it!
 352$QEMU_IO -c "write 0 64k" "$TEST_IMG" | _filter_qemu_io
 353
 354echo '--- Repairing ---'
 355_check_test_img -r all
 356
 357echo
 358echo '=== Discarding with an unaligned refblock ==='
 359echo
 360
 361_make_test_img 64M
 362
 363$QEMU_IO -c "write 0 128k" "$TEST_IMG" | _filter_qemu_io
 364# Make our refblock unaligned
 365poke_file "$TEST_IMG" "$(($rt_offset))" "\x00\x00\x00\x00\x00\x00\x2a\x00"
 366# Now try to discard something that will be submitted as two requests
 367# (main part + tail)
 368$QEMU_IO -c "discard 0 65537" "$TEST_IMG"
 369
 370echo '--- Repairing ---'
 371# Fails the first repair because the corruption prevents the check
 372# function from double-checking
 373# (Using -q for the first invocation, because otherwise the
 374#  double-check error message appears above the summary for some
 375#  reason -- so let's just hide the summary)
 376_check_test_img -q -r all
 377_check_test_img -r all
 378
 379echo
 380echo "=== Discarding an out-of-bounds refblock ==="
 381echo
 382
 383_make_test_img 64M
 384
 385# Pretend there's a refblock really up high
 386poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\xff\xff\xff\x00\x00\x00\x00"
 387# Let's try to shrink the qcow2 image so that the block driver tries
 388# to discard that refblock (and see what happens!)
 389$QEMU_IMG resize --shrink "$TEST_IMG" 32M
 390
 391echo '--- Checking and retrying ---'
 392# Image should not be resized
 393_img_info | grep 'virtual size'
 394# But it should pass this check, because the "partial" resize has
 395# already overwritten refblocks past the end
 396_check_test_img -r all
 397# So let's try again
 398$QEMU_IMG resize --shrink "$TEST_IMG" 32M
 399_img_info | grep 'virtual size'
 400
 401echo
 402echo "=== Discarding a non-covered in-bounds refblock ==="
 403echo
 404
 405_make_test_img -o 'refcount_bits=1' 64M
 406
 407# Pretend there's a refblock somewhere where there is no refblock to
 408# cover it (but the covering refblock has a valid index in the
 409# reftable)
 410# Every refblock covers 65536 * 8 * 65536 = 32 GB, so we have to point
 411# to 0x10_0000_0000 (64G) to point to the third refblock
 412poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
 413$QEMU_IMG resize --shrink "$TEST_IMG" 32M
 414
 415echo '--- Checking and retrying ---'
 416# Image should not be resized
 417_img_info | grep 'virtual size'
 418# But it should pass this check, because the "partial" resize has
 419# already overwritten refblocks past the end
 420_check_test_img -r all
 421# So let's try again
 422$QEMU_IMG resize --shrink "$TEST_IMG" 32M
 423_img_info | grep 'virtual size'
 424
 425echo
 426echo "=== Discarding a refblock covered by an unaligned refblock ==="
 427echo
 428
 429_make_test_img -o 'refcount_bits=1' 64M
 430
 431# Same as above
 432poke_file "$TEST_IMG" "$(($rt_offset+8))" "\x00\x00\x00\x10\x00\x00\x00\x00"
 433# But now we actually "create" an unaligned third refblock
 434poke_file "$TEST_IMG" "$(($rt_offset+16))" "\x00\x00\x00\x00\x00\x00\x02\x00"
 435$QEMU_IMG resize --shrink "$TEST_IMG" 32M
 436
 437echo '--- Repairing ---'
 438# Fails the first repair because the corruption prevents the check
 439# function from double-checking
 440# (Using -q for the first invocation, because otherwise the
 441#  double-check error message appears above the summary for some
 442#  reason -- so let's just hide the summary)
 443_check_test_img -q -r all
 444_check_test_img -r all
 445
 446echo
 447echo "=== Testing the QEMU shutdown with a corrupted image ==="
 448echo
 449_make_test_img 64M
 450poke_file "$TEST_IMG" "$rt_offset"        "\x00\x00\x00\x00\x00\x00\x00\x00"
 451echo "{'execute': 'qmp_capabilities'}
 452      {'execute': 'human-monitor-command',
 453       'arguments': {'command-line': 'qemu-io drive \"write 0 512\"'}}
 454      {'execute': 'quit'}" \
 455    | $QEMU -qmp stdio -nographic -nodefaults \
 456            -drive if=none,node-name=drive,file="$TEST_IMG",driver=qcow2 \
 457    | _filter_qmp | _filter_qemu_io
 458
 459echo
 460echo "=== Testing incoming inactive corrupted image ==="
 461echo
 462
 463_make_test_img 64M
 464# Create an unaligned L1 entry, so qemu will signal a corruption when
 465# reading from the covered area
 466poke_file "$TEST_IMG" "$l1_offset" "\x00\x00\x00\x00\x2a\x2a\x2a\x2a"
 467
 468# Inactive images are effectively read-only images, so this should be a
 469# non-fatal corruption (which does not modify the image)
 470echo "{'execute': 'qmp_capabilities'}
 471      {'execute': 'human-monitor-command',
 472       'arguments': {'command-line': 'qemu-io drive \"read 0 512\"'}}
 473      {'execute': 'quit'}" \
 474    | $QEMU -qmp stdio -nographic -nodefaults \
 475            -blockdev "{'node-name': 'drive',
 476                        'driver': 'qcow2',
 477                        'file': {
 478                            'driver': 'file',
 479                            'filename': '$TEST_IMG'
 480                        }}" \
 481            -incoming exec:'cat /dev/null' \
 482            2>&1 \
 483    | _filter_qmp | _filter_qemu_io | _filter_io_error
 484
 485echo
 486# Image should not have been marked corrupt
 487_img_info --format-specific | grep 'corrupt:'
 488
 489# success, all done
 490echo "*** done"
 491rm -f $seq.full
 492status=0
 493