qemu/tests/avocado/tesseract_utils.py
<<
>>
Prefs
   1# ...
   2#
   3# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
   4#
   5# This work is licensed under the terms of the GNU GPL, version 2 or
   6# later. See the COPYING file in the top-level directory.
   7
   8import re
   9import logging
  10
  11from avocado.utils import process
  12from avocado.utils.path import find_command, CmdNotFoundError
  13
  14def tesseract_available(expected_version):
  15    try:
  16        find_command('tesseract')
  17    except CmdNotFoundError:
  18        return False
  19    res = process.run('tesseract --version')
  20    try:
  21        version = res.stdout_text.split()[1]
  22    except IndexError:
  23        version = res.stderr_text.split()[1]
  24    return int(version.split('.')[0]) == expected_version
  25
  26    match = re.match(r'tesseract\s(\d)', res)
  27    if match is None:
  28        return False
  29    # now this is guaranteed to be a digit
  30    return int(match.groups()[0]) == expected_version
  31
  32
  33def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3):
  34    console_logger = logging.getLogger('tesseract')
  35    console_logger.debug(image_path)
  36    if tesseract_version == 4:
  37        tesseract_args += ' --oem 1'
  38    proc = process.run("tesseract {} {} stdout".format(tesseract_args,
  39                                                       image_path))
  40    lines = []
  41    for line in proc.stdout_text.split('\n'):
  42        sline = line.strip()
  43        if len(sline):
  44            console_logger.debug(sline)
  45            lines += [sline]
  46    return lines
  47