Merge pull request #120349 from lukegb/debug-release-2009

nixos/test-driver: use a variety of different Tesseract settings for OCR
This commit is contained in:
Luke Granger-Brown 2021-04-23 21:04:02 +01:00 committed by GitHub
commit 6e4f8b06f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 70 additions and 33 deletions

View file

@ -186,6 +186,25 @@ start_all()
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term>
<methodname>get_screen_text_variants</methodname>
</term>
<listitem>
<para>
Return a list of different interpretations of what is currently visible
on the machine's screen using optical character recognition. The number
and order of the interpretations is not specified and is subject to
change, but if no exception is raised at least one will be returned.
</para>
<note>
<para>
This requires passing <option>enableOCR</option> to the test attribute
set.
</para>
</note>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term> <term>
<methodname>get_screen_text</methodname> <methodname>get_screen_text</methodname>
@ -350,7 +369,8 @@ start_all()
<para> <para>
Wait until the supplied regular expressions matches the textual contents Wait until the supplied regular expressions matches the textual contents
of the screen by using optical character recognition (see of the screen by using optical character recognition (see
<methodname>get_screen_text</methodname>). <methodname>get_screen_text</methodname> and
<methodname>get_screen_text_variants</methodname>).
</para> </para>
<note> <note>
<para> <para>

View file

@ -1,7 +1,7 @@
#! /somewhere/python3 #! /somewhere/python3
from contextlib import contextmanager, _GeneratorContextManager from contextlib import contextmanager, _GeneratorContextManager
from queue import Queue, Empty from queue import Queue, Empty
from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List from typing import Tuple, Any, Callable, Dict, Iterator, Optional, List, Iterable
from xml.sax.saxutils import XMLGenerator from xml.sax.saxutils import XMLGenerator
import queue import queue
import io import io
@ -205,6 +205,37 @@ class Logger:
self.xml.endElement("nest") self.xml.endElement("nest")
def _perform_ocr_on_screenshot(
screenshot_path: str, model_ids: Iterable[int]
) -> List[str]:
if shutil.which("tesseract") is None:
raise Exception("OCR requested but enableOCR is false")
magick_args = (
"-filter Catrom -density 72 -resample 300 "
+ "-contrast -normalize -despeckle -type grayscale "
+ "-sharpen 1 -posterize 3 -negate -gamma 100 "
+ "-blur 1x65535"
)
tess_args = f"-c debug_file=/dev/null --psm 11"
cmd = f"convert {magick_args} {screenshot_path} tiff:{screenshot_path}.tiff"
ret = subprocess.run(cmd, shell=True, capture_output=True)
if ret.returncode != 0:
raise Exception(f"TIFF conversion failed with exit code {ret.returncode}")
model_results = []
for model_id in model_ids:
cmd = f"tesseract {screenshot_path}.tiff - {tess_args} --oem {model_id}"
ret = subprocess.run(cmd, shell=True, capture_output=True)
if ret.returncode != 0:
raise Exception(f"OCR failed with exit code {ret.returncode}")
model_results.append(ret.stdout.decode("utf-8"))
return model_results
class Machine: class Machine:
def __init__(self, args: Dict[str, Any]) -> None: def __init__(self, args: Dict[str, Any]) -> None:
if "name" in args: if "name" in args:
@ -637,43 +668,29 @@ class Machine:
"""Debugging: Dump the contents of the TTY<n>""" """Debugging: Dump the contents of the TTY<n>"""
self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty)) self.execute("fold -w 80 /dev/vcs{} | systemd-cat".format(tty))
def _get_screen_text_variants(self, model_ids: Iterable[int]) -> List[str]:
with tempfile.TemporaryDirectory() as tmpdir:
screenshot_path = os.path.join(tmpdir, "ppm")
self.send_monitor_command(f"screendump {screenshot_path}")
return _perform_ocr_on_screenshot(screenshot_path, model_ids)
def get_screen_text_variants(self) -> List[str]:
return self._get_screen_text_variants([0, 1, 2])
def get_screen_text(self) -> str: def get_screen_text(self) -> str:
if shutil.which("tesseract") is None: return self._get_screen_text_variants([2])[0]
raise Exception("get_screen_text used but enableOCR is false")
magick_args = (
"-filter Catrom -density 72 -resample 300 "
+ "-contrast -normalize -despeckle -type grayscale "
+ "-sharpen 1 -posterize 3 -negate -gamma 100 "
+ "-blur 1x65535"
)
tess_args = "-c debug_file=/dev/null --psm 11 --oem 2"
with self.nested("performing optical character recognition"):
with tempfile.NamedTemporaryFile() as tmpin:
self.send_monitor_command("screendump {}".format(tmpin.name))
cmd = "convert {} {} tiff:- | tesseract - - {}".format(
magick_args, tmpin.name, tess_args
)
ret = subprocess.run(cmd, shell=True, capture_output=True)
if ret.returncode != 0:
raise Exception(
"OCR failed with exit code {}".format(ret.returncode)
)
return ret.stdout.decode("utf-8")
def wait_for_text(self, regex: str) -> None: def wait_for_text(self, regex: str) -> None:
def screen_matches(last: bool) -> bool: def screen_matches(last: bool) -> bool:
text = self.get_screen_text() variants = self.get_screen_text_variants()
matches = re.search(regex, text) is not None for text in variants:
if re.search(regex, text) is not None:
return True
if last and not matches: if last:
self.log("Last OCR attempt failed. Text was: {}".format(text)) self.log("Last OCR attempt failed. Text was: {}".format(variants))
return matches return False
with self.nested("waiting for {} to appear on screen".format(regex)): with self.nested("waiting for {} to appear on screen".format(regex)):
retry(screen_matches) retry(screen_matches)