From bce6fc562427e7d73ce320d8c450f77f9e415133 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Tue, 17 Feb 2026 15:25:13 +0000 Subject: [PATCH] Add TEXT_FUZZY_VECTORS define. --- docs/vars.rst | 4 ++++ src/__init__.py | 1 + 2 files changed, 5 insertions(+) diff --git a/docs/vars.rst b/docs/vars.rst index 7759418ad..31e23dada 100644 --- a/docs/vars.rst +++ b/docs/vars.rst @@ -279,6 +279,10 @@ For the PyMuPDF programmer, some combination (using Python's `|` operator, or si 1048576 -- Delay vector blocks in the extraction slightly to avoid breaking what would otherwise be continuous lines of text. +.. py:data:: TEXT_FUZZY_VECTORS + + 2097152 -- If this option is set, we 'fuzzily' collect rectangular vectors of the same colour together. This enables us to spot where 'pixels' or 'slices' of vectors are used to create the appearance of characters on the page without exploding the storage and processing time requirements. + The following constants represent the default combinations of the above for text extraction and searching: .. py:data:: TEXTFLAGS_TEXT diff --git a/src/__init__.py b/src/__init__.py index 2d4e36c35..bde83e99c 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -17576,6 +17576,7 @@ def width(self): TEXT_CLIP = mupdf.FZ_STEXT_CLIP if mupdf_version_tuple >= (1, 27, 1): TEXT_LAZY_VECTORS = mupdf.FZ_STEXT_LAZY_VECTORS + TEXT_FUZZY_VECTORS = mupdf.FZ_STEXT_FUZZY_VECTORS TEXT_PARAGRAPH_BREAK = mupdf.FZ_STEXT_PARAGRAPH_BREAK TEXT_TABLE_HUNT = mupdf.FZ_STEXT_TABLE_HUNT