From e22166acdd842a48fd641482e5d788e727019525 Mon Sep 17 00:00:00 2001 From: zooowooxo Date: Mon, 26 Jan 2026 18:22:53 +0800 Subject: [PATCH 1/3] feat:add histoqc operator to runtime ops --- runtime/ops/histoqc_op/_init_.py | 6 ++ runtime/ops/histoqc_op/histoqc_src/HistoQC | 1 + runtime/ops/histoqc_op/metadata.yml | 25 +++++ runtime/ops/histoqc_op/process.py | 114 +++++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 runtime/ops/histoqc_op/_init_.py create mode 160000 runtime/ops/histoqc_op/histoqc_src/HistoQC create mode 100644 runtime/ops/histoqc_op/metadata.yml create mode 100644 runtime/ops/histoqc_op/process.py diff --git a/runtime/ops/histoqc_op/_init_.py b/runtime/ops/histoqc_op/_init_.py new file mode 100644 index 00000000..0534bbb9 --- /dev/null +++ b/runtime/ops/histoqc_op/_init_.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +from datamate.core.base_op import OPERATORS +OPERATORS.register_module( + module_name='HistoQCMapper', + module_path='ops.user.histoqc_op.process' +) \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC b/runtime/ops/histoqc_op/histoqc_src/HistoQC new file mode 160000 index 00000000..b7956129 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC @@ -0,0 +1 @@ +Subproject commit b79561292e49d153e2e4c1401c9c48d3d5f925e1 diff --git a/runtime/ops/histoqc_op/metadata.yml b/runtime/ops/histoqc_op/metadata.yml new file mode 100644 index 00000000..c133462d --- /dev/null +++ b/runtime/ops/histoqc_op/metadata.yml @@ -0,0 +1,25 @@ +name: 'HistoQC' +description: '集成HistoQC输出处理 : 提取伪影坐标(GeoJSON)、汇总质量指标(TSV)并关联全套掩码图像(PNG)。' +language: 'python' +vendor: 'huawei' +raw_id: 'HistoQCMapper' +version: '1.0.0' +modal: 'multimodal' # 多模态,因为涉及文本(TSV/JSON)和图像(PNG) +inputs: 'image' +outputs: 'multimodal' +settings: + scaleFactor: + name: '缩放倍率' + description: '掩码相对于原图的下采样倍率(HistoQC log_factor)。' + type: 'inputNumber' + defaultVal: 1.0 + exportFullImages: + name: '导出全套图像' + description: '是否将所有生成的掩码图(Equalized/Mask/Blur)关联至输出。' + type: 'switch' + defaultVal: 'true' + includeStats: + name: '包含指标统计' + description: '是否从results.tsv中提取该切片的具体质量.' + type: 'switch' + defaultVal: 'true' \ No newline at end of file diff --git a/runtime/ops/histoqc_op/process.py b/runtime/ops/histoqc_op/process.py new file mode 100644 index 00000000..2a57c981 --- /dev/null +++ b/runtime/ops/histoqc_op/process.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +import cv2 +import numpy as np +import json +import os +import pandas as pd +import subprocess +from pathlib import Path +from typing import Dict, Any +from datamate.core.base_op import Mapper + +class HistoQCMapper(Mapper): + def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: + # 动态路径与环境变量初始化 + current_dir = os.path.dirname(os.path.abspath(__file__)) + # 定位算子包内部的配置文件 + config_path = os.path.join(current_dir, "histoqc_src", "histoqc", "config", "config_v2.1.ini") + # 定位 HistoQC 源代码目录 + histoqc_src_dir = os.path.join(current_dir, "histoqc_src") + + file_path = sample.get("filePath", "") + out_dir = sample.get("export_path", os.path.dirname(file_path)) + if not os.path.exists(out_dir): + os.makedirs(out_dir, exist_ok=True) + + # HistoQC 在 TSV 和图片命名中通常保留完整文件名(含后缀) + base_name = os.path.basename(file_path) + + # 注入环境变量,确保能找到包内 HistoQC 模块 + custom_env = os.environ.copy() + custom_env["PYTHONPATH"] = f"{histoqc_src_dir}{os.pathsep}{custom_env.get('PYTHONPATH', '')}" + + #启动 HistoQC 算法 + cmd = [ + "python3", "-m", "histoqc", + "-o", out_dir, + "-c", config_path, + "-n", "4", + file_path + ] + + try: + + # 执行算法,设置 30 分钟超时 + result = subprocess.run(cmd, capture_output=True, text=True, env=custom_env, timeout=1800) + + # 如果被系统 Kill (返回码 -9) 或其他错误,记录在 text 字段 + if result.returncode != 0: + sample["text"] = json.dumps({"error": f"HistoQC failed with return code {result.returncode}", "stderr": result.stderr}) + return sample + + except Exception as e: + sample["text"] = json.dumps({"error": f"Subprocess exception: {str(e)}"}) + return sample + + #结果解析与归档 + final_report = { + "slide_name": base_name, + "metrics": {}, + "annotations": [], + "generated_images": [] + } + + # 解析 TSV 质量指标 + + tsv_path = Path(out_dir) / "results.tsv" + + if tsv_path.exists(): + try: + # comment='#' 跳过 HistoQC 的元数据行,on_bad_lines='skip' 增强容错 + df = pd.read_csv(tsv_path, sep='\t', comment='#', on_bad_lines='skip') + # 匹配第一列文件名 + row = df[df.iloc[:, 0].astype(str) == base_name] + if not row.empty: + final_report["metrics"] = row.iloc[-1].to_dict() + except Exception as e: + final_report["metrics_error"] = str(e) + + # 提取伪影坐标并转换为 GeoJSON (适应多种路径结构) + scale_factor = float(getattr(self, 'scaleFactor', 1.0)) + artifact_types = { + "pen_markings": {"name": "Pen Marking", "color": -65536}, + "coverslip_edge": {"name": "Coverslip Edge", "color": -16711936} + } + + # 考虑到 HistoQC 可能会创建子文件夹,使用 glob 进行深度搜索 + search_dir = Path(out_dir) / base_name + if not search_dir.exists(): + search_dir = Path(out_dir) + + for key, info in artifact_types.items(): + # 搜索包含文件名和伪影类型的图片 + found_files = list(search_dir.glob(f"*{base_name}*{key}*.png")) + if found_files: + mask_path = found_files[0] + mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) + if mask is not None and np.max(mask) > 0: + _, thresh = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + for contour in contours: + pts = (contour.reshape(-1, 2) * scale_factor).tolist() + if len(pts) < 3: continue + pts.append(pts[0]) + final_report["annotations"].append({ + "type": "Feature", + "properties": {"classification": {"name": info["name"], "colorRGB": info["color"]}}, + "geometry": {"type": "Polygon", "coordinates": [pts]} + }) + final_report["generated_images"].append(mask_path.name) + # 写回 DataMate 样本数据 + sample["text"] = json.dumps(final_report,ensure_ascii=False,indent=2) + if final_report["metrics"]: + sample["extra_info"] = final_report["metrics"] + return sample \ No newline at end of file From bc85f69f45318ee119d80e189f684862be251bd3 Mon Sep 17 00:00:00 2001 From: zooowooxo Date: Thu, 29 Jan 2026 16:31:09 +0800 Subject: [PATCH 2/3] feat:complete integration of histoqc --- runtime/ops/histoqc_op/histoqc_src/HistoQC | 1 - .../histoqc_src/HistoQC/LICENSE.txt | 32 + .../histoqc_op/histoqc_src/HistoQC/README.md | 19 + .../HistoQC/histoqc/AnnotationModule.py | 126 ++++ .../histoqc/ArtifactToGeoJSONModule.py | 84 +++ .../histoqc_src/HistoQC/histoqc/BaseImage.py | 363 +++++++++++ .../HistoQC/histoqc/BasicModule.py | 83 +++ .../HistoQC/histoqc/BlurDetectionModule.py | 66 ++ .../HistoQC/histoqc/BrightContrastModule.py | 123 ++++ .../HistoQC/histoqc/BubbleRegionByRegion.py | 128 ++++ .../HistoQC/histoqc/ClassificationModule.py | 252 ++++++++ .../HistoQC/histoqc/DeconvolutionModule.py | 76 +++ .../HistoQC/histoqc/HistogramModule.py | 78 +++ .../HistoQC/histoqc/LightDarkModule.py | 188 ++++++ .../histoqc/LocalTextureEstimationModule.py | 57 ++ .../HistoQC/histoqc/MorphologyModule.py | 161 +++++ .../histoqc_src/HistoQC/histoqc/SaveModule.py | 175 +++++ .../HistoQC/histoqc/TileExtractionModule.py | 610 ++++++++++++++++++ .../histoqc_src/HistoQC/histoqc/__init__.py | 4 + .../histoqc_src/HistoQC/histoqc/__main__.py | 215 ++++++ .../histoqc_src/HistoQC/histoqc/_parser.py | 44 ++ .../histoqc_src/HistoQC/histoqc/_pipeline.py | 409 ++++++++++++ .../histoqc_src/HistoQC/histoqc/_worker.py | 106 +++ .../HistoQC/histoqc/annotations/__init__.py | 0 .../histoqc/annotations/annot_collection.py | 66 ++ .../annotations/annotation/__init__.py | 1 + .../histoqc/annotations/annotation/base.py | 176 +++++ .../histoqc/annotations/annotation/geojson.py | 83 +++ .../annotations/annotation/imagescope.py | 93 +++ .../histoqc/annotations/io_utils/__init__.py | 1 + .../histoqc/annotations/io_utils/json.py | 12 + .../HistoQC/histoqc/config/__init__.py | 38 ++ .../HistoQC/histoqc/config/__main__.py | 40 ++ .../HistoQC/histoqc/config/_parser.py | 14 + .../HistoQC/histoqc/config/config.ini | 200 ++++++ .../histoqc/config/config_clinical.ini | 228 +++++++ .../HistoQC/histoqc/config/config_first.ini | 92 +++ .../HistoQC/histoqc/config/config_ihc.ini | 246 +++++++ .../HistoQC/histoqc/config/config_light.ini | 187 ++++++ .../HistoQC/histoqc/config/config_v2.1.ini | 262 ++++++++ .../HistoQC/histoqc/data/__init__.py | 162 +++++ .../HistoQC/histoqc/data/__main__.py | 29 + .../coverslip_edge_he/coverslip_edge.png | Bin 0 -> 1117386 bytes .../coverslip_edge_he/coverslip_edge_mask.png | Bin 0 -> 2999 bytes .../coverslip_edge_ihc/coverslip_edge.png | Bin 0 -> 497487 bytes .../coverslip_edge_mask.png | Bin 0 -> 1868 bytes .../data/models/pen_markings_he/he.tsv | 26 + .../data/models/pen_markings_he/pen_green.png | Bin 0 -> 92581 bytes .../models/pen_markings_he/pen_green_mask.png | Bin 0 -> 712 bytes .../data/models/pen_markings_he/pen_red.png | Bin 0 -> 364582 bytes .../models/pen_markings_he/pen_red_mask.png | Bin 0 -> 1662 bytes .../histoqc/data/pen/1k_version/pen_black.png | Bin 0 -> 529786 bytes .../data/pen/1k_version/pen_black_mask.png | Bin 0 -> 12802 bytes .../histoqc/data/pen/1k_version/pen_green.png | Bin 0 -> 1165369 bytes .../data/pen/1k_version/pen_green_mask.png | Bin 0 -> 2819 bytes .../histoqc/data/pen/1k_version/pen_red.png | Bin 0 -> 1345874 bytes .../data/pen/1k_version/pen_red_mask.png | Bin 0 -> 3849 bytes .../data/pen/1k_version/pen_red_mask_back.png | Bin 0 -> 282004 bytes .../histoqc/data/pen/1k_version/red_mask.png | Bin 0 -> 116 bytes .../histoqc/data/templates/template1.png | Bin 0 -> 579612 bytes .../histoqc/data/templates/template2.png | Bin 0 -> 567065 bytes .../histoqc/data/templates/template3.png | Bin 0 -> 595367 bytes .../histoqc/data/templates/template4.png | Bin 0 -> 588290 bytes .../histoqc/import_wrapper/__init__.py | 0 .../HistoQC/histoqc/import_wrapper/helper.py | 23 + .../histoqc/import_wrapper/openslide.py | 9 + .../HistoQC/histoqc/import_wrapper/typing.py | 6 + .../histoqc_src/HistoQC/requirements.txt | 15 + 68 files changed, 5408 insertions(+), 1 deletion(-) delete mode 160000 runtime/ops/histoqc_op/histoqc_src/HistoQC create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__main__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/coverslip_edge_he/coverslip_edge.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/coverslip_edge_he/coverslip_edge_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/coverslip_edge_ihc/coverslip_edge.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/coverslip_edge_ihc/coverslip_edge_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/pen_markings_he/he.tsv create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/pen_markings_he/pen_green.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/pen_markings_he/pen_green_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/pen_markings_he/pen_red.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/models/pen_markings_he/pen_red_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_black.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_black_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_green.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_green_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_red.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_red_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/pen_red_mask_back.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/pen/1k_version/red_mask.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/templates/template1.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/templates/template2.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/templates/template3.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/templates/template4.png create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/import_wrapper/__init__.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/import_wrapper/helper.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/import_wrapper/openslide.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/import_wrapper/typing.py create mode 100644 runtime/ops/histoqc_op/histoqc_src/HistoQC/requirements.txt diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC b/runtime/ops/histoqc_op/histoqc_src/HistoQC deleted file mode 160000 index b7956129..00000000 --- a/runtime/ops/histoqc_op/histoqc_src/HistoQC +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b79561292e49d153e2e4c1401c9c48d3d5f925e1 diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt b/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt new file mode 100644 index 00000000..382cc7a0 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt @@ -0,0 +1,32 @@ +The Clear BSD License + +Copyright (c) 2019 Andrew Janowczyk +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted (subject to the limitations in the disclaimer +below) provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY +THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md b/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md new file mode 100644 index 00000000..fe7d91db --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md @@ -0,0 +1,19 @@ +# HistoQC Mapper for DataMate + +[![Original Project](https://github.com/choosehappy/HistoQC)] + + **说明**: 本算子是 [HistoQC]在 DataMate 算子管线中的标准集成实现。包含了针对 WSI 质量控制产物的结构化提取逻辑。 + + +## 主要改动 (Modifications) + +本算子在集成过程中,针对 DataMate 的 `Mapper` 架构进行了以下逻辑增强: + +### 1. 空间坐标提取 (Spatial Coordinates Extraction) +**掩码转坐标**:新增了对 HistoQC 输出掩码(Mask PNG)的后处理逻辑。 +**GeoJSON 产出**:通过 OpenCV 轮廓检测算法,将伪影区域(如笔迹、盖玻片边缘)转换为标准的 GeoJSON 多边形数据,并封装于 `sample["text"]` 中。 +**缩放对齐**:支持通过 `scaleFactor` 参数对检测坐标进行重采样映射,确保坐标体系与原图一致。 + +### 2. 性能与耗时监控 (Performance & Timing) +**执行计时**:在算子执行周期内引入了计时器,记录每个切片任务的实际处理时长。 +**指标关联**:将处理耗时(Processing Time)与输入文件大小(File Size)作为元数据存入输出结果,便于后续进行算法性能评估。 diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py new file mode 100644 index 00000000..96912590 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py @@ -0,0 +1,126 @@ +import logging +from typing import List, Tuple +from histoqc.BaseImage import printMaskHelper +from skimage import io, img_as_ubyte +import os +from pathlib import PurePosixPath, Path +from shapely.geometry import Polygon +from shapely import affinity +from PIL import Image, ImageDraw +import numpy as np +from histoqc.annotations.annot_collection import AnnotCollection, PARSER_BUILDER_MAP, TYPE_SUPPORTED_PARSER, Region + + +def rescale_by_img_bbox(polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float) -> Polygon: + if isinstance(offset_xy, float): + offset_xy = (offset_xy, offset_xy) + x_off, y_off = offset_xy + polygon = affinity.translate(polygon, xoff=x_off, yoff=y_off) + polygon = affinity.scale(polygon, xfact=resize_factor, yfact=resize_factor, origin=(0, 0)) + return polygon + + +def polygon_filled(draw_pil: ImageDraw, polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float): + polygon = rescale_by_img_bbox(polygon, offset_xy, resize_factor) + # outer + exterior_coords = list(polygon.exterior.coords) + draw_pil.polygon(exterior_coords, fill=1, outline=1, width=0) + for component in polygon.interiors: + interior_coord = list(component.coords) + draw_pil.polygon(interior_coord, fill=0, outline=0, width=0) + return draw_pil + + +def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollection, offset_xy: Tuple[float, float], + resize_factor: float) -> np.ndarray: + # binary + mask = Image.new(mode="1", size=(width, height)) + draw_pil = ImageDraw.Draw(mask) + all_regions: List[Region] = annot_collection.all_regions + for region in all_regions: + polygon: Polygon = region['polygon'] + # skip if empty ring (e.g., misclick in qupath) + if polygon.is_empty or (not polygon.is_valid): + continue + draw_pil = polygon_filled(draw_pil, polygon, offset_xy, resize_factor) + # noinspection PyTypeChecker + return np.array(mask) + + +def getParams(s, params): + # read params - format: xml, json; file_path; suffix; + ann_format = params.get("format", None) + file_path = params.get("file_path", None) + suffix = params.get("suffix", "") + + # try use default value if the params are not provided + if not ann_format: + # set default format + ann_format = "xml" + # warning msg + msg = f"format is not provided, using xml as the default format." + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + + if not file_path: + # set default file path + file_path = s["dir"] + # warning msg + msg = f"file path is not provided, using \"{s['dir']}\" as the default file path" + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + + return ann_format, file_path, suffix + + +def saveAnnotationMask(s, params): + logging.info(f"{s['filename']} - \tgetAnnotationMask") + + (ann_format, file_path, suffix) = getParams(s, params) + + # annotation file path + f_path = f"{file_path}{os.sep}{PurePosixPath(s['filename']).stem}{suffix}.{ann_format}" + + if not Path(f_path).is_file(): + msg = f"Annotation file {f_path} does not exist. Skipping..." + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + return + + logging.info(f"{s['filename']} - \tusing {f_path}") + + # todo better using the Py3.10 match statement - so it will be a Literal + # noinspection PyTypeChecker + annotation_type: TYPE_SUPPORTED_PARSER = ann_format.lower() + logging.info(f"{s['filename']} - \tusing {annotation_type}") + # read points set + if annotation_type in PARSER_BUILDER_MAP: # xml + annot_collection = AnnotCollection.build(parser_type=annotation_type, uri=f_path, label_map=None) + # get_points_from_geojson(s, f_path) + else: # unsupported format + msg = f"unsupported file format '{ann_format}'. Skipping..." + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + return + + (off_x, off_y, ncol, nrow) = s["img_bbox"] + resize_factor = np.shape(s["img_mask_use"])[1] / ncol + height, width = s["img_mask_use"].shape + annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0 + + mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png" + io.imsave(mask_file_name, img_as_ubyte(annotationMask)) + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = prev_mask & annotationMask + s.addToPrintList("getAnnotationMask", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning( + f"{s['filename']} - After AnnotationModule.getAnnotationMask " + f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append( + f"After AnnotationModule.getAnnotationMask NO tissue remains detectable!" + f" Downstream modules likely to be incorrect/fail") + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py new file mode 100644 index 00000000..67fa70d8 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py @@ -0,0 +1,84 @@ +import cv2 +import numpy as np +import json +import os +import logging +from pathlib import Path + +def export_geojson(di, params): + # 获取当前切片的输出目录 + outdir = di.get("outdir", ".") + # 获取切片名称 (HistoQC 通常以这个名字开头保存图片) + sname = di.get("sname") or di.get("img_base") + if not sname and "filename" in di: + sname = Path(di["filename"]).stem + + if not sname: + logging.error("无法获取切片名称,跳过 GeoJSON 生成。") + return + + # 获取缩放因子 (WSI原图与处理图的倍率) + scale_factor = di.get("log_factor", 1.0) + + # 定义想要识别的后缀和对应 QuPath 类别 + # 注意:这里务必和 output 文件夹里生成的文件名后缀一致 + ARTIFACT_CONFIG = { + "_pen_markings.png": {"name": "Pen Marking", "color": -65536}, + "_coverslip_edge.png": {"name": "Coverslip Edge", "color": -16711936}, + "_flat.png": {"name": "Air Bubble", "color": -16776961} + } + + all_features = [] + found_files = [] + + logging.info(f" [切片: {sname}] 启动后期扫描提取 ") + + # 扫描文件夹中对应的 PNG + for suffix, info in ARTIFACT_CONFIG.items(): + # HistoQC 保存的规律通常是 {sname}{suffix} + # 例如: TCGA-XXX_pen_markings.png + target_path = Path(outdir) / f"{sname}{suffix}" + + if not target_path.exists(): + # 兼容性扫描:如果文件名里有多余的点或空格,进行模糊匹配 + potential = list(Path(outdir).glob(f"*{suffix}")) + if potential: + target_path = potential[0] + else: + continue + + # 读取并转换 + mask = cv2.imread(str(target_path), cv2.IMREAD_GRAYSCALE) + if mask is None or np.max(mask) == 0: + continue + + found_files.append(target_path.name) + + # 提取轮廓 + _, thresh = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + for contour in contours: + pts = (contour.reshape(-1, 2) * scale_factor).tolist() + if len(pts) < 3: continue + pts.append(pts[0]) # 闭合 + + all_features.append({ + "type": "Feature", + "properties": { + "objectType": "annotation", + "classification": {"name": info["name"], "colorRGB": info["color"]}, + "isLocked": False + }, + "geometry": {"type": "Polygon", "coordinates": [pts]} + }) + # 保存 JSON + if all_features: + output_file = Path(outdir) / f"{sname}_artifacts.json" + with open(output_file, 'w') as f: + json.dump({"type": "FeatureCollection", "features": all_features}, f) + logging.info(f"[成功] 从 {found_files} 提取了 {len(all_features)} 个坐标点") + else: + logging.warning(f"[跳过] 在目录 {outdir} 中未找到有效伪影图像或图像为空") + + return \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py new file mode 100644 index 00000000..93a51f44 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py @@ -0,0 +1,363 @@ +import logging +import os +import numpy as np +import zlib, dill +from distutils.util import strtobool +from PIL import Image +import re +from typing import Union, Tuple +#os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically +from histoqc.import_wrapper.openslide import openslide +from skimage import measure, morphology +# there is no branch reset group in re +# compatible with the previous definition of valid input: leading zero and leading decimals are supported +_REGEX_MAG = r"^(\d?\.?\d*X?)" +_PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE) +MAG_NA = None + + +class BaseImage(dict): + + def __init__(self, fname, fname_outdir, seed, params): + dict.__init__(self) + + self.in_memory_compression = strtobool(params.get("in_memory_compression", "False")) + + self["warnings"] = [''] # this needs to be first key in case anything else wants to add to it + self["output"] = [] + + # these 2 need to be first for UI to work + self.addToPrintList("filename", os.path.basename(fname)) + self.addToPrintList("comments", " ") + + self["outdir"] = fname_outdir + self["seed"] = seed + self["dir"] = os.path.dirname(fname) + + self["os_handle"] = openslide.OpenSlide(fname) + self["image_base_size"] = self["os_handle"].dimensions + self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box","False")) + # check if the bbox if doesn't have bbox set enable_bounding_box to False + self.setBBox() + self.addToPrintList("image_bounding_box", self["img_bbox"]) + self["image_work_size"] = params.get("image_work_size", "1.25x") + self["mask_statistics"] = params.get("mask_statistics", "relative2mask") + + self["base_mag"] = getMag(self, params) + + if not self["base_mag"]: + logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?") + return -1 + + self.addToPrintList("base_mag", self["base_mag"]) + + mask_statistics_types = ["relative2mask", "absolute", "relative2image"] + if (self["mask_statistics"] not in mask_statistics_types): + logging.error( + f"mask_statistic type '{self['mask_statistics']}' is not one of the 3 supported options relative2mask, absolute, relative2image!") + exit() + + self["img_mask_use"] = np.ones(self.getImgThumb(self["image_work_size"]).shape[0:2], dtype=bool) + self["img_mask_force"] = [] + + self["completed"] = [] + + def __getitem__(self, key): + value = super(BaseImage, self).__getitem__(key) + if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + value = dill.loads(zlib.decompress(value)) + return value + + def __setitem__(self, key, value): + if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"): + value = zlib.compress(dill.dumps(value), level=5) + + return super(BaseImage, self).__setitem__(key,value) + + # setbounding box start coordinate and size + def setBBox(self): + # add self["img_bbox"] = (x, y, width, heigh) + osh = self["os_handle"] + # set default bbox + (dim_width, dim_height) = osh.dimensions + self["img_bbox"] = (0, 0, dim_width, dim_height) + # try to get bbox if bounding_box is ture + if self["enable_bounding_box"]: + # try get bbox from os handle properties + try: + x = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA')) + y = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA')) + width = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA')) + height = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA')) + self["img_bbox"] = (x, y, width, height) + except: + # no bbox info in slide set enable_bounding_box as Flase + self["enable_bounding_box"] = False + logging.warning(f"{self['filename']}: Bounding Box requested but could not read") + self["warnings"].append("Bounding Box requested but could not read") + + def addToPrintList(self, name, val): + self[name] = val + self["output"].append(name) + + # find the next higher level by giving a downsample factor + # return (level, isFindCloseLevel) + def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]: + osh = self["os_handle"] + relative_down_factors_idx=[np.isclose(i/downsample_factor,1,atol=.01) for i in osh.level_downsamples] + level=np.where(relative_down_factors_idx)[0] + if level.size: + return (level[0], True) + else: + return (osh.get_best_level_for_downsample(downsample_factor), False) + + @staticmethod + def is_valid_size(size: str): + size = str(size) + return _PATTERN_MAG.fullmatch(size) is not None + + @staticmethod + def validate_slide_size(size: str, assertion: bool = False): + size = str(size) + if assertion: + assert BaseImage.is_valid_size(size), f"{size}: does not match pattern {_REGEX_MAG}" + # for now just cast it to str + return size + + def getImgThumb(self, size: str): + # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't + # really handle it that way, and trace of previous coding also suggest that there actually lack a params + # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here. + size = BaseImage.validate_slide_size(size, assertion=False) + # get img key with size + key = "img_" + str(size) + # return the img if it exists + if key in self: + return self[key] + + # get open slide handle + osh = self["os_handle"] + + # get the size of view on current img - the current size of view by using the bounding box. + # bounding box could be the size of whole img or read the size from the slide mate data. + (bx, by, bwidth, bheight) = self["img_bbox"] + img_base_size = (bwidth, bheight) + + # barricade the invalid input first + # can't determine operation. + if not BaseImage.is_valid_size(size): + # print out error message + err_msg = f"{self['filename']}: invalid arguments - {size}" + logging.error(err_msg) + self["warnings"].append(err_msg) + return + + # specifies a desired operating magnification + if size.endswith(("X", "x")) and size[:-1].replace(".", "0", 1).isdigit(): + target_mag = float(size.upper().split("X")[0]) + # magnification + base_mag = self["base_mag"] + target_sampling_factor = base_mag / target_mag + target_dims = tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int)) + + # generate the thumb img + self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + + # the size of the img is number + elif size.replace(".", "0", 1).isdigit(): + size = float(size) + # specifies a desired downscaling factor + if size < 1: + target_downscaling_factor = size + target_sampling_factor = 1 / target_downscaling_factor + target_dims = tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int)) + + # generate the thumb img + self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + + # specifies a desired level of open slide + elif size < 100: + target_level = int(size) + if target_level >= osh.level_count: + target_level = osh.level_count - 1 + msg = f"Desired Image Level {size+1} does not exist! Instead using level {osh.level_count-1}! Downstream output may not be correct" + logging.error(f"{self['filename']}: {msg}" ) + self["warnings"].append(msg) + size = (tuple((np.array(img_base_size)/osh.level_downsamples[target_level]).astype(int)) + if self["enable_bounding_box"] + else osh.level_dimensions[target_level]) + logging.info( + f"{self['filename']} - \t\tloading image from level {target_level} of size {osh.level_dimensions[target_level]}") + tile = osh.read_region((bx, by), target_level, size) + self[key] = (np.asarray(rgba2rgb(self, tile)) + if np.shape(tile)[-1]==4 + else np.asarray(tile)) + + # specifies a desired size of thumbnail + else: + # recommend having the dimension is less than 10k + if size > 10000: + # warning message for the memory overhead + msg = f"Suggest using the smaller dimension thumbnail image because of the memory overhead." + logging.warning(msg) + self["warnings"].append(msg) + target_dims = getDimensionsByOneDim(self, int(size)) + target_sampling_factor = img_base_size[0] / target_dims[0] + self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor) + return self[key] + +''' +the following are helper functions +''' +def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float): + osh = s["os_handle"] + + # get thumb from og + if not s["enable_bounding_box"]: + max_dim = dims[0] if dims[0] > dims[1] else dims[1] + return np.array(osh.get_thumbnail((max_dim, max_dim))) + + (level, isExactLevel) = s.getBestLevelForDownsample(target_sampling_factor) + + # check if get the existing level + if isExactLevel: + tile = osh.read_region((x, y), level, dims) + return np.asarray(rgba2rgb(s, tile)) if np.shape(tile)[-1]==4 else np.asarray(tile) + # scale down the thumb img from the next high level + else: + return resizeTileDownward(s, target_sampling_factor, level) + + +def resizeTileDownward(self, target_downsampling_factor, level): + osh = self["os_handle"] + (bx, by, bwidth, bheight) = self["img_bbox"] + end_x = bx + bwidth + end_y = by + bheight + + cloest_downsampling_factor = osh.level_downsamples[level] + win_size = 2048 + + # create a new img + output = [] + for x in range(bx, end_x, win_size): + row_piece = [] + for y in range(by, end_y, win_size): + win_width, win_height = [win_size] * 2 + # Adjust extraction size for endcut + if end_x < x + win_width: + win_width = end_x - x + if end_y < y + win_height: + win_height = end_y - y + + + win_down_width = int(round(win_width / target_downsampling_factor)) + win_down_height = int(round(win_height / target_downsampling_factor)) + + win_width = int(round(win_width / cloest_downsampling_factor)) + win_height = int(round(win_height / cloest_downsampling_factor)) + + # TODO Note: this isn't very efficient, and if more efficiency isneeded + # We should likely refactor using "paste" from Image. + # Or even just set the pixels directly with indexing. + cloest_region = osh.read_region((x, y), level, (win_width, win_height)) + if np.shape(cloest_region)[-1]==4: + cloest_region = rgba2rgb(self, cloest_region) + target_region = cloest_region.resize((win_down_width, win_down_height)) + row_piece.append(target_region) + row_piece = np.concatenate(row_piece, axis=0) + + output.append(row_piece) + output = np.concatenate(output, axis=1) + return output + + +def rgba2rgb(s: BaseImage, img): + bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff") + thumb = Image.new("RGB", img.size, bg_color) + thumb.paste(img, None, img) + return thumb + + +def printMaskHelper(type: str, prev_mask, curr_mask): + if type == "relative2mask": + if len(prev_mask.nonzero()[0]) == 0: + return str(-100) + else: + return str(1 - len(curr_mask.nonzero()[0]) / len(prev_mask.nonzero()[0])) + elif type == "relative2image": + return str(len(curr_mask.nonzero()[0]) / np.prod(curr_mask.shape)) + elif type == "absolute": + return str(len(curr_mask.nonzero()[0])) + else: + return str(-1) + + +def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]: + """Parse magnification to float + Args: + mag: + + Returns: + Validated size factor either as a float number or "NA" (MAG_NA) + """ + if isinstance(mag, (int, float)): + return float(mag) + numeric_mag_str_flag = BaseImage.is_valid_size(mag) + invalid_flag = mag == MAG_NA or not numeric_mag_str_flag + if invalid_flag: + return MAG_NA + # regex determines X must either be abscent or at the end of the string + if "X" in mag.upper(): + mag = mag[0:-1] + return float(mag) + + +# this function is seperated out because in the future we hope to have automatic detection of +# magnification if not present in open slide, and/or to confirm openslide base magnification +def getMag(s: BaseImage, params) -> Union[float, None]: + logging.info(f"{s['filename']} - \tgetMag") + osh = s["os_handle"] + mag = osh.properties.get("openslide.objective-power") or \ + osh.properties.get("aperio.AppMag") or MAG_NA + # if mag or strtobool(params.get("confirm_base_mag", "False")): + # # do analysis work here + # logging.warning(f"{s['filename']} - Unknown base magnification for file") + # s["warnings"].append(f"{s['filename']} - Unknown base magnification for file") + # return None + # else: + # workaround for unspecified mag -- with or without automatic detection it might be preferred to have + # mag predefined + mag = mag or parsed_mag(params.get("base_mag")) + # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise + # the exception here. + return float(mag) if mag is not MAG_NA else MAG_NA + + +def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]: + (x, y, width, height) = s["img_bbox"] + # calulate the width or height depends on dim + if width > height: + h = int(dim * height / width) + return dim, h + else: + w = int(dim * width / height) + return w, dim + + +def getMaskRegionsStats(img)-> dict: + + rps = measure.regionprops(morphology.label(img)) + if rps: + areas = np.asarray([rp.area for rp in rps]) + num = len(rps) + area_min = areas.min() + area_max = areas.max() + area_mean = areas.mean() + else: + num = area_min = area_max = area_mean = 0 + return { + 'num': num, + 'area_min': area_min, + 'area_max': area_max, + 'area_mean': area_mean + } \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py new file mode 100644 index 00000000..55361be6 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py @@ -0,0 +1,83 @@ +import logging +import os +from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats +from skimage.morphology import remove_small_objects, binary_opening, disk +from skimage import io, color, img_as_ubyte + +import matplotlib.pyplot as plt + + +def getBasicStats(s, params): + logging.info(f"{s['filename']} - \tgetBasicStats") + osh = s["os_handle"] + s.addToPrintList("type", osh.properties.get("openslide.vendor", "NA")) + s.addToPrintList("levels", osh.properties.get("openslide.level-count", "NA")) + s.addToPrintList("height", osh.properties.get("openslide.level[0].height", "NA")) + s.addToPrintList("width", osh.properties.get("openslide.level[0].width", "NA")) + s.addToPrintList("mpp_x", osh.properties.get("openslide.mpp-x", "NA")) + s.addToPrintList("mpp_y", osh.properties.get("openslide.mpp-y", "NA")) + s.addToPrintList("comment", osh.properties.get("openslide.comment", "NA").replace("\n", " ").replace("\r", " ")) + return + + +def finalComputations(s, params): + mask = s["img_mask_use"] + mask_statistics = params.get("mask_statistics", s["mask_statistics"]) + if mask_statistics == "relative2mask": + warning_message = "relative2mask is not supported in BasicModule.finalComputations. Using absolute instead." + logging.warning(f"{s['filename']} - {warning_message}") + s["warnings"].append(warning_message) + mask_statistics = "absolute" + s.addToPrintList("pixels_to_use", printMaskHelper(mask_statistics, mask, mask)) + + +def finalProcessingSpur(s, params): + logging.info(f"{s['filename']} - \tfinalProcessingSpur") + disk_radius = int(params.get("disk_radius", "25")) + selem = disk(disk_radius) + mask = s["img_mask_use"] + mask_opened = binary_opening(mask, selem) + mask_spur = ~mask_opened & mask + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", img_as_ubyte(mask_spur)) + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = mask_opened + + s.addToPrintList("spur_pixels", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning( + f"{s['filename']} - After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append( + f"After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + + +def finalProcessingArea(s, params): + logging.info(f"{s['filename']} - \tfinalProcessingArea") + area_thresh = int(params.get("area_threshold", "1000")) + mask = s["img_mask_use"] + + mask_opened = remove_small_objects(mask, min_size=area_thresh) + mask_removed_area = ~mask_opened & mask + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area)) + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = mask_opened > 0 + + s.addToPrintList("areaThresh", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + warning_message = "After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail" + logging.warning( + f"{s['filename']} - {warning_message}") + s["warnings"].append(warning_message) + + +def countTissuePieces(s, params): + mask = s["img_mask_use"] + stats = getMaskRegionsStats(mask) + s.addToPrintList("#pieces_of_tissue", str(stats.get('num', 0))) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py new file mode 100644 index 00000000..afea9f6b --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py @@ -0,0 +1,66 @@ +import logging +import os + +import skimage +from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats +from skimage import io, img_as_ubyte, morphology, measure +from skimage.color import rgb2gray +from skimage.filters import rank +import numpy as np + +import matplotlib.pyplot as plt + + +# Analysis of focus measure operators for shape-from-focus +# Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012 +# https://pdfs.semanticscholar.org/8c67/5bf5b542b98bf81dcf70bd869ab52ab8aae9.pdf + + +def identifyBlurryRegions(s, params): + logging.info(f"{s['filename']} - \tidentifyBlurryRegions") + + blur_radius = int(params.get("blur_radius", 7)) + blur_threshold = float(params.get("blur_threshold", .1)) + + img = s.getImgThumb(params.get("image_work_size", "2.5x")) + img = rgb2gray(img) + img_laplace = np.abs(skimage.filters.laplace(img)) + mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold + + mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :, + 1] # for some reason resize takes a grayscale and produces a 3chan + mask = s["img_mask_use"] & (mask > 0) + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask)) + s["img_mask_blurry"] = (mask * 255) > 0 + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_blurry"] + + # rps = measure.regionprops(morphology.label(mask)) + # if rps: + # areas = np.asarray([rp.area for rp in rps]) + # nobj = len(rps) + # area_max = areas.max() + # area_mean = areas.mean() + # else: + # nobj = area_max = area_mean = 0 + stat_rs = getMaskRegionsStats(mask) + + + s.addToPrintList("blurry_removed_num_regions", str(stat_rs.get('num', 0))) + s.addToPrintList("blurry_removed_mean_area", str(stat_rs.get('area_mean',0))) + s.addToPrintList("blurry_removed_max_area", str(stat_rs.get('area_max',0))) + + + s.addToPrintList("blurry_removed_percent", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning( + f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append( + f"After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail") + + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py new file mode 100644 index 00000000..89dbfbd2 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py @@ -0,0 +1,123 @@ +import logging +import numpy as np +from skimage.filters import sobel +from skimage.color import convert_colorspace, rgb2gray +from distutils.util import strtobool + + +def getBrightnessGray(s, params): + prefix = params.get("prefix", None) + prefix = prefix+"_" if prefix else "" + logging.info(f"{s['filename']} - \tgetContrast:{prefix}") + + limit_to_mask = strtobool(params.get("limit_to_mask", "True")) + invert = strtobool(params.get("invert", "False")) + mask_name = params.get("mask_name", "img_mask_use") + + img = s.getImgThumb(s["image_work_size"]) + + img_g = rgb2gray(img) + if limit_to_mask: + + mask = s[mask_name] if not invert else ~s[mask_name] + + img_g = img_g[mask] + if img_g.size == 0: + img_g = np.array(-100) + + s.addToPrintList(f"{prefix}grayscale_brightness", str(img_g.mean())) + s.addToPrintList(f"{prefix}grayscale_brightness_std", str(img_g.std())) + + return + + +def getBrightnessByChannelinColorSpace(s, params): + prefix = params.get("prefix", None) + prefix = prefix + "_" if prefix else "" + + logging.info(f"{s['filename']} - \tgetContrast:{prefix}") + + to_color_space = params.get("to_color_space", "RGB") + limit_to_mask = strtobool(params.get("limit_to_mask", "True")) + mask_name = params.get("mask_name", "img_mask_use") + + invert = strtobool(params.get("invert", "False")) + + + img = s.getImgThumb(s["image_work_size"]) + + suffix = "" + if (to_color_space != "RGB"): + img = convert_colorspace(img, "RGB", to_color_space) + suffix = "_" + to_color_space + + for chan in range(0, 3): + vals = img[:, :, chan] + if (limit_to_mask): + + mask = s[mask_name] if not invert else ~s[mask_name] + vals = vals[mask] + + if vals.size == 0: + vals = np.array(-100) + + s.addToPrintList(f"{prefix}chan{chan+1}_brightness{suffix}", str(vals.mean())) + s.addToPrintList(f"{prefix}chan{chan+1}_brightness_std{suffix}", str(vals.std())) + + return + + +def getContrast(s, params): + prefix = params.get("prefix", None) + prefix = prefix + "_" if prefix else "" + + logging.info(f"{s['filename']} - \tgetContrast:{prefix}") + limit_to_mask = strtobool(params.get("limit_to_mask", True)) + mask_name = params.get("mask_name", "img_mask_use") + + invert = strtobool(params.get("invert", "False")) + + + img = s.getImgThumb(s["image_work_size"]) + img = rgb2gray(img) + + sobel_img = sobel(img) ** 2 + + if limit_to_mask: + + mask = s[mask_name] if not invert else ~s[mask_name] + + sobel_img = sobel_img[mask] + img = img[s["img_mask_use"]] + + if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to + # produce metrics for completeness with warning + + s.addToPrintList(f"{prefix}tenenGrad_contrast", str(-100)) + s.addToPrintList(f"{prefix}michelson_contrast", str(-100)) + s.addToPrintList(f"{prefix}rms_contrast", str(-100)) + + + logging.warning(f"{s['filename']} - After BrightContrastModule.getContrast: NO tissue " + f"detected, statistics are impossible to compute, defaulting to -100 !") + s["warnings"].append(f"After BrightContrastModule.getContrast: NO tissue remains " + f"detected, statistics are impossible to compute, defaulting to -100 !") + + return + + + # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask + tenenGrad_contrast = np.sqrt(np.sum(sobel_img)) / img.size + s.addToPrintList(f"{prefix}tenenGrad_contrast", str(tenenGrad_contrast)) + + # Michelson contrast + max_img = img.max() + min_img = img.min() + contrast = (max_img - min_img) / (max_img + min_img) + s.addToPrintList(f"{prefix}michelson_contrast", str(contrast)) + + # RMS contrast + rms_contrast = np.sqrt(pow(img - img.mean(), 2).sum() / img.size) + s.addToPrintList(f"{prefix}rms_contrast", str(rms_contrast)) + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py new file mode 100644 index 00000000..afd62f57 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py @@ -0,0 +1,128 @@ +import logging +import os +import sys + +from ast import literal_eval as make_tuple + +from distutils.util import strtobool +from histoqc.BaseImage import printMaskHelper + +import scipy.signal + +from skimage import io, img_as_ubyte +from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from skimage.color import rgb2gray +from skimage.morphology import remove_small_objects, disk, binary_opening +from skimage.feature import local_binary_pattern + +from skimage.transform import rescale, resize, downscale_local_mean + +from math import ceil + +from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import RandomForestClassifier + +from skimage import io, color + + +import numpy as np + +import matplotlib.pyplot as plt + +global_holder = {} + +#WARNING: Not as robust as other modules +def roiWise(s, params): + name = params.get("name", "classTask") + print("\tpixelWise:\t", name, end="") + + level = int(params.get("level", 1)) + win_size = int(params.get("win_size", 2048)) #the size of the ROI which will be iteratively considered + + osh = s["os_handle"] + + dim_base = osh.level_dimensions[0] + dims = osh.level_dimensions[level] + + ratio_x = dim_base[0] / dims[0] #figure out the difference between desi + ratio_y = dim_base[1] / dims[1] + + frangi_scale_range = (1, 6) + frangi_scale_step = 2 + frangi_beta1 = .5 + frangi_beta2 = 100 + frangi_black_ridges = True + + mask = [] + for x in range(0, dim_base[0], round(win_size * ratio_x)): + row_piece = [] + print('.', end='', flush=True) + for y in range(0, dim_base[1], round(win_size * ratio_y)): + region = np.asarray(osh.read_region((x, y), 1, (win_size, win_size))) + region = region[:, :, 0:3] # remove alpha channel + g = rgb2gray(region) + feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges) + feat = feat / 8.875854409275627e-08 + region_mask = np.bitwise_and(g < .3, feat > 5) + region_mask = remove_small_objects(region_mask, min_size=100, in_place=True) + # region_std = region.std(axis=2) + # region_gray = rgb2gray(region) + # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255) + # region_mask = scipy.ndimage.morphology.binary_dilation(region_mask, iterations=1) + # region_mask = resize(region_mask , (region_mask.shape[0] / 2, region_mask.shape[1] / 2)) + row_piece.append(region_mask) + row_piece = np.concatenate(row_piece, axis=0) + mask.append(row_piece) + + mask = np.concatenate(mask, axis=1) + + if params.get("area_threshold", "") != "": + mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), in_place=True) + + s.addToPrintList(name, str(mask.mean())) + + #TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask + #s.addToPrintList(name, + # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) #.astype(np.uint8) * 255) + + return + + +def detectSmoothness(s, params): + logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness") + thresh = float(params.get("threshold", ".01" )) + kernel_size = int(params.get("kernel_size", "10")) + min_object_size = int(params.get("min_object_size", "100")) + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2) + + imf = scipy.signal.convolve2d(img, avg, mode="same") + mask_flat = abs(imf - img) < thresh + + mask_flat = remove_small_objects(mask_flat, min_size=min_object_size) + mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size) + + prev_mask = s["img_mask_use"] + s["img_mask_flat"] = mask_flat + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask)) + + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"] + + + s.addToPrintList("flat_areas", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, + s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py new file mode 100644 index 00000000..660145d5 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py @@ -0,0 +1,252 @@ +import logging +import os +import re +import sys + +from ast import literal_eval as make_tuple + +from distutils.util import strtobool + +from histoqc.BaseImage import printMaskHelper +from skimage import io, img_as_ubyte, img_as_bool +from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace +from skimage.color import rgb2gray +from skimage.morphology import remove_small_objects, disk, dilation +from skimage.feature import local_binary_pattern + +from scipy import ndimage as ndi + +from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import RandomForestClassifier + +import numpy as np + +import matplotlib.pyplot as plt + + +def pixelWise(s, params): + name = params.get("name", "classTask") + logging.info(f"{s['filename']} - \tpixelWise:\t", name) + + thresh = float(params.get("threshold", .5)) + + fname = params.get("tsv_file", "") + if fname == "": + logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name) + sys.exit(1) + return + model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1) + + img = s.getImgThumb(s["image_work_size"]) + + gnb = GaussianNB() + gnb.fit(model_vals[:, 1:], model_vals[:, 0]) + cal = gnb.predict_proba(img.reshape(-1, 3)) + + cal = cal.reshape(img.shape[0], img.shape[1], 2) + mask = cal[:, :, 1] > thresh + + mask = s["img_mask_use"] & (mask > 0) + + s.addToPrintList(name, str(mask.mean())) + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) + s["img_mask_" + name] = (mask * 255) > 0 + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + + s.addToPrintList(name, + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After ClassificationModule.pixelWise:{name} NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After ClassificationModule.pixelWise:{name} NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + + +# extract_patches_2d(image, patch_size, max_patches=None, random_state=None + +def compute_rgb(img, params): + return img + + +def compute_laplace(img, params): + laplace_ksize = int(params.get("laplace_ksize", 3)) + return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None] + + +def compute_lbp(img, params): + lbp_radius = float(params.get("lbp_radius", 3)) + lbp_points = int(params.get("lbp_points", 24)) # example sets radius * 8 + lbp_method = params.get("lbp_method", "default") + + return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None] + + +def compute_gaussian(img, params): + gaussian_sigma = int(params.get("gaussian_sigma", 1)) + gaussian_multichan = strtobool(params.get("gaussian_multichan", False)) + + if (gaussian_multichan): + return gaussian(img, sigma=gaussian_sigma, multichannel=gaussian_multichan) + else: + return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None] + + +def compute_median(img, params): + median_disk_size = int(params.get("median_disk_size", 3)) + return median(rgb2gray(img), selem=disk(median_disk_size))[:, :, None] + + +def compute_gabor(img, params): + if not params["shared_dict"].get("gabor_kernels", False): + gabor_theta = int(params.get("gabor_theta", 4)) + gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)")) + gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)")) + + kernels = [] + for theta in range(gabor_theta): + theta = theta / 4. * np.pi + for sigma in gabor_sigma: + for frequency in gabor_frequency: + kernel = np.real(gabor_kernel(frequency, theta=theta, + sigma_x=sigma, sigma_y=sigma)) + kernels.append(kernel) + params["shared_dict"]["gabor_kernels"] = kernels + + kernels = params["shared_dict"]["gabor_kernels"] + imgg = rgb2gray(img) + feats = np.zeros((imgg.shape[0], imgg.shape[1], len(kernels)), dtype=np.double) + for k, kernel in enumerate(kernels): + filtered = ndi.convolve(imgg, kernel, mode='wrap') + feats[:, :, k] = filtered + return feats + + +def compute_frangi(img, params): + frangi_scale_range = make_tuple(params.get("frangi_scale_range", "(1, 10)")) + frangi_scale_step = float(params.get("frangi_scale_step", 2)) + frangi_beta1 = float(params.get("frangi_beta1", .5)) + frangi_beta2 = float(params.get("frangi_beta2", 15)) + frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True")) + feat = frangi(rgb2gray(img), scale_range = frangi_scale_range, scale_step =frangi_scale_step, beta =frangi_beta1, gamma=frangi_beta2, black_ridges =frangi_black_ridges) + return feat[:, :, None] # add singleton dimension + + +def compute_features(img, params): + features = params.get("features", "") + + feats = [] + for feature in features.splitlines(): + func = getattr(sys.modules[__name__], f"compute_{feature}") + feats.append(func(img, params)) + + return np.concatenate(feats, axis=2) + + +def byExampleWithFeatures(s, params): + name = params.get("name", "classTask") + logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}") + + thresh = float(params.get("threshold", .5)) + nsamples_per_example = float(params.get("nsamples_per_example", -1)) + + examples = params.get("examples", "") + if examples == "": + logging.error(f"{s['filename']} - No examples provided in ClassificationModule.byExample for {name} !!") + sys.exit(1) + return + + if params.get("features", "") == "": + logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!") + sys.exit(1) + return + + with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model + # then it is shared with all other modules + if not params["shared_dict"].get("model_" + name, False): + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}") + + model_vals = [] + model_labels = np.empty([0, 1]) + + for ex in params["examples"].splitlines(): + ex = re.split(r'(? 1 else int(mask.shape[0]*nsamples_per_example) + + # set seed to random function if seed is not None + if s["seed"] is not None: + np.random.seed(int(s["seed"])) + + idxkeep = np.random.choice(mask.shape[0], size=int(nitems)) + eximg = eximg[idxkeep, :] + mask = mask[idxkeep] + + + model_vals.append(eximg) + model_labels = np.vstack((model_labels, mask)) + + # do stuff here with model_vals + model_vals = np.vstack(model_vals) + clf = RandomForestClassifier(n_jobs=-1) + clf.fit(model_vals, model_labels.ravel()) + params["shared_dict"]["model_" + name] = clf + logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done") + + clf = params["shared_dict"]["model_" + name] + img = s.getImgThumb(s["image_work_size"]) + feats = compute_features(img, params) + cal = clf.predict_proba(feats.reshape(-1, feats.shape[2])) + cal = cal.reshape(img.shape[0], img.shape[1], 2) + + mask = cal[:, :, 1] > thresh + + area_thresh = int(params.get("area_threshold", "5")) + if area_thresh > 0: + mask = remove_small_objects(mask, min_size=area_thresh, in_place=True) + + dilate_kernel_size = int(params.get("dilate_kernel_size", "0")) + if dilate_kernel_size > 0: + mask = dilation(mask, selem=np.ones((dilate_kernel_size, dilate_kernel_size))) + + mask = s["img_mask_use"] & (mask > 0) + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask)) + s["img_mask_" + name] = (mask * 255) > 0 + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name] + + s.addToPrintList(name, + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After ClassificationModule.byExampleWithFeatures:{name} NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After ClassificationModule.byExampleWithFeatures:{name} NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + s["img_mask_force"].append("img_mask_" + name) + s["completed"].append(f"byExampleWithFeatures:{name}") + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py new file mode 100644 index 00000000..dbb41f1a --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py @@ -0,0 +1,76 @@ +import logging +import os +import sys +import numpy as np +from skimage import io, color, img_as_ubyte +from skimage.exposure import rescale_intensity +from skimage.color import separate_stains +from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb +from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \ + hpx_from_rgb # need to load all of these in case the user selects them +from distutils.util import strtobool + +import matplotlib.pyplot as plt + + +def separateStains(s, params): + logging.info(f"{s['filename']} - \tseparateStains") + stain = params.get("stain", "") + use_mask = strtobool(params.get("use_mask", "True")) + + if stain == "": + logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains") + sys.exit(1) + return + + stain_matrix = getattr(sys.modules[__name__], stain, None) + + if stain_matrix is None: + logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains") + sys.exit(1) + return + + mask = s["img_mask_use"] + + if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early + for c in range(3): + s.addToPrintList(f"deconv_c{c}_std", str(-100)) + s.addToPrintList(f"deconv_c{c}_mean", str(-100)) + io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape))) + + logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue " + f"remains detectable! Saving Black images") + s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue " + f"remains detectable! Saving Black images") + + return + + img = s.getImgThumb(s["image_work_size"]) + dimg = separate_stains(img, stain_matrix) + + for c in range(0, 3): + dc = dimg[:, :, c] + + clip_max_val = np.quantile(dc.flatten(), .99) + dc = np.clip(dc, a_min=0, a_max=clip_max_val) + + + if use_mask: + dc_sub = dc[mask] + dc_min = dc_sub.min() + dc_max = dc_sub.max() + + s.addToPrintList(f"deconv_c{c}_mean", str(dc_sub.mean())) + s.addToPrintList(f"deconv_c{c}_std", str(dc_sub.std())) + else: + mask = 1.0 + dc_min = dc.min() + dc_max = dc.max() + + s.addToPrintList(f"deconv_c{c}_mean", str(dc.mean())) + s.addToPrintList(f"deconv_c{c}_std", str(dc.std())) + + dc = (dc - dc_min) / float(dc_max - dc_min) * mask + io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc)) + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py new file mode 100644 index 00000000..e58707c8 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py @@ -0,0 +1,78 @@ +import logging +import os +import numpy as np +from skimage import io +import matplotlib.pyplot as plt +from distutils.util import strtobool + +global_holder = {} #this holds a local copy of the histograms of the template images so that they need only be computed once + + +def getHistogram(s, params): + logging.info(f"{s['filename']} - \tgetHistogram") + limit_to_mask = strtobool(params.get("limit_to_mask", True)) + bins = int(params.get("bins", 20)) + + img = s.getImgThumb(s["image_work_size"]) + if limit_to_mask: + img = img[s["img_mask_use"]] + else: + img = img.reshape(-1, 3) + + ax = plt.axes() + ax.hist(img, bins=bins, density=True, range=(0, 255), histtype='step', color=("r", "g", "b")) + + ax.grid(True) + ax.set_title('Color Distribution for ' + s["filename"]) + ax.set_xlabel('Pixel Val') + ax.set_ylabel('Density') + plt.savefig(s["outdir"] + os.sep + s["filename"] + "_hist.png") + plt.close() + return + + +def computeHistogram(img, bins, mask=-1): + result = np.zeros(shape=(bins, 3)) + for chan in range(0, 3): + vals = img[:, :, chan].flatten() + if (isinstance(mask, np.ndarray)): + vals = vals[mask.flatten()] + + result[:, chan] = np.histogram(vals, bins=bins, density=True, range=[0, 255])[0] + + return result + + +def compareToTemplates(s, params): + logging.info(f"{s['filename']} - \tcompareToTemplates") + bins = int(params.get("bins", 20)) + limit_to_mask = strtobool(params.get("limit_to_mask", True)) + if (not global_holder.get("templates", False)): #if the histograms haven't already been computed, compute and store them now + templates = {} + for template in params["templates"].splitlines(): + templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins) + # compute each of their histograms + global_holder["templates"] = templates + + img = s.getImgThumb(s["image_work_size"]) + + if (limit_to_mask): + mask = s["img_mask_use"] + if len(mask.nonzero()[0]) == 0: + + logging.warning(f"{s['filename']} - HistogramModule.compareToTemplates NO tissue " + f"remains detectable in mask!") + s["warnings"].append(f"HistogramModule.compareToTemplates NO tissue " + f"remains detectable in mask!") + + imghst = np.zeros((bins,3)) + + else: + imghst = computeHistogram(img, bins, mask) + else: + imghst = computeHistogram(img, bins) + + for template in global_holder["templates"]: + val = np.sum(pow(abs(global_holder["templates"][template] - imghst), 2)) + s.addToPrintList(template + "_MSE_hist", str(val)) + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py new file mode 100644 index 00000000..1f8d3069 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py @@ -0,0 +1,188 @@ +import logging +import os +import numpy as np +from histoqc.BaseImage import printMaskHelper +from skimage import io, color, img_as_ubyte +from distutils.util import strtobool +from skimage.filters import threshold_otsu, rank +from skimage.morphology import disk +from sklearn.cluster import KMeans +from skimage import exposure + +import matplotlib.pyplot as plt + + + +def getIntensityThresholdOtsu(s, params): + logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu") + name = params.get("name", "classTask") + local = strtobool(params.get("local", "False")) + radius = float(params.get("radius", 15)) + selem = disk(radius) + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + + if local: + thresh = rank.otsu(img, selem) + else: + thresh = threshold_otsu(img) + + map = img < thresh + + s["img_mask_" + name] = map > 0 + if strtobool(params.get("invert", "False")): + s["img_mask_" + name] = ~s["img_mask_" + name] + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(s["img_mask_" + name])) + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] + + s.addToPrintList(name, + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After LightDarkModule.getIntensityThresholdOtsu:{name} NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After LightDarkModule.getIntensityThresholdOtsu:{name} NO tissue remains detectable! " + f"Downstream modules likely to be incorrect/fail") + + return + + +def getIntensityThresholdPercent(s, params): + name = params.get("name", "classTask") + logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}") + + lower_thresh = float(params.get("lower_threshold", "-inf")) + upper_thresh = float(params.get("upper_threshold", "inf")) + + # Prepare parameter names due to issues #213 and #219 + # set lower standard deviation + lower_std = float(params.get("lower_var") or params.get("lower_std") or "-inf") + # set upper standard deviation + upper_std = float(params.get("upper_var") or params.get("upper_std") or "inf") + + img = s.getImgThumb(s["image_work_size"]) + img_std = img.std(axis=2) + + map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std) + + img = color.rgb2gray(img) + map = np.bitwise_and(img > lower_thresh, img < upper_thresh) + + map = np.bitwise_and(map, map_std) + + s["img_mask_" + name] = map > 0 + + + + if strtobool(params.get("invert", "False")): + s["img_mask_" + name] = ~s["img_mask_" + name] + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name] + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(prev_mask & ~s["img_mask_" + name])) + + s.addToPrintList(name, + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After LightDarkModule.getIntensityThresholdPercent:{name} NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After LightDarkModule.getIntensityThresholdPercent:{name} NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + + + + + + +def removeBrightestPixels(s, params): + logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels") + + # lower_thresh = float(params.get("lower_threshold", -float("inf"))) + # upper_thresh = float(params.get("upper_threshold", float("inf"))) + # + # lower_var = float(params.get("lower_variance", -float("inf"))) + # upper_var = float(params.get("upper_variance", float("inf"))) + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + + kmeans = KMeans(n_clusters=3, n_init=1).fit(img.reshape([-1, 1])) + brightest_cluster = np.argmax(kmeans.cluster_centers_) + darkest_point_in_brightest_cluster = (img.reshape([-1, 1])[kmeans.labels_ == brightest_cluster]).min() + + s["img_mask_bright"] = img > darkest_point_in_brightest_cluster + + + + if strtobool(params.get("invert", "False")): + s["img_mask_bright"] = ~s["img_mask_bright"] + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + + s.addToPrintList("brightestPixels", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After LightDarkModule.removeBrightestPixels NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After LightDarkModule.removeBrightestPixels NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + + + +def minimumPixelIntensityNeighborhoodFiltering(s,params): + logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering") + disk_size = int(params.get("disk_size", 10000)) + threshold = int(params.get("upper_threshold", 200)) + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + img = (img * 255).astype(np.uint8) + selem = disk(disk_size) + + imgfilt = rank.minimum(img, selem) + s["img_mask_bright"] = imgfilt > threshold + + + if strtobool(params.get("invert", "True")): + s["img_mask_bright"] = ~s["img_mask_bright"] + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"] + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"])) + + s.addToPrintList("brightestPixels", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After LightDarkModule.minimumPixelNeighborhoodFiltering NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After LightDarkModule.minimumPixelNeighborhoodFiltering NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + +def saveEqualisedImage(s,params): + logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage") + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + + out = exposure.equalize_hist((img*255).astype(np.uint8)) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", img_as_ubyte(out)) + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py new file mode 100644 index 00000000..cbe7cb28 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py @@ -0,0 +1,57 @@ +import logging +import numpy as np +from skimage import color +from distutils.util import strtobool +from skimage.feature import graycomatrix, graycoprops +import matplotlib.pyplot as plt + + + +def estimateGreyComatrixFeatures(s, params): + + prefix = params.get("prefix", None) + prefix = prefix+"_" if prefix else "" + + logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}") + patch_size = int(params.get("patch_size", 32)) + npatches = int(params.get("npatches", 100)) + nlevels = int(params.get("nlevels", 8)) + feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':') + invert = strtobool(params.get("invert", "False")) + mask_name = params.get("mask_name","img_mask_use") + + # set seed to random function if seed is not None + if s["seed"] is not None: + np.random.seed(int(s["seed"])) + + img = s.getImgThumb(s["image_work_size"]) + img = color.rgb2gray(img) + + mask = s[mask_name] if not invert else ~s[mask_name] + if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask + msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask" + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + return + + maskidx = mask.nonzero() + maskidx = np.asarray(maskidx).transpose() + idx = np.random.choice(maskidx.shape[0], npatches) + + results = [] + + for id in idx: + r, c = maskidx[id, :] + patch = img[r:r + patch_size, c:c + patch_size] + glcm = graycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5], + angles=[0], levels=nlevels, symmetric=True, normed=True) + + results.append([graycoprops(glcm, prop=feat) for feat in feats]) + + results = np.asarray(results).squeeze() + + for vals, feat in zip(results.transpose(), feats): + s.addToPrintList(f"{prefix}{feat}", str(vals.mean())) + s.addToPrintList(f"{prefix}{feat}_std", str(vals.std())) + + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py new file mode 100644 index 00000000..9bfac7dd --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py @@ -0,0 +1,161 @@ +import logging +import os +import numpy as np +from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats +from skimage import io, morphology, img_as_ubyte, measure + +from scipy import ndimage as ndi + +import matplotlib.pyplot as plt # these 2 are used for debugging +from histoqc.SaveModule import blend2Images #for easier debugging + + +def removeSmallObjects(s, params): + logging.info(f"{s['filename']} - \tremoveSmallObjects") + min_size = int(params.get("min_size", 64)) + img_reduced = morphology.remove_small_objects(s["img_mask_use"], min_size=min_size) + img_small = np.invert(img_reduced) & s["img_mask_use"] + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_remove.png", img_as_ubyte(img_small)) + s["img_mask_small_filled"] = (img_small * 255) > 0 + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = img_reduced + + + # rps = measure.regionprops(morphology.label(img_small)) + # if rps: + # areas = np.asarray([rp.area for rp in rps]) + # nobj = len(rps) + # area_max = areas.max() + # area_mean = areas.mean() + # else: + # nobj = area_max = area_mean = 0 + stats = getMaskRegionsStats(img_small) + + + # s.addToPrintList("small_tissue_removed_num_regions", str(nobj)) + # s.addToPrintList("small_tissue_removed_mean_area", str(area_mean)) + # s.addToPrintList("small_tissue_removed_max_area", str(area_max)) + s.addToPrintList("small_tissue_removed_num_regions", str(stats.get('num',0))) + s.addToPrintList("small_tissue_removed_mean_area", str(stats.get('area_mean',0))) + s.addToPrintList("small_tissue_removed_max_area", str(stats.get('area_max',0))) + + + + + s.addToPrintList("small_tissue_removed_percent", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After MorphologyModule.removeSmallObjects: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After MorphologyModule.removeSmallObjects: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + return + + +def remove_large_objects(img, max_size): + # code taken from morphology.remove_small_holes, except switched < with > + selem = ndi.generate_binary_structure(img.ndim, 1) + ccs = np.zeros_like(img, dtype=np.int32) + ndi.label(img, selem, output=ccs) + component_sizes = np.bincount(ccs.ravel()) + too_big = component_sizes > max_size + too_big_mask = too_big[ccs] + img_out = img.copy() + img_out[too_big_mask] = 0 + return img_out + + +def removeFatlikeTissue(s, params): + logging.info(f"{s['filename']} - \tremoveFatlikeTissue") + fat_cell_size = int(params.get("fat_cell_size", 64)) + kernel_size = int(params.get("kernel_size", 3)) + max_keep_size = int(params.get("max_keep_size", 1000)) + + img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=fat_cell_size) + img_small = img_reduced & np.invert(s["img_mask_use"]) + img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9) + + mask_dilate = morphology.dilation(img_small, selem=np.ones((kernel_size, kernel_size))) + mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size) + + mask_fat = mask_dilate & ~mask_dilate_removed + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_fatlike.png", img_as_ubyte(mask_fat)) + s["img_mask_fatlike"] = (mask_fat * 255) > 0 + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = prev_mask & ~mask_fat + + # rps = measure.regionprops(morphology.label(mask_fat)) + # if rps: + # areas = np.asarray([rp.area for rp in rps]) + # nobj = len(rps) + # area_max = areas.max() + # area_mean = areas.mean() + # else: + # nobj = area_max = area_mean = 0 + stats = getMaskRegionsStats(mask_fat) + + # s.addToPrintList("fatlike_tissue_removed_num_regions", str(nobj)) + # s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean)) + # s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max)) + s.addToPrintList("fatlike_tissue_removed_num_regions", str(stats.get('num',0))) + s.addToPrintList("fatlike_tissue_removed_mean_area", str(stats.get('area_mean',0))) + s.addToPrintList("fatlike_tissue_removed_max_area", str(stats.get('area_max',0))) + + + + s.addToPrintList("fatlike_tissue_removed_percent", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After MorphologyModule.removeFatlikeTissue: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After MorphologyModule.removeFatlikeTissue: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + + +def fillSmallHoles(s, params): + logging.info(f"{s['filename']} - \tfillSmallHoles") + min_size = int(params.get("min_size", 64)) + img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size) + img_small = img_reduced & np.invert(s["img_mask_use"]) + + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small)) + s["img_mask_small_removed"] = (img_small * 255) > 0 + + prev_mask = s["img_mask_use"] + s["img_mask_use"] = img_reduced + + # rps = measure.regionprops(morphology.label(img_small)) + # if rps: + # areas = np.asarray([rp.area for rp in rps]) + # nobj = len(rps) + # area_max = areas.max() + # area_mean = areas.mean() + # else: + # nobj = area_max = area_mean = 0 + stats = getMaskRegionsStats(img_small) + + # s.addToPrintList("small_tissue_filled_num_regions", str(nobj)) + # s.addToPrintList("small_tissue_filled_mean_area", str(area_mean)) + # s.addToPrintList("small_tissue_filled_max_area", str(area_max)) + s.addToPrintList("small_tissue_filled_num_regions", str(stats.get('num', 0))) + s.addToPrintList("small_tissue_filled_mean_area", str(stats.get('area_mean', 0))) + s.addToPrintList("small_tissue_filled_max_area", str(stats.get('area_max', 0))) + + s.addToPrintList("small_tissue_filled_percent", + printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"])) + + if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty + logging.warning(f"{s['filename']} - After MorphologyModule.fillSmallHoles: NO tissue " + f"remains detectable! Downstream modules likely to be incorrect/fail") + s["warnings"].append(f"After MorphologyModule.fillSmallHoles: NO tissue remains " + f"detectable! Downstream modules likely to be incorrect/fail") + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py new file mode 100644 index 00000000..f3ad2723 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py @@ -0,0 +1,175 @@ +import logging +import os +import cv2 +import uuid +import json +import numpy as np +from skimage import io, img_as_ubyte +from distutils.util import strtobool +from skimage import color, measure +from copy import deepcopy +from geojson import Polygon, Feature, FeatureCollection, dump + +def blend2Images(img, mask): + if (img.ndim == 3): + img = color.rgb2gray(img) + if (mask.ndim == 3): + mask = color.rgb2gray(mask) + img = img[:, :, None] * 1.0 # can't use boolean + mask = mask[:, :, None] * 1.0 + out = np.concatenate((mask, img, mask), 2) + return out + +def binaryMask2Geojson(s, mask): + # get the dimension of slide + (dim_width, dim_height) = s['os_handle'].dimensions + # get the dimension of mask + (mask_height, mask_width) = mask.shape + + # convert binary mask to contours + # contours = measure.find_contours(mask) + contours, hierarchy = cv2.findContours(img_as_ubyte(mask), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + children = [[] for i in range(len(contours))] + for i, cnt in enumerate(contours): + # first_child_idx = hier[0, i, 2] + parent_idx = hierarchy[0, i, 3] + if (parent_idx == -1): + continue + # add contour to parent's children node + children[parent_idx].append(cnt) + + # no contours detected + if not len(contours): + # set default format + ann_format = "xml" + # warning msg + msg = f"No contour detected at use mask image. Geojson annotation won't be generated." + logging.warning(f"{s['filename']} - {msg}") + s["warnings"].append(msg) + return None + + features = [] + for i, contour in enumerate(contours): + first_child_idx = hierarchy[0, i, 2] + parent_idx = hierarchy[0, i, 3] + + if (parent_idx != -1): + continue + + geometry = [] + points = np.asarray(contour / [mask_height, mask_width] * [dim_height, dim_width],dtype="int") + points = np.append(points, [points[0]], axis=0) + points = points[:,0].tolist() + points = [tuple(p) for p in points] + geometry.append(points) + if first_child_idx != -1: + for child in children[i]: + child_points = np.asarray(child / [mask_height, mask_width] * [dim_height, dim_width],dtype="int") + child_points = np.append(child_points, [child_points[0]], axis=0) + child_points = child_points[:,0].tolist() + child_points = [tuple(p) for p in child_points] + geometry.append(child_points) + new_feature = Feature(id=uuid.uuid4().hex, geometry=Polygon(geometry),properties={"objectType": "annotation"}) + + features.append(new_feature) + feature_collection = FeatureCollection(features) + + return feature_collection + + +def saveFinalMask(s, params): + logging.info(f"{s['filename']} - \tsaveUsableRegion") + + mask = s["img_mask_use"] + for mask_force in s["img_mask_force"]: + mask[s[mask_force]] = 0 + + io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask)) + + + if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask? + img = s.getImgThumb(s["image_work_size"]) + out = blend2Images(img, mask) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out)) + + return + + +def saveAssociatedImage(s, key:str, dim:int): + logging.info(f"{s['filename']}- save{key.capitalize()}") + osh = s["os_handle"] + + if not key in osh.associated_images: + message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images" + logging.warning(message) + s["warnings"].append(message) + return + + # get asscociated image by key + associated_img = osh.associated_images[key] + (width, height) = associated_img.size + + # calulate the width or height depends on dim + if width > height: + h = round(dim * height / width) + size = (dim, h) + else: + w = round(dim * width / height) + size = (w, dim) + + associated_img = associated_img.resize(size) + associated_img = np.asarray(associated_img)[:, :, 0:3] + io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img) + +def saveMacro(s, params): + dim = params.get("small_dim", 500) + saveAssociatedImage(s, "macro", dim) + return + +def saveMask(s, params): + logging.info(f"{s['filename']} - \tsaveMaskUse") + suffix = params.get("suffix", None) + # check suffix param + if not suffix: + msg = f"{s['filename']} - \tPlease set the suffix for mask use." + logging.error(msg) + return + + # save mask + io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"])) + + +def saveMask2Geojson(s, params): + + mask_name = params.get('mask_name', 'img_mask_use') + suffix = params.get("suffix", None) + logging.info(f"{s['filename']} - \tsaveMaskUse2Geojson: {mask_name}") + # check suffix param + if not suffix: + msg = f"{s['filename']} - \tPlease set the suffix for mask use in geojson." + logging.error(msg) + return + + # check if the mask name exists + if s.get(mask_name, None) is None: + msg = f"{s['filename']} - \tThe `{mask_name}` mask dosen't exist. Please use correct mask name." + logging.error(msg) + return + # convert mask to geojson + geojson = binaryMask2Geojson(s, s[mask_name]) + + # save mask as genjson file + with open(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.geojson", 'w') as f: + dump(geojson, f) + + + +def saveThumbnails(s, params): + logging.info(f"{s['filename']} - \tsaveThumbnail") + # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one + img = s.getImgThumb(params.get("image_work_size", "1.25x")) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img) + + img = s.getImgThumb(params.get("small_dim", 500)) + io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img) + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py new file mode 100644 index 00000000..0101ca49 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py @@ -0,0 +1,610 @@ +""" +A standalone tile extraction module to locate tile bounding boxes in usable tissue region obtained by previous steps. +Coordinates are saved in the half-open 4-tuple convention of (left, top, right, bottom), where `right` and `bottom` +are open. +""" +import os +import openslide +import json +from histoqc.BaseImage import BaseImage +from typing import Callable, Dict, Any, List, Tuple, Union +import numpy as np +from PIL import Image, ImageDraw +from skimage.measure import regionprops +from contextlib import contextmanager +from distutils.util import strtobool +import logging +from histoqc.import_wrapper.typing import Literal, get_args +# from histoqc.import_wrapper.helper import dynamic_import +# __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]] +# Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions") +# get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions") + +TYPE_TILE_SIZE = Literal['tile_size'] +TYPE_TILE_STRIDE = Literal['tile_stride'] +TYPE_TISSUE_RATIO = Literal['tissue_ratio'] +TYPE_OUTPUT_ROOT = Literal['tile_output'] +TYPE_LOCK = Literal['lock'] +TYPE_SUFFIX = Literal['suffix'] +TYPE_OUTLINE = Literal['outline'] +TYPE_WIDTH = Literal['width'] +TYPE_SAVE_FLAG = Literal['save_image'] +PARAMS = Literal[TYPE_TILE_SIZE, TYPE_TILE_STRIDE, TYPE_TISSUE_RATIO, + TYPE_OUTPUT_ROOT, TYPE_SUFFIX, + TYPE_LOCK, TYPE_OUTLINE, TYPE_WIDTH, TYPE_SAVE_FLAG] + + +TYPE_BBOX_FLOAT = Tuple[float, float, float, float] +TYPE_BBOX_INT = Tuple[int, int, int, int] + + +def default_screen_identity(img: np.ndarray): + return True + + +class MaskTileWindows: + """ + Locate the window of tiles in the given downsampled mask. Output Convention: (left, top, right, bottom). + Coordinates are half-open as [left, right) and [top, bottom). + """ + + __rp_list: List + __mask: np.ndarray + __mask_pil: Image.Image + __tissue_thresh: float + # note that the tile size on the corresponding downsampled masks may no longer be integer, therefore cause + # loss of precision when convert back to original tile size after working on mask + __windows_on_mask: List[List[TYPE_BBOX_FLOAT]] + __windows_on_original_image: List[List[Tuple[int, int, int, int]]] + + @property + def mask_pil(self) -> Image.Image: + return Image.fromarray(self._mask) + + @property + def _mask(self) -> np.ndarray: + return self.__mask + + @property + def _rp_list(self) -> List: + """ + + Returns: + List of regionprops objects (see sklearn.regionprops) + """ + attr_name = '__rp_list' + if not hasattr(self, attr_name) or getattr(self, attr_name) is None: + setattr(self, attr_name, regionprops(self.__mask)) + return getattr(self, attr_name) + + def __init_mask(self, mask: np.ndarray): + self.__mask = mask + + def __init__(self, mask: np.ndarray, *, work_tile_size: int, work_stride: int, + size_factor: float, + tissue_thresh: float): + """ + Args: + mask: + work_tile_size: + work_stride: + size_factor: size ratio of image to mask + """ + self.__init_mask(mask.astype(np.int32)) + self.work_tile_size = work_tile_size + self.work_stride = work_stride + self.__size_factor = size_factor + self.__tissue_thresh = tissue_thresh + + @staticmethod + def validate_tile_mask_area_thresh(mask_pil: Image.Image, + tile_window_on_mask: TYPE_BBOX_FLOAT, + tissue_thresh: float) -> bool: + """ Validate whether the given tile window (left, top, right, bottom) contains sufficient tissue. This is + computed by calculating the tissue % in the corresponding mask region. + Note that if the coordinates are not int the actual area of region may be different + Args: + mask_pil: + tile_window_on_mask: List of (left, top, right, bottom). Open on right and bottom + tissue_thresh: minimum requirement of tissue percentage + + Returns: + True if the window has sufficient tissue. + """ + left, top, right, bottom = tile_window_on_mask + # window_on_mask_work = tuple(round(x) for x in tile_window_on_mask) + window_on_mask_work = round(left), round(top), round(right), round(bottom) + window_pil = mask_pil.crop(window_on_mask_work) + # noinspection PyTypeChecker + window_np = np.array(window_pil, copy=False) + window_bool = window_np > 0 + return window_bool.mean() >= tissue_thresh + + @staticmethod + def _valid_tile_windows_on_mask_helper(mask_pil: Image.Image, + tile_cand_pil_window_on_mask: List[TYPE_BBOX_FLOAT], + tissue_thresh: float) -> List[TYPE_BBOX_FLOAT]: + """ All tile windows with sufficient usable tissue from the grid of window candidates + Args: + mask_pil: + tile_cand_pil_window_on_mask: left, top, right, bottom. Potential candidates of windows + tissue_thresh: minimum requirement of tissue % + Returns: + List of validated windows (left, top, right, bottom) from the given candidates. + """ + # output = [] + # for window in tile_cand_pil_window: + # has_enough_usable_tissue = _MaskTileLocator.validate_tile_mask_area_thresh(mask_pil, window, + # tissue_thresh) + # if not has_enough_usable_tissue: + # continue + return [window for window in tile_cand_pil_window_on_mask + if MaskTileWindows.validate_tile_mask_area_thresh(mask_pil, window, tissue_thresh)] + + @staticmethod + def region_tile_cand_pil_window_on_mask(rp_bbox: TYPE_BBOX_INT, + work_tile_size: float, + work_stride: float) -> List[TYPE_BBOX_FLOAT]: + """ Split the region given by the region property bounding box into a grid of tile windows. Support overlapping. + This computes the all possible window given by the rp regardless of the tissue condition. Refinement can be + performed in further steps. + Args: + rp_bbox: sklearn region property style: [top, left, bottom, right]. Half-open + -- [Left, Right) and [Top, Bottom) + work_tile_size: + work_stride: + + Returns: + List of (left, top, right, bottom) tuples. Half-open + """ + top_rp, left_rp, bottom_rp, right_rp = rp_bbox + # top/left of the right/bottom most tile + tile_max_top, tile_max_left = MaskTileWindows.max_tile_bbox_top_left_coord(rp_bbox, + work_tile_size, + work_stride) + # obtain the top/left coord of all tile bboxes + all_tops = np.arange(top_rp, tile_max_top + 1, work_stride, dtype=int) + all_lefts = np.arange(left_rp, tile_max_left + 1, work_stride, dtype=int) + # since it's open on right and bottom, right = left + size and bottom = top + size, wherein the right-1 + # is the actual right most pixel and likewise bottom-1 is the actual bottom-most pixel. + def window(left, top, size): return left, top, (left + size), (top + size) + # get full tile bbox representation + all_tile_pil_window = [window(left, top, work_tile_size) for left in all_lefts for top in all_tops] + return all_tile_pil_window + + @staticmethod + def rp_tile_windows_on_mask(mask_pil, + rp_bbox: TYPE_BBOX_INT, + work_tile_size: float, + work_stride: float, + tissue_thresh: float) -> List[TYPE_BBOX_FLOAT]: + """Wrapper. Obtain the valid window with sufficient tissue from a list of region property objects based on + a given mask. For each individual region, a list of window in format (left, top, right, bottom) is obtained. + Resulting lists of windows of all regions are nested into a list as the object. + Args: + mask_pil: PIL handle of the downsampled mask + rp_bbox: bounding box of sklearn region properties. Note that its convention is [top, left, bottom, right], + which is different to the (left, top, right, bottom) in PIL and OpenSlide. Int coords. + work_tile_size: Working tile size on the downsampled mask + work_stride: Working stride size on the downsampled mask + tissue_thresh: Minimum requirement of tissue % in each window + + Returns: + List of (left, top, right, bottom) + """ + candidates = MaskTileWindows.region_tile_cand_pil_window_on_mask(rp_bbox, work_tile_size, work_stride) + return MaskTileWindows._valid_tile_windows_on_mask_helper(mask_pil, candidates, tissue_thresh) + + def _tile_windows_on_mask(self) -> List[List[TYPE_BBOX_FLOAT]]: + """Helper function to locate the windows of each region in format of (left, top, right, bottom) + Note that to retain the precision the coordinates are in the float form, rather than cast to int. + (Noticeably the right/bottom coords due to that size may not be integer) + THe actual validation of corresponding bbox regions on mask on the other hand should convert the coords + correspondingly. + Returns: + List of List of (left, top, right, bottom), nested by connected regions in the mask + """ + result_list: List[List[TYPE_BBOX_FLOAT]] = [] + # loop the regionprop list + for region in self._rp_list: + # get bounding box of the individual region + rp_bbox = region.bbox + # get list of possible tile bounding boxes within the region bounding box, computed from + # tile size, stride, and tissue thresh + windows: List[TYPE_BBOX_FLOAT] = MaskTileWindows.rp_tile_windows_on_mask(self.mask_pil, + rp_bbox, + self.work_tile_size, + self.work_stride, + self.__tissue_thresh) + # result_list += windows + result_list.append(windows) + return result_list + + @property + def windows_on_mask(self) -> List[List[TYPE_BBOX_FLOAT]]: + """ + Returns: + Obtain the cached tile windows on the given mask. Results are cached. + """ + if not hasattr(self, '__windows_on_mask') or self.__windows_on_mask is None: + self.__windows_on_mask = self._tile_windows_on_mask() + return self.__windows_on_mask + + @property + def windows_on_original_image(self) -> List[List[TYPE_BBOX_INT]]: + """Zoom the windows from the mask (which is often downsampled) to the original image, using the defined + size factor + Returns: + Zoomed windows on the original image (left, top, right, bottom) + """ + if not hasattr(self, '__windows_on_original_image') or self.__windows_on_original_image is None: + self.__windows_on_original_image = MaskTileWindows.__window_list_resize(self.windows_on_mask, + self.__size_factor) + return self.__windows_on_original_image + + @staticmethod + def max_tile_bbox_top_left_coord(rp_bbox: TYPE_BBOX_INT, work_tile_size: float, work_stride: float) \ + -> Tuple[int, int]: + """ find the coords of the top/left corner of the most right / bottom tile ever possible given the current + size and stride. + Args: + rp_bbox: [top, left, bottom, right]. Half-open -- [Left, Right) and [Top, Bottom). Note that this is the + convention of sklearn's region properties, which is different to the (left, top, right, bottom) used by + PIL or OpenSlide. The bbox of connected tissue regions on mask. Int coords. + work_tile_size: Tile size on the working mask, which might be downsampled. + work_stride: Stride size on the working mask, which might be downsampled. + Returns: + Tuple[int, int] + """ + assert work_stride > 0, f"work stride must be greater than 0 - got {work_stride}" + assert work_tile_size > 0, f"work tile size must be greater than 0 - got {work_tile_size}" + + # not for skimage regionprops, the bbox is half-open at the bottom / right coordinates. + # [left, right) and [top, bottom). Hence, the "+1" operation below for coord computation + # is already priced-in + top_rp, left_rp, bottom_rp, right_rp = rp_bbox + # start + n_step * stride + tile_size = bottom/rightmost --> (rp_limit - tile_size) // stride = max step + max_step_horiz = (right_rp - left_rp - work_tile_size) / work_stride + max_step_vert = (bottom_rp - top_rp - work_tile_size) / work_stride + tile_max_left = left_rp + max_step_horiz * work_stride + tile_max_top = top_rp + max_step_vert * work_stride + + assert round(tile_max_left + work_tile_size) <= right_rp,\ + f"left + size check" \ + f" {tile_max_left + work_tile_size} = {tile_max_left} + {work_tile_size} <= {right_rp} fail" + assert round(tile_max_top + work_tile_size) <= bottom_rp,\ + f"top + size check" \ + f" {tile_max_top + work_tile_size} = {tile_max_top} + {work_tile_size} <= {bottom_rp} fail" + return int(tile_max_top), int(tile_max_left) + + @staticmethod + def __window_resize_helper(window_on_mask: Union[TYPE_BBOX_FLOAT, TYPE_BBOX_INT], size_factor) -> TYPE_BBOX_INT: + """Helper function to zoom the window coordinates on downsampled mask to the original sized image. + Convert back to int. + Args: + window_on_mask: (left, top, right, bottom) + size_factor: size_factor = img_size / mask_size + + Returns: + Resized (left, top, right, bottom) + """ + # work around of the type check of IDE. Otherwise, I will return directly + left, top, right, bottom, *rest = tuple(int(r * size_factor) for r in window_on_mask) + return left, top, right, bottom + + @staticmethod + def __window_list_resize(window_on_mask: Union[List[List[TYPE_BBOX_FLOAT]], List[List[TYPE_BBOX_INT]]], + size_factor: float) -> List[List[Tuple[int, int, int, int]]]: + """ + Args: + window_on_mask: list of list of (left, top, right, bottom), nested at region-level. + size_factor: size_factor = img_size / mask_size + + Returns: + Resized (left, top, right, bottom) + """ + return [ + [ + MaskTileWindows.__window_resize_helper(win, size_factor) + for win in win_list_region + ] + for win_list_region in window_on_mask + ] + + +DRAW_TARGET_IMG_THUMB = Literal['img_thumb'] +DRAW_TARGET_MASK = Literal['mask_thumb'] + + +class TileExtractor: + + __tile_window_cache: Dict[str, MaskTileWindows] + + def __init__(self, s: BaseImage): + self.__tile_window_cache = dict() + self.baseimage = s + self._filename = s['filename'] + + @staticmethod + def _tile_windows_helper(mask_use_for_tiles, + img_w, img_h, + tile_size: int = 256, tile_stride: int = 256, + tissue_thresh: float = 0.5, + filename: str = '') -> MaskTileWindows: + """ + Initiated the mask_tile_window using a given tile_size, tile_stride, and tissue_thresh. + Args: + mask_use_for_tiles: + img_w: width of base image in original mag + img_h: height of base image in original mag + tile_size: + tile_stride: + tissue_thresh: + + Returns: + + """ + mask = mask_use_for_tiles + # image_handle: openslide.OpenSlide = s["os_handle"] + # img_w, img_h = image_handle.dimensions + assert mask is not None, f"{filename}: mask is not initialized" + assert isinstance(mask, np.ndarray), f"The mask is expected to be a Numpy NDArray" + + # shape --> row column --> height width + mask_w, mask_h = mask.shape[1], mask.shape[0] + size_factor = img_w / mask_w + size_factor_ref = img_h / mask_h + assert size_factor > 0, f"{size_factor} negative" + if round(size_factor) != round(size_factor_ref): + logging.warning(f"{filename}: Aspect Ratio Mismatch: {img_w, img_h} vs. " + f"{mask_w, mask_h}") + logging.debug(f"{filename}: size ratio between img and thumb: {size_factor}") + work_tile_size = tile_size / size_factor + work_stride = tile_stride / size_factor + mask_tile_windows = MaskTileWindows(mask, work_tile_size=work_tile_size, + work_stride=work_stride, + size_factor=size_factor, tissue_thresh=tissue_thresh) + + return mask_tile_windows + + # @staticmethod + # def _img_use_for_tiles(s: BaseImage): + # return s.getImgThumb(s["image_work_size"]) + # + # @staticmethod + # def _mask_use_for_tiles(s: BaseImage): + # """ + # For now, we just use the img_mask_use field in BaseImage + # Args: + # s: + # + # Returns: + # + # """ + # return s['img_mask_use'] + + def tile_windows(self, + mask_use_for_tiles: np.ndarray, + img_w, img_h, + tile_size: int = 256, tile_stride: int = 256, + tissue_thresh: float = 0.5, *, force_rewrite: bool = False) -> MaskTileWindows: + # s = self.baseimage + key = f"{tile_size}_{tile_stride}_{tissue_thresh}" + root_dict = self.__tile_window_cache + entry = root_dict.get(key, None) + if entry is None or force_rewrite: + # img_w, img_h = s['os_handle'].dimensions + root_dict[key] = TileExtractor._tile_windows_helper(mask_use_for_tiles, img_w, img_h, + tile_size, + tile_stride, tissue_thresh) + return root_dict[key] + + def clear_tile_window(self, tile_size: int = 256, tile_stride: int = 256, + tissue_thresh: float = 0.5): + key = f"{tile_size}_{tile_stride}_{tissue_thresh}" + root_dict = self.__tile_window_cache + root_dict.pop(key, None) + + @staticmethod + def __bbox_overlay_helper(img: np.ndarray, + windows_grouped_by_region: Union[List[List[TYPE_BBOX_INT]], List[List[TYPE_BBOX_FLOAT]]], + outline: str = 'green', width: int = 2) -> Image.Image: + """ + Helper function to draw bbox and overlay to the img thumbnail + Args: + img: image to overlay the bboxes + windows_grouped_by_region: + outline: outline color of the bboxes + width: width of bboxes outlines + + Returns: + the PIL object after drawing the bboxes over the img + """ + # avoid inplace change + copy = np.array(img, copy=True) + draw_pil = Image.fromarray(copy, mode="RGB") + draw_context = ImageDraw.Draw(draw_pil, mode="RGB") + for window_list in windows_grouped_by_region: + for window in window_list: + # ImageDraw accepts x0 y0 x1 y1 + window = tuple(round(x) for x in window) + left, top, right, bottom = window + draw_context.rectangle((left, top, right, bottom), outline=outline, width=width) + + return draw_pil + + def bbox_overlay(self, + img_use_for_tiles, + mask_use_for_tiles: np.ndarray, + img_w, img_h, + target: Literal[DRAW_TARGET_IMG_THUMB, DRAW_TARGET_MASK] = 'img_thumb', + tile_size_on_img: int = 256, tile_stride_on_img: int = 256, + tissue_thresh: float = 0.5, *, force_rewrite: bool = False, outline='green', width=2 + ) -> Image.Image: + # note that the window_on_image corresponding to the original image at base-mag. + # the thumb image is at the same size of masks + tile_windows: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h, + tile_size_on_img, tile_stride_on_img, tissue_thresh, + force_rewrite=force_rewrite) + windows_on_mask: List[List[TYPE_BBOX_FLOAT]] = tile_windows.windows_on_mask + # all properties below are cached + mapping: Dict[Literal[DRAW_TARGET_IMG_THUMB, DRAW_TARGET_MASK], np.ndarray] = { + get_args(DRAW_TARGET_IMG_THUMB)[0]: img_use_for_tiles, + get_args(DRAW_TARGET_MASK)[0]: mask_use_for_tiles, + } + target_img_arr = mapping[target] + return self.__bbox_overlay_helper(target_img_arr, windows_grouped_by_region=windows_on_mask, outline=outline, + width=width) + + @staticmethod + def __window_convert(window: Tuple[int, int, int, int]) -> Tuple[Tuple[int, int], Tuple[int, int]]: + """TO OpenSlide style (left, top) + (width, height) + Args: + window: + + Returns: + + """ + left, top, right, bottom = window + + left = int(left) + top = int(top) + right = int(right) + bottom = int(bottom) + + location = (left, top) + width = right - left + height = bottom - top + size = (width, height) + return location, size + + @contextmanager + def mp_tile_window_manager(self, + mask_use_for_tiles, + img_w, + img_h, + tile_size: int = 256, + tile_stride: int = 256, tissue_thresh: float = 0.5, force_rewrite: bool = False): + """Avoid pickling the MaskTileWindow in MP. Clear cached objects on exit + Args: + mask_use_for_tiles: tissue mask for tile extraction + img_w: width of the image under original magnification + img_h: height of the image under original magnification + tile_size: + tile_stride: + tissue_thresh: + force_rewrite: + + Returns: + + """ + self.tile_windows(mask_use_for_tiles, img_w, img_h, + tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite) + yield + # clearn cache + self.clear_tile_window(tile_size, tile_stride, tissue_thresh) + + @staticmethod + def __save_on_flag(path: str, prefix: str, suffix: str, + window: Tuple[int, int, int, int], + region: Image.Image, + save_image: bool): + + if save_image: + full_export_dest = os.path.join(path, f"{prefix}_{window}{suffix}") + actual_path, actual_base = os.path.split(full_export_dest) + os.makedirs(actual_path, exist_ok=True) + region.save(full_export_dest) + + def valid_tile_extraction(self, + s: BaseImage, + mask_use_for_tiles, + img_w, + img_h, + path, *, prefix='', suffix='.png', + screen_callbacks: Callable = default_screen_identity, + tile_size: int = 256, + tile_stride: int = 256, + tissue_thresh: float = 0.5, + save_image: bool = True, + force_rewrite: bool = False) -> List[List[Tuple[int, int, int, int]]]: + + tw: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h, + tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite) + window_list_of_regions = tw.windows_on_original_image + image_handle: openslide.OpenSlide = s["os_handle"] + valid_window_list_all_regions: List[List[Tuple[int, int, int, int]]] = [] + for region_windows in window_list_of_regions: + region_windows: List[Tuple[int, int, int, int]] + valid_windows_curr: List[Tuple[int, int, int, int]] = [] + for window in region_windows: + window: Tuple[int, int, int, int] + # just to make the convention clear + location, size = TileExtractor.__window_convert(window) + region = image_handle.read_region(location, 0, size) + tile_np = np.array(region, copy=False) + valid_flag = screen_callbacks(tile_np) + if not valid_flag: + continue + valid_windows_curr.append(window) + # prefix can be the slide name + TileExtractor.__save_on_flag(path=path, prefix=prefix, suffix=suffix, window=window, + region=region, save_image=save_image) + if len(valid_windows_curr) > 0: + valid_window_list_all_regions.append(valid_windows_curr) + return valid_window_list_all_regions + + +def extract(s: BaseImage, params: Dict[PARAMS, Any]): + logging.info(f"{s['filename']} - \textract") + with params['lock']: + slide_out = s['outdir'] + tile_output_dir = params.get('tile_output', os.path.join(slide_out, 'tiles')) + os.makedirs(tile_output_dir, exist_ok=True) + prefix, _ = os.path.splitext(os.path.basename(s['filename'])) + + suffix: str = params.get('suffix', '.png') + outline: str = params.get('outline', "green") + width: int = int(params.get('width', 2)) + save_image: bool = bool(strtobool(params.get("save_image", "False"))) + tile_size = int(params.get('tile_size', 256)) + tile_stride = int(params.get('tile_stride', 256)) + tissue_thresh = float(params.get('tissue_ratio', 0.5)) + + img_use_for_tiles = s.getImgThumb(s["image_work_size"]) + mask_use_for_tiles = s['img_mask_use'] + image_handle: openslide.OpenSlide = s['os_handle'] + img_w, img_h = image_handle.dimensions + + tile_extractor = TileExtractor(s) + # binding MaskTileWindow into the BaseImage might not be the best idea in multiprocessing + # clear the cached MaskTileWindow to prevent hanging of the pool + with tile_extractor.mp_tile_window_manager(mask_use_for_tiles, img_w, img_h, + tile_size, tile_stride, tissue_thresh, False): + window_list_of_regions: List[List[Tuple[int, int, int, int]]]\ + = tile_extractor.valid_tile_extraction(s, + mask_use_for_tiles, + img_w, img_h, + tile_output_dir, + prefix=prefix, + suffix=suffix, + tile_size=tile_size, + tile_stride=tile_stride, + tissue_thresh=tissue_thresh, + save_image=save_image, + force_rewrite=False) + # # + overlay_export = os.path.join(s["outdir"], f"{s['filename']}_tile_bbox.png") + bbox_overlay = tile_extractor.bbox_overlay(img_use_for_tiles, mask_use_for_tiles, + img_w, img_h, + target="img_thumb", + tile_size_on_img=tile_size, + tile_stride_on_img=tile_stride, + tissue_thresh=tissue_thresh, + force_rewrite=False, outline=outline, width=width) + + bbox_overlay.save(overlay_export) + bbox_json_loc = os.path.join(slide_out, f'{s["filename"]}_bbox.json') + with open(bbox_json_loc, 'w') as root: + json.dump(window_list_of_regions, root, indent=4) + return diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py new file mode 100644 index 00000000..3349fda4 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py @@ -0,0 +1,4 @@ +try: + from ._version import version as __version__ +except ImportError: # pragma: no cover + __version__ = "not-installed" diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py new file mode 100644 index 00000000..333782a1 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py @@ -0,0 +1,215 @@ +import argparse +import configparser +import datetime +import glob +import logging +import multiprocessing +import os +import sys +import time +from functools import partial + +from histoqc._pipeline import BatchedResultFile +from histoqc._pipeline import MultiProcessingLogManager +from histoqc._pipeline import load_pipeline +from histoqc._pipeline import log_pipeline +from histoqc._pipeline import move_logging_file_handler +from histoqc._pipeline import setup_logging +from histoqc._pipeline import setup_plotting_backend +from histoqc._worker import worker +from histoqc._worker import worker_setup +from histoqc._worker import worker_success +from histoqc._worker import worker_error +from histoqc.config import read_config_template +from histoqc.data import managed_pkg_data +from histoqc._parser import get_argument_parser + +parser = get_argument_parser() + +@managed_pkg_data +def main(argv=None): + """main entry point for histoqc pipelines""" + if argv is None: + argv = sys.argv[1:] + + args = parser.parse_args(argv) + + # --- multiprocessing and logging setup ----------------------------------- + + setup_logging(capture_warnings=True, filter_warnings='ignore') + mpm = multiprocessing.Manager() + lm = MultiProcessingLogManager('histoqc', manager=mpm) + + # --- parse the pipeline configuration ------------------------------------ + config = configparser.ConfigParser() + if not args.config: + lm.logger.warning(f"Configuration file not set (--config), using default") + config.read_string(read_config_template('default')) + elif os.path.exists(args.config): + config.read(args.config) #Will read the config file + else: + lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.") + config.read_string(read_config_template(args.config)) + + # --- provide models, pen and templates as fallbacks from package data ---- + + managed_pkg_data.inject_pkg_data_fallback(config) + + # --- load the process queue (error early) -------------------------------- + + _steps = log_pipeline(config, log_manager=lm) + process_queue = load_pipeline(config) + + # --- check symlink target ------------------------------------------------ + + if args.symlink is not None: + if not os.path.isdir(args.symlink): + lm.logger.error("error: --symlink {args.symlink} is not a directory") + return -1 + + # --- create output directory and move log -------------------------------- + args.outdir = os.path.expanduser(args.outdir) + os.makedirs(args.outdir, exist_ok=True) + move_logging_file_handler(logging.getLogger(), args.outdir, args.debug) + + if BatchedResultFile.results_in_path(args.outdir): + if args.force: + lm.logger.info("Previous run detected....overwriting (--force set)") + else: + lm.logger.info("Previous run detected....skipping completed (--force not set)") + + results = BatchedResultFile(args.outdir, + manager=mpm, + batch_size=args.batch, + force_overwrite=args.force) + + # --- document configuration in results ----------------------------------- + + results.add_header(f"start_time:\t{datetime.datetime.now()}") + results.add_header(f"pipeline:\t{' '.join(_steps)}") + results.add_header(f"outdir:\t{os.path.realpath(args.outdir)}") + results.add_header(f"config_file:\t{os.path.realpath(args.config) if args.config is not None else 'default'}") + results.add_header(f"command_line_args:\t{' '.join(argv)}") + + # --- receive input file list (there are 3 options) ----------------------- + args.basepath = os.path.expanduser(args.basepath) + if len(args.input_pattern) > 1: + # more than one input_pattern is interpreted as a list of files + # (basepath is ignored) + files = list(args.input_pattern) + + elif args.input_pattern[0].endswith('.tsv'): + # input_pattern is a tsv file containing a list of files + files = [] + with open(args.input_pattern[0], 'rt') as f: + for line in f: + if line[0] == "#": + continue + fn = line.strip().split("\t")[0] + files.append(os.path.join(args.basepath, fn)) + + else: + # input_pattern is a glob pattern + pth = os.path.join(args.basepath, args.input_pattern[0]) + files = glob.glob(pth, recursive=True) + + lm.logger.info("-" * 80) + num_files = len(files) + lm.logger.info(f"Number of files detected by pattern:\t{num_files}") + + # --- start worker processes ---------------------------------------------- + + _shared_state = { + 'process_queue': process_queue, + 'config': config, + 'outdir': args.outdir, + 'log_manager': lm, + 'lock': mpm.Lock(), + 'shared_dict': mpm.dict(), + 'num_files': num_files, + 'force': args.force, + 'seed': args.seed, + 'debug': args.debug + } + failed = mpm.list() + setup_plotting_backend(lm.logger, debug=args.debug) + + try: + if args.nprocesses > 1: + + with lm.logger_thread(): + print(args.nprocesses) + with multiprocessing.Pool(processes=args.nprocesses, + initializer=worker_setup, + initargs=(config,)) as pool: + try: + for idx, file_name in enumerate(files): + _ = pool.apply_async( + func=worker, + args=(idx, file_name), + kwds=_shared_state, + callback=partial(worker_success, result_file=results), + error_callback=partial(worker_error, failed=failed), + ) + + finally: + pool.close() + pool.join() + + else: + for idx, file_name in enumerate(files): + try: + _success = worker(idx, file_name, **_shared_state) + except Exception as exc: + worker_error(exc, failed) + continue + else: + worker_success(_success, results) + + except KeyboardInterrupt: + lm.logger.info("-----REQUESTED-ABORT-----\n") + + else: + lm.logger.info("----------Done-----------\n") + + finally: + lm.logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log)," + " warnings are listed in warnings column in output") + + for file_name, error, tb in failed: + lm.logger.info(f"{file_name}\t{error}\n{tb}") + timing_file = os.path.join(args.outdir, "processing_times.tsv") + file_exists = os.path.exists(timing_file) + with open(timing_file, 'a') as tf: + if not file_exists: + tf.write("slide_name\tfile_size_mb\tprocessing_time_sec\n") + + for file_name in files: + base_name = os.path.basename(file_name) + file_size_bytes = os.path.getsize(file_name) if os.path.exists(file_name) else 0 + file_size_mb = file_size_bytes / (1024 * 1024) + + time_key = f"time_{base_name}" + time_taken = _shared_state['shared_dict'].get(time_key, 0.0) + + tf.write(f"{base_name}\t{file_size_mb:.2f}\t{time_taken:.2f}\n") + lm.logger.info(f"Processing times appended to {timing_file}") + + if args.symlink is not None: + origin = os.path.realpath(args.outdir) + target = os.path.join( + os.path.realpath(args.symlink), + os.path.basename(origin) + ) + try: + os.symlink(origin, target, target_is_directory=True) + lm.logger.info("Symlink to output directory created") + except (FileExistsError, FileNotFoundError): + lm.logger.error( + f"Error creating symlink to output in '{args.symlink}', " + f"Please create manually: ln -s {origin} {target}" + ) + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py new file mode 100644 index 00000000..26ff1d07 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py @@ -0,0 +1,44 @@ +import argparse +import time + +def get_argument_parser(): + """Return the argument parser for histoqc""" + parser = argparse.ArgumentParser(prog="histoqc", description='Run HistoQC main quality control pipeline for digital pathology images') + parser.add_argument('input_pattern', + help="input filename pattern (try: *.svs or target_path/*.svs )," + " or tsv file containing list of files to analyze", + nargs="+") + parser.add_argument('-o', '--outdir', + help="outputdir, default ./histoqc_output_YYMMDD-hhmmss", + default=f"./histoqc_output_{time.strftime('%Y%m%d-%H%M%S')}", + type=str) + parser.add_argument('-p', '--basepath', + help="base path to add to file names," + " helps when producing data using existing output file as input", + default="", + type=str) + parser.add_argument('-c', '--config', + help="config file to use, either by name supplied by histoqc.config (e.g., v2.1) or filename", + type=str) + parser.add_argument('-f', '--force', + help="force overwriting of existing files", + action="store_true") + parser.add_argument('-b', '--batch', + help="break results file into subsets of this size", + type=int, + default=None) + parser.add_argument('-s', '--seed', + help="set a seed used to produce a random number in all modules", + type=int, + default=None) + parser.add_argument('-n', '--nprocesses', + help="number of processes to launch", + type=int, + default=1) + parser.add_argument('--symlink', metavar="TARGET_DIR", + help="create symlink to outdir in TARGET_DIR", + default=None) + + parser.add_argument('--debug', action='store_true', help="trigger debugging behavior") + + return parser \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py new file mode 100644 index 00000000..51727424 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py @@ -0,0 +1,409 @@ +"""histoqc._pipeline + +helper utilities for running the HistoQC pipelines + +""" +import glob +import logging +import multiprocessing +import os +import platform +import shutil +import threading +import warnings +from contextlib import ExitStack +from contextlib import contextmanager +from contextlib import nullcontext +from importlib import import_module +from logging.config import dictConfig +from logging.handlers import QueueHandler + + +# --- logging helpers ------------------------------------------------- + +DEFAULT_LOG_FN = "error.log" + + +def setup_logging(*, capture_warnings, filter_warnings): + """configure histoqc's logging instance + + Parameters + ---------- + capture_warnings: `bool` + flag if warnings should be captured by the logging system + filter_warnings: `str` + action for warnings.filterwarnings + """ + dictConfig({ + 'version': 1, + 'formatters': { + 'default': { + 'class': 'logging.Formatter', + 'format': '%(asctime)s - %(levelname)s - %(message)s', + } + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'level': 'INFO', + 'formatter': 'default', + }, + 'logfile': { + 'class': 'logging.FileHandler', + 'level': 'WARNING', + 'filename': DEFAULT_LOG_FN, + 'mode': 'w', # we initially start overwriting existing logs + 'formatter': 'default', + }, + }, + 'root': { + 'level': 'INFO', + 'handlers': ['console', 'logfile'] + } + }) + + # configure warnings too... + warnings.filterwarnings(filter_warnings) + logging.captureWarnings(capture_warnings) + + + +def move_logging_file_handler(logger, destination, debug=False): + """ + Redirect FileHandlers to a new destination directory, moving the log file. + + Parameters + ---------- + logger : logging.Logger + Logger instance whose FileHandlers will be moved. + destination : str + Directory path for the new log files. + debug : bool + If True, set the log level of the new handlers to DEBUG. If False, the new handlers retain the original handler's log level. + """ + for handler in reversed(logger.handlers): + if not isinstance(handler, logging.FileHandler): + continue + if handler.baseFilename != os.path.join(os.getcwd(), DEFAULT_LOG_FN): + continue + if not os.path.exists(handler.baseFilename): + logger.warning(f"Original log file {handler.baseFilename!r} does not exist, cannot move.") + continue + + if not destination.endswith(handler.baseFilename): + destination = os.path.join(destination, os.path.relpath(handler.baseFilename, os.getcwd())) + logger.info(f'moving fileHandler {handler.baseFilename!r} to {destination!r}') + + # remove handler + logger.removeHandler(handler) + handler.close() + + new_filename = shutil.copy2(handler.baseFilename, destination) + try: + os.remove(handler.baseFilename) + except OSError: + # on WSL/Windows, file might still be locked briefly + logger.warning(f"Could not remove original log file {handler.baseFilename!r}") + + # create and attach new handler + new_handler = logging.FileHandler(new_filename, mode='a') + new_handler.setLevel(logging.DEBUG if debug else handler.level) + new_handler.setFormatter(handler.formatter) + logger.addHandler(new_handler) + + +class MultiProcessingLogManager: + + def __init__(self, logger_name, *, manager): + """create a MultiProcessingLogManager + + Note: this uses a multiprocessing Queue to correctly transfer + logging information from worker processes to the main + process logging instance + + Parameters + ---------- + logger_name : str + the name of the logger instance + manager : multiprocessing.Manager + the mp Manager instance used for creating sharable context + """ + self._logger_name = logger_name + self._log_queue = manager.Queue() + self._log_thread_active = False + + @property + def is_main_process(self): + return multiprocessing.current_process().name == "MainProcess" + + @property + def logger(self): + """returns the logger instance""" + if self.is_main_process: + return logging.getLogger(self._logger_name) + else: + root = logging.getLogger() + if not root.hasHandlers(): + qh = QueueHandler(self._log_queue) + root.setLevel(logging.INFO) + root.addHandler(qh) + # note: this should be revisited and set by the main process + warnings.filterwarnings('ignore') + logging.captureWarnings(True) + return root + + @contextmanager + def logger_thread(self): + """context manager for multiprocess logging + + Note: this starts the thread responsible for handing the log records + emitted by child processes to the main logger instance + """ + assert self.is_main_process + assert not self._log_thread_active # not reentrant... + self._log_thread_active = True + + def process_queue(q, ln): + main_logger = logging.getLogger(ln) + while True: + log_record = q.get() + if log_record is None: + break + main_logger.handle(log_record) + + lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name)) + lt.start() + try: + yield + finally: + self._log_queue.put(None) + lt.join() + self._log_thread_active = False + + +def log_pipeline(config, log_manager): + """log the pipeline information + + Parameters + ---------- + config : configparser.ConfigParser + log_manager : MultiProcessingLogManager + """ + assert log_manager.is_main_process + steps = config.get(section='pipeline', option='steps').splitlines() + + log_manager.logger.info("the pipeline will use these steps:") + for process in steps: + mod_name, func_name = process.split('.') + log_manager.logger.info(f"\t\t{mod_name}\t{func_name}") + return steps + + +# --- worker process helpers ------------------------------------------ + +def setup_plotting_backend(logger=None, debug=False): + """loads the correct matplotlib backend + + Parameters + ---------- + logger : + the logging.Logger instance + """ + import matplotlib + + if debug and (platform.system() == "Windows" or os.environ.get('DISPLAY')): + matplotlib.use('TkAgg') + logger.info("Display found and debug mode enabled. Using TkAgg backend for matplotlib.") + + else: + matplotlib.use('Agg') + + +class BatchedResultFile: + """BatchedResultFile encapsulates the results writing + + Note: this is multiprocessing safe + """ + FILENAME_GLOB = "results*.tsv" + FILENAME_NO_BATCH = "results.tsv" + FILENAME_BATCH = "results_{:d}.tsv" + + def __init__(self, dst, *, manager, batch_size=None, force_overwrite=False): + """create a BatchedResultFile instance + + Parameters + ---------- + dst : os.PathLike + the output directory for the result files + manager : multiprocessing.Manager + the mp Manager instance used for creating sharable context + batch_size : int or None + after `batch_size` calls to increment_counter() the results + file will be rotated + force_overwrite : bool + overwrite result files if they are already present. default + is to append. + """ + if not os.path.isdir(dst): + raise ValueError(f"dst {dst!r} is not a directory or does not exist") + if batch_size is not None: + batch_size = int(batch_size) + if batch_size < 1: + raise ValueError(f"batch_size must be > 0, got {batch_size}") + self.dst = os.path.abspath(dst) + self.batch_size = batch_size + self.force_overwrite = bool(force_overwrite) + + # multiprocessing safety + self._headers = manager.list() + self._rlock = manager.RLock() + + # internal state + self._batch = 0 + self._completed = 0 + self._first = True + + # contextmanager + self._f = None + self._stack = None + + def __enter__(self): + self._stack = ExitStack() + self._stack.callback(self.increment_counter) + self._stack.enter_context(self._rlock) + self._f = nullcontext(self._stack.enter_context(self._file())) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._stack.close() + self._stack = None + self._f = None + + def _file(self): + if self._f is not None: + return self._f # we're in the context manager + + if self.batch_size is None: + fn = self.FILENAME_NO_BATCH + else: + fn = self.FILENAME_BATCH.format(self._batch) + pth = os.path.join(self.dst, fn) + + mode = "a" + if self._first and os.path.isfile(pth): + if self.force_overwrite: + mode = "w" + else: + mode = "a" + self._first = False + return open(pth, mode=mode) + + def add_header(self, header): + """add a new header to the results file + + Parameters + ---------- + header : + a string that can be written to the file by calling the + write_headers method + """ + self._headers.append(header) + + def is_empty_file(self): + """return if the current file is empty + + Note: this is useful to determine if you want to write_headers + ... technically the name is incorrect, but in this use case + pos 0 is equivalent to an empty file + """ + with self._rlock, self._file() as f: + return f.tell() == 0 + + def write_headers(self, *args): + """write the internally collected headers to the current file + + Parameters + ---------- + state : dict + the current histoqc implementation writes the outputs to + the header files, so *args supports `state` for now. + overwrite in subclass to control header output behavior + """ + with self._rlock: + # write headers + for line in self._headers: + self.write_line(f"#{line}") + # histoqc specific + _state, = args + _outputs = '\t'.join(_state['output']) + line = f"#dataset:{_outputs}\twarnings" + self.write_line(line) + + def write_line(self, text, end="\n"): + """write text to the file + + Parameters + ---------- + text : str + end : str + defaults to newline + """ + with self._rlock, self._file() as f: + f.write(text) + if end: + f.write(end) + + def increment_counter(self): + """increment the completed counter + + moves to the next batch as determined by batch_size + """ + # move on to the next batch if needed + with self._rlock: + self._completed += 1 + if self._completed and self.batch_size and self._completed % self.batch_size == 0: + self._batch += 1 + self._first = True + + @classmethod + def results_in_path(cls, dst): + """return if a dst path contains results files + + Parameters + ---------- + dst : os.PathLike + """ + return bool(glob.glob(os.path.join(dst, cls.FILENAME_GLOB))) + + +def load_pipeline(config): + """load functions and parameters from config + + Parameters + ---------- + config : configparser.ConfigParser + """ + steps = config.get(section='pipeline', option='steps').splitlines() + + process_queue = [] + for process in steps: + mod_name, func_name = process.split('.') + + try: + mod = import_module(f"histoqc.{mod_name}") + except ImportError: + raise NameError(f"Unknown module in pipeline from config file:\t {mod_name}") + + func_name = func_name.split(":")[0] # take base of function name + try: + func = getattr(mod, func_name) + except AttributeError: + raise NameError(f"Unknown function from module in pipeline from config file:\t {mod_name}.{func_name}") + + if config.has_section(process): + params = dict(config.items(section=process)) + else: + params = {} + + process_queue.append((func, params)) + return process_queue diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py new file mode 100644 index 00000000..cc722adc --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py @@ -0,0 +1,106 @@ +"""histoqc worker functions""" +import os +import shutil +import numpy as np +from histoqc.BaseImage import BaseImage +from histoqc._pipeline import load_pipeline +from histoqc._pipeline import setup_plotting_backend +import time + + +# --- worker functions -------------------------------------------------------- + +def worker_setup(c): + """needed for multiprocessing worker setup""" + setup_plotting_backend() + load_pipeline(config=c) + + +def worker(idx, file_name, *, + process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force, seed, debug): + """pipeline worker function""" + # set the seed + if seed is not None: + np.random.seed(seed) + # --- output directory preparation -------------------------------- + fname_outdir = os.path.join(outdir, os.path.basename(file_name)) + if os.path.isdir(fname_outdir): # directory exists + if not force: + log_manager.logger.warning( + f"{file_name} already seems to be processed (output directory exists)," + " skipping. To avoid this behavior use --force" + ) + return + else: + # remove entire directory to ensure no old files are present + shutil.rmtree(fname_outdir) + # create output dir + os.makedirs(fname_outdir) + + log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}") + + try: + s = BaseImage(file_name, fname_outdir, seed, dict(config.items("BaseImage.BaseImage"))) + # 计时 + start_time = time.perf_counter() + for process, process_params in process_queue: + process_params["lock"] = lock + process_params["shared_dict"] = shared_dict + process(s, process_params) + s["completed"].append(process.__name__) + + except Exception as exc: + # reproduce histoqc error string + _oneline_doc_str = exc.__doc__.replace('\n', '') + err_str = f"{exc.__class__} {_oneline_doc_str} {exc}" + + log_manager.logger.error( + f"{file_name} - Error analyzing file (skipping): \t {err_str}" + ) + if exc.__traceback__.tb_next is not None: + func_tb_obj = str(exc.__traceback__.tb_next.tb_frame.f_code) + else: + func_tb_obj = str(exc.__traceback__) + + exc.__histoqc_err__ = (file_name, err_str, func_tb_obj) + raise exc + + else: + # TODO: + # the histoqc workaround below is due an implementation detail in BaseImage: + # BaseImage keeps an OpenSlide instance stored under os_handle and leaks a + # file handle. This will need fixing in BaseImage. + # -> best solution would be to make BaseImage a contextmanager and close + # and cleanup the OpenSlide handle on __exit__ + s["os_handle"] = None # need to get rid of handle because it can't be pickled + #记录成功处理的耗时 + elapsed = time.perf_counter() - start_time + base_name = os.path.basename(file_name) + shared_dict[f"time_{base_name}"] = elapsed + return s + + +def worker_success(s, result_file): + """success callback""" + if s is None: + return + + with result_file: + if result_file.is_empty_file(): + result_file.write_headers(s) + + _fields = '\t'.join([str(s[field]) for field in s['output']]) + _warnings = '|'.join(s['warnings']) + result_file.write_line("\t".join([_fields, _warnings])) + + +def worker_error(e, failed): + """error callback""" + if hasattr(e, '__histoqc_err__'): + file_name, err_str, tb = e.__histoqc_err__ + else: + # error outside of pipeline + # todo: it would be better to handle all of this as a decorator + # around the worker function + file_name, err_str, tb = "N/A", f"error outside of pipeline {e!r}", None + failed.append((file_name, err_str, tb)) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py new file mode 100644 index 00000000..30a187c0 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py @@ -0,0 +1,66 @@ +from typing import List, Dict, Union, Type, Tuple, Mapping # Literal, get_args, +from types import MappingProxyType +# from shapely.strtree import STRtree +# from shapely.geometry import box as shapely_box +from histoqc.import_wrapper.typing import Literal, get_args +from lazy_property import LazyProperty +from .annotation.base import Annotation, Region, TYPE_RAW_LABEL +from .annotation.imagescope import ImageScopeAnnotation +from .annotation.geojson import GEOJsonAnnotation + + +TYPE_BBOX = Tuple[int, int, int, int] + +TYPE_GEO = Literal["geojson"] +TYPE_IMAGESCOPE = Literal["imagescope"] +TYPE_JSON = Literal["json"] +TYPE_XML = Literal["xml"] + +TYPE_SUPPORTED_PARSER = Literal[TYPE_GEO, TYPE_IMAGESCOPE, TYPE_JSON, TYPE_XML] + +PARSER_BUILDER_MAP: Dict[str, Type[Annotation]] = { + get_args(TYPE_GEO)[0]: GEOJsonAnnotation, + get_args(TYPE_IMAGESCOPE)[0]: ImageScopeAnnotation, + # for HistoQC + get_args(TYPE_JSON)[0]: GEOJsonAnnotation, # duplicate + get_args(TYPE_XML)[0]: ImageScopeAnnotation, +} + + +class AnnotCollection: + _annotation_list: List[Annotation] + _label_to_regions_map: Mapping[TYPE_RAW_LABEL, List[Region]] + + @LazyProperty + def all_regions(self) -> List[Region]: + region_list = [] + for annotation in self._annotation_list: + region_list += annotation.regions + return region_list + + # @LazyProperty + # def multipolygons(self) -> MultiPolygon: + # for annotation in self._annotation_list: + + def __init__(self, annotation_list: List[Annotation]): + self._annotation_list = annotation_list + self._label_to_regions_map = self._new_label_to_regions_map() + + @classmethod + def build(cls, parser_type: TYPE_SUPPORTED_PARSER, uri: str, label_map: Union[Dict[Union[str, int], int], None]): + construct = PARSER_BUILDER_MAP[parser_type] + annotation_list = construct.build_from_uri(uri=uri, label_map=label_map) + return cls(annotation_list) + + def _new_label_to_regions_map(self) -> Mapping[TYPE_RAW_LABEL, List[Region]]: + out_dict: Dict[TYPE_RAW_LABEL, List[Region]] = dict() + for region in self.all_regions: + region: Region + label: TYPE_RAW_LABEL = region['label'] + out_dict[label] = out_dict.get(label, []) + out_dict[label].append(region) + return MappingProxyType(out_dict) + + @property + def label_to_regions_map(self) -> Mapping[TYPE_RAW_LABEL, List[Region]]: + return self._label_to_regions_map diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py new file mode 100644 index 00000000..b6e690fd --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py @@ -0,0 +1 @@ +from . import * diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py new file mode 100644 index 00000000..36da7251 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py @@ -0,0 +1,176 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar, Dict, Union, List, Tuple, TypedDict +from lazy_property import LazyProperty +from shapely.geometry import Polygon, MultiPolygon +import logging + +T = TypeVar("T") +TYPE_POINT = Tuple[int, int] +TYPE_POINT_SET = List[TYPE_POINT] +TYPE_HOLED_SET = Tuple[TYPE_POINT_SET, Union[List[TYPE_POINT_SET], None]] +# TYPE_HOLED_SET_COLLECTION = List[TYPE_HOLED_SET] + +TYPE_LABEL = Union[int, None] +TYPE_RAW_LABEL = Union[str, None, TYPE_LABEL] + +WARNING_NOT_SIMPLE_POLYGON = f"Not a Simple Polygon: buffer of the polygon " \ + f"with 0-distance resulting multiple polygons." \ + f"The shape of these polygons may not be identical to" \ + f"input annotations" + + +class Region(TypedDict): + polygon: Polygon + point_set: TYPE_HOLED_SET # TYPE_POINT_SET + label: TYPE_RAW_LABEL + raw_label: TYPE_RAW_LABEL + uri: str + + +class Annotation(ABC, Generic[T]): + """ + Annotation --> an atomic annotation that may contain one or multiple regions. + One label is assigned to one annotation. + """ + + _label_map: Union[Dict[TYPE_RAW_LABEL, TYPE_LABEL], None] + _ann_data: T + _uri: str + + @staticmethod + def point_to_int(point_xy: Tuple[Union[float, str], Union[float, str]]) -> TYPE_POINT: + raw_x, raw_y = point_xy + return int(float(raw_x)), int(float(raw_y)) + + @abstractmethod + def point_set_list(self) -> List[TYPE_HOLED_SET]: + return NotImplemented + + @abstractmethod + def label_from_annotation(self) -> TYPE_RAW_LABEL: + return NotImplemented + + @staticmethod + @abstractmethod + def annotation_list_from_uri(uri) -> List[T]: + return NotImplemented + + @staticmethod + def _enough_points(point_set: TYPE_POINT_SET): + return len(point_set) >= 3 + + @staticmethod + def _sanitized_points_helper(point_set: TYPE_HOLED_SET) -> Union[TYPE_HOLED_SET, None]: + outer, inner = point_set + # if shell has less than 3 --> discard directly + if not Annotation._enough_points(outer): + return None + inner = [hole for hole in inner if Annotation._enough_points(hole)] if inner is not None else None + return outer, inner + + @staticmethod + def _sanitized_points(point_set_list: List[TYPE_HOLED_SET]) -> List[TYPE_HOLED_SET]: + out_list = [] + for point_set in point_set_list: + sanitized = Annotation._sanitized_points_helper(point_set) + if sanitized is None: + continue + out_list.append(sanitized) + return out_list + + @staticmethod + def valid_polygon_helper(polygon: Polygon, point_set: TYPE_HOLED_SET) -> Tuple[List[Polygon], List[TYPE_HOLED_SET]]: + """ + In case + Returns: + + """ + if polygon.is_valid: + return [polygon, ], [point_set, ] + + valid_poly = polygon.buffer(0) + if isinstance(valid_poly, Polygon): + return [valid_poly, ], [point_set, ] + # if not simple polygon but multiple polygons + assert isinstance(valid_poly, MultiPolygon) + logging.warning(WARNING_NOT_SIMPLE_POLYGON) + # warning + polygon_list: List[Polygon] = list(valid_poly.geoms) + exterior_list: List[List[TYPE_POINT_SET]] = [list(x.exterior.coords) for x in polygon_list] + interior_list: List[List[List[TYPE_POINT_SET]]] = [[list(interior.coords) for interior in x.interiors] + for x in polygon_list] + point_set_list: List[TYPE_HOLED_SET] = [(outer, inner) + for outer, inner in zip(exterior_list, interior_list)] + return polygon_list, point_set_list + + @staticmethod + def valid_polygon(point_set: TYPE_HOLED_SET) -> Tuple[List[Polygon], List[TYPE_HOLED_SET]]: + outer, inner = point_set + polygon = Polygon(outer, holes=inner) + # if not polygon.is_valid: + # return polygon.buffer(0) + # assert not isinstance(polygon, MultiPolygon) + # return [polygon, ], [point_set, ] + return Annotation.valid_polygon_helper(polygon, point_set) + + @staticmethod + def regions_accumulate_helper(polygon_list: List[Polygon], + point_set_list: List[TYPE_HOLED_SET], label, raw_label, uri) -> List[Region]: + return [Region(polygon=polygon, point_set=point_set, label=label, raw_label=raw_label, uri=uri) + for polygon, point_set in zip(polygon_list, point_set_list)] + + @LazyProperty + def regions(self) -> List[Region]: + point_set_list: List[TYPE_HOLED_SET] = self.point_set_list() + clean_list = Annotation._sanitized_points(point_set_list) + region_list: List[Region] = [] + for point_set in clean_list: + point_set: TYPE_HOLED_SET + # polygon: Polygon = Annotation.valid_polygon(point_set) + polygon_list, point_set_list = Annotation.valid_polygon(point_set) + label = self.label + raw_label = self.raw_label + uri = self._uri + # curr_region = Region(polygon=polygon, point_set=point_set, + # label=label, raw_label=raw_label, uri=self._uri) + # region_list.append(curr_region) + curr_region_list = Annotation.regions_accumulate_helper(polygon_list, + point_set_list, label, raw_label, uri) + region_list += curr_region_list + return region_list + + @staticmethod + def _mapped_label(label_map: Dict[TYPE_RAW_LABEL, TYPE_LABEL], + label_var: TYPE_RAW_LABEL) -> Union[TYPE_RAW_LABEL, TYPE_LABEL]: + if label_map is None or len(label_map) == 0: + return label_var + assert label_var in label_map + return label_map[label_var] + + @LazyProperty + def raw_label(self) -> TYPE_RAW_LABEL: + return self.label_from_annotation() + + @LazyProperty + def label(self) -> Union[TYPE_RAW_LABEL, TYPE_LABEL]: + raw_label = self.raw_label + label = Annotation._mapped_label(self._label_map, raw_label) + return label + + @property + def ann_data(self) -> T: + return self._ann_data + + def __init__(self, uri: str, ann_data: T, label_map: Dict[Union[str, int], int]): + self._uri = uri + self._ann_data = ann_data + self._label_map = label_map + + @classmethod + def build(cls, uri: str, ann_data: T, label_map: Dict[Union[str, int], int]) -> "Annotation": + return cls(uri=uri, ann_data=ann_data, label_map=label_map) + + @classmethod + def build_from_uri(cls, uri: str, label_map: Union[Dict[TYPE_RAW_LABEL, TYPE_LABEL], None]) -> List["Annotation"]: + ann_data_list: List[T] = cls.annotation_list_from_uri(uri) + return [cls.build(uri=uri, ann_data=ann_data, label_map=label_map) for ann_data in ann_data_list] diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py new file mode 100644 index 00000000..cf5bf2e7 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py @@ -0,0 +1,83 @@ +from typing import List, Dict, Callable, Any # Literal, get_args +from histoqc.import_wrapper.typing import Literal, get_args +from ..io_utils.json import load_json +from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET + +TYPE_GEO_MULTIPOLYGON = Literal['MultiPolygon'] +TYPE_GEO_POLYGON = Literal['Polygon'] +TYPE_GEO_LINE_STRING = Literal['LineString'] + +TYPE_GEOTYPE = Literal[TYPE_GEO_MULTIPOLYGON, TYPE_GEO_POLYGON, TYPE_GEO_LINE_STRING] + + +class GEOJsonAnnotation(Annotation[Dict]): + PROP: str = "properties" + CLASS: str = "classification" + NAME: str = "name" + + """ + Parses a typical GeoJSON file containing one or more Polygon or MultiPolygon features. + These JSON files are the preferred way to serialize QuPath annotations, for example. + See https://qupath.readthedocs.io/en/latest/docs/scripting/overview.html#serialization-json + """ + + @staticmethod + def point_set_helper_multipolygon(coordinates: List[List[TYPE_POINT_SET]]) -> List[TYPE_HOLED_SET]: + out_list = [] + for roi in coordinates: + out_list += GEOJsonAnnotation.point_set_helper_polygon(roi) + return out_list + + @staticmethod + def point_set_helper_polygon(coordinates: List[TYPE_POINT_SET]) -> List[TYPE_HOLED_SET]: + inner_list = [] + outer = GEOJsonAnnotation._point_set_single(coordinates[0]) + inner_source = coordinates[1:] + for points in inner_source: + points: List[TYPE_POINT] + inner_list.append(GEOJsonAnnotation._point_set_single(points)) + # if len(inner_list) == 0: + # inner_list = None + holed: TYPE_HOLED_SET = outer, inner_list + return [holed] + + @staticmethod + def point_set_helper_lines(coordinates: List[TYPE_POINT]) -> List[TYPE_HOLED_SET]: + holed_set: TYPE_HOLED_SET = GEOJsonAnnotation._point_set_single(coordinates), None + return [holed_set] + + @staticmethod + def _point_set_single(coordinates: List[TYPE_POINT]) -> List[TYPE_POINT]: + return [Annotation.point_to_int((coord[0], coord[1])) for coord in coordinates] + + @staticmethod + def _func_from_geom_type(geom_type: TYPE_GEOTYPE) -> Callable[[Any], List[TYPE_HOLED_SET]]: + GEOMETRY_MAP: Dict[str, Callable[[Any], List[TYPE_HOLED_SET]]] = { + get_args(TYPE_GEO_MULTIPOLYGON)[0]: GEOJsonAnnotation.point_set_helper_multipolygon, + get_args(TYPE_GEO_POLYGON)[0]: GEOJsonAnnotation.point_set_helper_polygon, + get_args(TYPE_GEO_LINE_STRING)[0]: GEOJsonAnnotation.point_set_helper_lines, + } + assert geom_type in GEOMETRY_MAP, f"Unsupported Geometry Type: {geom_type}" + return GEOMETRY_MAP[geom_type] + + def point_set_list(self) -> List[TYPE_HOLED_SET]: + geometry = self.ann_data['geometry'] + geom_type = geometry['type'] + coordinates = geometry['coordinates'] + func = GEOJsonAnnotation._func_from_geom_type(geom_type) + return func(coordinates) + + def label_from_annotation(self) -> TYPE_RAW_LABEL: + prop = self.ann_data.get(GEOJsonAnnotation.PROP) + classification = prop.get(GEOJsonAnnotation.CLASS) + if classification is not None: + return classification.get(GEOJsonAnnotation.NAME) + return None + + @staticmethod + def annotation_list_from_uri(uri) -> List[Dict]: + data = load_json(uri) + if isinstance(data, Dict): + return [data] + assert isinstance(data, List) + return data diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py new file mode 100644 index 00000000..f5e0b5b0 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py @@ -0,0 +1,93 @@ +from typing import List, Dict, Union +from xml.etree import ElementTree as ET +from xml.etree.ElementTree import Element +from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET + + +class ImageScopeAnnotation(Annotation[Element]): + ANNOTATION_TAG_NAME = "Annotation" + + TAG_REGION_ALL = "Regions" + TAG_REGION = "Region" + VERTICES: str = 'Vertices' + VERTEX: str = "Vertex" + X: str = 'X' + Y: str = 'Y' + + CLASS_ATTR = "Name" + _ann_data: Element + + """ + Parses the xml file to get those annotations as lists of verticies + xmlMask will create a mask that is true inside the annotated region described in the specified xml file. + The xml file must follow the ImageScope format, the minimal components of which are: + ``` + + + + + + + + + + + + + + ``` + With more or blocks as needed for additional annotations. There is no functional difference + between multiple blocks and one blocks with multiple blocks + """ + + def label_from_annotation(self) -> TYPE_RAW_LABEL: + """ + Read the label of the whole annotated region. + Assume the annotation class label is under element's "Name" attribute. + + Returns: + int + """ + return self._ann_data.get(ImageScopeAnnotation.CLASS_ATTR) + + @staticmethod + def vertex_from_node(vertex: Element) -> TYPE_POINT: + raw_x: str = vertex.get(ImageScopeAnnotation.X) + raw_y: str = vertex.get(ImageScopeAnnotation.Y) + raw_point = (raw_x, raw_y) + return Annotation.point_to_int(raw_point) + + def point_set_list(self) -> List[TYPE_HOLED_SET]: + """ + I doubt it is ever standardized how to define holes so herein we just ignore all holes. + Returns: + + """ + out_list = [] + for regions_all in self.ann_data.findall(ImageScopeAnnotation.TAG_REGION_ALL): + for region in regions_all.findall(ImageScopeAnnotation.TAG_REGION): + for vertices in region.findall(ImageScopeAnnotation.VERTICES): + # the X and Y attributes are float strings --> cast to float first before flooring down to int + xy_list: TYPE_POINT_SET = [ImageScopeAnnotation.vertex_from_node(vertex) + for vertex in vertices.findall(ImageScopeAnnotation.VERTEX)] + holes = None + holed_point_set: TYPE_HOLED_SET = xy_list, holes + out_list.append(holed_point_set) + return out_list + + @staticmethod + def validated_ann(ann_data: Element): + # todo perhaps using xmlschema and a predefined image scope xml as the schema to validate the structure? + assert ann_data.tag == ImageScopeAnnotation.ANNOTATION_TAG_NAME + return ann_data + + @staticmethod + def annotation_list_from_uri(uri) -> List[Element]: + tree = ET.parse(uri) + root = tree.getroot() + return root.findall(ImageScopeAnnotation.ANNOTATION_TAG_NAME) + + @classmethod + def build(cls, uri: str, ann_data: Element, label_map: Dict[Union[str, int], int]) -> Annotation: + ann_data = ImageScopeAnnotation.validated_ann(ann_data) + return super().build(uri=uri, ann_data=ann_data, label_map=label_map) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py new file mode 100644 index 00000000..b6e690fd --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py @@ -0,0 +1 @@ +from . import * diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py new file mode 100644 index 00000000..7fe1b3d9 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py @@ -0,0 +1,12 @@ +import json + + +def write_json(fname: str, obj, **kwargs): + with open(fname, 'w') as root: + json.dump(obj, root, **kwargs) + + +def load_json(fname: str, **kwargs): + with open(fname, 'r') as root: + return json.load(root, **kwargs) + diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py new file mode 100644 index 00000000..01a2a7b2 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py @@ -0,0 +1,38 @@ +"""histoqc.config + +helpers for providing configuration ini files for histoqc + +""" +import re +try: + import importlib.resources as _resources +except ImportError: + # fallback for Python < 3.7 + import importlib_resources as _resources + + +# note to developers: +# to store a new ini file in the histoqc package add an ini file that +# matches the regular expression below to this directory +CONFIG_TEMPLATE_RE = re.compile(r'config(_(?P[A-Za-z][A-Za-z0-9.]+))?[.]ini') + + +def list_config_templates(): + """return a dictionary mapping config template names to filenames + + note: the default config is stored under name `None` + """ + templates = {} + for filename in _resources.contents('histoqc.config'): + m = CONFIG_TEMPLATE_RE.match(filename) + if m: + templates[m.group('name') or 'default'] = filename + return templates + + +def read_config_template(name=None): + """return the contents of a configuration template""" + templates = list_config_templates() + if name not in templates: + raise KeyError(f'no configuration template found under key {name!r}') + return _resources.read_text('histoqc.config', templates[name]) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py new file mode 100644 index 00000000..5211cf16 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py @@ -0,0 +1,40 @@ +import argparse +import sys + +from histoqc.config import list_config_templates +from histoqc.config import read_config_template +from histoqc.config._parser import get_argument_parser + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + parser = get_argument_parser() + args = parser.parse_args(argv) + + if args.list: + print("# available example configurations:") + for name, filename in list_config_templates().items(): + print(f"- {name}:".ljust(11), f"{filename}") + return 0 + + elif args.show: + name = args.show + try: + config = read_config_template(name) + except KeyError: + print(f"unknown configuration '{name}'", file=sys.stderr) + return -1 + + print(f"# example configuration '{name}'") + print(config) + return 0 + + else: + parser.print_help() + return -1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py new file mode 100644 index 00000000..dbafd568 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py @@ -0,0 +1,14 @@ +import argparse + +def get_argument_parser(): + """Return the argument parser for histoqc.config""" + parser = argparse.ArgumentParser(prog="histoqc.config",description="Show example configuration files") + parser.add_argument('--list', + action='store_true', + help='list available configs') + parser.add_argument('--show', + metavar='NAME', + type=str, + default=None, + help='show named example config') + return parser diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini new file mode 100644 index 00000000..34251aa3 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini @@ -0,0 +1,200 @@ +[pipeline] +steps= BasicModule.getBasicStats + ClassificationModule.byExampleWithFeatures:pen_markings + ClassificationModule.byExampleWithFeatures:coverslip_edge + LightDarkModule.getIntensityThresholdPercent:tissue + LightDarkModule.getIntensityThresholdPercent:darktissue + BubbleRegionByRegion.detectSmoothness + MorphologyModule.removeFatlikeTissue + MorphologyModule.fillSmallHoles + MorphologyModule.removeSmallObjects + BlurDetectionModule.identifyBlurryRegions + BasicModule.finalProcessingSpur + BasicModule.finalProcessingArea + HistogramModule.compareToTemplates + HistogramModule.getHistogram + BrightContrastModule.getContrast + BrightContrastModule.getBrightnessGray + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB + BrightContrastModule.getBrightnessByChannelinColorSpace:YUV + DeconvolutionModule.separateStains + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x + +#not yet implemented +confirm_base_mag: False + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +[BasicModule.getBasicStats] +image_work_size = 1.25x + +#[ClassificationModule.byExample] +[ClassificationModule.byExampleWithFeatures:pen_markings] +name: pen_markings +threshold: .5 +examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png + +area_threshold: 100 +features: frangi + laplace + rgb + #lbp + #gabor + #median + #gaussian + +laplace_ksize: 3 + +frangi_scale_range: (1,10) +frangi_scale_step: 2 +frangi_beta1: .5 +frangi_beta2: 15 +frangi_black_ridges: True + +gabor_theta: 4 +gabor_sigma: (1,3) +gabor_frequency: (0.05, 0.25) + +lbp_radius: 3 +lbp_points: 24 +lbp_method: default + +median_disk_size: 3 + +#gaussian_sigma: 1 +#gaussian_multichan: False + + + +[ClassificationModule.byExampleWithFeatures:coverslip_edge] +name: coverslip_edge +threshold: .5 + +examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png + +area_threshold: 15 +features: frangi + laplace + rgb + +dilate_kernel_size: 5 + +[LightDarkModule.getIntensityThresholdPercent:bubble] +name: bubble +upper_threshold: .94 +lower_threshold: .82 +upper_variance: 11 +invert: true + + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: bright +upper_threshold: .9 +lower_std: 10 + +[LightDarkModule.getIntensityThresholdPercent:darktissue] +name: dark +upper_threshold: .15 +invert: true + + +[LightDarkModule.getTissuePercent] +threshold: .8 + +[LightDarkModule.getDarkTissuePercent] +threshold: .15 + +[MorphologyModule.removeSmallObjects] +min_size: 64 + +[MorphologyModule.removeFatlikeTissue] +kernel_size: 10 +max_keep_size: 1000 +fat_cell_size: 64 + +[MorphologyModule.fillSmallHoles] +min_size: 1000 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + +[BlurDetectionModule.identifyBlurryRegions] +image_work_size = 2.5x +blur_radius: 100 +blur_threshold: .15 + + +[BasicModule.finalComputations] +; two options: absolute, relative2image. relative2mask is not available here +mask_statistics = absolute + +[BasicModule.finalProcessingSpur] +disk_radius: 5 + +[BasicModule.finalProcessingArea] +#area_threshold: 90000 +area_threshold: 10000 + +[DeconvolutionModule.separateStains] +;hed_from_rgb: Hematoxylin + Eosin + DAB +;hdx_from_rgb: Hematoxylin + DAB +;fgx_from_rgb: Feulgen + Light Green +;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin +;rbd_from_rgb: FastRed + FastBlue + DAB +;gdx_from_rgb: Methyl Green + DAB +;hax_from_rgb: Hematoxylin + AEC +;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G +;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin +;ahx_from_rgb: Alcian Blue + Hematoxylin +;hpx_from_rgb: Hematoxylin + PAS +stain: hed_from_rgb +use_mask: True + + +[BubbleRegionByRegion.detectSmoothness] +threshold: .01 +kernel_size: 10 +min_object_size: 500 \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini new file mode 100644 index 00000000..dde03b84 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini @@ -0,0 +1,228 @@ +[pipeline] +steps= BasicModule.getBasicStats + #ClassificationModule.byExampleWithFeatures:pen_markings + ClassificationModule.byExampleWithFeatures:coverslip_edge + LightDarkModule.getIntensityThresholdPercent:tissue + LightDarkModule.getIntensityThresholdPercent:darktissue + BubbleRegionByRegion.detectSmoothness + SaveModule.saveMask:first + SaveModule.saveMask2Geojson:geo_first + MorphologyModule.removeFatlikeTissue + MorphologyModule.fillSmallHoles + MorphologyModule.removeSmallObjects + SaveModule.saveMask:second + SaveModule.saveMask2Geojson:geo_second + BlurDetectionModule.identifyBlurryRegions + BasicModule.finalProcessingSpur + BasicModule.finalProcessingArea + HistogramModule.compareToTemplates + HistogramModule.getHistogram + BrightContrastModule.getContrast + BrightContrastModule.getBrightnessGray + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB + BrightContrastModule.getBrightnessByChannelinColorSpace:YUV + SaveModule.saveMask:third + SaveModule.saveMask2Geojson:geo_third + DeconvolutionModule.separateStains + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x + +#not yet implemented +confirm_base_mag: False + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +[BasicModule.getBasicStats] +image_work_size = 1.25x + +#[ClassificationModule.byExample] +[ClassificationModule.byExampleWithFeatures:pen_markings] +name: pen_markings +threshold: .5 +examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png + +area_threshold: 100 +features: frangi + laplace + rgb + #lbp + #gabor + #median + #gaussian + +laplace_ksize: 3 + +frangi_scale_range: (1,10) +frangi_scale_step: 2 +frangi_beta1: .5 +frangi_beta2: 15 +frangi_black_ridges: True + +gabor_theta: 4 +gabor_sigma: (1,3) +gabor_frequency: (0.05, 0.25) + +lbp_radius: 3 +lbp_points: 24 +lbp_method: default + +median_disk_size: 3 + +#gaussian_sigma: 1 +#gaussian_multichan: False + + + +[ClassificationModule.byExampleWithFeatures:coverslip_edge] +name: coverslip_edge +threshold: .5 + +examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png + +area_threshold: 15 +features: frangi + laplace + rgb + +dilate_kernel_size: 5 + +[LightDarkModule.getIntensityThresholdPercent:bubble] +name: bubble +upper_threshold: .94 +lower_threshold: .82 +upper_variance: 11 +invert: true + + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: nonwhite +upper_threshold: .9 +lower_std: 10 + +[LightDarkModule.getIntensityThresholdPercent:darktissue] +name: dark +upper_threshold: .15 +invert: true + + +[LightDarkModule.getTissuePercent] +threshold: .8 + +[LightDarkModule.getDarkTissuePercent] +threshold: .15 + +[MorphologyModule.removeSmallObjects] +min_size: 64 + +[MorphologyModule.removeFatlikeTissue] +kernel_size: 10 +max_keep_size: 1000 +fat_cell_size: 64 + +[MorphologyModule.fillSmallHoles] +min_size: 1000 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + +[BlurDetectionModule.identifyBlurryRegions] +image_work_size = 2.5x +blur_radius: 100 +blur_threshold: .15 + + +[BasicModule.finalComputations] +; two options: absolute, relative2image. relative2mask is not available here +mask_statistics = absolute + +[BasicModule.finalProcessingSpur] +disk_radius: 5 + +[BasicModule.finalProcessingArea] +#area_threshold: 90000 +area_threshold: 10000 + +[DeconvolutionModule.separateStains] +;hed_from_rgb: Hematoxylin + Eosin + DAB +;hdx_from_rgb: Hematoxylin + DAB +;fgx_from_rgb: Feulgen + Light Green +;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin +;rbd_from_rgb: FastRed + FastBlue + DAB +;gdx_from_rgb: Methyl Green + DAB +;hax_from_rgb: Hematoxylin + AEC +;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G +;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin +;ahx_from_rgb: Alcian Blue + Hematoxylin +;hpx_from_rgb: Hematoxylin + PAS +stain: hed_from_rgb +use_mask: True + + +[BubbleRegionByRegion.detectSmoothness] +threshold: .01 +kernel_size: 10 +min_object_size: 500 + +[SaveModule.saveMask:first] +suffix: first + +[SaveModule.saveMask:second] +suffix: second + + +[SaveModule.saveMask:third] +suffix: third + + +[SaveModule.saveMask2Geojson:geo_first] +suffix: geo_first +mask_name: img_mask_flat + +[SaveModule.saveMask2Geojson:geo_second] +suffix: geo_second +mask_name: img_mask_small_filled + +[SaveModule.saveMask2Geojson:geo_third] +suffix: geo_third \ No newline at end of file diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini new file mode 100644 index 00000000..49b71ee7 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini @@ -0,0 +1,92 @@ +[pipeline] +steps= BasicModule.getBasicStats + LightDarkModule.getIntensityThresholdPercent:tissue + BrightContrastModule.getContrast:background + BrightContrastModule.getBrightnessGray:background + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB:background + HistogramModule.compareToTemplates + HistogramModule.getHistogram + BrightContrastModule.getContrast + BrightContrastModule.getBrightnessGray + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB + BrightContrastModule.getBrightnessByChannelinColorSpace:YUV + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +confirm_base_mag: False + + +[BasicModule.getBasicStats] +image_work_size = 1.25x + + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: nonwhite +upper_threshold: .9 +lower_std: 10 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + +[BrightContrastModule.getContrast:background] +limit_to_mask: True +invert: True +prefix: background + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessGray:background] +limit_to_mask: True +invert: True +prefix: background + + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB:background] +limit_to_mask: True +invert: True +prefix: background + + + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini new file mode 100644 index 00000000..2aeb1d41 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini @@ -0,0 +1,246 @@ +[pipeline] +steps= BasicModule.getBasicStats + LightDarkModule.saveEqualisedImage + LightDarkModule.minimumPixelIntensityNeighborhoodFiltering + ;LightDarkModule.getIntensityThresholdPercent:darktissue + MorphologyModule.fillSmallHoles + MorphologyModule.removeSmallObjects + LocalTextureEstimationModule.estimateGreyComatrixFeatures:background + BrightContrastModule.getContrast:background + BrightContrastModule.getBrightnessGray:background + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background + ClassificationModule.byExampleWithFeatures:pen_markings + ClassificationModule.byExampleWithFeatures:coverslip_edge + LocalTextureEstimationModule.estimateGreyComatrixFeatures:final + BrightContrastModule.getContrast + BrightContrastModule.getBrightnessGray + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB + BrightContrastModule.getBrightnessByChannelinColorSpace:YUV + DeconvolutionModule.separateStains + HistogramModule.getHistogram + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x +in_memory_compression = True + +#not yet implemented +confirm_base_mag: False + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +[BasicModule.getBasicStats] +image_work_size = 1.25x + +#[ClassificationModule.byExample] +[ClassificationModule.byExampleWithFeatures:pen_markings] +name: pen_markings +threshold: .5 +examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png + ./pen/1k_version/pen_red.png:./pen/1k_version/pen_red_mask.png + ./pen/1k_version/pen_black.png:./pen/1k_version/pen_black_mask.png + +nsamples_per_example: 1000 +area_threshold: 100 +features: frangi + laplace + rgb + #lbp + #gabor + #median + #gaussian + +laplace_ksize: 3 + +frangi_scale_range: (1,10) +frangi_scale_step: 2 +frangi_beta1: .5 +frangi_beta2: 15 +frangi_black_ridges: True + +gabor_theta: 4 +gabor_sigma: (1,3) +gabor_frequency: (0.05, 0.25) + +lbp_radius: 3 +lbp_points: 24 +lbp_method: default + +median_disk_size: 3 + +#gaussian_sigma: 1 +#gaussian_multichan: False + + + +[ClassificationModule.byExampleWithFeatures:coverslip_edge] +name: coverslip_edge +threshold: .5 + +examples: ./models/coverslip_edge_ihc/coverslip_edge.png:./models/coverslip_edge_ihc/coverslip_edge_mask.png + +area_threshold: 15 +features: frangi + laplace + rgb + +dilate_kernel_size: 5 + +[LightDarkModule.getIntensityThresholdPercent:bubble] +name: bubble +upper_threshold: .94 +lower_threshold: .82 +upper_variance: 11 +invert: true + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: bright +upper_threshold: .9 +lower_std: 10 + +[LightDarkModule.getIntensityThresholdPercent:darktissue] +name: dark +upper_threshold: .15 +invert: true + + +[LightDarkModule.getTissuePercent] +threshold: .8 +[LightDarkModule.getDarkTissuePercent] +threshold: .15 + +[MorphologyModule.removeSmallObjects] +min_size: 10000 + +[MorphologyModule.removeFatlikeTissue] +kernel_size: 10 +max_keep_size: 1000 +fat_cell_size: 64 + +[MorphologyModule.fillSmallHoles] +min_size: 1000 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + + + +[BrightContrastModule.getContrast:background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessGray:background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + +[BlurDetectionModule.identifyBlurryRegions] +image_work_size = 2.5x +blur_radius: 100 +blur_threshold: .15 + + +[BasicModule.finalComputations] +; two options: absolute, relative2image. relative2mask is not available here +mask_statistics = absolute + +[BasicModule.finalProcessingSpur] +disk_radius: 5 + +[BasicModule.finalProcessingArea] +#area_threshold: 90000 +area_threshold: 10000 + +[DeconvolutionModule.separateStains] +;hed_from_rgb: Hematoxylin + Eosin + DAB +;hdx_from_rgb: Hematoxylin + DAB +;fgx_from_rgb: Feulgen + Light Green +;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin +;rbd_from_rgb: FastRed + FastBlue + DAB +;gdx_from_rgb: Methyl Green + DAB +;hax_from_rgb: Hematoxylin + AEC +;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G +;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin +;ahx_from_rgb: Alcian Blue + Hematoxylin +;hpx_from_rgb: Hematoxylin + PAS +stain: hdx_from_rgb +use_mask: True + + +[BubbleRegionByRegion.detectSmoothness] +threshold: .01 +kernel_size: 10 +min_object_size: 500 + + +[LocalTextureEstimationModule.estimateGreyComatrixFeatures:background] +prefix: background +patch_size: 32 +npatches: 1000 +nlevels: 8 +feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation +invert: True +mask_name: img_mask_use + +[LocalTextureEstimationModule.estimateGreyComatrixFeatures:final] +prefix: final +patch_size: 32 +nlevels: 8 +npatches: 1000 +feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation +invert: False +mask_name: img_mask_use + + +[LightDarkModule.minimumPixelIntensityNeighborhoodFiltering] +disk_size: 5 +upper_threshold: 210 +invert: True diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini new file mode 100644 index 00000000..0edfc9b4 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini @@ -0,0 +1,187 @@ +[pipeline] +steps= BasicModule.getBasicStats +# ClassificationModule.byExampleWithFeatures:pen_markings +; ClassificationModule.byExampleWithFeatures:coverslip_edge + LightDarkModule.getIntensityThresholdPercent:tissue + BubbleRegionByRegion.detectSmoothness + LightDarkModule.getIntensityThresholdPercent:darktissue + MorphologyModule.fillSmallHoles + MorphologyModule.removeSmallObjects + BlurDetectionModule.identifyBlurryRegions + BasicModule.finalProcessingSpur + BasicModule.finalProcessingArea +; HistogramModule.compareToTemplates +; HistogramModule.getHistogram +; BrightContrastModule.getContrast +; BrightContrastModule.getBrightnessGray +; BrightContrastModule.getBrightnessByChannelinColorSpace:RGB +; BrightContrastModule.getBrightnessByChannelinColorSpace:YUV +; DeconvolutionModule.separateStains + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x + +#not yet implemented +confirm_base_mag: False + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +[BasicModule.getBasicStats] +image_work_size = 1.25x + +#[ClassificationModule.byExample] +[ClassificationModule.byExampleWithFeatures:pen_markings] +name: pen_markings +threshold: .5 +examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png + +area_threshold: 100 +features: frangi + laplace + rgb + #lbp + #gabor + #median + #gaussian + +laplace_ksize: 3 + +frangi_scale_range: (1,10) +frangi_scale_step: 2 +frangi_beta1: .5 +frangi_beta2: 15 +frangi_black_ridges: True + +gabor_theta: 4 +gabor_sigma: (1,3) +gabor_frequency: (0.05, 0.25) + +lbp_radius: 3 +lbp_points: 24 +lbp_method: default + +median_disk_size: 3 + +#gaussian_sigma: 1 +#gaussian_multichan: False + + + +[ClassificationModule.byExampleWithFeatures:coverslip_edge] +name: coverslip_edge +threshold: .5 + +examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png + +area_threshold: 16000 +features: frangi + laplace + rgb + + +[LightDarkModule.getIntensityThresholdPercent:bubble] +name: bubble +upper_threshold: .94 +lower_threshold: .82 +upper_variance: 11 +invert: true + + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: nonwhite +upper_threshold: .99 +lower_std: 10 + +[LightDarkModule.getIntensityThresholdPercent:darktissue] +name: dark +upper_threshold: .15 +invert: true + + +[LightDarkModule.getTissuePercent] +threshold: .8 + +[LightDarkModule.getDarkTissuePercent] +threshold: .15 + +[MorphologyModule.removeSmallObjects] +min_size: 64 + +[MorphologyModule.fillSmallHoles] +min_size: 1000 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + +[BlurDetectionModule.identifyBlurryRegions] +image_work_size = 2.5x +blur_radius: 7 +blur_threshold: .05 + + +[BasicModule.finalComputations] +; two options: absolute, relative2image. relative2mask is not available here +mask_statistics = absolute + +[BasicModule.finalProcessingSpur] +disk_radius: 7 + +[BasicModule.finalProcessingArea] +area_threshold: 90000 + +[DeconvolutionModule.separateStains] +;hed_from_rgb: Hematoxylin + Eosin + DAB +;hdx_from_rgb: Hematoxylin + DAB +;fgx_from_rgb: Feulgen + Light Green +;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin +;rbd_from_rgb: FastRed + FastBlue + DAB +;gdx_from_rgb: Methyl Green + DAB +;hax_from_rgb: Hematoxylin + AEC +;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G +;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin +;ahx_from_rgb: Alcian Blue + Hematoxylin +;hpx_from_rgb: Hematoxylin + PAS +stain: hed_from_rgb +use_mask: True + diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini new file mode 100644 index 00000000..59bfc33b --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini @@ -0,0 +1,262 @@ +[pipeline] +steps= BasicModule.getBasicStats + LightDarkModule.saveEqualisedImage +# ClassificationModule.byExampleWithFeatures:pen_markings +# ClassificationModule.byExampleWithFeatures:coverslip_edge + LightDarkModule.minimumPixelIntensityNeighborhoodFiltering + LightDarkModule.getIntensityThresholdPercent:darktissue + BubbleRegionByRegion.detectSmoothness + MorphologyModule.removeFatlikeTissue + MorphologyModule.fillSmallHoles + MorphologyModule.removeSmallObjects + LocalTextureEstimationModule.estimateGreyComatrixFeatures:background + BrightContrastModule.getContrast:background + BrightContrastModule.getBrightnessGray:background + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background + BlurDetectionModule.identifyBlurryRegions + BasicModule.finalProcessingSpur + BasicModule.finalProcessingArea + HistogramModule.compareToTemplates + HistogramModule.getHistogram + LocalTextureEstimationModule.estimateGreyComatrixFeatures:final + BrightContrastModule.getContrast + BrightContrastModule.getBrightnessGray + BrightContrastModule.getBrightnessByChannelinColorSpace:RGB + BrightContrastModule.getBrightnessByChannelinColorSpace:YUV + DeconvolutionModule.separateStains + SaveModule.saveFinalMask + SaveModule.saveMacro + SaveModule.saveThumbnails + ArtifactToGeoJSONModule.export_geojson + BasicModule.countTissuePieces + BasicModule.finalComputations + + +[BaseImage.BaseImage] +image_work_size = 1.25x +in_memory_compression = True + +#not yet implemented +confirm_base_mag: False + +#three options: relative2mask, absolute, relative2image +mask_statistics = relative2mask + +[BasicModule.getBasicStats] +image_work_size = 1.25x + +#[ClassificationModule.byExample] +[ClassificationModule.byExampleWithFeatures:pen_markings] +name: pen_markings +threshold: .5 +examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png + ./pen/1k_version/pen_red.png:./pen/1k_version/pen_red_mask.png + ./pen/1k_version/pen_black.png:./pen/1k_version/pen_black_mask.png + +nsamples_per_example: 10000 +area_threshold: 100 +features: frangi + laplace + rgb + #lbp + #gabor + #median + #gaussian + +laplace_ksize: 3 + +frangi_scale_range: (1,10) +frangi_scale_step: 2 +frangi_beta1: .5 +frangi_beta2: 15 +frangi_black_ridges: True + +gabor_theta: 4 +gabor_sigma: (1,3) +gabor_frequency: (0.05, 0.25) + +lbp_radius: 3 +lbp_points: 24 +lbp_method: default + +median_disk_size: 3 + +#gaussian_sigma: 1 +#gaussian_multichan: False + + + +[ClassificationModule.byExampleWithFeatures:coverslip_edge] +name: coverslip_edge +threshold: .5 + +examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png + +nsamples_per_example: 10000 + +area_threshold: 15 +features: frangi + laplace + rgb + +dilate_kernel_size: 5 + +[LightDarkModule.getIntensityThresholdPercent:bubble] +name: bubble +upper_threshold: .94 +lower_threshold: .82 +upper_variance: 11 +invert: true + + +[LightDarkModule.getIntensityThresholdPercent:tissue] +name: bright +upper_threshold: .9 +lower_std: 10 + +[LightDarkModule.getIntensityThresholdPercent:darktissue] +name: dark +upper_threshold: .15 +invert: true + + +[LightDarkModule.getTissuePercent] +threshold: .8 + +[LightDarkModule.getDarkTissuePercent] +threshold: .15 + +[MorphologyModule.removeSmallObjects] +min_size: 64 + +[MorphologyModule.removeFatlikeTissue] +kernel_size: 10 +max_keep_size: 1000 +fat_cell_size: 64 + +[MorphologyModule.fillSmallHoles] +min_size: 1000 + +[HistogramModule.compareToTemplates] +limit_to_mask: True +bins: 20 +templates= ./templates/template1.png + ./templates/template2.png + ./templates/template3.png + ./templates/template4.png + +[HistogramModule.getHistogram] +limit_to_mask: True +bins: 20 + +[BrightContrastModule.getContrast] +limit_to_mask: True + + +[BrightContrastModule.getBrightnessGray] +limit_to_mask: True + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB] +limit_to_mask: True + + + +[BrightContrastModule.getContrast:background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessGray:background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background] +prefix: background +limit_to_mask: True +invert: True +mask_name: img_mask_use + +[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV] +limit_to_mask: True +#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace +to_color_space: YUV + +[SaveModule.saveFinalMask] +overlay: True + +[SaveModule.saveMacro] +dim: 500 + +[SaveModule.saveThumbnails] +image_work_size: 1.25x +small_dim: 500 + +[BlurDetectionModule.identifyBlurryRegions] +image_work_size = 2.5x +blur_radius: 100 +blur_threshold: .15 + + +[BasicModule.finalComputations] +; two options: absolute, relative2image. relative2mask is not available here +mask_statistics = absolute + +[BasicModule.finalProcessingSpur] +disk_radius: 5 + +[BasicModule.finalProcessingArea] +#area_threshold: 90000 +area_threshold: 10000 + +[DeconvolutionModule.separateStains] +;hed_from_rgb: Hematoxylin + Eosin + DAB +;hdx_from_rgb: Hematoxylin + DAB +;fgx_from_rgb: Feulgen + Light Green +;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin +;rbd_from_rgb: FastRed + FastBlue + DAB +;gdx_from_rgb: Methyl Green + DAB +;hax_from_rgb: Hematoxylin + AEC +;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G +;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin +;ahx_from_rgb: Alcian Blue + Hematoxylin +;hpx_from_rgb: Hematoxylin + PAS +stain: hed_from_rgb +use_mask: True + + +[BubbleRegionByRegion.detectSmoothness] +threshold: .01 +kernel_size: 10 +min_object_size: 500 + + + +[LocalTextureEstimationModule.estimateGreyComatrixFeatures:background] +prefix: background +patch_size: 32 +npatches: 1000 +nlevels: 8 +feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation +invert: True +mask_name: img_mask_use + +[LocalTextureEstimationModule.estimateGreyComatrixFeatures:final] +prefix: final +patch_size: 32 +nlevels: 8 +npatches: 1000 +feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation +invert: False +mask_name: img_mask_use + +[LightDarkModule.minimumPixelIntensityNeighborhoodFiltering] +disk_size: 5 +upper_threshold: 210 +invert: True + +[BasicModule.countTissuePieces] + +[ArtifactToGeoJSONModule.export_geojson] diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py new file mode 100644 index 00000000..956e42f9 --- /dev/null +++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py @@ -0,0 +1,162 @@ +"""histoqc.data + +helpers for providing data resources for histoqc modules + +""" +import os +import re +from configparser import ConfigParser +from contextlib import ContextDecorator +from contextlib import ExitStack +from pathlib import Path +from tempfile import TemporaryDirectory +from types import MethodType + +try: + from importlib.resources import files as _files +except ImportError: + from importlib_resources import files as _files + + +# --- dispatch config rewriting --------------------------------------- + +# noinspection PyPep8Naming +class _ManagedPkgData(ContextDecorator): + + _section_re = re.compile( + r'(?P[A-Za-z_][A-Za-z0-9_]+[.][A-Za-z_][A-Za-z0-9_]+)' + r'([:](?P