diff --git a/runtime/ops/histoqc_op/_init_.py b/runtime/ops/histoqc_op/_init_.py
new file mode 100644
index 00000000..0534bbb9
--- /dev/null
+++ b/runtime/ops/histoqc_op/_init_.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+from datamate.core.base_op import OPERATORS
+OPERATORS.register_module(
+ module_name='HistoQCMapper',
+ module_path='ops.user.histoqc_op.process'
+)
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt b/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt
new file mode 100644
index 00000000..382cc7a0
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt
@@ -0,0 +1,32 @@
+The Clear BSD License
+
+Copyright (c) 2019 Andrew Janowczyk
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted (subject to the limitations in the disclaimer
+below) provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from this
+ software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md b/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md
new file mode 100644
index 00000000..30487fb0
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md
@@ -0,0 +1,19 @@
+# HistoQC Mapper for DataMate
+
+[Original Project](https://github.com/choosehappy/HistoQC)
+
+ **说明**: 本算子是 [HistoQC]在 DataMate 算子管线中的标准集成实现。包含了针对 WSI 质量控制产物的结构化提取逻辑。
+
+
+## 主要改动 (Modifications)
+
+本算子在集成过程中,针对 DataMate 的 `Mapper` 架构进行了以下逻辑增强:
+
+### 1. 空间坐标提取 (Spatial Coordinates Extraction)
+**掩码转坐标**:新增了对 HistoQC 输出掩码(Mask PNG)的后处理逻辑。
+**GeoJSON 产出**:通过 OpenCV 轮廓检测算法,将伪影区域(如笔迹、盖玻片边缘)转换为标准的 GeoJSON 多边形数据,并封装于 `sample["text"]` 中。
+**缩放对齐**:支持通过 `scaleFactor` 参数对检测坐标进行重采样映射,确保坐标体系与原图一致。
+
+### 2. 性能与耗时监控 (Performance & Timing)
+**执行计时**:在算子执行周期内引入了计时器,记录每个切片任务的实际处理时长。
+**指标关联**:将处理耗时(Processing Time)与输入文件大小(File Size)作为元数据存入输出结果,便于后续进行算法性能评估。
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py
new file mode 100644
index 00000000..96912590
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py
@@ -0,0 +1,126 @@
+import logging
+from typing import List, Tuple
+from histoqc.BaseImage import printMaskHelper
+from skimage import io, img_as_ubyte
+import os
+from pathlib import PurePosixPath, Path
+from shapely.geometry import Polygon
+from shapely import affinity
+from PIL import Image, ImageDraw
+import numpy as np
+from histoqc.annotations.annot_collection import AnnotCollection, PARSER_BUILDER_MAP, TYPE_SUPPORTED_PARSER, Region
+
+
+def rescale_by_img_bbox(polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float) -> Polygon:
+ if isinstance(offset_xy, float):
+ offset_xy = (offset_xy, offset_xy)
+ x_off, y_off = offset_xy
+ polygon = affinity.translate(polygon, xoff=x_off, yoff=y_off)
+ polygon = affinity.scale(polygon, xfact=resize_factor, yfact=resize_factor, origin=(0, 0))
+ return polygon
+
+
+def polygon_filled(draw_pil: ImageDraw, polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float):
+ polygon = rescale_by_img_bbox(polygon, offset_xy, resize_factor)
+ # outer
+ exterior_coords = list(polygon.exterior.coords)
+ draw_pil.polygon(exterior_coords, fill=1, outline=1, width=0)
+ for component in polygon.interiors:
+ interior_coord = list(component.coords)
+ draw_pil.polygon(interior_coord, fill=0, outline=0, width=0)
+ return draw_pil
+
+
+def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollection, offset_xy: Tuple[float, float],
+ resize_factor: float) -> np.ndarray:
+ # binary
+ mask = Image.new(mode="1", size=(width, height))
+ draw_pil = ImageDraw.Draw(mask)
+ all_regions: List[Region] = annot_collection.all_regions
+ for region in all_regions:
+ polygon: Polygon = region['polygon']
+ # skip if empty ring (e.g., misclick in qupath)
+ if polygon.is_empty or (not polygon.is_valid):
+ continue
+ draw_pil = polygon_filled(draw_pil, polygon, offset_xy, resize_factor)
+ # noinspection PyTypeChecker
+ return np.array(mask)
+
+
+def getParams(s, params):
+ # read params - format: xml, json; file_path; suffix;
+ ann_format = params.get("format", None)
+ file_path = params.get("file_path", None)
+ suffix = params.get("suffix", "")
+
+ # try use default value if the params are not provided
+ if not ann_format:
+ # set default format
+ ann_format = "xml"
+ # warning msg
+ msg = f"format is not provided, using xml as the default format."
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+
+ if not file_path:
+ # set default file path
+ file_path = s["dir"]
+ # warning msg
+ msg = f"file path is not provided, using \"{s['dir']}\" as the default file path"
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+
+ return ann_format, file_path, suffix
+
+
+def saveAnnotationMask(s, params):
+ logging.info(f"{s['filename']} - \tgetAnnotationMask")
+
+ (ann_format, file_path, suffix) = getParams(s, params)
+
+ # annotation file path
+ f_path = f"{file_path}{os.sep}{PurePosixPath(s['filename']).stem}{suffix}.{ann_format}"
+
+ if not Path(f_path).is_file():
+ msg = f"Annotation file {f_path} does not exist. Skipping..."
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+ return
+
+ logging.info(f"{s['filename']} - \tusing {f_path}")
+
+ # todo better using the Py3.10 match statement - so it will be a Literal
+ # noinspection PyTypeChecker
+ annotation_type: TYPE_SUPPORTED_PARSER = ann_format.lower()
+ logging.info(f"{s['filename']} - \tusing {annotation_type}")
+ # read points set
+ if annotation_type in PARSER_BUILDER_MAP: # xml
+ annot_collection = AnnotCollection.build(parser_type=annotation_type, uri=f_path, label_map=None)
+ # get_points_from_geojson(s, f_path)
+ else: # unsupported format
+ msg = f"unsupported file format '{ann_format}'. Skipping..."
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+ return
+
+ (off_x, off_y, ncol, nrow) = s["img_bbox"]
+ resize_factor = np.shape(s["img_mask_use"])[1] / ncol
+ height, width = s["img_mask_use"].shape
+ annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0
+
+ mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png"
+ io.imsave(mask_file_name, img_as_ubyte(annotationMask))
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = prev_mask & annotationMask
+ s.addToPrintList("getAnnotationMask",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(
+ f"{s['filename']} - After AnnotationModule.getAnnotationMask "
+ f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(
+ f"After AnnotationModule.getAnnotationMask NO tissue remains detectable!"
+ f" Downstream modules likely to be incorrect/fail")
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py
new file mode 100644
index 00000000..67fa70d8
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ArtifactToGeoJSONModule.py
@@ -0,0 +1,84 @@
+import cv2
+import numpy as np
+import json
+import os
+import logging
+from pathlib import Path
+
+def export_geojson(di, params):
+ # 获取当前切片的输出目录
+ outdir = di.get("outdir", ".")
+ # 获取切片名称 (HistoQC 通常以这个名字开头保存图片)
+ sname = di.get("sname") or di.get("img_base")
+ if not sname and "filename" in di:
+ sname = Path(di["filename"]).stem
+
+ if not sname:
+ logging.error("无法获取切片名称,跳过 GeoJSON 生成。")
+ return
+
+ # 获取缩放因子 (WSI原图与处理图的倍率)
+ scale_factor = di.get("log_factor", 1.0)
+
+ # 定义想要识别的后缀和对应 QuPath 类别
+ # 注意:这里务必和 output 文件夹里生成的文件名后缀一致
+ ARTIFACT_CONFIG = {
+ "_pen_markings.png": {"name": "Pen Marking", "color": -65536},
+ "_coverslip_edge.png": {"name": "Coverslip Edge", "color": -16711936},
+ "_flat.png": {"name": "Air Bubble", "color": -16776961}
+ }
+
+ all_features = []
+ found_files = []
+
+ logging.info(f" [切片: {sname}] 启动后期扫描提取 ")
+
+ # 扫描文件夹中对应的 PNG
+ for suffix, info in ARTIFACT_CONFIG.items():
+ # HistoQC 保存的规律通常是 {sname}{suffix}
+ # 例如: TCGA-XXX_pen_markings.png
+ target_path = Path(outdir) / f"{sname}{suffix}"
+
+ if not target_path.exists():
+ # 兼容性扫描:如果文件名里有多余的点或空格,进行模糊匹配
+ potential = list(Path(outdir).glob(f"*{suffix}"))
+ if potential:
+ target_path = potential[0]
+ else:
+ continue
+
+ # 读取并转换
+ mask = cv2.imread(str(target_path), cv2.IMREAD_GRAYSCALE)
+ if mask is None or np.max(mask) == 0:
+ continue
+
+ found_files.append(target_path.name)
+
+ # 提取轮廓
+ _, thresh = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ for contour in contours:
+ pts = (contour.reshape(-1, 2) * scale_factor).tolist()
+ if len(pts) < 3: continue
+ pts.append(pts[0]) # 闭合
+
+ all_features.append({
+ "type": "Feature",
+ "properties": {
+ "objectType": "annotation",
+ "classification": {"name": info["name"], "colorRGB": info["color"]},
+ "isLocked": False
+ },
+ "geometry": {"type": "Polygon", "coordinates": [pts]}
+ })
+ # 保存 JSON
+ if all_features:
+ output_file = Path(outdir) / f"{sname}_artifacts.json"
+ with open(output_file, 'w') as f:
+ json.dump({"type": "FeatureCollection", "features": all_features}, f)
+ logging.info(f"[成功] 从 {found_files} 提取了 {len(all_features)} 个坐标点")
+ else:
+ logging.warning(f"[跳过] 在目录 {outdir} 中未找到有效伪影图像或图像为空")
+
+ return
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py
new file mode 100644
index 00000000..93a51f44
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BaseImage.py
@@ -0,0 +1,363 @@
+import logging
+import os
+import numpy as np
+import zlib, dill
+from distutils.util import strtobool
+from PIL import Image
+import re
+from typing import Union, Tuple
+#os.environ['PATH'] = 'C:\\research\\openslide\\bin' + ';' + os.environ['PATH'] #can either specify openslide bin path in PATH, or add it dynamically
+from histoqc.import_wrapper.openslide import openslide
+from skimage import measure, morphology
+# there is no branch reset group in re
+# compatible with the previous definition of valid input: leading zero and leading decimals are supported
+_REGEX_MAG = r"^(\d?\.?\d*X?)"
+_PATTERN_MAG: re.Pattern = re.compile(_REGEX_MAG, flags=re.IGNORECASE)
+MAG_NA = None
+
+
+class BaseImage(dict):
+
+ def __init__(self, fname, fname_outdir, seed, params):
+ dict.__init__(self)
+
+ self.in_memory_compression = strtobool(params.get("in_memory_compression", "False"))
+
+ self["warnings"] = [''] # this needs to be first key in case anything else wants to add to it
+ self["output"] = []
+
+ # these 2 need to be first for UI to work
+ self.addToPrintList("filename", os.path.basename(fname))
+ self.addToPrintList("comments", " ")
+
+ self["outdir"] = fname_outdir
+ self["seed"] = seed
+ self["dir"] = os.path.dirname(fname)
+
+ self["os_handle"] = openslide.OpenSlide(fname)
+ self["image_base_size"] = self["os_handle"].dimensions
+ self["enable_bounding_box"] = strtobool(params.get("enable_bounding_box","False"))
+ # check if the bbox if doesn't have bbox set enable_bounding_box to False
+ self.setBBox()
+ self.addToPrintList("image_bounding_box", self["img_bbox"])
+ self["image_work_size"] = params.get("image_work_size", "1.25x")
+ self["mask_statistics"] = params.get("mask_statistics", "relative2mask")
+
+ self["base_mag"] = getMag(self, params)
+
+ if not self["base_mag"]:
+ logging.error(f"{self['filename']}: Has unknown or uncalculated base magnification, cannot specify magnification scale! Did you try getMag?")
+ return -1
+
+ self.addToPrintList("base_mag", self["base_mag"])
+
+ mask_statistics_types = ["relative2mask", "absolute", "relative2image"]
+ if (self["mask_statistics"] not in mask_statistics_types):
+ logging.error(
+ f"mask_statistic type '{self['mask_statistics']}' is not one of the 3 supported options relative2mask, absolute, relative2image!")
+ exit()
+
+ self["img_mask_use"] = np.ones(self.getImgThumb(self["image_work_size"]).shape[0:2], dtype=bool)
+ self["img_mask_force"] = []
+
+ self["completed"] = []
+
+ def __getitem__(self, key):
+ value = super(BaseImage, self).__getitem__(key)
+ if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"):
+ value = dill.loads(zlib.decompress(value))
+ return value
+
+ def __setitem__(self, key, value):
+ if hasattr(self,"in_memory_compression") and self.in_memory_compression and key.startswith("img"):
+ value = zlib.compress(dill.dumps(value), level=5)
+
+ return super(BaseImage, self).__setitem__(key,value)
+
+ # setbounding box start coordinate and size
+ def setBBox(self):
+ # add self["img_bbox"] = (x, y, width, heigh)
+ osh = self["os_handle"]
+ # set default bbox
+ (dim_width, dim_height) = osh.dimensions
+ self["img_bbox"] = (0, 0, dim_width, dim_height)
+ # try to get bbox if bounding_box is ture
+ if self["enable_bounding_box"]:
+ # try get bbox from os handle properties
+ try:
+ x = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_X, 'NA'))
+ y = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_Y, 'NA'))
+ width = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_WIDTH, 'NA'))
+ height = int(osh.properties.get(openslide.PROPERTY_NAME_BOUNDS_HEIGHT, 'NA'))
+ self["img_bbox"] = (x, y, width, height)
+ except:
+ # no bbox info in slide set enable_bounding_box as Flase
+ self["enable_bounding_box"] = False
+ logging.warning(f"{self['filename']}: Bounding Box requested but could not read")
+ self["warnings"].append("Bounding Box requested but could not read")
+
+ def addToPrintList(self, name, val):
+ self[name] = val
+ self["output"].append(name)
+
+ # find the next higher level by giving a downsample factor
+ # return (level, isFindCloseLevel)
+ def getBestLevelForDownsample(self, downsample_factor: float) -> Tuple[int, bool]:
+ osh = self["os_handle"]
+ relative_down_factors_idx=[np.isclose(i/downsample_factor,1,atol=.01) for i in osh.level_downsamples]
+ level=np.where(relative_down_factors_idx)[0]
+ if level.size:
+ return (level[0], True)
+ else:
+ return (osh.get_best_level_for_downsample(downsample_factor), False)
+
+ @staticmethod
+ def is_valid_size(size: str):
+ size = str(size)
+ return _PATTERN_MAG.fullmatch(size) is not None
+
+ @staticmethod
+ def validate_slide_size(size: str, assertion: bool = False):
+ size = str(size)
+ if assertion:
+ assert BaseImage.is_valid_size(size), f"{size}: does not match pattern {_REGEX_MAG}"
+ # for now just cast it to str
+ return size
+
+ def getImgThumb(self, size: str):
+ # note that while size is annotated as str, a bunch of functions in process Modules like SaveModule doesn't
+ # really handle it that way, and trace of previous coding also suggest that there actually lack a params
+ # type protocol in xxxModules. I think an extra layer of data sanitizing is necessary here.
+ size = BaseImage.validate_slide_size(size, assertion=False)
+ # get img key with size
+ key = "img_" + str(size)
+ # return the img if it exists
+ if key in self:
+ return self[key]
+
+ # get open slide handle
+ osh = self["os_handle"]
+
+ # get the size of view on current img - the current size of view by using the bounding box.
+ # bounding box could be the size of whole img or read the size from the slide mate data.
+ (bx, by, bwidth, bheight) = self["img_bbox"]
+ img_base_size = (bwidth, bheight)
+
+ # barricade the invalid input first
+ # can't determine operation.
+ if not BaseImage.is_valid_size(size):
+ # print out error message
+ err_msg = f"{self['filename']}: invalid arguments - {size}"
+ logging.error(err_msg)
+ self["warnings"].append(err_msg)
+ return
+
+ # specifies a desired operating magnification
+ if size.endswith(("X", "x")) and size[:-1].replace(".", "0", 1).isdigit():
+ target_mag = float(size.upper().split("X")[0])
+ # magnification
+ base_mag = self["base_mag"]
+ target_sampling_factor = base_mag / target_mag
+ target_dims = tuple(np.rint(np.asarray(img_base_size) / target_sampling_factor).astype(int))
+
+ # generate the thumb img
+ self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
+
+ # the size of the img is number
+ elif size.replace(".", "0", 1).isdigit():
+ size = float(size)
+ # specifies a desired downscaling factor
+ if size < 1:
+ target_downscaling_factor = size
+ target_sampling_factor = 1 / target_downscaling_factor
+ target_dims = tuple(np.rint(np.asarray(img_base_size) * target_downscaling_factor).astype(int))
+
+ # generate the thumb img
+ self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
+
+ # specifies a desired level of open slide
+ elif size < 100:
+ target_level = int(size)
+ if target_level >= osh.level_count:
+ target_level = osh.level_count - 1
+ msg = f"Desired Image Level {size+1} does not exist! Instead using level {osh.level_count-1}! Downstream output may not be correct"
+ logging.error(f"{self['filename']}: {msg}" )
+ self["warnings"].append(msg)
+ size = (tuple((np.array(img_base_size)/osh.level_downsamples[target_level]).astype(int))
+ if self["enable_bounding_box"]
+ else osh.level_dimensions[target_level])
+ logging.info(
+ f"{self['filename']} - \t\tloading image from level {target_level} of size {osh.level_dimensions[target_level]}")
+ tile = osh.read_region((bx, by), target_level, size)
+ self[key] = (np.asarray(rgba2rgb(self, tile))
+ if np.shape(tile)[-1]==4
+ else np.asarray(tile))
+
+ # specifies a desired size of thumbnail
+ else:
+ # recommend having the dimension is less than 10k
+ if size > 10000:
+ # warning message for the memory overhead
+ msg = f"Suggest using the smaller dimension thumbnail image because of the memory overhead."
+ logging.warning(msg)
+ self["warnings"].append(msg)
+ target_dims = getDimensionsByOneDim(self, int(size))
+ target_sampling_factor = img_base_size[0] / target_dims[0]
+ self[key] = getBestThumb(self, bx, by, target_dims, target_sampling_factor)
+ return self[key]
+
+'''
+the following are helper functions
+'''
+def getBestThumb(s: BaseImage, x: int, y: int, dims: Tuple[int, int], target_sampling_factor: float):
+ osh = s["os_handle"]
+
+ # get thumb from og
+ if not s["enable_bounding_box"]:
+ max_dim = dims[0] if dims[0] > dims[1] else dims[1]
+ return np.array(osh.get_thumbnail((max_dim, max_dim)))
+
+ (level, isExactLevel) = s.getBestLevelForDownsample(target_sampling_factor)
+
+ # check if get the existing level
+ if isExactLevel:
+ tile = osh.read_region((x, y), level, dims)
+ return np.asarray(rgba2rgb(s, tile)) if np.shape(tile)[-1]==4 else np.asarray(tile)
+ # scale down the thumb img from the next high level
+ else:
+ return resizeTileDownward(s, target_sampling_factor, level)
+
+
+def resizeTileDownward(self, target_downsampling_factor, level):
+ osh = self["os_handle"]
+ (bx, by, bwidth, bheight) = self["img_bbox"]
+ end_x = bx + bwidth
+ end_y = by + bheight
+
+ cloest_downsampling_factor = osh.level_downsamples[level]
+ win_size = 2048
+
+ # create a new img
+ output = []
+ for x in range(bx, end_x, win_size):
+ row_piece = []
+ for y in range(by, end_y, win_size):
+ win_width, win_height = [win_size] * 2
+ # Adjust extraction size for endcut
+ if end_x < x + win_width:
+ win_width = end_x - x
+ if end_y < y + win_height:
+ win_height = end_y - y
+
+
+ win_down_width = int(round(win_width / target_downsampling_factor))
+ win_down_height = int(round(win_height / target_downsampling_factor))
+
+ win_width = int(round(win_width / cloest_downsampling_factor))
+ win_height = int(round(win_height / cloest_downsampling_factor))
+
+ # TODO Note: this isn't very efficient, and if more efficiency isneeded
+ # We should likely refactor using "paste" from Image.
+ # Or even just set the pixels directly with indexing.
+ cloest_region = osh.read_region((x, y), level, (win_width, win_height))
+ if np.shape(cloest_region)[-1]==4:
+ cloest_region = rgba2rgb(self, cloest_region)
+ target_region = cloest_region.resize((win_down_width, win_down_height))
+ row_piece.append(target_region)
+ row_piece = np.concatenate(row_piece, axis=0)
+
+ output.append(row_piece)
+ output = np.concatenate(output, axis=1)
+ return output
+
+
+def rgba2rgb(s: BaseImage, img):
+ bg_color = "#" + s["os_handle"].properties.get(openslide.PROPERTY_NAME_BACKGROUND_COLOR, "ffffff")
+ thumb = Image.new("RGB", img.size, bg_color)
+ thumb.paste(img, None, img)
+ return thumb
+
+
+def printMaskHelper(type: str, prev_mask, curr_mask):
+ if type == "relative2mask":
+ if len(prev_mask.nonzero()[0]) == 0:
+ return str(-100)
+ else:
+ return str(1 - len(curr_mask.nonzero()[0]) / len(prev_mask.nonzero()[0]))
+ elif type == "relative2image":
+ return str(len(curr_mask.nonzero()[0]) / np.prod(curr_mask.shape))
+ elif type == "absolute":
+ return str(len(curr_mask.nonzero()[0]))
+ else:
+ return str(-1)
+
+
+def parsed_mag(mag: Union[str, int, float]) -> Union[None, float]:
+ """Parse magnification to float
+ Args:
+ mag:
+
+ Returns:
+ Validated size factor either as a float number or "NA" (MAG_NA)
+ """
+ if isinstance(mag, (int, float)):
+ return float(mag)
+ numeric_mag_str_flag = BaseImage.is_valid_size(mag)
+ invalid_flag = mag == MAG_NA or not numeric_mag_str_flag
+ if invalid_flag:
+ return MAG_NA
+ # regex determines X must either be abscent or at the end of the string
+ if "X" in mag.upper():
+ mag = mag[0:-1]
+ return float(mag)
+
+
+# this function is seperated out because in the future we hope to have automatic detection of
+# magnification if not present in open slide, and/or to confirm openslide base magnification
+def getMag(s: BaseImage, params) -> Union[float, None]:
+ logging.info(f"{s['filename']} - \tgetMag")
+ osh = s["os_handle"]
+ mag = osh.properties.get("openslide.objective-power") or \
+ osh.properties.get("aperio.AppMag") or MAG_NA
+ # if mag or strtobool(params.get("confirm_base_mag", "False")):
+ # # do analysis work here
+ # logging.warning(f"{s['filename']} - Unknown base magnification for file")
+ # s["warnings"].append(f"{s['filename']} - Unknown base magnification for file")
+ # return None
+ # else:
+ # workaround for unspecified mag -- with or without automatic detection it might be preferred to have
+ # mag predefined
+ mag = mag or parsed_mag(params.get("base_mag"))
+ # mag is santized after invoking getMag regarding whether it's None. Therefore, it should not raise
+ # the exception here.
+ return float(mag) if mag is not MAG_NA else MAG_NA
+
+
+def getDimensionsByOneDim(s: BaseImage, dim: int) -> Tuple[int, int]:
+ (x, y, width, height) = s["img_bbox"]
+ # calulate the width or height depends on dim
+ if width > height:
+ h = int(dim * height / width)
+ return dim, h
+ else:
+ w = int(dim * width / height)
+ return w, dim
+
+
+def getMaskRegionsStats(img)-> dict:
+
+ rps = measure.regionprops(morphology.label(img))
+ if rps:
+ areas = np.asarray([rp.area for rp in rps])
+ num = len(rps)
+ area_min = areas.min()
+ area_max = areas.max()
+ area_mean = areas.mean()
+ else:
+ num = area_min = area_max = area_mean = 0
+ return {
+ 'num': num,
+ 'area_min': area_min,
+ 'area_max': area_max,
+ 'area_mean': area_mean
+ }
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py
new file mode 100644
index 00000000..55361be6
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BasicModule.py
@@ -0,0 +1,83 @@
+import logging
+import os
+from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats
+from skimage.morphology import remove_small_objects, binary_opening, disk
+from skimage import io, color, img_as_ubyte
+
+import matplotlib.pyplot as plt
+
+
+def getBasicStats(s, params):
+ logging.info(f"{s['filename']} - \tgetBasicStats")
+ osh = s["os_handle"]
+ s.addToPrintList("type", osh.properties.get("openslide.vendor", "NA"))
+ s.addToPrintList("levels", osh.properties.get("openslide.level-count", "NA"))
+ s.addToPrintList("height", osh.properties.get("openslide.level[0].height", "NA"))
+ s.addToPrintList("width", osh.properties.get("openslide.level[0].width", "NA"))
+ s.addToPrintList("mpp_x", osh.properties.get("openslide.mpp-x", "NA"))
+ s.addToPrintList("mpp_y", osh.properties.get("openslide.mpp-y", "NA"))
+ s.addToPrintList("comment", osh.properties.get("openslide.comment", "NA").replace("\n", " ").replace("\r", " "))
+ return
+
+
+def finalComputations(s, params):
+ mask = s["img_mask_use"]
+ mask_statistics = params.get("mask_statistics", s["mask_statistics"])
+ if mask_statistics == "relative2mask":
+ warning_message = "relative2mask is not supported in BasicModule.finalComputations. Using absolute instead."
+ logging.warning(f"{s['filename']} - {warning_message}")
+ s["warnings"].append(warning_message)
+ mask_statistics = "absolute"
+ s.addToPrintList("pixels_to_use", printMaskHelper(mask_statistics, mask, mask))
+
+
+def finalProcessingSpur(s, params):
+ logging.info(f"{s['filename']} - \tfinalProcessingSpur")
+ disk_radius = int(params.get("disk_radius", "25"))
+ selem = disk(disk_radius)
+ mask = s["img_mask_use"]
+ mask_opened = binary_opening(mask, selem)
+ mask_spur = ~mask_opened & mask
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_spur.png", img_as_ubyte(mask_spur))
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = mask_opened
+
+ s.addToPrintList("spur_pixels",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(
+ f"{s['filename']} - After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(
+ f"After BasicModule.finalProcessingSpur NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+
+
+def finalProcessingArea(s, params):
+ logging.info(f"{s['filename']} - \tfinalProcessingArea")
+ area_thresh = int(params.get("area_threshold", "1000"))
+ mask = s["img_mask_use"]
+
+ mask_opened = remove_small_objects(mask, min_size=area_thresh)
+ mask_removed_area = ~mask_opened & mask
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_areathresh.png", img_as_ubyte(mask_removed_area))
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = mask_opened > 0
+
+ s.addToPrintList("areaThresh",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ warning_message = "After BasicModule.finalProcessingArea NO tissue remains detectable! Downstream modules likely to be incorrect/fail"
+ logging.warning(
+ f"{s['filename']} - {warning_message}")
+ s["warnings"].append(warning_message)
+
+
+def countTissuePieces(s, params):
+ mask = s["img_mask_use"]
+ stats = getMaskRegionsStats(mask)
+ s.addToPrintList("#pieces_of_tissue", str(stats.get('num', 0)))
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py
new file mode 100644
index 00000000..afea9f6b
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BlurDetectionModule.py
@@ -0,0 +1,66 @@
+import logging
+import os
+
+import skimage
+from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats
+from skimage import io, img_as_ubyte, morphology, measure
+from skimage.color import rgb2gray
+from skimage.filters import rank
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+
+# Analysis of focus measure operators for shape-from-focus
+# Said Pertuza,, Domenec Puiga, Miguel Angel Garciab, 2012
+# https://pdfs.semanticscholar.org/8c67/5bf5b542b98bf81dcf70bd869ab52ab8aae9.pdf
+
+
+def identifyBlurryRegions(s, params):
+ logging.info(f"{s['filename']} - \tidentifyBlurryRegions")
+
+ blur_radius = int(params.get("blur_radius", 7))
+ blur_threshold = float(params.get("blur_threshold", .1))
+
+ img = s.getImgThumb(params.get("image_work_size", "2.5x"))
+ img = rgb2gray(img)
+ img_laplace = np.abs(skimage.filters.laplace(img))
+ mask = skimage.filters.gaussian(img_laplace, sigma=blur_radius) <= blur_threshold
+
+ mask = skimage.transform.resize(mask, s.getImgThumb(s["image_work_size"]).shape, order=0)[:, :,
+ 1] # for some reason resize takes a grayscale and produces a 3chan
+ mask = s["img_mask_use"] & (mask > 0)
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_blurry.png", img_as_ubyte(mask))
+ s["img_mask_blurry"] = (mask * 255) > 0
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_blurry"]
+
+ # rps = measure.regionprops(morphology.label(mask))
+ # if rps:
+ # areas = np.asarray([rp.area for rp in rps])
+ # nobj = len(rps)
+ # area_max = areas.max()
+ # area_mean = areas.mean()
+ # else:
+ # nobj = area_max = area_mean = 0
+ stat_rs = getMaskRegionsStats(mask)
+
+
+ s.addToPrintList("blurry_removed_num_regions", str(stat_rs.get('num', 0)))
+ s.addToPrintList("blurry_removed_mean_area", str(stat_rs.get('area_mean',0)))
+ s.addToPrintList("blurry_removed_max_area", str(stat_rs.get('area_max',0)))
+
+
+ s.addToPrintList("blurry_removed_percent",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(
+ f"{s['filename']} - After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(
+ f"After BlurDetectionModule.identifyBlurryRegions NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
+
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py
new file mode 100644
index 00000000..89dbfbd2
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BrightContrastModule.py
@@ -0,0 +1,123 @@
+import logging
+import numpy as np
+from skimage.filters import sobel
+from skimage.color import convert_colorspace, rgb2gray
+from distutils.util import strtobool
+
+
+def getBrightnessGray(s, params):
+ prefix = params.get("prefix", None)
+ prefix = prefix+"_" if prefix else ""
+ logging.info(f"{s['filename']} - \tgetContrast:{prefix}")
+
+ limit_to_mask = strtobool(params.get("limit_to_mask", "True"))
+ invert = strtobool(params.get("invert", "False"))
+ mask_name = params.get("mask_name", "img_mask_use")
+
+ img = s.getImgThumb(s["image_work_size"])
+
+ img_g = rgb2gray(img)
+ if limit_to_mask:
+
+ mask = s[mask_name] if not invert else ~s[mask_name]
+
+ img_g = img_g[mask]
+ if img_g.size == 0:
+ img_g = np.array(-100)
+
+ s.addToPrintList(f"{prefix}grayscale_brightness", str(img_g.mean()))
+ s.addToPrintList(f"{prefix}grayscale_brightness_std", str(img_g.std()))
+
+ return
+
+
+def getBrightnessByChannelinColorSpace(s, params):
+ prefix = params.get("prefix", None)
+ prefix = prefix + "_" if prefix else ""
+
+ logging.info(f"{s['filename']} - \tgetContrast:{prefix}")
+
+ to_color_space = params.get("to_color_space", "RGB")
+ limit_to_mask = strtobool(params.get("limit_to_mask", "True"))
+ mask_name = params.get("mask_name", "img_mask_use")
+
+ invert = strtobool(params.get("invert", "False"))
+
+
+ img = s.getImgThumb(s["image_work_size"])
+
+ suffix = ""
+ if (to_color_space != "RGB"):
+ img = convert_colorspace(img, "RGB", to_color_space)
+ suffix = "_" + to_color_space
+
+ for chan in range(0, 3):
+ vals = img[:, :, chan]
+ if (limit_to_mask):
+
+ mask = s[mask_name] if not invert else ~s[mask_name]
+ vals = vals[mask]
+
+ if vals.size == 0:
+ vals = np.array(-100)
+
+ s.addToPrintList(f"{prefix}chan{chan+1}_brightness{suffix}", str(vals.mean()))
+ s.addToPrintList(f"{prefix}chan{chan+1}_brightness_std{suffix}", str(vals.std()))
+
+ return
+
+
+def getContrast(s, params):
+ prefix = params.get("prefix", None)
+ prefix = prefix + "_" if prefix else ""
+
+ logging.info(f"{s['filename']} - \tgetContrast:{prefix}")
+ limit_to_mask = strtobool(params.get("limit_to_mask", True))
+ mask_name = params.get("mask_name", "img_mask_use")
+
+ invert = strtobool(params.get("invert", "False"))
+
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = rgb2gray(img)
+
+ sobel_img = sobel(img) ** 2
+
+ if limit_to_mask:
+
+ mask = s[mask_name] if not invert else ~s[mask_name]
+
+ sobel_img = sobel_img[mask]
+ img = img[s["img_mask_use"]]
+
+ if img.size == 0: # need a check to ensure that mask wasn't empty AND limit_to_mask is true, still want to
+ # produce metrics for completeness with warning
+
+ s.addToPrintList(f"{prefix}tenenGrad_contrast", str(-100))
+ s.addToPrintList(f"{prefix}michelson_contrast", str(-100))
+ s.addToPrintList(f"{prefix}rms_contrast", str(-100))
+
+
+ logging.warning(f"{s['filename']} - After BrightContrastModule.getContrast: NO tissue "
+ f"detected, statistics are impossible to compute, defaulting to -100 !")
+ s["warnings"].append(f"After BrightContrastModule.getContrast: NO tissue remains "
+ f"detected, statistics are impossible to compute, defaulting to -100 !")
+
+ return
+
+
+ # tenenGrad - Note this must be performed on full image and then subsetted if limiting to mask
+ tenenGrad_contrast = np.sqrt(np.sum(sobel_img)) / img.size
+ s.addToPrintList(f"{prefix}tenenGrad_contrast", str(tenenGrad_contrast))
+
+ # Michelson contrast
+ max_img = img.max()
+ min_img = img.min()
+ contrast = (max_img - min_img) / (max_img + min_img)
+ s.addToPrintList(f"{prefix}michelson_contrast", str(contrast))
+
+ # RMS contrast
+ rms_contrast = np.sqrt(pow(img - img.mean(), 2).sum() / img.size)
+ s.addToPrintList(f"{prefix}rms_contrast", str(rms_contrast))
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py
new file mode 100644
index 00000000..afd62f57
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/BubbleRegionByRegion.py
@@ -0,0 +1,128 @@
+import logging
+import os
+import sys
+
+from ast import literal_eval as make_tuple
+
+from distutils.util import strtobool
+from histoqc.BaseImage import printMaskHelper
+
+import scipy.signal
+
+from skimage import io, img_as_ubyte
+from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace
+from skimage.color import rgb2gray
+from skimage.morphology import remove_small_objects, disk, binary_opening
+from skimage.feature import local_binary_pattern
+
+from skimage.transform import rescale, resize, downscale_local_mean
+
+from math import ceil
+
+from sklearn.naive_bayes import GaussianNB
+from sklearn.ensemble import RandomForestClassifier
+
+from skimage import io, color
+
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+global_holder = {}
+
+#WARNING: Not as robust as other modules
+def roiWise(s, params):
+ name = params.get("name", "classTask")
+ print("\tpixelWise:\t", name, end="")
+
+ level = int(params.get("level", 1))
+ win_size = int(params.get("win_size", 2048)) #the size of the ROI which will be iteratively considered
+
+ osh = s["os_handle"]
+
+ dim_base = osh.level_dimensions[0]
+ dims = osh.level_dimensions[level]
+
+ ratio_x = dim_base[0] / dims[0] #figure out the difference between desi
+ ratio_y = dim_base[1] / dims[1]
+
+ frangi_scale_range = (1, 6)
+ frangi_scale_step = 2
+ frangi_beta1 = .5
+ frangi_beta2 = 100
+ frangi_black_ridges = True
+
+ mask = []
+ for x in range(0, dim_base[0], round(win_size * ratio_x)):
+ row_piece = []
+ print('.', end='', flush=True)
+ for y in range(0, dim_base[1], round(win_size * ratio_y)):
+ region = np.asarray(osh.read_region((x, y), 1, (win_size, win_size)))
+ region = region[:, :, 0:3] # remove alpha channel
+ g = rgb2gray(region)
+ feat = frangi(g, frangi_scale_range, frangi_scale_step, frangi_beta1, frangi_beta2, frangi_black_ridges)
+ feat = feat / 8.875854409275627e-08
+ region_mask = np.bitwise_and(g < .3, feat > 5)
+ region_mask = remove_small_objects(region_mask, min_size=100, in_place=True)
+ # region_std = region.std(axis=2)
+ # region_gray = rgb2gray(region)
+ # region_mask = np.bitwise_and(region_std < 20, region_gray < 100/255)
+ # region_mask = scipy.ndimage.morphology.binary_dilation(region_mask, iterations=1)
+ # region_mask = resize(region_mask , (region_mask.shape[0] / 2, region_mask.shape[1] / 2))
+ row_piece.append(region_mask)
+ row_piece = np.concatenate(row_piece, axis=0)
+ mask.append(row_piece)
+
+ mask = np.concatenate(mask, axis=1)
+
+ if params.get("area_threshold", "") != "":
+ mask = remove_small_objects(mask, min_size=int(params.get("area_threshold", "")), in_place=True)
+
+ s.addToPrintList(name, str(mask.mean()))
+
+ #TODO, migrate to printMaskHelper, but currently don't see how this output affects final mask
+ #s.addToPrintList(name,
+ # printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_BubbleBounds.png", img_as_ubyte(mask)) #.astype(np.uint8) * 255)
+
+ return
+
+
+def detectSmoothness(s, params):
+ logging.info(f"{s['filename']} - \tBubbleRegionByRegion.detectSmoothness")
+ thresh = float(params.get("threshold", ".01" ))
+ kernel_size = int(params.get("kernel_size", "10"))
+ min_object_size = int(params.get("min_object_size", "100"))
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+ avg = np.ones((kernel_size, kernel_size)) / (kernel_size**2)
+
+ imf = scipy.signal.convolve2d(img, avg, mode="same")
+ mask_flat = abs(imf - img) < thresh
+
+ mask_flat = remove_small_objects(mask_flat, min_size=min_object_size)
+ mask_flat = ~remove_small_objects(~mask_flat, min_size=min_object_size)
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_flat"] = mask_flat
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_flat.png", img_as_ubyte(mask_flat & prev_mask))
+
+ s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_flat"]
+
+
+ s.addToPrintList("flat_areas",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask,
+ s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After BubbleRegionByRegion.detectSmoothness: NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After BubbleRegionByRegion.detectSmoothness: NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py
new file mode 100644
index 00000000..660145d5
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/ClassificationModule.py
@@ -0,0 +1,252 @@
+import logging
+import os
+import re
+import sys
+
+from ast import literal_eval as make_tuple
+
+from distutils.util import strtobool
+
+from histoqc.BaseImage import printMaskHelper
+from skimage import io, img_as_ubyte, img_as_bool
+from skimage.filters import gabor_kernel, frangi, gaussian, median, laplace
+from skimage.color import rgb2gray
+from skimage.morphology import remove_small_objects, disk, dilation
+from skimage.feature import local_binary_pattern
+
+from scipy import ndimage as ndi
+
+from sklearn.naive_bayes import GaussianNB
+from sklearn.ensemble import RandomForestClassifier
+
+import numpy as np
+
+import matplotlib.pyplot as plt
+
+
+def pixelWise(s, params):
+ name = params.get("name", "classTask")
+ logging.info(f"{s['filename']} - \tpixelWise:\t", name)
+
+ thresh = float(params.get("threshold", .5))
+
+ fname = params.get("tsv_file", "")
+ if fname == "":
+ logging.error(f"{s['filename']} - tsv_file not set in ClassificationModule.pixelWise for ", name)
+ sys.exit(1)
+ return
+ model_vals = np.loadtxt(fname, delimiter="\t", skiprows=1)
+
+ img = s.getImgThumb(s["image_work_size"])
+
+ gnb = GaussianNB()
+ gnb.fit(model_vals[:, 1:], model_vals[:, 0])
+ cal = gnb.predict_proba(img.reshape(-1, 3))
+
+ cal = cal.reshape(img.shape[0], img.shape[1], 2)
+ mask = cal[:, :, 1] > thresh
+
+ mask = s["img_mask_use"] & (mask > 0)
+
+ s.addToPrintList(name, str(mask.mean()))
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask))
+ s["img_mask_" + name] = (mask * 255) > 0
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name]
+
+ s.addToPrintList(name,
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After ClassificationModule.pixelWise:{name} NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After ClassificationModule.pixelWise:{name} NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
+
+# extract_patches_2d(image, patch_size, max_patches=None, random_state=None
+
+def compute_rgb(img, params):
+ return img
+
+
+def compute_laplace(img, params):
+ laplace_ksize = int(params.get("laplace_ksize", 3))
+ return laplace(rgb2gray(img), ksize=laplace_ksize)[:, :, None]
+
+
+def compute_lbp(img, params):
+ lbp_radius = float(params.get("lbp_radius", 3))
+ lbp_points = int(params.get("lbp_points", 24)) # example sets radius * 8
+ lbp_method = params.get("lbp_method", "default")
+
+ return local_binary_pattern(rgb2gray(img), P=lbp_points, R=lbp_radius, method=lbp_method)[:, :, None]
+
+
+def compute_gaussian(img, params):
+ gaussian_sigma = int(params.get("gaussian_sigma", 1))
+ gaussian_multichan = strtobool(params.get("gaussian_multichan", False))
+
+ if (gaussian_multichan):
+ return gaussian(img, sigma=gaussian_sigma, multichannel=gaussian_multichan)
+ else:
+ return gaussian(rgb2gray(img), sigma=gaussian_sigma)[:, :, None]
+
+
+def compute_median(img, params):
+ median_disk_size = int(params.get("median_disk_size", 3))
+ return median(rgb2gray(img), selem=disk(median_disk_size))[:, :, None]
+
+
+def compute_gabor(img, params):
+ if not params["shared_dict"].get("gabor_kernels", False):
+ gabor_theta = int(params.get("gabor_theta", 4))
+ gabor_sigma = make_tuple(params.get("gabor_sigma", "(1,3)"))
+ gabor_frequency = make_tuple(params.get("gabor_frequency", "(0.05, 0.25)"))
+
+ kernels = []
+ for theta in range(gabor_theta):
+ theta = theta / 4. * np.pi
+ for sigma in gabor_sigma:
+ for frequency in gabor_frequency:
+ kernel = np.real(gabor_kernel(frequency, theta=theta,
+ sigma_x=sigma, sigma_y=sigma))
+ kernels.append(kernel)
+ params["shared_dict"]["gabor_kernels"] = kernels
+
+ kernels = params["shared_dict"]["gabor_kernels"]
+ imgg = rgb2gray(img)
+ feats = np.zeros((imgg.shape[0], imgg.shape[1], len(kernels)), dtype=np.double)
+ for k, kernel in enumerate(kernels):
+ filtered = ndi.convolve(imgg, kernel, mode='wrap')
+ feats[:, :, k] = filtered
+ return feats
+
+
+def compute_frangi(img, params):
+ frangi_scale_range = make_tuple(params.get("frangi_scale_range", "(1, 10)"))
+ frangi_scale_step = float(params.get("frangi_scale_step", 2))
+ frangi_beta1 = float(params.get("frangi_beta1", .5))
+ frangi_beta2 = float(params.get("frangi_beta2", 15))
+ frangi_black_ridges = strtobool(params.get("frangi_black_ridges", "True"))
+ feat = frangi(rgb2gray(img), scale_range = frangi_scale_range, scale_step =frangi_scale_step, beta =frangi_beta1, gamma=frangi_beta2, black_ridges =frangi_black_ridges)
+ return feat[:, :, None] # add singleton dimension
+
+
+def compute_features(img, params):
+ features = params.get("features", "")
+
+ feats = []
+ for feature in features.splitlines():
+ func = getattr(sys.modules[__name__], f"compute_{feature}")
+ feats.append(func(img, params))
+
+ return np.concatenate(feats, axis=2)
+
+
+def byExampleWithFeatures(s, params):
+ name = params.get("name", "classTask")
+ logging.info(f"{s['filename']} - \tClassificationModule.byExample:\t{name}")
+
+ thresh = float(params.get("threshold", .5))
+ nsamples_per_example = float(params.get("nsamples_per_example", -1))
+
+ examples = params.get("examples", "")
+ if examples == "":
+ logging.error(f"{s['filename']} - No examples provided in ClassificationModule.byExample for {name} !!")
+ sys.exit(1)
+ return
+
+ if params.get("features", "") == "":
+ logging.error(f"{s['filename']} - No features provided in ClassificationModule.byExample for {name} !!")
+ sys.exit(1)
+ return
+
+ with params["lock"]: # this lock is shared across all threads such that only one thread needs to train the model
+ # then it is shared with all other modules
+ if not params["shared_dict"].get("model_" + name, False):
+ logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}")
+
+ model_vals = []
+ model_labels = np.empty([0, 1])
+
+ for ex in params["examples"].splitlines():
+ ex = re.split(r'(? 1 else int(mask.shape[0]*nsamples_per_example)
+
+ # set seed to random function if seed is not None
+ if s["seed"] is not None:
+ np.random.seed(int(s["seed"]))
+
+ idxkeep = np.random.choice(mask.shape[0], size=int(nitems))
+ eximg = eximg[idxkeep, :]
+ mask = mask[idxkeep]
+
+
+ model_vals.append(eximg)
+ model_labels = np.vstack((model_labels, mask))
+
+ # do stuff here with model_vals
+ model_vals = np.vstack(model_vals)
+ clf = RandomForestClassifier(n_jobs=-1)
+ clf.fit(model_vals, model_labels.ravel())
+ params["shared_dict"]["model_" + name] = clf
+ logging.info(f"{s['filename']} - Training model ClassificationModule.byExample:{name}....done")
+
+ clf = params["shared_dict"]["model_" + name]
+ img = s.getImgThumb(s["image_work_size"])
+ feats = compute_features(img, params)
+ cal = clf.predict_proba(feats.reshape(-1, feats.shape[2]))
+ cal = cal.reshape(img.shape[0], img.shape[1], 2)
+
+ mask = cal[:, :, 1] > thresh
+
+ area_thresh = int(params.get("area_threshold", "5"))
+ if area_thresh > 0:
+ mask = remove_small_objects(mask, min_size=area_thresh, in_place=True)
+
+ dilate_kernel_size = int(params.get("dilate_kernel_size", "0"))
+ if dilate_kernel_size > 0:
+ mask = dilation(mask, selem=np.ones((dilate_kernel_size, dilate_kernel_size)))
+
+ mask = s["img_mask_use"] & (mask > 0)
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(mask))
+ s["img_mask_" + name] = (mask * 255) > 0
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & ~s["img_mask_" + name]
+
+ s.addToPrintList(name,
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After ClassificationModule.byExampleWithFeatures:{name} NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After ClassificationModule.byExampleWithFeatures:{name} NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ s["img_mask_force"].append("img_mask_" + name)
+ s["completed"].append(f"byExampleWithFeatures:{name}")
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py
new file mode 100644
index 00000000..dbb41f1a
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/DeconvolutionModule.py
@@ -0,0 +1,76 @@
+import logging
+import os
+import sys
+import numpy as np
+from skimage import io, color, img_as_ubyte
+from skimage.exposure import rescale_intensity
+from skimage.color import separate_stains
+from skimage.color import hed_from_rgb, hdx_from_rgb, fgx_from_rgb, bex_from_rgb, rbd_from_rgb
+from skimage.color import gdx_from_rgb, hax_from_rgb, bro_from_rgb, bpx_from_rgb, ahx_from_rgb, \
+ hpx_from_rgb # need to load all of these in case the user selects them
+from distutils.util import strtobool
+
+import matplotlib.pyplot as plt
+
+
+def separateStains(s, params):
+ logging.info(f"{s['filename']} - \tseparateStains")
+ stain = params.get("stain", "")
+ use_mask = strtobool(params.get("use_mask", "True"))
+
+ if stain == "":
+ logging.error(f"{s['filename']} - stain not set in DeconvolutionModule.separateStains")
+ sys.exit(1)
+ return
+
+ stain_matrix = getattr(sys.modules[__name__], stain, None)
+
+ if stain_matrix is None:
+ logging.error(f"{s['filename']} - Unknown stain matrix specified in DeconolutionModule.separateStains")
+ sys.exit(1)
+ return
+
+ mask = s["img_mask_use"]
+
+ if use_mask and len(mask.nonzero()[0]) == 0: #-- lets just error check at the top if mask is empty and abort early
+ for c in range(3):
+ s.addToPrintList(f"deconv_c{c}_std", str(-100))
+ s.addToPrintList(f"deconv_c{c}_mean", str(-100))
+ io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(np.zeros(mask.shape)))
+
+ logging.warning(f"{s['filename']} - DeconvolutionModule.separateStains: NO tissue "
+ f"remains detectable! Saving Black images")
+ s["warnings"].append(f"DeconvolutionModule.separateStains: NO tissue "
+ f"remains detectable! Saving Black images")
+
+ return
+
+ img = s.getImgThumb(s["image_work_size"])
+ dimg = separate_stains(img, stain_matrix)
+
+ for c in range(0, 3):
+ dc = dimg[:, :, c]
+
+ clip_max_val = np.quantile(dc.flatten(), .99)
+ dc = np.clip(dc, a_min=0, a_max=clip_max_val)
+
+
+ if use_mask:
+ dc_sub = dc[mask]
+ dc_min = dc_sub.min()
+ dc_max = dc_sub.max()
+
+ s.addToPrintList(f"deconv_c{c}_mean", str(dc_sub.mean()))
+ s.addToPrintList(f"deconv_c{c}_std", str(dc_sub.std()))
+ else:
+ mask = 1.0
+ dc_min = dc.min()
+ dc_max = dc.max()
+
+ s.addToPrintList(f"deconv_c{c}_mean", str(dc.mean()))
+ s.addToPrintList(f"deconv_c{c}_std", str(dc.std()))
+
+ dc = (dc - dc_min) / float(dc_max - dc_min) * mask
+ io.imsave(s["outdir"] + os.sep + s["filename"] + f"_deconv_c{c}.png", img_as_ubyte(dc))
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py
new file mode 100644
index 00000000..e58707c8
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/HistogramModule.py
@@ -0,0 +1,78 @@
+import logging
+import os
+import numpy as np
+from skimage import io
+import matplotlib.pyplot as plt
+from distutils.util import strtobool
+
+global_holder = {} #this holds a local copy of the histograms of the template images so that they need only be computed once
+
+
+def getHistogram(s, params):
+ logging.info(f"{s['filename']} - \tgetHistogram")
+ limit_to_mask = strtobool(params.get("limit_to_mask", True))
+ bins = int(params.get("bins", 20))
+
+ img = s.getImgThumb(s["image_work_size"])
+ if limit_to_mask:
+ img = img[s["img_mask_use"]]
+ else:
+ img = img.reshape(-1, 3)
+
+ ax = plt.axes()
+ ax.hist(img, bins=bins, density=True, range=(0, 255), histtype='step', color=("r", "g", "b"))
+
+ ax.grid(True)
+ ax.set_title('Color Distribution for ' + s["filename"])
+ ax.set_xlabel('Pixel Val')
+ ax.set_ylabel('Density')
+ plt.savefig(s["outdir"] + os.sep + s["filename"] + "_hist.png")
+ plt.close()
+ return
+
+
+def computeHistogram(img, bins, mask=-1):
+ result = np.zeros(shape=(bins, 3))
+ for chan in range(0, 3):
+ vals = img[:, :, chan].flatten()
+ if (isinstance(mask, np.ndarray)):
+ vals = vals[mask.flatten()]
+
+ result[:, chan] = np.histogram(vals, bins=bins, density=True, range=[0, 255])[0]
+
+ return result
+
+
+def compareToTemplates(s, params):
+ logging.info(f"{s['filename']} - \tcompareToTemplates")
+ bins = int(params.get("bins", 20))
+ limit_to_mask = strtobool(params.get("limit_to_mask", True))
+ if (not global_holder.get("templates", False)): #if the histograms haven't already been computed, compute and store them now
+ templates = {}
+ for template in params["templates"].splitlines():
+ templates[os.path.splitext(os.path.basename(template))[0]] = computeHistogram(io.imread(template), bins)
+ # compute each of their histograms
+ global_holder["templates"] = templates
+
+ img = s.getImgThumb(s["image_work_size"])
+
+ if (limit_to_mask):
+ mask = s["img_mask_use"]
+ if len(mask.nonzero()[0]) == 0:
+
+ logging.warning(f"{s['filename']} - HistogramModule.compareToTemplates NO tissue "
+ f"remains detectable in mask!")
+ s["warnings"].append(f"HistogramModule.compareToTemplates NO tissue "
+ f"remains detectable in mask!")
+
+ imghst = np.zeros((bins,3))
+
+ else:
+ imghst = computeHistogram(img, bins, mask)
+ else:
+ imghst = computeHistogram(img, bins)
+
+ for template in global_holder["templates"]:
+ val = np.sum(pow(abs(global_holder["templates"][template] - imghst), 2))
+ s.addToPrintList(template + "_MSE_hist", str(val))
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py
new file mode 100644
index 00000000..1f8d3069
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LightDarkModule.py
@@ -0,0 +1,188 @@
+import logging
+import os
+import numpy as np
+from histoqc.BaseImage import printMaskHelper
+from skimage import io, color, img_as_ubyte
+from distutils.util import strtobool
+from skimage.filters import threshold_otsu, rank
+from skimage.morphology import disk
+from sklearn.cluster import KMeans
+from skimage import exposure
+
+import matplotlib.pyplot as plt
+
+
+
+def getIntensityThresholdOtsu(s, params):
+ logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdOtsu")
+ name = params.get("name", "classTask")
+ local = strtobool(params.get("local", "False"))
+ radius = float(params.get("radius", 15))
+ selem = disk(radius)
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+
+ if local:
+ thresh = rank.otsu(img, selem)
+ else:
+ thresh = threshold_otsu(img)
+
+ map = img < thresh
+
+ s["img_mask_" + name] = map > 0
+ if strtobool(params.get("invert", "False")):
+ s["img_mask_" + name] = ~s["img_mask_" + name]
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(s["img_mask_" + name]))
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name]
+
+ s.addToPrintList(name,
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After LightDarkModule.getIntensityThresholdOtsu:{name} NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After LightDarkModule.getIntensityThresholdOtsu:{name} NO tissue remains detectable! "
+ f"Downstream modules likely to be incorrect/fail")
+
+ return
+
+
+def getIntensityThresholdPercent(s, params):
+ name = params.get("name", "classTask")
+ logging.info(f"{s['filename']} - \tLightDarkModule.getIntensityThresholdPercent:\t {name}")
+
+ lower_thresh = float(params.get("lower_threshold", "-inf"))
+ upper_thresh = float(params.get("upper_threshold", "inf"))
+
+ # Prepare parameter names due to issues #213 and #219
+ # set lower standard deviation
+ lower_std = float(params.get("lower_var") or params.get("lower_std") or "-inf")
+ # set upper standard deviation
+ upper_std = float(params.get("upper_var") or params.get("upper_std") or "inf")
+
+ img = s.getImgThumb(s["image_work_size"])
+ img_std = img.std(axis=2)
+
+ map_std = np.bitwise_and(img_std > lower_std, img_std < upper_std)
+
+ img = color.rgb2gray(img)
+ map = np.bitwise_and(img > lower_thresh, img < upper_thresh)
+
+ map = np.bitwise_and(map, map_std)
+
+ s["img_mask_" + name] = map > 0
+
+
+
+ if strtobool(params.get("invert", "False")):
+ s["img_mask_" + name] = ~s["img_mask_" + name]
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & s["img_mask_" + name]
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_" + name + ".png", img_as_ubyte(prev_mask & ~s["img_mask_" + name]))
+
+ s.addToPrintList(name,
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After LightDarkModule.getIntensityThresholdPercent:{name} NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After LightDarkModule.getIntensityThresholdPercent:{name} NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
+
+
+
+
+
+def removeBrightestPixels(s, params):
+ logging.info(f"{s['filename']} - \tLightDarkModule.removeBrightestPixels")
+
+ # lower_thresh = float(params.get("lower_threshold", -float("inf")))
+ # upper_thresh = float(params.get("upper_threshold", float("inf")))
+ #
+ # lower_var = float(params.get("lower_variance", -float("inf")))
+ # upper_var = float(params.get("upper_variance", float("inf")))
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+
+ kmeans = KMeans(n_clusters=3, n_init=1).fit(img.reshape([-1, 1]))
+ brightest_cluster = np.argmax(kmeans.cluster_centers_)
+ darkest_point_in_brightest_cluster = (img.reshape([-1, 1])[kmeans.labels_ == brightest_cluster]).min()
+
+ s["img_mask_bright"] = img > darkest_point_in_brightest_cluster
+
+
+
+ if strtobool(params.get("invert", "False")):
+ s["img_mask_bright"] = ~s["img_mask_bright"]
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"]
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"]))
+
+ s.addToPrintList("brightestPixels",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After LightDarkModule.removeBrightestPixels NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After LightDarkModule.removeBrightestPixels NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
+
+
+def minimumPixelIntensityNeighborhoodFiltering(s,params):
+ logging.info(f"{s['filename']} - \tLightDarkModule.minimumPixelNeighborhoodFiltering")
+ disk_size = int(params.get("disk_size", 10000))
+ threshold = int(params.get("upper_threshold", 200))
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+ img = (img * 255).astype(np.uint8)
+ selem = disk(disk_size)
+
+ imgfilt = rank.minimum(img, selem)
+ s["img_mask_bright"] = imgfilt > threshold
+
+
+ if strtobool(params.get("invert", "True")):
+ s["img_mask_bright"] = ~s["img_mask_bright"]
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = s["img_mask_use"] & s["img_mask_bright"]
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_bright.png", img_as_ubyte(prev_mask & ~s["img_mask_bright"]))
+
+ s.addToPrintList("brightestPixels",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After LightDarkModule.minimumPixelNeighborhoodFiltering NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After LightDarkModule.minimumPixelNeighborhoodFiltering NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
+def saveEqualisedImage(s,params):
+ logging.info(f"{s['filename']} - \tLightDarkModule.saveEqualisedImage")
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+
+ out = exposure.equalize_hist((img*255).astype(np.uint8))
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_equalized_thumb.png", img_as_ubyte(out))
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py
new file mode 100644
index 00000000..cbe7cb28
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/LocalTextureEstimationModule.py
@@ -0,0 +1,57 @@
+import logging
+import numpy as np
+from skimage import color
+from distutils.util import strtobool
+from skimage.feature import graycomatrix, graycoprops
+import matplotlib.pyplot as plt
+
+
+
+def estimateGreyComatrixFeatures(s, params):
+
+ prefix = params.get("prefix", None)
+ prefix = prefix+"_" if prefix else ""
+
+ logging.info(f"{s['filename']} - \tLocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix}")
+ patch_size = int(params.get("patch_size", 32))
+ npatches = int(params.get("npatches", 100))
+ nlevels = int(params.get("nlevels", 8))
+ feats = params.get("feats","contrast:dissimilarity:homogeneity:ASM:energy:correlation").split(':')
+ invert = strtobool(params.get("invert", "False"))
+ mask_name = params.get("mask_name","img_mask_use")
+
+ # set seed to random function if seed is not None
+ if s["seed"] is not None:
+ np.random.seed(int(s["seed"]))
+
+ img = s.getImgThumb(s["image_work_size"])
+ img = color.rgb2gray(img)
+
+ mask = s[mask_name] if not invert else ~s[mask_name]
+ if len(mask.nonzero()[0]) == 0: # add warning in case the no tissus detected in mask
+ msg = f"LocalTextureEstimationModule.estimateGreyComatrixFeatures:{prefix} Can not estimate the empty mask since NO tissue remains detectable in mask"
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+ return
+
+ maskidx = mask.nonzero()
+ maskidx = np.asarray(maskidx).transpose()
+ idx = np.random.choice(maskidx.shape[0], npatches)
+
+ results = []
+
+ for id in idx:
+ r, c = maskidx[id, :]
+ patch = img[r:r + patch_size, c:c + patch_size]
+ glcm = graycomatrix(np.digitize(patch,np.linspace(0,1,num=nlevels),right=True), distances=[5],
+ angles=[0], levels=nlevels, symmetric=True, normed=True)
+
+ results.append([graycoprops(glcm, prop=feat) for feat in feats])
+
+ results = np.asarray(results).squeeze()
+
+ for vals, feat in zip(results.transpose(), feats):
+ s.addToPrintList(f"{prefix}{feat}", str(vals.mean()))
+ s.addToPrintList(f"{prefix}{feat}_std", str(vals.std()))
+
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py
new file mode 100644
index 00000000..9bfac7dd
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/MorphologyModule.py
@@ -0,0 +1,161 @@
+import logging
+import os
+import numpy as np
+from histoqc.BaseImage import printMaskHelper, getMaskRegionsStats
+from skimage import io, morphology, img_as_ubyte, measure
+
+from scipy import ndimage as ndi
+
+import matplotlib.pyplot as plt # these 2 are used for debugging
+from histoqc.SaveModule import blend2Images #for easier debugging
+
+
+def removeSmallObjects(s, params):
+ logging.info(f"{s['filename']} - \tremoveSmallObjects")
+ min_size = int(params.get("min_size", 64))
+ img_reduced = morphology.remove_small_objects(s["img_mask_use"], min_size=min_size)
+ img_small = np.invert(img_reduced) & s["img_mask_use"]
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_remove.png", img_as_ubyte(img_small))
+ s["img_mask_small_filled"] = (img_small * 255) > 0
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = img_reduced
+
+
+ # rps = measure.regionprops(morphology.label(img_small))
+ # if rps:
+ # areas = np.asarray([rp.area for rp in rps])
+ # nobj = len(rps)
+ # area_max = areas.max()
+ # area_mean = areas.mean()
+ # else:
+ # nobj = area_max = area_mean = 0
+ stats = getMaskRegionsStats(img_small)
+
+
+ # s.addToPrintList("small_tissue_removed_num_regions", str(nobj))
+ # s.addToPrintList("small_tissue_removed_mean_area", str(area_mean))
+ # s.addToPrintList("small_tissue_removed_max_area", str(area_max))
+ s.addToPrintList("small_tissue_removed_num_regions", str(stats.get('num',0)))
+ s.addToPrintList("small_tissue_removed_mean_area", str(stats.get('area_mean',0)))
+ s.addToPrintList("small_tissue_removed_max_area", str(stats.get('area_max',0)))
+
+
+
+
+ s.addToPrintList("small_tissue_removed_percent",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After MorphologyModule.removeSmallObjects: NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After MorphologyModule.removeSmallObjects: NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+ return
+
+
+def remove_large_objects(img, max_size):
+ # code taken from morphology.remove_small_holes, except switched < with >
+ selem = ndi.generate_binary_structure(img.ndim, 1)
+ ccs = np.zeros_like(img, dtype=np.int32)
+ ndi.label(img, selem, output=ccs)
+ component_sizes = np.bincount(ccs.ravel())
+ too_big = component_sizes > max_size
+ too_big_mask = too_big[ccs]
+ img_out = img.copy()
+ img_out[too_big_mask] = 0
+ return img_out
+
+
+def removeFatlikeTissue(s, params):
+ logging.info(f"{s['filename']} - \tremoveFatlikeTissue")
+ fat_cell_size = int(params.get("fat_cell_size", 64))
+ kernel_size = int(params.get("kernel_size", 3))
+ max_keep_size = int(params.get("max_keep_size", 1000))
+
+ img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=fat_cell_size)
+ img_small = img_reduced & np.invert(s["img_mask_use"])
+ img_small = ~morphology.remove_small_holes(~img_small, area_threshold=9)
+
+ mask_dilate = morphology.dilation(img_small, selem=np.ones((kernel_size, kernel_size)))
+ mask_dilate_removed = remove_large_objects(mask_dilate, max_keep_size)
+
+ mask_fat = mask_dilate & ~mask_dilate_removed
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_fatlike.png", img_as_ubyte(mask_fat))
+ s["img_mask_fatlike"] = (mask_fat * 255) > 0
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = prev_mask & ~mask_fat
+
+ # rps = measure.regionprops(morphology.label(mask_fat))
+ # if rps:
+ # areas = np.asarray([rp.area for rp in rps])
+ # nobj = len(rps)
+ # area_max = areas.max()
+ # area_mean = areas.mean()
+ # else:
+ # nobj = area_max = area_mean = 0
+ stats = getMaskRegionsStats(mask_fat)
+
+ # s.addToPrintList("fatlike_tissue_removed_num_regions", str(nobj))
+ # s.addToPrintList("fatlike_tissue_removed_mean_area", str(area_mean))
+ # s.addToPrintList("fatlike_tissue_removed_max_area", str(area_max))
+ s.addToPrintList("fatlike_tissue_removed_num_regions", str(stats.get('num',0)))
+ s.addToPrintList("fatlike_tissue_removed_mean_area", str(stats.get('area_mean',0)))
+ s.addToPrintList("fatlike_tissue_removed_max_area", str(stats.get('area_max',0)))
+
+
+
+ s.addToPrintList("fatlike_tissue_removed_percent",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After MorphologyModule.removeFatlikeTissue: NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After MorphologyModule.removeFatlikeTissue: NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+
+
+def fillSmallHoles(s, params):
+ logging.info(f"{s['filename']} - \tfillSmallHoles")
+ min_size = int(params.get("min_size", 64))
+ img_reduced = morphology.remove_small_holes(s["img_mask_use"], area_threshold=min_size)
+ img_small = img_reduced & np.invert(s["img_mask_use"])
+
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_small_fill.png", img_as_ubyte(img_small))
+ s["img_mask_small_removed"] = (img_small * 255) > 0
+
+ prev_mask = s["img_mask_use"]
+ s["img_mask_use"] = img_reduced
+
+ # rps = measure.regionprops(morphology.label(img_small))
+ # if rps:
+ # areas = np.asarray([rp.area for rp in rps])
+ # nobj = len(rps)
+ # area_max = areas.max()
+ # area_mean = areas.mean()
+ # else:
+ # nobj = area_max = area_mean = 0
+ stats = getMaskRegionsStats(img_small)
+
+ # s.addToPrintList("small_tissue_filled_num_regions", str(nobj))
+ # s.addToPrintList("small_tissue_filled_mean_area", str(area_mean))
+ # s.addToPrintList("small_tissue_filled_max_area", str(area_max))
+ s.addToPrintList("small_tissue_filled_num_regions", str(stats.get('num', 0)))
+ s.addToPrintList("small_tissue_filled_mean_area", str(stats.get('area_mean', 0)))
+ s.addToPrintList("small_tissue_filled_max_area", str(stats.get('area_max', 0)))
+
+ s.addToPrintList("small_tissue_filled_percent",
+ printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))
+
+ if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
+ logging.warning(f"{s['filename']} - After MorphologyModule.fillSmallHoles: NO tissue "
+ f"remains detectable! Downstream modules likely to be incorrect/fail")
+ s["warnings"].append(f"After MorphologyModule.fillSmallHoles: NO tissue remains "
+ f"detectable! Downstream modules likely to be incorrect/fail")
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py
new file mode 100644
index 00000000..f3ad2723
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/SaveModule.py
@@ -0,0 +1,175 @@
+import logging
+import os
+import cv2
+import uuid
+import json
+import numpy as np
+from skimage import io, img_as_ubyte
+from distutils.util import strtobool
+from skimage import color, measure
+from copy import deepcopy
+from geojson import Polygon, Feature, FeatureCollection, dump
+
+def blend2Images(img, mask):
+ if (img.ndim == 3):
+ img = color.rgb2gray(img)
+ if (mask.ndim == 3):
+ mask = color.rgb2gray(mask)
+ img = img[:, :, None] * 1.0 # can't use boolean
+ mask = mask[:, :, None] * 1.0
+ out = np.concatenate((mask, img, mask), 2)
+ return out
+
+def binaryMask2Geojson(s, mask):
+ # get the dimension of slide
+ (dim_width, dim_height) = s['os_handle'].dimensions
+ # get the dimension of mask
+ (mask_height, mask_width) = mask.shape
+
+ # convert binary mask to contours
+ # contours = measure.find_contours(mask)
+ contours, hierarchy = cv2.findContours(img_as_ubyte(mask), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+ children = [[] for i in range(len(contours))]
+ for i, cnt in enumerate(contours):
+ # first_child_idx = hier[0, i, 2]
+ parent_idx = hierarchy[0, i, 3]
+ if (parent_idx == -1):
+ continue
+ # add contour to parent's children node
+ children[parent_idx].append(cnt)
+
+ # no contours detected
+ if not len(contours):
+ # set default format
+ ann_format = "xml"
+ # warning msg
+ msg = f"No contour detected at use mask image. Geojson annotation won't be generated."
+ logging.warning(f"{s['filename']} - {msg}")
+ s["warnings"].append(msg)
+ return None
+
+ features = []
+ for i, contour in enumerate(contours):
+ first_child_idx = hierarchy[0, i, 2]
+ parent_idx = hierarchy[0, i, 3]
+
+ if (parent_idx != -1):
+ continue
+
+ geometry = []
+ points = np.asarray(contour / [mask_height, mask_width] * [dim_height, dim_width],dtype="int")
+ points = np.append(points, [points[0]], axis=0)
+ points = points[:,0].tolist()
+ points = [tuple(p) for p in points]
+ geometry.append(points)
+ if first_child_idx != -1:
+ for child in children[i]:
+ child_points = np.asarray(child / [mask_height, mask_width] * [dim_height, dim_width],dtype="int")
+ child_points = np.append(child_points, [child_points[0]], axis=0)
+ child_points = child_points[:,0].tolist()
+ child_points = [tuple(p) for p in child_points]
+ geometry.append(child_points)
+ new_feature = Feature(id=uuid.uuid4().hex, geometry=Polygon(geometry),properties={"objectType": "annotation"})
+
+ features.append(new_feature)
+ feature_collection = FeatureCollection(features)
+
+ return feature_collection
+
+
+def saveFinalMask(s, params):
+ logging.info(f"{s['filename']} - \tsaveUsableRegion")
+
+ mask = s["img_mask_use"]
+ for mask_force in s["img_mask_force"]:
+ mask[s[mask_force]] = 0
+
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_mask_use.png", img_as_ubyte(mask))
+
+
+ if strtobool(params.get("use_mask", "True")): # should we create and save the fusion mask?
+ img = s.getImgThumb(s["image_work_size"])
+ out = blend2Images(img, mask)
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_fuse.png", img_as_ubyte(out))
+
+ return
+
+
+def saveAssociatedImage(s, key:str, dim:int):
+ logging.info(f"{s['filename']}- save{key.capitalize()}")
+ osh = s["os_handle"]
+
+ if not key in osh.associated_images:
+ message = f"{s['filename']}- save{key.capitalize()} Can't Read '{key}' Image from Slide's Associated Images"
+ logging.warning(message)
+ s["warnings"].append(message)
+ return
+
+ # get asscociated image by key
+ associated_img = osh.associated_images[key]
+ (width, height) = associated_img.size
+
+ # calulate the width or height depends on dim
+ if width > height:
+ h = round(dim * height / width)
+ size = (dim, h)
+ else:
+ w = round(dim * width / height)
+ size = (w, dim)
+
+ associated_img = associated_img.resize(size)
+ associated_img = np.asarray(associated_img)[:, :, 0:3]
+ io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{key}.png", associated_img)
+
+def saveMacro(s, params):
+ dim = params.get("small_dim", 500)
+ saveAssociatedImage(s, "macro", dim)
+ return
+
+def saveMask(s, params):
+ logging.info(f"{s['filename']} - \tsaveMaskUse")
+ suffix = params.get("suffix", None)
+ # check suffix param
+ if not suffix:
+ msg = f"{s['filename']} - \tPlease set the suffix for mask use."
+ logging.error(msg)
+ return
+
+ # save mask
+ io.imsave(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.png", img_as_ubyte(s["img_mask_use"]))
+
+
+def saveMask2Geojson(s, params):
+
+ mask_name = params.get('mask_name', 'img_mask_use')
+ suffix = params.get("suffix", None)
+ logging.info(f"{s['filename']} - \tsaveMaskUse2Geojson: {mask_name}")
+ # check suffix param
+ if not suffix:
+ msg = f"{s['filename']} - \tPlease set the suffix for mask use in geojson."
+ logging.error(msg)
+ return
+
+ # check if the mask name exists
+ if s.get(mask_name, None) is None:
+ msg = f"{s['filename']} - \tThe `{mask_name}` mask dosen't exist. Please use correct mask name."
+ logging.error(msg)
+ return
+ # convert mask to geojson
+ geojson = binaryMask2Geojson(s, s[mask_name])
+
+ # save mask as genjson file
+ with open(f"{s['outdir']}{os.sep}{s['filename']}_{suffix}.geojson", 'w') as f:
+ dump(geojson, f)
+
+
+
+def saveThumbnails(s, params):
+ logging.info(f"{s['filename']} - \tsaveThumbnail")
+ # we create 2 thumbnails for usage in the front end, one relatively small one, and one larger one
+ img = s.getImgThumb(params.get("image_work_size", "1.25x"))
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb.png", img)
+
+ img = s.getImgThumb(params.get("small_dim", 500))
+ io.imsave(s["outdir"] + os.sep + s["filename"] + "_thumb_small.png", img)
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py
new file mode 100644
index 00000000..0101ca49
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/TileExtractionModule.py
@@ -0,0 +1,610 @@
+"""
+A standalone tile extraction module to locate tile bounding boxes in usable tissue region obtained by previous steps.
+Coordinates are saved in the half-open 4-tuple convention of (left, top, right, bottom), where `right` and `bottom`
+are open.
+"""
+import os
+import openslide
+import json
+from histoqc.BaseImage import BaseImage
+from typing import Callable, Dict, Any, List, Tuple, Union
+import numpy as np
+from PIL import Image, ImageDraw
+from skimage.measure import regionprops
+from contextlib import contextmanager
+from distutils.util import strtobool
+import logging
+from histoqc.import_wrapper.typing import Literal, get_args
+# from histoqc.import_wrapper.helper import dynamic_import
+# __TYPE_GET_ARGS = Callable[[Type, ], Tuple[Any, ...]]
+# Literal: TypeVar = dynamic_import("typing", "Literal", "typing_extensions")
+# get_args: __TYPE_GET_ARGS = dynamic_import("typing", "get_args", "typing_extensions")
+
+TYPE_TILE_SIZE = Literal['tile_size']
+TYPE_TILE_STRIDE = Literal['tile_stride']
+TYPE_TISSUE_RATIO = Literal['tissue_ratio']
+TYPE_OUTPUT_ROOT = Literal['tile_output']
+TYPE_LOCK = Literal['lock']
+TYPE_SUFFIX = Literal['suffix']
+TYPE_OUTLINE = Literal['outline']
+TYPE_WIDTH = Literal['width']
+TYPE_SAVE_FLAG = Literal['save_image']
+PARAMS = Literal[TYPE_TILE_SIZE, TYPE_TILE_STRIDE, TYPE_TISSUE_RATIO,
+ TYPE_OUTPUT_ROOT, TYPE_SUFFIX,
+ TYPE_LOCK, TYPE_OUTLINE, TYPE_WIDTH, TYPE_SAVE_FLAG]
+
+
+TYPE_BBOX_FLOAT = Tuple[float, float, float, float]
+TYPE_BBOX_INT = Tuple[int, int, int, int]
+
+
+def default_screen_identity(img: np.ndarray):
+ return True
+
+
+class MaskTileWindows:
+ """
+ Locate the window of tiles in the given downsampled mask. Output Convention: (left, top, right, bottom).
+ Coordinates are half-open as [left, right) and [top, bottom).
+ """
+
+ __rp_list: List
+ __mask: np.ndarray
+ __mask_pil: Image.Image
+ __tissue_thresh: float
+ # note that the tile size on the corresponding downsampled masks may no longer be integer, therefore cause
+ # loss of precision when convert back to original tile size after working on mask
+ __windows_on_mask: List[List[TYPE_BBOX_FLOAT]]
+ __windows_on_original_image: List[List[Tuple[int, int, int, int]]]
+
+ @property
+ def mask_pil(self) -> Image.Image:
+ return Image.fromarray(self._mask)
+
+ @property
+ def _mask(self) -> np.ndarray:
+ return self.__mask
+
+ @property
+ def _rp_list(self) -> List:
+ """
+
+ Returns:
+ List of regionprops objects (see sklearn.regionprops)
+ """
+ attr_name = '__rp_list'
+ if not hasattr(self, attr_name) or getattr(self, attr_name) is None:
+ setattr(self, attr_name, regionprops(self.__mask))
+ return getattr(self, attr_name)
+
+ def __init_mask(self, mask: np.ndarray):
+ self.__mask = mask
+
+ def __init__(self, mask: np.ndarray, *, work_tile_size: int, work_stride: int,
+ size_factor: float,
+ tissue_thresh: float):
+ """
+ Args:
+ mask:
+ work_tile_size:
+ work_stride:
+ size_factor: size ratio of image to mask
+ """
+ self.__init_mask(mask.astype(np.int32))
+ self.work_tile_size = work_tile_size
+ self.work_stride = work_stride
+ self.__size_factor = size_factor
+ self.__tissue_thresh = tissue_thresh
+
+ @staticmethod
+ def validate_tile_mask_area_thresh(mask_pil: Image.Image,
+ tile_window_on_mask: TYPE_BBOX_FLOAT,
+ tissue_thresh: float) -> bool:
+ """ Validate whether the given tile window (left, top, right, bottom) contains sufficient tissue. This is
+ computed by calculating the tissue % in the corresponding mask region.
+ Note that if the coordinates are not int the actual area of region may be different
+ Args:
+ mask_pil:
+ tile_window_on_mask: List of (left, top, right, bottom). Open on right and bottom
+ tissue_thresh: minimum requirement of tissue percentage
+
+ Returns:
+ True if the window has sufficient tissue.
+ """
+ left, top, right, bottom = tile_window_on_mask
+ # window_on_mask_work = tuple(round(x) for x in tile_window_on_mask)
+ window_on_mask_work = round(left), round(top), round(right), round(bottom)
+ window_pil = mask_pil.crop(window_on_mask_work)
+ # noinspection PyTypeChecker
+ window_np = np.array(window_pil, copy=False)
+ window_bool = window_np > 0
+ return window_bool.mean() >= tissue_thresh
+
+ @staticmethod
+ def _valid_tile_windows_on_mask_helper(mask_pil: Image.Image,
+ tile_cand_pil_window_on_mask: List[TYPE_BBOX_FLOAT],
+ tissue_thresh: float) -> List[TYPE_BBOX_FLOAT]:
+ """ All tile windows with sufficient usable tissue from the grid of window candidates
+ Args:
+ mask_pil:
+ tile_cand_pil_window_on_mask: left, top, right, bottom. Potential candidates of windows
+ tissue_thresh: minimum requirement of tissue %
+ Returns:
+ List of validated windows (left, top, right, bottom) from the given candidates.
+ """
+ # output = []
+ # for window in tile_cand_pil_window:
+ # has_enough_usable_tissue = _MaskTileLocator.validate_tile_mask_area_thresh(mask_pil, window,
+ # tissue_thresh)
+ # if not has_enough_usable_tissue:
+ # continue
+ return [window for window in tile_cand_pil_window_on_mask
+ if MaskTileWindows.validate_tile_mask_area_thresh(mask_pil, window, tissue_thresh)]
+
+ @staticmethod
+ def region_tile_cand_pil_window_on_mask(rp_bbox: TYPE_BBOX_INT,
+ work_tile_size: float,
+ work_stride: float) -> List[TYPE_BBOX_FLOAT]:
+ """ Split the region given by the region property bounding box into a grid of tile windows. Support overlapping.
+ This computes the all possible window given by the rp regardless of the tissue condition. Refinement can be
+ performed in further steps.
+ Args:
+ rp_bbox: sklearn region property style: [top, left, bottom, right]. Half-open
+ -- [Left, Right) and [Top, Bottom)
+ work_tile_size:
+ work_stride:
+
+ Returns:
+ List of (left, top, right, bottom) tuples. Half-open
+ """
+ top_rp, left_rp, bottom_rp, right_rp = rp_bbox
+ # top/left of the right/bottom most tile
+ tile_max_top, tile_max_left = MaskTileWindows.max_tile_bbox_top_left_coord(rp_bbox,
+ work_tile_size,
+ work_stride)
+ # obtain the top/left coord of all tile bboxes
+ all_tops = np.arange(top_rp, tile_max_top + 1, work_stride, dtype=int)
+ all_lefts = np.arange(left_rp, tile_max_left + 1, work_stride, dtype=int)
+ # since it's open on right and bottom, right = left + size and bottom = top + size, wherein the right-1
+ # is the actual right most pixel and likewise bottom-1 is the actual bottom-most pixel.
+ def window(left, top, size): return left, top, (left + size), (top + size)
+ # get full tile bbox representation
+ all_tile_pil_window = [window(left, top, work_tile_size) for left in all_lefts for top in all_tops]
+ return all_tile_pil_window
+
+ @staticmethod
+ def rp_tile_windows_on_mask(mask_pil,
+ rp_bbox: TYPE_BBOX_INT,
+ work_tile_size: float,
+ work_stride: float,
+ tissue_thresh: float) -> List[TYPE_BBOX_FLOAT]:
+ """Wrapper. Obtain the valid window with sufficient tissue from a list of region property objects based on
+ a given mask. For each individual region, a list of window in format (left, top, right, bottom) is obtained.
+ Resulting lists of windows of all regions are nested into a list as the object.
+ Args:
+ mask_pil: PIL handle of the downsampled mask
+ rp_bbox: bounding box of sklearn region properties. Note that its convention is [top, left, bottom, right],
+ which is different to the (left, top, right, bottom) in PIL and OpenSlide. Int coords.
+ work_tile_size: Working tile size on the downsampled mask
+ work_stride: Working stride size on the downsampled mask
+ tissue_thresh: Minimum requirement of tissue % in each window
+
+ Returns:
+ List of (left, top, right, bottom)
+ """
+ candidates = MaskTileWindows.region_tile_cand_pil_window_on_mask(rp_bbox, work_tile_size, work_stride)
+ return MaskTileWindows._valid_tile_windows_on_mask_helper(mask_pil, candidates, tissue_thresh)
+
+ def _tile_windows_on_mask(self) -> List[List[TYPE_BBOX_FLOAT]]:
+ """Helper function to locate the windows of each region in format of (left, top, right, bottom)
+ Note that to retain the precision the coordinates are in the float form, rather than cast to int.
+ (Noticeably the right/bottom coords due to that size may not be integer)
+ THe actual validation of corresponding bbox regions on mask on the other hand should convert the coords
+ correspondingly.
+ Returns:
+ List of List of (left, top, right, bottom), nested by connected regions in the mask
+ """
+ result_list: List[List[TYPE_BBOX_FLOAT]] = []
+ # loop the regionprop list
+ for region in self._rp_list:
+ # get bounding box of the individual region
+ rp_bbox = region.bbox
+ # get list of possible tile bounding boxes within the region bounding box, computed from
+ # tile size, stride, and tissue thresh
+ windows: List[TYPE_BBOX_FLOAT] = MaskTileWindows.rp_tile_windows_on_mask(self.mask_pil,
+ rp_bbox,
+ self.work_tile_size,
+ self.work_stride,
+ self.__tissue_thresh)
+ # result_list += windows
+ result_list.append(windows)
+ return result_list
+
+ @property
+ def windows_on_mask(self) -> List[List[TYPE_BBOX_FLOAT]]:
+ """
+ Returns:
+ Obtain the cached tile windows on the given mask. Results are cached.
+ """
+ if not hasattr(self, '__windows_on_mask') or self.__windows_on_mask is None:
+ self.__windows_on_mask = self._tile_windows_on_mask()
+ return self.__windows_on_mask
+
+ @property
+ def windows_on_original_image(self) -> List[List[TYPE_BBOX_INT]]:
+ """Zoom the windows from the mask (which is often downsampled) to the original image, using the defined
+ size factor
+ Returns:
+ Zoomed windows on the original image (left, top, right, bottom)
+ """
+ if not hasattr(self, '__windows_on_original_image') or self.__windows_on_original_image is None:
+ self.__windows_on_original_image = MaskTileWindows.__window_list_resize(self.windows_on_mask,
+ self.__size_factor)
+ return self.__windows_on_original_image
+
+ @staticmethod
+ def max_tile_bbox_top_left_coord(rp_bbox: TYPE_BBOX_INT, work_tile_size: float, work_stride: float) \
+ -> Tuple[int, int]:
+ """ find the coords of the top/left corner of the most right / bottom tile ever possible given the current
+ size and stride.
+ Args:
+ rp_bbox: [top, left, bottom, right]. Half-open -- [Left, Right) and [Top, Bottom). Note that this is the
+ convention of sklearn's region properties, which is different to the (left, top, right, bottom) used by
+ PIL or OpenSlide. The bbox of connected tissue regions on mask. Int coords.
+ work_tile_size: Tile size on the working mask, which might be downsampled.
+ work_stride: Stride size on the working mask, which might be downsampled.
+ Returns:
+ Tuple[int, int]
+ """
+ assert work_stride > 0, f"work stride must be greater than 0 - got {work_stride}"
+ assert work_tile_size > 0, f"work tile size must be greater than 0 - got {work_tile_size}"
+
+ # not for skimage regionprops, the bbox is half-open at the bottom / right coordinates.
+ # [left, right) and [top, bottom). Hence, the "+1" operation below for coord computation
+ # is already priced-in
+ top_rp, left_rp, bottom_rp, right_rp = rp_bbox
+ # start + n_step * stride + tile_size = bottom/rightmost --> (rp_limit - tile_size) // stride = max step
+ max_step_horiz = (right_rp - left_rp - work_tile_size) / work_stride
+ max_step_vert = (bottom_rp - top_rp - work_tile_size) / work_stride
+ tile_max_left = left_rp + max_step_horiz * work_stride
+ tile_max_top = top_rp + max_step_vert * work_stride
+
+ assert round(tile_max_left + work_tile_size) <= right_rp,\
+ f"left + size check" \
+ f" {tile_max_left + work_tile_size} = {tile_max_left} + {work_tile_size} <= {right_rp} fail"
+ assert round(tile_max_top + work_tile_size) <= bottom_rp,\
+ f"top + size check" \
+ f" {tile_max_top + work_tile_size} = {tile_max_top} + {work_tile_size} <= {bottom_rp} fail"
+ return int(tile_max_top), int(tile_max_left)
+
+ @staticmethod
+ def __window_resize_helper(window_on_mask: Union[TYPE_BBOX_FLOAT, TYPE_BBOX_INT], size_factor) -> TYPE_BBOX_INT:
+ """Helper function to zoom the window coordinates on downsampled mask to the original sized image.
+ Convert back to int.
+ Args:
+ window_on_mask: (left, top, right, bottom)
+ size_factor: size_factor = img_size / mask_size
+
+ Returns:
+ Resized (left, top, right, bottom)
+ """
+ # work around of the type check of IDE. Otherwise, I will return directly
+ left, top, right, bottom, *rest = tuple(int(r * size_factor) for r in window_on_mask)
+ return left, top, right, bottom
+
+ @staticmethod
+ def __window_list_resize(window_on_mask: Union[List[List[TYPE_BBOX_FLOAT]], List[List[TYPE_BBOX_INT]]],
+ size_factor: float) -> List[List[Tuple[int, int, int, int]]]:
+ """
+ Args:
+ window_on_mask: list of list of (left, top, right, bottom), nested at region-level.
+ size_factor: size_factor = img_size / mask_size
+
+ Returns:
+ Resized (left, top, right, bottom)
+ """
+ return [
+ [
+ MaskTileWindows.__window_resize_helper(win, size_factor)
+ for win in win_list_region
+ ]
+ for win_list_region in window_on_mask
+ ]
+
+
+DRAW_TARGET_IMG_THUMB = Literal['img_thumb']
+DRAW_TARGET_MASK = Literal['mask_thumb']
+
+
+class TileExtractor:
+
+ __tile_window_cache: Dict[str, MaskTileWindows]
+
+ def __init__(self, s: BaseImage):
+ self.__tile_window_cache = dict()
+ self.baseimage = s
+ self._filename = s['filename']
+
+ @staticmethod
+ def _tile_windows_helper(mask_use_for_tiles,
+ img_w, img_h,
+ tile_size: int = 256, tile_stride: int = 256,
+ tissue_thresh: float = 0.5,
+ filename: str = '') -> MaskTileWindows:
+ """
+ Initiated the mask_tile_window using a given tile_size, tile_stride, and tissue_thresh.
+ Args:
+ mask_use_for_tiles:
+ img_w: width of base image in original mag
+ img_h: height of base image in original mag
+ tile_size:
+ tile_stride:
+ tissue_thresh:
+
+ Returns:
+
+ """
+ mask = mask_use_for_tiles
+ # image_handle: openslide.OpenSlide = s["os_handle"]
+ # img_w, img_h = image_handle.dimensions
+ assert mask is not None, f"{filename}: mask is not initialized"
+ assert isinstance(mask, np.ndarray), f"The mask is expected to be a Numpy NDArray"
+
+ # shape --> row column --> height width
+ mask_w, mask_h = mask.shape[1], mask.shape[0]
+ size_factor = img_w / mask_w
+ size_factor_ref = img_h / mask_h
+ assert size_factor > 0, f"{size_factor} negative"
+ if round(size_factor) != round(size_factor_ref):
+ logging.warning(f"{filename}: Aspect Ratio Mismatch: {img_w, img_h} vs. "
+ f"{mask_w, mask_h}")
+ logging.debug(f"{filename}: size ratio between img and thumb: {size_factor}")
+ work_tile_size = tile_size / size_factor
+ work_stride = tile_stride / size_factor
+ mask_tile_windows = MaskTileWindows(mask, work_tile_size=work_tile_size,
+ work_stride=work_stride,
+ size_factor=size_factor, tissue_thresh=tissue_thresh)
+
+ return mask_tile_windows
+
+ # @staticmethod
+ # def _img_use_for_tiles(s: BaseImage):
+ # return s.getImgThumb(s["image_work_size"])
+ #
+ # @staticmethod
+ # def _mask_use_for_tiles(s: BaseImage):
+ # """
+ # For now, we just use the img_mask_use field in BaseImage
+ # Args:
+ # s:
+ #
+ # Returns:
+ #
+ # """
+ # return s['img_mask_use']
+
+ def tile_windows(self,
+ mask_use_for_tiles: np.ndarray,
+ img_w, img_h,
+ tile_size: int = 256, tile_stride: int = 256,
+ tissue_thresh: float = 0.5, *, force_rewrite: bool = False) -> MaskTileWindows:
+ # s = self.baseimage
+ key = f"{tile_size}_{tile_stride}_{tissue_thresh}"
+ root_dict = self.__tile_window_cache
+ entry = root_dict.get(key, None)
+ if entry is None or force_rewrite:
+ # img_w, img_h = s['os_handle'].dimensions
+ root_dict[key] = TileExtractor._tile_windows_helper(mask_use_for_tiles, img_w, img_h,
+ tile_size,
+ tile_stride, tissue_thresh)
+ return root_dict[key]
+
+ def clear_tile_window(self, tile_size: int = 256, tile_stride: int = 256,
+ tissue_thresh: float = 0.5):
+ key = f"{tile_size}_{tile_stride}_{tissue_thresh}"
+ root_dict = self.__tile_window_cache
+ root_dict.pop(key, None)
+
+ @staticmethod
+ def __bbox_overlay_helper(img: np.ndarray,
+ windows_grouped_by_region: Union[List[List[TYPE_BBOX_INT]], List[List[TYPE_BBOX_FLOAT]]],
+ outline: str = 'green', width: int = 2) -> Image.Image:
+ """
+ Helper function to draw bbox and overlay to the img thumbnail
+ Args:
+ img: image to overlay the bboxes
+ windows_grouped_by_region:
+ outline: outline color of the bboxes
+ width: width of bboxes outlines
+
+ Returns:
+ the PIL object after drawing the bboxes over the img
+ """
+ # avoid inplace change
+ copy = np.array(img, copy=True)
+ draw_pil = Image.fromarray(copy, mode="RGB")
+ draw_context = ImageDraw.Draw(draw_pil, mode="RGB")
+ for window_list in windows_grouped_by_region:
+ for window in window_list:
+ # ImageDraw accepts x0 y0 x1 y1
+ window = tuple(round(x) for x in window)
+ left, top, right, bottom = window
+ draw_context.rectangle((left, top, right, bottom), outline=outline, width=width)
+
+ return draw_pil
+
+ def bbox_overlay(self,
+ img_use_for_tiles,
+ mask_use_for_tiles: np.ndarray,
+ img_w, img_h,
+ target: Literal[DRAW_TARGET_IMG_THUMB, DRAW_TARGET_MASK] = 'img_thumb',
+ tile_size_on_img: int = 256, tile_stride_on_img: int = 256,
+ tissue_thresh: float = 0.5, *, force_rewrite: bool = False, outline='green', width=2
+ ) -> Image.Image:
+ # note that the window_on_image corresponding to the original image at base-mag.
+ # the thumb image is at the same size of masks
+ tile_windows: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h,
+ tile_size_on_img, tile_stride_on_img, tissue_thresh,
+ force_rewrite=force_rewrite)
+ windows_on_mask: List[List[TYPE_BBOX_FLOAT]] = tile_windows.windows_on_mask
+ # all properties below are cached
+ mapping: Dict[Literal[DRAW_TARGET_IMG_THUMB, DRAW_TARGET_MASK], np.ndarray] = {
+ get_args(DRAW_TARGET_IMG_THUMB)[0]: img_use_for_tiles,
+ get_args(DRAW_TARGET_MASK)[0]: mask_use_for_tiles,
+ }
+ target_img_arr = mapping[target]
+ return self.__bbox_overlay_helper(target_img_arr, windows_grouped_by_region=windows_on_mask, outline=outline,
+ width=width)
+
+ @staticmethod
+ def __window_convert(window: Tuple[int, int, int, int]) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+ """TO OpenSlide style (left, top) + (width, height)
+ Args:
+ window:
+
+ Returns:
+
+ """
+ left, top, right, bottom = window
+
+ left = int(left)
+ top = int(top)
+ right = int(right)
+ bottom = int(bottom)
+
+ location = (left, top)
+ width = right - left
+ height = bottom - top
+ size = (width, height)
+ return location, size
+
+ @contextmanager
+ def mp_tile_window_manager(self,
+ mask_use_for_tiles,
+ img_w,
+ img_h,
+ tile_size: int = 256,
+ tile_stride: int = 256, tissue_thresh: float = 0.5, force_rewrite: bool = False):
+ """Avoid pickling the MaskTileWindow in MP. Clear cached objects on exit
+ Args:
+ mask_use_for_tiles: tissue mask for tile extraction
+ img_w: width of the image under original magnification
+ img_h: height of the image under original magnification
+ tile_size:
+ tile_stride:
+ tissue_thresh:
+ force_rewrite:
+
+ Returns:
+
+ """
+ self.tile_windows(mask_use_for_tiles, img_w, img_h,
+ tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite)
+ yield
+ # clearn cache
+ self.clear_tile_window(tile_size, tile_stride, tissue_thresh)
+
+ @staticmethod
+ def __save_on_flag(path: str, prefix: str, suffix: str,
+ window: Tuple[int, int, int, int],
+ region: Image.Image,
+ save_image: bool):
+
+ if save_image:
+ full_export_dest = os.path.join(path, f"{prefix}_{window}{suffix}")
+ actual_path, actual_base = os.path.split(full_export_dest)
+ os.makedirs(actual_path, exist_ok=True)
+ region.save(full_export_dest)
+
+ def valid_tile_extraction(self,
+ s: BaseImage,
+ mask_use_for_tiles,
+ img_w,
+ img_h,
+ path, *, prefix='', suffix='.png',
+ screen_callbacks: Callable = default_screen_identity,
+ tile_size: int = 256,
+ tile_stride: int = 256,
+ tissue_thresh: float = 0.5,
+ save_image: bool = True,
+ force_rewrite: bool = False) -> List[List[Tuple[int, int, int, int]]]:
+
+ tw: MaskTileWindows = self.tile_windows(mask_use_for_tiles, img_w, img_h,
+ tile_size, tile_stride, tissue_thresh, force_rewrite=force_rewrite)
+ window_list_of_regions = tw.windows_on_original_image
+ image_handle: openslide.OpenSlide = s["os_handle"]
+ valid_window_list_all_regions: List[List[Tuple[int, int, int, int]]] = []
+ for region_windows in window_list_of_regions:
+ region_windows: List[Tuple[int, int, int, int]]
+ valid_windows_curr: List[Tuple[int, int, int, int]] = []
+ for window in region_windows:
+ window: Tuple[int, int, int, int]
+ # just to make the convention clear
+ location, size = TileExtractor.__window_convert(window)
+ region = image_handle.read_region(location, 0, size)
+ tile_np = np.array(region, copy=False)
+ valid_flag = screen_callbacks(tile_np)
+ if not valid_flag:
+ continue
+ valid_windows_curr.append(window)
+ # prefix can be the slide name
+ TileExtractor.__save_on_flag(path=path, prefix=prefix, suffix=suffix, window=window,
+ region=region, save_image=save_image)
+ if len(valid_windows_curr) > 0:
+ valid_window_list_all_regions.append(valid_windows_curr)
+ return valid_window_list_all_regions
+
+
+def extract(s: BaseImage, params: Dict[PARAMS, Any]):
+ logging.info(f"{s['filename']} - \textract")
+ with params['lock']:
+ slide_out = s['outdir']
+ tile_output_dir = params.get('tile_output', os.path.join(slide_out, 'tiles'))
+ os.makedirs(tile_output_dir, exist_ok=True)
+ prefix, _ = os.path.splitext(os.path.basename(s['filename']))
+
+ suffix: str = params.get('suffix', '.png')
+ outline: str = params.get('outline', "green")
+ width: int = int(params.get('width', 2))
+ save_image: bool = bool(strtobool(params.get("save_image", "False")))
+ tile_size = int(params.get('tile_size', 256))
+ tile_stride = int(params.get('tile_stride', 256))
+ tissue_thresh = float(params.get('tissue_ratio', 0.5))
+
+ img_use_for_tiles = s.getImgThumb(s["image_work_size"])
+ mask_use_for_tiles = s['img_mask_use']
+ image_handle: openslide.OpenSlide = s['os_handle']
+ img_w, img_h = image_handle.dimensions
+
+ tile_extractor = TileExtractor(s)
+ # binding MaskTileWindow into the BaseImage might not be the best idea in multiprocessing
+ # clear the cached MaskTileWindow to prevent hanging of the pool
+ with tile_extractor.mp_tile_window_manager(mask_use_for_tiles, img_w, img_h,
+ tile_size, tile_stride, tissue_thresh, False):
+ window_list_of_regions: List[List[Tuple[int, int, int, int]]]\
+ = tile_extractor.valid_tile_extraction(s,
+ mask_use_for_tiles,
+ img_w, img_h,
+ tile_output_dir,
+ prefix=prefix,
+ suffix=suffix,
+ tile_size=tile_size,
+ tile_stride=tile_stride,
+ tissue_thresh=tissue_thresh,
+ save_image=save_image,
+ force_rewrite=False)
+ # #
+ overlay_export = os.path.join(s["outdir"], f"{s['filename']}_tile_bbox.png")
+ bbox_overlay = tile_extractor.bbox_overlay(img_use_for_tiles, mask_use_for_tiles,
+ img_w, img_h,
+ target="img_thumb",
+ tile_size_on_img=tile_size,
+ tile_stride_on_img=tile_stride,
+ tissue_thresh=tissue_thresh,
+ force_rewrite=False, outline=outline, width=width)
+
+ bbox_overlay.save(overlay_export)
+ bbox_json_loc = os.path.join(slide_out, f'{s["filename"]}_bbox.json')
+ with open(bbox_json_loc, 'w') as root:
+ json.dump(window_list_of_regions, root, indent=4)
+ return
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py
new file mode 100644
index 00000000..3349fda4
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__init__.py
@@ -0,0 +1,4 @@
+try:
+ from ._version import version as __version__
+except ImportError: # pragma: no cover
+ __version__ = "not-installed"
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py
new file mode 100644
index 00000000..333782a1
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/__main__.py
@@ -0,0 +1,215 @@
+import argparse
+import configparser
+import datetime
+import glob
+import logging
+import multiprocessing
+import os
+import sys
+import time
+from functools import partial
+
+from histoqc._pipeline import BatchedResultFile
+from histoqc._pipeline import MultiProcessingLogManager
+from histoqc._pipeline import load_pipeline
+from histoqc._pipeline import log_pipeline
+from histoqc._pipeline import move_logging_file_handler
+from histoqc._pipeline import setup_logging
+from histoqc._pipeline import setup_plotting_backend
+from histoqc._worker import worker
+from histoqc._worker import worker_setup
+from histoqc._worker import worker_success
+from histoqc._worker import worker_error
+from histoqc.config import read_config_template
+from histoqc.data import managed_pkg_data
+from histoqc._parser import get_argument_parser
+
+parser = get_argument_parser()
+
+@managed_pkg_data
+def main(argv=None):
+ """main entry point for histoqc pipelines"""
+ if argv is None:
+ argv = sys.argv[1:]
+
+ args = parser.parse_args(argv)
+
+ # --- multiprocessing and logging setup -----------------------------------
+
+ setup_logging(capture_warnings=True, filter_warnings='ignore')
+ mpm = multiprocessing.Manager()
+ lm = MultiProcessingLogManager('histoqc', manager=mpm)
+
+ # --- parse the pipeline configuration ------------------------------------
+ config = configparser.ConfigParser()
+ if not args.config:
+ lm.logger.warning(f"Configuration file not set (--config), using default")
+ config.read_string(read_config_template('default'))
+ elif os.path.exists(args.config):
+ config.read(args.config) #Will read the config file
+ else:
+ lm.logger.warning(f"Configuration file {args.config} assuming to be a template...checking.")
+ config.read_string(read_config_template(args.config))
+
+ # --- provide models, pen and templates as fallbacks from package data ----
+
+ managed_pkg_data.inject_pkg_data_fallback(config)
+
+ # --- load the process queue (error early) --------------------------------
+
+ _steps = log_pipeline(config, log_manager=lm)
+ process_queue = load_pipeline(config)
+
+ # --- check symlink target ------------------------------------------------
+
+ if args.symlink is not None:
+ if not os.path.isdir(args.symlink):
+ lm.logger.error("error: --symlink {args.symlink} is not a directory")
+ return -1
+
+ # --- create output directory and move log --------------------------------
+ args.outdir = os.path.expanduser(args.outdir)
+ os.makedirs(args.outdir, exist_ok=True)
+ move_logging_file_handler(logging.getLogger(), args.outdir, args.debug)
+
+ if BatchedResultFile.results_in_path(args.outdir):
+ if args.force:
+ lm.logger.info("Previous run detected....overwriting (--force set)")
+ else:
+ lm.logger.info("Previous run detected....skipping completed (--force not set)")
+
+ results = BatchedResultFile(args.outdir,
+ manager=mpm,
+ batch_size=args.batch,
+ force_overwrite=args.force)
+
+ # --- document configuration in results -----------------------------------
+
+ results.add_header(f"start_time:\t{datetime.datetime.now()}")
+ results.add_header(f"pipeline:\t{' '.join(_steps)}")
+ results.add_header(f"outdir:\t{os.path.realpath(args.outdir)}")
+ results.add_header(f"config_file:\t{os.path.realpath(args.config) if args.config is not None else 'default'}")
+ results.add_header(f"command_line_args:\t{' '.join(argv)}")
+
+ # --- receive input file list (there are 3 options) -----------------------
+ args.basepath = os.path.expanduser(args.basepath)
+ if len(args.input_pattern) > 1:
+ # more than one input_pattern is interpreted as a list of files
+ # (basepath is ignored)
+ files = list(args.input_pattern)
+
+ elif args.input_pattern[0].endswith('.tsv'):
+ # input_pattern is a tsv file containing a list of files
+ files = []
+ with open(args.input_pattern[0], 'rt') as f:
+ for line in f:
+ if line[0] == "#":
+ continue
+ fn = line.strip().split("\t")[0]
+ files.append(os.path.join(args.basepath, fn))
+
+ else:
+ # input_pattern is a glob pattern
+ pth = os.path.join(args.basepath, args.input_pattern[0])
+ files = glob.glob(pth, recursive=True)
+
+ lm.logger.info("-" * 80)
+ num_files = len(files)
+ lm.logger.info(f"Number of files detected by pattern:\t{num_files}")
+
+ # --- start worker processes ----------------------------------------------
+
+ _shared_state = {
+ 'process_queue': process_queue,
+ 'config': config,
+ 'outdir': args.outdir,
+ 'log_manager': lm,
+ 'lock': mpm.Lock(),
+ 'shared_dict': mpm.dict(),
+ 'num_files': num_files,
+ 'force': args.force,
+ 'seed': args.seed,
+ 'debug': args.debug
+ }
+ failed = mpm.list()
+ setup_plotting_backend(lm.logger, debug=args.debug)
+
+ try:
+ if args.nprocesses > 1:
+
+ with lm.logger_thread():
+ print(args.nprocesses)
+ with multiprocessing.Pool(processes=args.nprocesses,
+ initializer=worker_setup,
+ initargs=(config,)) as pool:
+ try:
+ for idx, file_name in enumerate(files):
+ _ = pool.apply_async(
+ func=worker,
+ args=(idx, file_name),
+ kwds=_shared_state,
+ callback=partial(worker_success, result_file=results),
+ error_callback=partial(worker_error, failed=failed),
+ )
+
+ finally:
+ pool.close()
+ pool.join()
+
+ else:
+ for idx, file_name in enumerate(files):
+ try:
+ _success = worker(idx, file_name, **_shared_state)
+ except Exception as exc:
+ worker_error(exc, failed)
+ continue
+ else:
+ worker_success(_success, results)
+
+ except KeyboardInterrupt:
+ lm.logger.info("-----REQUESTED-ABORT-----\n")
+
+ else:
+ lm.logger.info("----------Done-----------\n")
+
+ finally:
+ lm.logger.info(f"There are {len(failed)} explicitly failed images (available also in error.log),"
+ " warnings are listed in warnings column in output")
+
+ for file_name, error, tb in failed:
+ lm.logger.info(f"{file_name}\t{error}\n{tb}")
+ timing_file = os.path.join(args.outdir, "processing_times.tsv")
+ file_exists = os.path.exists(timing_file)
+ with open(timing_file, 'a') as tf:
+ if not file_exists:
+ tf.write("slide_name\tfile_size_mb\tprocessing_time_sec\n")
+
+ for file_name in files:
+ base_name = os.path.basename(file_name)
+ file_size_bytes = os.path.getsize(file_name) if os.path.exists(file_name) else 0
+ file_size_mb = file_size_bytes / (1024 * 1024)
+
+ time_key = f"time_{base_name}"
+ time_taken = _shared_state['shared_dict'].get(time_key, 0.0)
+
+ tf.write(f"{base_name}\t{file_size_mb:.2f}\t{time_taken:.2f}\n")
+ lm.logger.info(f"Processing times appended to {timing_file}")
+
+ if args.symlink is not None:
+ origin = os.path.realpath(args.outdir)
+ target = os.path.join(
+ os.path.realpath(args.symlink),
+ os.path.basename(origin)
+ )
+ try:
+ os.symlink(origin, target, target_is_directory=True)
+ lm.logger.info("Symlink to output directory created")
+ except (FileExistsError, FileNotFoundError):
+ lm.logger.error(
+ f"Error creating symlink to output in '{args.symlink}', "
+ f"Please create manually: ln -s {origin} {target}"
+ )
+ return 0
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py
new file mode 100644
index 00000000..26ff1d07
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_parser.py
@@ -0,0 +1,44 @@
+import argparse
+import time
+
+def get_argument_parser():
+ """Return the argument parser for histoqc"""
+ parser = argparse.ArgumentParser(prog="histoqc", description='Run HistoQC main quality control pipeline for digital pathology images')
+ parser.add_argument('input_pattern',
+ help="input filename pattern (try: *.svs or target_path/*.svs ),"
+ " or tsv file containing list of files to analyze",
+ nargs="+")
+ parser.add_argument('-o', '--outdir',
+ help="outputdir, default ./histoqc_output_YYMMDD-hhmmss",
+ default=f"./histoqc_output_{time.strftime('%Y%m%d-%H%M%S')}",
+ type=str)
+ parser.add_argument('-p', '--basepath',
+ help="base path to add to file names,"
+ " helps when producing data using existing output file as input",
+ default="",
+ type=str)
+ parser.add_argument('-c', '--config',
+ help="config file to use, either by name supplied by histoqc.config (e.g., v2.1) or filename",
+ type=str)
+ parser.add_argument('-f', '--force',
+ help="force overwriting of existing files",
+ action="store_true")
+ parser.add_argument('-b', '--batch',
+ help="break results file into subsets of this size",
+ type=int,
+ default=None)
+ parser.add_argument('-s', '--seed',
+ help="set a seed used to produce a random number in all modules",
+ type=int,
+ default=None)
+ parser.add_argument('-n', '--nprocesses',
+ help="number of processes to launch",
+ type=int,
+ default=1)
+ parser.add_argument('--symlink', metavar="TARGET_DIR",
+ help="create symlink to outdir in TARGET_DIR",
+ default=None)
+
+ parser.add_argument('--debug', action='store_true', help="trigger debugging behavior")
+
+ return parser
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py
new file mode 100644
index 00000000..51727424
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_pipeline.py
@@ -0,0 +1,409 @@
+"""histoqc._pipeline
+
+helper utilities for running the HistoQC pipelines
+
+"""
+import glob
+import logging
+import multiprocessing
+import os
+import platform
+import shutil
+import threading
+import warnings
+from contextlib import ExitStack
+from contextlib import contextmanager
+from contextlib import nullcontext
+from importlib import import_module
+from logging.config import dictConfig
+from logging.handlers import QueueHandler
+
+
+# --- logging helpers -------------------------------------------------
+
+DEFAULT_LOG_FN = "error.log"
+
+
+def setup_logging(*, capture_warnings, filter_warnings):
+ """configure histoqc's logging instance
+
+ Parameters
+ ----------
+ capture_warnings: `bool`
+ flag if warnings should be captured by the logging system
+ filter_warnings: `str`
+ action for warnings.filterwarnings
+ """
+ dictConfig({
+ 'version': 1,
+ 'formatters': {
+ 'default': {
+ 'class': 'logging.Formatter',
+ 'format': '%(asctime)s - %(levelname)s - %(message)s',
+ }
+ },
+ 'handlers': {
+ 'console': {
+ 'class': 'logging.StreamHandler',
+ 'level': 'INFO',
+ 'formatter': 'default',
+ },
+ 'logfile': {
+ 'class': 'logging.FileHandler',
+ 'level': 'WARNING',
+ 'filename': DEFAULT_LOG_FN,
+ 'mode': 'w', # we initially start overwriting existing logs
+ 'formatter': 'default',
+ },
+ },
+ 'root': {
+ 'level': 'INFO',
+ 'handlers': ['console', 'logfile']
+ }
+ })
+
+ # configure warnings too...
+ warnings.filterwarnings(filter_warnings)
+ logging.captureWarnings(capture_warnings)
+
+
+
+def move_logging_file_handler(logger, destination, debug=False):
+ """
+ Redirect FileHandlers to a new destination directory, moving the log file.
+
+ Parameters
+ ----------
+ logger : logging.Logger
+ Logger instance whose FileHandlers will be moved.
+ destination : str
+ Directory path for the new log files.
+ debug : bool
+ If True, set the log level of the new handlers to DEBUG. If False, the new handlers retain the original handler's log level.
+ """
+ for handler in reversed(logger.handlers):
+ if not isinstance(handler, logging.FileHandler):
+ continue
+ if handler.baseFilename != os.path.join(os.getcwd(), DEFAULT_LOG_FN):
+ continue
+ if not os.path.exists(handler.baseFilename):
+ logger.warning(f"Original log file {handler.baseFilename!r} does not exist, cannot move.")
+ continue
+
+ if not destination.endswith(handler.baseFilename):
+ destination = os.path.join(destination, os.path.relpath(handler.baseFilename, os.getcwd()))
+ logger.info(f'moving fileHandler {handler.baseFilename!r} to {destination!r}')
+
+ # remove handler
+ logger.removeHandler(handler)
+ handler.close()
+
+ new_filename = shutil.copy2(handler.baseFilename, destination)
+ try:
+ os.remove(handler.baseFilename)
+ except OSError:
+ # on WSL/Windows, file might still be locked briefly
+ logger.warning(f"Could not remove original log file {handler.baseFilename!r}")
+
+ # create and attach new handler
+ new_handler = logging.FileHandler(new_filename, mode='a')
+ new_handler.setLevel(logging.DEBUG if debug else handler.level)
+ new_handler.setFormatter(handler.formatter)
+ logger.addHandler(new_handler)
+
+
+class MultiProcessingLogManager:
+
+ def __init__(self, logger_name, *, manager):
+ """create a MultiProcessingLogManager
+
+ Note: this uses a multiprocessing Queue to correctly transfer
+ logging information from worker processes to the main
+ process logging instance
+
+ Parameters
+ ----------
+ logger_name : str
+ the name of the logger instance
+ manager : multiprocessing.Manager
+ the mp Manager instance used for creating sharable context
+ """
+ self._logger_name = logger_name
+ self._log_queue = manager.Queue()
+ self._log_thread_active = False
+
+ @property
+ def is_main_process(self):
+ return multiprocessing.current_process().name == "MainProcess"
+
+ @property
+ def logger(self):
+ """returns the logger instance"""
+ if self.is_main_process:
+ return logging.getLogger(self._logger_name)
+ else:
+ root = logging.getLogger()
+ if not root.hasHandlers():
+ qh = QueueHandler(self._log_queue)
+ root.setLevel(logging.INFO)
+ root.addHandler(qh)
+ # note: this should be revisited and set by the main process
+ warnings.filterwarnings('ignore')
+ logging.captureWarnings(True)
+ return root
+
+ @contextmanager
+ def logger_thread(self):
+ """context manager for multiprocess logging
+
+ Note: this starts the thread responsible for handing the log records
+ emitted by child processes to the main logger instance
+ """
+ assert self.is_main_process
+ assert not self._log_thread_active # not reentrant...
+ self._log_thread_active = True
+
+ def process_queue(q, ln):
+ main_logger = logging.getLogger(ln)
+ while True:
+ log_record = q.get()
+ if log_record is None:
+ break
+ main_logger.handle(log_record)
+
+ lt = threading.Thread(target=process_queue, args=(self._log_queue, self._logger_name))
+ lt.start()
+ try:
+ yield
+ finally:
+ self._log_queue.put(None)
+ lt.join()
+ self._log_thread_active = False
+
+
+def log_pipeline(config, log_manager):
+ """log the pipeline information
+
+ Parameters
+ ----------
+ config : configparser.ConfigParser
+ log_manager : MultiProcessingLogManager
+ """
+ assert log_manager.is_main_process
+ steps = config.get(section='pipeline', option='steps').splitlines()
+
+ log_manager.logger.info("the pipeline will use these steps:")
+ for process in steps:
+ mod_name, func_name = process.split('.')
+ log_manager.logger.info(f"\t\t{mod_name}\t{func_name}")
+ return steps
+
+
+# --- worker process helpers ------------------------------------------
+
+def setup_plotting_backend(logger=None, debug=False):
+ """loads the correct matplotlib backend
+
+ Parameters
+ ----------
+ logger :
+ the logging.Logger instance
+ """
+ import matplotlib
+
+ if debug and (platform.system() == "Windows" or os.environ.get('DISPLAY')):
+ matplotlib.use('TkAgg')
+ logger.info("Display found and debug mode enabled. Using TkAgg backend for matplotlib.")
+
+ else:
+ matplotlib.use('Agg')
+
+
+class BatchedResultFile:
+ """BatchedResultFile encapsulates the results writing
+
+ Note: this is multiprocessing safe
+ """
+ FILENAME_GLOB = "results*.tsv"
+ FILENAME_NO_BATCH = "results.tsv"
+ FILENAME_BATCH = "results_{:d}.tsv"
+
+ def __init__(self, dst, *, manager, batch_size=None, force_overwrite=False):
+ """create a BatchedResultFile instance
+
+ Parameters
+ ----------
+ dst : os.PathLike
+ the output directory for the result files
+ manager : multiprocessing.Manager
+ the mp Manager instance used for creating sharable context
+ batch_size : int or None
+ after `batch_size` calls to increment_counter() the results
+ file will be rotated
+ force_overwrite : bool
+ overwrite result files if they are already present. default
+ is to append.
+ """
+ if not os.path.isdir(dst):
+ raise ValueError(f"dst {dst!r} is not a directory or does not exist")
+ if batch_size is not None:
+ batch_size = int(batch_size)
+ if batch_size < 1:
+ raise ValueError(f"batch_size must be > 0, got {batch_size}")
+ self.dst = os.path.abspath(dst)
+ self.batch_size = batch_size
+ self.force_overwrite = bool(force_overwrite)
+
+ # multiprocessing safety
+ self._headers = manager.list()
+ self._rlock = manager.RLock()
+
+ # internal state
+ self._batch = 0
+ self._completed = 0
+ self._first = True
+
+ # contextmanager
+ self._f = None
+ self._stack = None
+
+ def __enter__(self):
+ self._stack = ExitStack()
+ self._stack.callback(self.increment_counter)
+ self._stack.enter_context(self._rlock)
+ self._f = nullcontext(self._stack.enter_context(self._file()))
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self._stack.close()
+ self._stack = None
+ self._f = None
+
+ def _file(self):
+ if self._f is not None:
+ return self._f # we're in the context manager
+
+ if self.batch_size is None:
+ fn = self.FILENAME_NO_BATCH
+ else:
+ fn = self.FILENAME_BATCH.format(self._batch)
+ pth = os.path.join(self.dst, fn)
+
+ mode = "a"
+ if self._first and os.path.isfile(pth):
+ if self.force_overwrite:
+ mode = "w"
+ else:
+ mode = "a"
+ self._first = False
+ return open(pth, mode=mode)
+
+ def add_header(self, header):
+ """add a new header to the results file
+
+ Parameters
+ ----------
+ header :
+ a string that can be written to the file by calling the
+ write_headers method
+ """
+ self._headers.append(header)
+
+ def is_empty_file(self):
+ """return if the current file is empty
+
+ Note: this is useful to determine if you want to write_headers
+ ... technically the name is incorrect, but in this use case
+ pos 0 is equivalent to an empty file
+ """
+ with self._rlock, self._file() as f:
+ return f.tell() == 0
+
+ def write_headers(self, *args):
+ """write the internally collected headers to the current file
+
+ Parameters
+ ----------
+ state : dict
+ the current histoqc implementation writes the outputs to
+ the header files, so *args supports `state` for now.
+ overwrite in subclass to control header output behavior
+ """
+ with self._rlock:
+ # write headers
+ for line in self._headers:
+ self.write_line(f"#{line}")
+ # histoqc specific
+ _state, = args
+ _outputs = '\t'.join(_state['output'])
+ line = f"#dataset:{_outputs}\twarnings"
+ self.write_line(line)
+
+ def write_line(self, text, end="\n"):
+ """write text to the file
+
+ Parameters
+ ----------
+ text : str
+ end : str
+ defaults to newline
+ """
+ with self._rlock, self._file() as f:
+ f.write(text)
+ if end:
+ f.write(end)
+
+ def increment_counter(self):
+ """increment the completed counter
+
+ moves to the next batch as determined by batch_size
+ """
+ # move on to the next batch if needed
+ with self._rlock:
+ self._completed += 1
+ if self._completed and self.batch_size and self._completed % self.batch_size == 0:
+ self._batch += 1
+ self._first = True
+
+ @classmethod
+ def results_in_path(cls, dst):
+ """return if a dst path contains results files
+
+ Parameters
+ ----------
+ dst : os.PathLike
+ """
+ return bool(glob.glob(os.path.join(dst, cls.FILENAME_GLOB)))
+
+
+def load_pipeline(config):
+ """load functions and parameters from config
+
+ Parameters
+ ----------
+ config : configparser.ConfigParser
+ """
+ steps = config.get(section='pipeline', option='steps').splitlines()
+
+ process_queue = []
+ for process in steps:
+ mod_name, func_name = process.split('.')
+
+ try:
+ mod = import_module(f"histoqc.{mod_name}")
+ except ImportError:
+ raise NameError(f"Unknown module in pipeline from config file:\t {mod_name}")
+
+ func_name = func_name.split(":")[0] # take base of function name
+ try:
+ func = getattr(mod, func_name)
+ except AttributeError:
+ raise NameError(f"Unknown function from module in pipeline from config file:\t {mod_name}.{func_name}")
+
+ if config.has_section(process):
+ params = dict(config.items(section=process))
+ else:
+ params = {}
+
+ process_queue.append((func, params))
+ return process_queue
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py
new file mode 100644
index 00000000..cc722adc
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/_worker.py
@@ -0,0 +1,106 @@
+"""histoqc worker functions"""
+import os
+import shutil
+import numpy as np
+from histoqc.BaseImage import BaseImage
+from histoqc._pipeline import load_pipeline
+from histoqc._pipeline import setup_plotting_backend
+import time
+
+
+# --- worker functions --------------------------------------------------------
+
+def worker_setup(c):
+ """needed for multiprocessing worker setup"""
+ setup_plotting_backend()
+ load_pipeline(config=c)
+
+
+def worker(idx, file_name, *,
+ process_queue, config, outdir, log_manager, lock, shared_dict, num_files, force, seed, debug):
+ """pipeline worker function"""
+ # set the seed
+ if seed is not None:
+ np.random.seed(seed)
+ # --- output directory preparation --------------------------------
+ fname_outdir = os.path.join(outdir, os.path.basename(file_name))
+ if os.path.isdir(fname_outdir): # directory exists
+ if not force:
+ log_manager.logger.warning(
+ f"{file_name} already seems to be processed (output directory exists),"
+ " skipping. To avoid this behavior use --force"
+ )
+ return
+ else:
+ # remove entire directory to ensure no old files are present
+ shutil.rmtree(fname_outdir)
+ # create output dir
+ os.makedirs(fname_outdir)
+
+ log_manager.logger.info(f"-----Working on:\t{file_name}\t\t{idx+1} of {num_files}")
+
+ try:
+ s = BaseImage(file_name, fname_outdir, seed, dict(config.items("BaseImage.BaseImage")))
+ # 计时
+ start_time = time.perf_counter()
+ for process, process_params in process_queue:
+ process_params["lock"] = lock
+ process_params["shared_dict"] = shared_dict
+ process(s, process_params)
+ s["completed"].append(process.__name__)
+
+ except Exception as exc:
+ # reproduce histoqc error string
+ _oneline_doc_str = exc.__doc__.replace('\n', '')
+ err_str = f"{exc.__class__} {_oneline_doc_str} {exc}"
+
+ log_manager.logger.error(
+ f"{file_name} - Error analyzing file (skipping): \t {err_str}"
+ )
+ if exc.__traceback__.tb_next is not None:
+ func_tb_obj = str(exc.__traceback__.tb_next.tb_frame.f_code)
+ else:
+ func_tb_obj = str(exc.__traceback__)
+
+ exc.__histoqc_err__ = (file_name, err_str, func_tb_obj)
+ raise exc
+
+ else:
+ # TODO:
+ # the histoqc workaround below is due an implementation detail in BaseImage:
+ # BaseImage keeps an OpenSlide instance stored under os_handle and leaks a
+ # file handle. This will need fixing in BaseImage.
+ # -> best solution would be to make BaseImage a contextmanager and close
+ # and cleanup the OpenSlide handle on __exit__
+ s["os_handle"] = None # need to get rid of handle because it can't be pickled
+ #记录成功处理的耗时
+ elapsed = time.perf_counter() - start_time
+ base_name = os.path.basename(file_name)
+ shared_dict[f"time_{base_name}"] = elapsed
+ return s
+
+
+def worker_success(s, result_file):
+ """success callback"""
+ if s is None:
+ return
+
+ with result_file:
+ if result_file.is_empty_file():
+ result_file.write_headers(s)
+
+ _fields = '\t'.join([str(s[field]) for field in s['output']])
+ _warnings = '|'.join(s['warnings'])
+ result_file.write_line("\t".join([_fields, _warnings]))
+
+
+def worker_error(e, failed):
+ """error callback"""
+ if hasattr(e, '__histoqc_err__'):
+ file_name, err_str, tb = e.__histoqc_err__
+ else:
+ # error outside of pipeline
+ # todo: it would be better to handle all of this as a decorator
+ # around the worker function
+ file_name, err_str, tb = "N/A", f"error outside of pipeline {e!r}", None
+ failed.append((file_name, err_str, tb))
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py
new file mode 100644
index 00000000..30a187c0
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annot_collection.py
@@ -0,0 +1,66 @@
+from typing import List, Dict, Union, Type, Tuple, Mapping # Literal, get_args,
+from types import MappingProxyType
+# from shapely.strtree import STRtree
+# from shapely.geometry import box as shapely_box
+from histoqc.import_wrapper.typing import Literal, get_args
+from lazy_property import LazyProperty
+from .annotation.base import Annotation, Region, TYPE_RAW_LABEL
+from .annotation.imagescope import ImageScopeAnnotation
+from .annotation.geojson import GEOJsonAnnotation
+
+
+TYPE_BBOX = Tuple[int, int, int, int]
+
+TYPE_GEO = Literal["geojson"]
+TYPE_IMAGESCOPE = Literal["imagescope"]
+TYPE_JSON = Literal["json"]
+TYPE_XML = Literal["xml"]
+
+TYPE_SUPPORTED_PARSER = Literal[TYPE_GEO, TYPE_IMAGESCOPE, TYPE_JSON, TYPE_XML]
+
+PARSER_BUILDER_MAP: Dict[str, Type[Annotation]] = {
+ get_args(TYPE_GEO)[0]: GEOJsonAnnotation,
+ get_args(TYPE_IMAGESCOPE)[0]: ImageScopeAnnotation,
+ # for HistoQC
+ get_args(TYPE_JSON)[0]: GEOJsonAnnotation, # duplicate
+ get_args(TYPE_XML)[0]: ImageScopeAnnotation,
+}
+
+
+class AnnotCollection:
+ _annotation_list: List[Annotation]
+ _label_to_regions_map: Mapping[TYPE_RAW_LABEL, List[Region]]
+
+ @LazyProperty
+ def all_regions(self) -> List[Region]:
+ region_list = []
+ for annotation in self._annotation_list:
+ region_list += annotation.regions
+ return region_list
+
+ # @LazyProperty
+ # def multipolygons(self) -> MultiPolygon:
+ # for annotation in self._annotation_list:
+
+ def __init__(self, annotation_list: List[Annotation]):
+ self._annotation_list = annotation_list
+ self._label_to_regions_map = self._new_label_to_regions_map()
+
+ @classmethod
+ def build(cls, parser_type: TYPE_SUPPORTED_PARSER, uri: str, label_map: Union[Dict[Union[str, int], int], None]):
+ construct = PARSER_BUILDER_MAP[parser_type]
+ annotation_list = construct.build_from_uri(uri=uri, label_map=label_map)
+ return cls(annotation_list)
+
+ def _new_label_to_regions_map(self) -> Mapping[TYPE_RAW_LABEL, List[Region]]:
+ out_dict: Dict[TYPE_RAW_LABEL, List[Region]] = dict()
+ for region in self.all_regions:
+ region: Region
+ label: TYPE_RAW_LABEL = region['label']
+ out_dict[label] = out_dict.get(label, [])
+ out_dict[label].append(region)
+ return MappingProxyType(out_dict)
+
+ @property
+ def label_to_regions_map(self) -> Mapping[TYPE_RAW_LABEL, List[Region]]:
+ return self._label_to_regions_map
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py
new file mode 100644
index 00000000..b6e690fd
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/__init__.py
@@ -0,0 +1 @@
+from . import *
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py
new file mode 100644
index 00000000..36da7251
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/base.py
@@ -0,0 +1,176 @@
+from abc import ABC, abstractmethod
+from typing import Generic, TypeVar, Dict, Union, List, Tuple, TypedDict
+from lazy_property import LazyProperty
+from shapely.geometry import Polygon, MultiPolygon
+import logging
+
+T = TypeVar("T")
+TYPE_POINT = Tuple[int, int]
+TYPE_POINT_SET = List[TYPE_POINT]
+TYPE_HOLED_SET = Tuple[TYPE_POINT_SET, Union[List[TYPE_POINT_SET], None]]
+# TYPE_HOLED_SET_COLLECTION = List[TYPE_HOLED_SET]
+
+TYPE_LABEL = Union[int, None]
+TYPE_RAW_LABEL = Union[str, None, TYPE_LABEL]
+
+WARNING_NOT_SIMPLE_POLYGON = f"Not a Simple Polygon: buffer of the polygon " \
+ f"with 0-distance resulting multiple polygons." \
+ f"The shape of these polygons may not be identical to" \
+ f"input annotations"
+
+
+class Region(TypedDict):
+ polygon: Polygon
+ point_set: TYPE_HOLED_SET # TYPE_POINT_SET
+ label: TYPE_RAW_LABEL
+ raw_label: TYPE_RAW_LABEL
+ uri: str
+
+
+class Annotation(ABC, Generic[T]):
+ """
+ Annotation --> an atomic annotation that may contain one or multiple regions.
+ One label is assigned to one annotation.
+ """
+
+ _label_map: Union[Dict[TYPE_RAW_LABEL, TYPE_LABEL], None]
+ _ann_data: T
+ _uri: str
+
+ @staticmethod
+ def point_to_int(point_xy: Tuple[Union[float, str], Union[float, str]]) -> TYPE_POINT:
+ raw_x, raw_y = point_xy
+ return int(float(raw_x)), int(float(raw_y))
+
+ @abstractmethod
+ def point_set_list(self) -> List[TYPE_HOLED_SET]:
+ return NotImplemented
+
+ @abstractmethod
+ def label_from_annotation(self) -> TYPE_RAW_LABEL:
+ return NotImplemented
+
+ @staticmethod
+ @abstractmethod
+ def annotation_list_from_uri(uri) -> List[T]:
+ return NotImplemented
+
+ @staticmethod
+ def _enough_points(point_set: TYPE_POINT_SET):
+ return len(point_set) >= 3
+
+ @staticmethod
+ def _sanitized_points_helper(point_set: TYPE_HOLED_SET) -> Union[TYPE_HOLED_SET, None]:
+ outer, inner = point_set
+ # if shell has less than 3 --> discard directly
+ if not Annotation._enough_points(outer):
+ return None
+ inner = [hole for hole in inner if Annotation._enough_points(hole)] if inner is not None else None
+ return outer, inner
+
+ @staticmethod
+ def _sanitized_points(point_set_list: List[TYPE_HOLED_SET]) -> List[TYPE_HOLED_SET]:
+ out_list = []
+ for point_set in point_set_list:
+ sanitized = Annotation._sanitized_points_helper(point_set)
+ if sanitized is None:
+ continue
+ out_list.append(sanitized)
+ return out_list
+
+ @staticmethod
+ def valid_polygon_helper(polygon: Polygon, point_set: TYPE_HOLED_SET) -> Tuple[List[Polygon], List[TYPE_HOLED_SET]]:
+ """
+ In case
+ Returns:
+
+ """
+ if polygon.is_valid:
+ return [polygon, ], [point_set, ]
+
+ valid_poly = polygon.buffer(0)
+ if isinstance(valid_poly, Polygon):
+ return [valid_poly, ], [point_set, ]
+ # if not simple polygon but multiple polygons
+ assert isinstance(valid_poly, MultiPolygon)
+ logging.warning(WARNING_NOT_SIMPLE_POLYGON)
+ # warning
+ polygon_list: List[Polygon] = list(valid_poly.geoms)
+ exterior_list: List[List[TYPE_POINT_SET]] = [list(x.exterior.coords) for x in polygon_list]
+ interior_list: List[List[List[TYPE_POINT_SET]]] = [[list(interior.coords) for interior in x.interiors]
+ for x in polygon_list]
+ point_set_list: List[TYPE_HOLED_SET] = [(outer, inner)
+ for outer, inner in zip(exterior_list, interior_list)]
+ return polygon_list, point_set_list
+
+ @staticmethod
+ def valid_polygon(point_set: TYPE_HOLED_SET) -> Tuple[List[Polygon], List[TYPE_HOLED_SET]]:
+ outer, inner = point_set
+ polygon = Polygon(outer, holes=inner)
+ # if not polygon.is_valid:
+ # return polygon.buffer(0)
+ # assert not isinstance(polygon, MultiPolygon)
+ # return [polygon, ], [point_set, ]
+ return Annotation.valid_polygon_helper(polygon, point_set)
+
+ @staticmethod
+ def regions_accumulate_helper(polygon_list: List[Polygon],
+ point_set_list: List[TYPE_HOLED_SET], label, raw_label, uri) -> List[Region]:
+ return [Region(polygon=polygon, point_set=point_set, label=label, raw_label=raw_label, uri=uri)
+ for polygon, point_set in zip(polygon_list, point_set_list)]
+
+ @LazyProperty
+ def regions(self) -> List[Region]:
+ point_set_list: List[TYPE_HOLED_SET] = self.point_set_list()
+ clean_list = Annotation._sanitized_points(point_set_list)
+ region_list: List[Region] = []
+ for point_set in clean_list:
+ point_set: TYPE_HOLED_SET
+ # polygon: Polygon = Annotation.valid_polygon(point_set)
+ polygon_list, point_set_list = Annotation.valid_polygon(point_set)
+ label = self.label
+ raw_label = self.raw_label
+ uri = self._uri
+ # curr_region = Region(polygon=polygon, point_set=point_set,
+ # label=label, raw_label=raw_label, uri=self._uri)
+ # region_list.append(curr_region)
+ curr_region_list = Annotation.regions_accumulate_helper(polygon_list,
+ point_set_list, label, raw_label, uri)
+ region_list += curr_region_list
+ return region_list
+
+ @staticmethod
+ def _mapped_label(label_map: Dict[TYPE_RAW_LABEL, TYPE_LABEL],
+ label_var: TYPE_RAW_LABEL) -> Union[TYPE_RAW_LABEL, TYPE_LABEL]:
+ if label_map is None or len(label_map) == 0:
+ return label_var
+ assert label_var in label_map
+ return label_map[label_var]
+
+ @LazyProperty
+ def raw_label(self) -> TYPE_RAW_LABEL:
+ return self.label_from_annotation()
+
+ @LazyProperty
+ def label(self) -> Union[TYPE_RAW_LABEL, TYPE_LABEL]:
+ raw_label = self.raw_label
+ label = Annotation._mapped_label(self._label_map, raw_label)
+ return label
+
+ @property
+ def ann_data(self) -> T:
+ return self._ann_data
+
+ def __init__(self, uri: str, ann_data: T, label_map: Dict[Union[str, int], int]):
+ self._uri = uri
+ self._ann_data = ann_data
+ self._label_map = label_map
+
+ @classmethod
+ def build(cls, uri: str, ann_data: T, label_map: Dict[Union[str, int], int]) -> "Annotation":
+ return cls(uri=uri, ann_data=ann_data, label_map=label_map)
+
+ @classmethod
+ def build_from_uri(cls, uri: str, label_map: Union[Dict[TYPE_RAW_LABEL, TYPE_LABEL], None]) -> List["Annotation"]:
+ ann_data_list: List[T] = cls.annotation_list_from_uri(uri)
+ return [cls.build(uri=uri, ann_data=ann_data, label_map=label_map) for ann_data in ann_data_list]
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py
new file mode 100644
index 00000000..cf5bf2e7
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/geojson.py
@@ -0,0 +1,83 @@
+from typing import List, Dict, Callable, Any # Literal, get_args
+from histoqc.import_wrapper.typing import Literal, get_args
+from ..io_utils.json import load_json
+from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET
+
+TYPE_GEO_MULTIPOLYGON = Literal['MultiPolygon']
+TYPE_GEO_POLYGON = Literal['Polygon']
+TYPE_GEO_LINE_STRING = Literal['LineString']
+
+TYPE_GEOTYPE = Literal[TYPE_GEO_MULTIPOLYGON, TYPE_GEO_POLYGON, TYPE_GEO_LINE_STRING]
+
+
+class GEOJsonAnnotation(Annotation[Dict]):
+ PROP: str = "properties"
+ CLASS: str = "classification"
+ NAME: str = "name"
+
+ """
+ Parses a typical GeoJSON file containing one or more Polygon or MultiPolygon features.
+ These JSON files are the preferred way to serialize QuPath annotations, for example.
+ See https://qupath.readthedocs.io/en/latest/docs/scripting/overview.html#serialization-json
+ """
+
+ @staticmethod
+ def point_set_helper_multipolygon(coordinates: List[List[TYPE_POINT_SET]]) -> List[TYPE_HOLED_SET]:
+ out_list = []
+ for roi in coordinates:
+ out_list += GEOJsonAnnotation.point_set_helper_polygon(roi)
+ return out_list
+
+ @staticmethod
+ def point_set_helper_polygon(coordinates: List[TYPE_POINT_SET]) -> List[TYPE_HOLED_SET]:
+ inner_list = []
+ outer = GEOJsonAnnotation._point_set_single(coordinates[0])
+ inner_source = coordinates[1:]
+ for points in inner_source:
+ points: List[TYPE_POINT]
+ inner_list.append(GEOJsonAnnotation._point_set_single(points))
+ # if len(inner_list) == 0:
+ # inner_list = None
+ holed: TYPE_HOLED_SET = outer, inner_list
+ return [holed]
+
+ @staticmethod
+ def point_set_helper_lines(coordinates: List[TYPE_POINT]) -> List[TYPE_HOLED_SET]:
+ holed_set: TYPE_HOLED_SET = GEOJsonAnnotation._point_set_single(coordinates), None
+ return [holed_set]
+
+ @staticmethod
+ def _point_set_single(coordinates: List[TYPE_POINT]) -> List[TYPE_POINT]:
+ return [Annotation.point_to_int((coord[0], coord[1])) for coord in coordinates]
+
+ @staticmethod
+ def _func_from_geom_type(geom_type: TYPE_GEOTYPE) -> Callable[[Any], List[TYPE_HOLED_SET]]:
+ GEOMETRY_MAP: Dict[str, Callable[[Any], List[TYPE_HOLED_SET]]] = {
+ get_args(TYPE_GEO_MULTIPOLYGON)[0]: GEOJsonAnnotation.point_set_helper_multipolygon,
+ get_args(TYPE_GEO_POLYGON)[0]: GEOJsonAnnotation.point_set_helper_polygon,
+ get_args(TYPE_GEO_LINE_STRING)[0]: GEOJsonAnnotation.point_set_helper_lines,
+ }
+ assert geom_type in GEOMETRY_MAP, f"Unsupported Geometry Type: {geom_type}"
+ return GEOMETRY_MAP[geom_type]
+
+ def point_set_list(self) -> List[TYPE_HOLED_SET]:
+ geometry = self.ann_data['geometry']
+ geom_type = geometry['type']
+ coordinates = geometry['coordinates']
+ func = GEOJsonAnnotation._func_from_geom_type(geom_type)
+ return func(coordinates)
+
+ def label_from_annotation(self) -> TYPE_RAW_LABEL:
+ prop = self.ann_data.get(GEOJsonAnnotation.PROP)
+ classification = prop.get(GEOJsonAnnotation.CLASS)
+ if classification is not None:
+ return classification.get(GEOJsonAnnotation.NAME)
+ return None
+
+ @staticmethod
+ def annotation_list_from_uri(uri) -> List[Dict]:
+ data = load_json(uri)
+ if isinstance(data, Dict):
+ return [data]
+ assert isinstance(data, List)
+ return data
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py
new file mode 100644
index 00000000..f5e0b5b0
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/annotation/imagescope.py
@@ -0,0 +1,93 @@
+from typing import List, Dict, Union
+from xml.etree import ElementTree as ET
+from xml.etree.ElementTree import Element
+from .base import Annotation, TYPE_POINT_SET, TYPE_RAW_LABEL, TYPE_POINT, TYPE_HOLED_SET
+
+
+class ImageScopeAnnotation(Annotation[Element]):
+ ANNOTATION_TAG_NAME = "Annotation"
+
+ TAG_REGION_ALL = "Regions"
+ TAG_REGION = "Region"
+ VERTICES: str = 'Vertices'
+ VERTEX: str = "Vertex"
+ X: str = 'X'
+ Y: str = 'Y'
+
+ CLASS_ATTR = "Name"
+ _ann_data: Element
+
+ """
+ Parses the xml file to get those annotations as lists of verticies
+ xmlMask will create a mask that is true inside the annotated region described in the specified xml file.
+ The xml file must follow the ImageScope format, the minimal components of which are:
+ ```
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ```
+ With more or blocks as needed for additional annotations. There is no functional difference
+ between multiple blocks and one blocks with multiple blocks
+ """
+
+ def label_from_annotation(self) -> TYPE_RAW_LABEL:
+ """
+ Read the label of the whole annotated region.
+ Assume the annotation class label is under element's "Name" attribute.
+
+ Returns:
+ int
+ """
+ return self._ann_data.get(ImageScopeAnnotation.CLASS_ATTR)
+
+ @staticmethod
+ def vertex_from_node(vertex: Element) -> TYPE_POINT:
+ raw_x: str = vertex.get(ImageScopeAnnotation.X)
+ raw_y: str = vertex.get(ImageScopeAnnotation.Y)
+ raw_point = (raw_x, raw_y)
+ return Annotation.point_to_int(raw_point)
+
+ def point_set_list(self) -> List[TYPE_HOLED_SET]:
+ """
+ I doubt it is ever standardized how to define holes so herein we just ignore all holes.
+ Returns:
+
+ """
+ out_list = []
+ for regions_all in self.ann_data.findall(ImageScopeAnnotation.TAG_REGION_ALL):
+ for region in regions_all.findall(ImageScopeAnnotation.TAG_REGION):
+ for vertices in region.findall(ImageScopeAnnotation.VERTICES):
+ # the X and Y attributes are float strings --> cast to float first before flooring down to int
+ xy_list: TYPE_POINT_SET = [ImageScopeAnnotation.vertex_from_node(vertex)
+ for vertex in vertices.findall(ImageScopeAnnotation.VERTEX)]
+ holes = None
+ holed_point_set: TYPE_HOLED_SET = xy_list, holes
+ out_list.append(holed_point_set)
+ return out_list
+
+ @staticmethod
+ def validated_ann(ann_data: Element):
+ # todo perhaps using xmlschema and a predefined image scope xml as the schema to validate the structure?
+ assert ann_data.tag == ImageScopeAnnotation.ANNOTATION_TAG_NAME
+ return ann_data
+
+ @staticmethod
+ def annotation_list_from_uri(uri) -> List[Element]:
+ tree = ET.parse(uri)
+ root = tree.getroot()
+ return root.findall(ImageScopeAnnotation.ANNOTATION_TAG_NAME)
+
+ @classmethod
+ def build(cls, uri: str, ann_data: Element, label_map: Dict[Union[str, int], int]) -> Annotation:
+ ann_data = ImageScopeAnnotation.validated_ann(ann_data)
+ return super().build(uri=uri, ann_data=ann_data, label_map=label_map)
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py
new file mode 100644
index 00000000..b6e690fd
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/__init__.py
@@ -0,0 +1 @@
+from . import *
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py
new file mode 100644
index 00000000..7fe1b3d9
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/annotations/io_utils/json.py
@@ -0,0 +1,12 @@
+import json
+
+
+def write_json(fname: str, obj, **kwargs):
+ with open(fname, 'w') as root:
+ json.dump(obj, root, **kwargs)
+
+
+def load_json(fname: str, **kwargs):
+ with open(fname, 'r') as root:
+ return json.load(root, **kwargs)
+
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py
new file mode 100644
index 00000000..01a2a7b2
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__init__.py
@@ -0,0 +1,38 @@
+"""histoqc.config
+
+helpers for providing configuration ini files for histoqc
+
+"""
+import re
+try:
+ import importlib.resources as _resources
+except ImportError:
+ # fallback for Python < 3.7
+ import importlib_resources as _resources
+
+
+# note to developers:
+# to store a new ini file in the histoqc package add an ini file that
+# matches the regular expression below to this directory
+CONFIG_TEMPLATE_RE = re.compile(r'config(_(?P[A-Za-z][A-Za-z0-9.]+))?[.]ini')
+
+
+def list_config_templates():
+ """return a dictionary mapping config template names to filenames
+
+ note: the default config is stored under name `None`
+ """
+ templates = {}
+ for filename in _resources.contents('histoqc.config'):
+ m = CONFIG_TEMPLATE_RE.match(filename)
+ if m:
+ templates[m.group('name') or 'default'] = filename
+ return templates
+
+
+def read_config_template(name=None):
+ """return the contents of a configuration template"""
+ templates = list_config_templates()
+ if name not in templates:
+ raise KeyError(f'no configuration template found under key {name!r}')
+ return _resources.read_text('histoqc.config', templates[name])
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py
new file mode 100644
index 00000000..5211cf16
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/__main__.py
@@ -0,0 +1,40 @@
+import argparse
+import sys
+
+from histoqc.config import list_config_templates
+from histoqc.config import read_config_template
+from histoqc.config._parser import get_argument_parser
+
+
+def main(argv=None):
+ if argv is None:
+ argv = sys.argv[1:]
+
+ parser = get_argument_parser()
+ args = parser.parse_args(argv)
+
+ if args.list:
+ print("# available example configurations:")
+ for name, filename in list_config_templates().items():
+ print(f"- {name}:".ljust(11), f"{filename}")
+ return 0
+
+ elif args.show:
+ name = args.show
+ try:
+ config = read_config_template(name)
+ except KeyError:
+ print(f"unknown configuration '{name}'", file=sys.stderr)
+ return -1
+
+ print(f"# example configuration '{name}'")
+ print(config)
+ return 0
+
+ else:
+ parser.print_help()
+ return -1
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py
new file mode 100644
index 00000000..dbafd568
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/_parser.py
@@ -0,0 +1,14 @@
+import argparse
+
+def get_argument_parser():
+ """Return the argument parser for histoqc.config"""
+ parser = argparse.ArgumentParser(prog="histoqc.config",description="Show example configuration files")
+ parser.add_argument('--list',
+ action='store_true',
+ help='list available configs')
+ parser.add_argument('--show',
+ metavar='NAME',
+ type=str,
+ default=None,
+ help='show named example config')
+ return parser
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini
new file mode 100644
index 00000000..34251aa3
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config.ini
@@ -0,0 +1,200 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+ ClassificationModule.byExampleWithFeatures:pen_markings
+ ClassificationModule.byExampleWithFeatures:coverslip_edge
+ LightDarkModule.getIntensityThresholdPercent:tissue
+ LightDarkModule.getIntensityThresholdPercent:darktissue
+ BubbleRegionByRegion.detectSmoothness
+ MorphologyModule.removeFatlikeTissue
+ MorphologyModule.fillSmallHoles
+ MorphologyModule.removeSmallObjects
+ BlurDetectionModule.identifyBlurryRegions
+ BasicModule.finalProcessingSpur
+ BasicModule.finalProcessingArea
+ HistogramModule.compareToTemplates
+ HistogramModule.getHistogram
+ BrightContrastModule.getContrast
+ BrightContrastModule.getBrightnessGray
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+ BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+ DeconvolutionModule.separateStains
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+
+#not yet implemented
+confirm_base_mag: False
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+#[ClassificationModule.byExample]
+[ClassificationModule.byExampleWithFeatures:pen_markings]
+name: pen_markings
+threshold: .5
+examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png
+
+area_threshold: 100
+features: frangi
+ laplace
+ rgb
+ #lbp
+ #gabor
+ #median
+ #gaussian
+
+laplace_ksize: 3
+
+frangi_scale_range: (1,10)
+frangi_scale_step: 2
+frangi_beta1: .5
+frangi_beta2: 15
+frangi_black_ridges: True
+
+gabor_theta: 4
+gabor_sigma: (1,3)
+gabor_frequency: (0.05, 0.25)
+
+lbp_radius: 3
+lbp_points: 24
+lbp_method: default
+
+median_disk_size: 3
+
+#gaussian_sigma: 1
+#gaussian_multichan: False
+
+
+
+[ClassificationModule.byExampleWithFeatures:coverslip_edge]
+name: coverslip_edge
+threshold: .5
+
+examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png
+
+area_threshold: 15
+features: frangi
+ laplace
+ rgb
+
+dilate_kernel_size: 5
+
+[LightDarkModule.getIntensityThresholdPercent:bubble]
+name: bubble
+upper_threshold: .94
+lower_threshold: .82
+upper_variance: 11
+invert: true
+
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: bright
+upper_threshold: .9
+lower_std: 10
+
+[LightDarkModule.getIntensityThresholdPercent:darktissue]
+name: dark
+upper_threshold: .15
+invert: true
+
+
+[LightDarkModule.getTissuePercent]
+threshold: .8
+
+[LightDarkModule.getDarkTissuePercent]
+threshold: .15
+
+[MorphologyModule.removeSmallObjects]
+min_size: 64
+
+[MorphologyModule.removeFatlikeTissue]
+kernel_size: 10
+max_keep_size: 1000
+fat_cell_size: 64
+
+[MorphologyModule.fillSmallHoles]
+min_size: 1000
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
+[BlurDetectionModule.identifyBlurryRegions]
+image_work_size = 2.5x
+blur_radius: 100
+blur_threshold: .15
+
+
+[BasicModule.finalComputations]
+; two options: absolute, relative2image. relative2mask is not available here
+mask_statistics = absolute
+
+[BasicModule.finalProcessingSpur]
+disk_radius: 5
+
+[BasicModule.finalProcessingArea]
+#area_threshold: 90000
+area_threshold: 10000
+
+[DeconvolutionModule.separateStains]
+;hed_from_rgb: Hematoxylin + Eosin + DAB
+;hdx_from_rgb: Hematoxylin + DAB
+;fgx_from_rgb: Feulgen + Light Green
+;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin
+;rbd_from_rgb: FastRed + FastBlue + DAB
+;gdx_from_rgb: Methyl Green + DAB
+;hax_from_rgb: Hematoxylin + AEC
+;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G
+;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin
+;ahx_from_rgb: Alcian Blue + Hematoxylin
+;hpx_from_rgb: Hematoxylin + PAS
+stain: hed_from_rgb
+use_mask: True
+
+
+[BubbleRegionByRegion.detectSmoothness]
+threshold: .01
+kernel_size: 10
+min_object_size: 500
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini
new file mode 100644
index 00000000..dde03b84
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_clinical.ini
@@ -0,0 +1,228 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+ #ClassificationModule.byExampleWithFeatures:pen_markings
+ ClassificationModule.byExampleWithFeatures:coverslip_edge
+ LightDarkModule.getIntensityThresholdPercent:tissue
+ LightDarkModule.getIntensityThresholdPercent:darktissue
+ BubbleRegionByRegion.detectSmoothness
+ SaveModule.saveMask:first
+ SaveModule.saveMask2Geojson:geo_first
+ MorphologyModule.removeFatlikeTissue
+ MorphologyModule.fillSmallHoles
+ MorphologyModule.removeSmallObjects
+ SaveModule.saveMask:second
+ SaveModule.saveMask2Geojson:geo_second
+ BlurDetectionModule.identifyBlurryRegions
+ BasicModule.finalProcessingSpur
+ BasicModule.finalProcessingArea
+ HistogramModule.compareToTemplates
+ HistogramModule.getHistogram
+ BrightContrastModule.getContrast
+ BrightContrastModule.getBrightnessGray
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+ BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+ SaveModule.saveMask:third
+ SaveModule.saveMask2Geojson:geo_third
+ DeconvolutionModule.separateStains
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+
+#not yet implemented
+confirm_base_mag: False
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+#[ClassificationModule.byExample]
+[ClassificationModule.byExampleWithFeatures:pen_markings]
+name: pen_markings
+threshold: .5
+examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png
+
+area_threshold: 100
+features: frangi
+ laplace
+ rgb
+ #lbp
+ #gabor
+ #median
+ #gaussian
+
+laplace_ksize: 3
+
+frangi_scale_range: (1,10)
+frangi_scale_step: 2
+frangi_beta1: .5
+frangi_beta2: 15
+frangi_black_ridges: True
+
+gabor_theta: 4
+gabor_sigma: (1,3)
+gabor_frequency: (0.05, 0.25)
+
+lbp_radius: 3
+lbp_points: 24
+lbp_method: default
+
+median_disk_size: 3
+
+#gaussian_sigma: 1
+#gaussian_multichan: False
+
+
+
+[ClassificationModule.byExampleWithFeatures:coverslip_edge]
+name: coverslip_edge
+threshold: .5
+
+examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png
+
+area_threshold: 15
+features: frangi
+ laplace
+ rgb
+
+dilate_kernel_size: 5
+
+[LightDarkModule.getIntensityThresholdPercent:bubble]
+name: bubble
+upper_threshold: .94
+lower_threshold: .82
+upper_variance: 11
+invert: true
+
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: nonwhite
+upper_threshold: .9
+lower_std: 10
+
+[LightDarkModule.getIntensityThresholdPercent:darktissue]
+name: dark
+upper_threshold: .15
+invert: true
+
+
+[LightDarkModule.getTissuePercent]
+threshold: .8
+
+[LightDarkModule.getDarkTissuePercent]
+threshold: .15
+
+[MorphologyModule.removeSmallObjects]
+min_size: 64
+
+[MorphologyModule.removeFatlikeTissue]
+kernel_size: 10
+max_keep_size: 1000
+fat_cell_size: 64
+
+[MorphologyModule.fillSmallHoles]
+min_size: 1000
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
+[BlurDetectionModule.identifyBlurryRegions]
+image_work_size = 2.5x
+blur_radius: 100
+blur_threshold: .15
+
+
+[BasicModule.finalComputations]
+; two options: absolute, relative2image. relative2mask is not available here
+mask_statistics = absolute
+
+[BasicModule.finalProcessingSpur]
+disk_radius: 5
+
+[BasicModule.finalProcessingArea]
+#area_threshold: 90000
+area_threshold: 10000
+
+[DeconvolutionModule.separateStains]
+;hed_from_rgb: Hematoxylin + Eosin + DAB
+;hdx_from_rgb: Hematoxylin + DAB
+;fgx_from_rgb: Feulgen + Light Green
+;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin
+;rbd_from_rgb: FastRed + FastBlue + DAB
+;gdx_from_rgb: Methyl Green + DAB
+;hax_from_rgb: Hematoxylin + AEC
+;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G
+;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin
+;ahx_from_rgb: Alcian Blue + Hematoxylin
+;hpx_from_rgb: Hematoxylin + PAS
+stain: hed_from_rgb
+use_mask: True
+
+
+[BubbleRegionByRegion.detectSmoothness]
+threshold: .01
+kernel_size: 10
+min_object_size: 500
+
+[SaveModule.saveMask:first]
+suffix: first
+
+[SaveModule.saveMask:second]
+suffix: second
+
+
+[SaveModule.saveMask:third]
+suffix: third
+
+
+[SaveModule.saveMask2Geojson:geo_first]
+suffix: geo_first
+mask_name: img_mask_flat
+
+[SaveModule.saveMask2Geojson:geo_second]
+suffix: geo_second
+mask_name: img_mask_small_filled
+
+[SaveModule.saveMask2Geojson:geo_third]
+suffix: geo_third
\ No newline at end of file
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini
new file mode 100644
index 00000000..49b71ee7
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_first.ini
@@ -0,0 +1,92 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+ LightDarkModule.getIntensityThresholdPercent:tissue
+ BrightContrastModule.getContrast:background
+ BrightContrastModule.getBrightnessGray:background
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB:background
+ HistogramModule.compareToTemplates
+ HistogramModule.getHistogram
+ BrightContrastModule.getContrast
+ BrightContrastModule.getBrightnessGray
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+ BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+confirm_base_mag: False
+
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: nonwhite
+upper_threshold: .9
+lower_std: 10
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+[BrightContrastModule.getContrast:background]
+limit_to_mask: True
+invert: True
+prefix: background
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessGray:background]
+limit_to_mask: True
+invert: True
+prefix: background
+
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB:background]
+limit_to_mask: True
+invert: True
+prefix: background
+
+
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini
new file mode 100644
index 00000000..2aeb1d41
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_ihc.ini
@@ -0,0 +1,246 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+ LightDarkModule.saveEqualisedImage
+ LightDarkModule.minimumPixelIntensityNeighborhoodFiltering
+ ;LightDarkModule.getIntensityThresholdPercent:darktissue
+ MorphologyModule.fillSmallHoles
+ MorphologyModule.removeSmallObjects
+ LocalTextureEstimationModule.estimateGreyComatrixFeatures:background
+ BrightContrastModule.getContrast:background
+ BrightContrastModule.getBrightnessGray:background
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background
+ ClassificationModule.byExampleWithFeatures:pen_markings
+ ClassificationModule.byExampleWithFeatures:coverslip_edge
+ LocalTextureEstimationModule.estimateGreyComatrixFeatures:final
+ BrightContrastModule.getContrast
+ BrightContrastModule.getBrightnessGray
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+ BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+ DeconvolutionModule.separateStains
+ HistogramModule.getHistogram
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+in_memory_compression = True
+
+#not yet implemented
+confirm_base_mag: False
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+#[ClassificationModule.byExample]
+[ClassificationModule.byExampleWithFeatures:pen_markings]
+name: pen_markings
+threshold: .5
+examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png
+ ./pen/1k_version/pen_red.png:./pen/1k_version/pen_red_mask.png
+ ./pen/1k_version/pen_black.png:./pen/1k_version/pen_black_mask.png
+
+nsamples_per_example: 1000
+area_threshold: 100
+features: frangi
+ laplace
+ rgb
+ #lbp
+ #gabor
+ #median
+ #gaussian
+
+laplace_ksize: 3
+
+frangi_scale_range: (1,10)
+frangi_scale_step: 2
+frangi_beta1: .5
+frangi_beta2: 15
+frangi_black_ridges: True
+
+gabor_theta: 4
+gabor_sigma: (1,3)
+gabor_frequency: (0.05, 0.25)
+
+lbp_radius: 3
+lbp_points: 24
+lbp_method: default
+
+median_disk_size: 3
+
+#gaussian_sigma: 1
+#gaussian_multichan: False
+
+
+
+[ClassificationModule.byExampleWithFeatures:coverslip_edge]
+name: coverslip_edge
+threshold: .5
+
+examples: ./models/coverslip_edge_ihc/coverslip_edge.png:./models/coverslip_edge_ihc/coverslip_edge_mask.png
+
+area_threshold: 15
+features: frangi
+ laplace
+ rgb
+
+dilate_kernel_size: 5
+
+[LightDarkModule.getIntensityThresholdPercent:bubble]
+name: bubble
+upper_threshold: .94
+lower_threshold: .82
+upper_variance: 11
+invert: true
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: bright
+upper_threshold: .9
+lower_std: 10
+
+[LightDarkModule.getIntensityThresholdPercent:darktissue]
+name: dark
+upper_threshold: .15
+invert: true
+
+
+[LightDarkModule.getTissuePercent]
+threshold: .8
+[LightDarkModule.getDarkTissuePercent]
+threshold: .15
+
+[MorphologyModule.removeSmallObjects]
+min_size: 10000
+
+[MorphologyModule.removeFatlikeTissue]
+kernel_size: 10
+max_keep_size: 1000
+fat_cell_size: 64
+
+[MorphologyModule.fillSmallHoles]
+min_size: 1000
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+
+
+[BrightContrastModule.getContrast:background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessGray:background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
+[BlurDetectionModule.identifyBlurryRegions]
+image_work_size = 2.5x
+blur_radius: 100
+blur_threshold: .15
+
+
+[BasicModule.finalComputations]
+; two options: absolute, relative2image. relative2mask is not available here
+mask_statistics = absolute
+
+[BasicModule.finalProcessingSpur]
+disk_radius: 5
+
+[BasicModule.finalProcessingArea]
+#area_threshold: 90000
+area_threshold: 10000
+
+[DeconvolutionModule.separateStains]
+;hed_from_rgb: Hematoxylin + Eosin + DAB
+;hdx_from_rgb: Hematoxylin + DAB
+;fgx_from_rgb: Feulgen + Light Green
+;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin
+;rbd_from_rgb: FastRed + FastBlue + DAB
+;gdx_from_rgb: Methyl Green + DAB
+;hax_from_rgb: Hematoxylin + AEC
+;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G
+;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin
+;ahx_from_rgb: Alcian Blue + Hematoxylin
+;hpx_from_rgb: Hematoxylin + PAS
+stain: hdx_from_rgb
+use_mask: True
+
+
+[BubbleRegionByRegion.detectSmoothness]
+threshold: .01
+kernel_size: 10
+min_object_size: 500
+
+
+[LocalTextureEstimationModule.estimateGreyComatrixFeatures:background]
+prefix: background
+patch_size: 32
+npatches: 1000
+nlevels: 8
+feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation
+invert: True
+mask_name: img_mask_use
+
+[LocalTextureEstimationModule.estimateGreyComatrixFeatures:final]
+prefix: final
+patch_size: 32
+nlevels: 8
+npatches: 1000
+feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation
+invert: False
+mask_name: img_mask_use
+
+
+[LightDarkModule.minimumPixelIntensityNeighborhoodFiltering]
+disk_size: 5
+upper_threshold: 210
+invert: True
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini
new file mode 100644
index 00000000..0edfc9b4
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_light.ini
@@ -0,0 +1,187 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+# ClassificationModule.byExampleWithFeatures:pen_markings
+; ClassificationModule.byExampleWithFeatures:coverslip_edge
+ LightDarkModule.getIntensityThresholdPercent:tissue
+ BubbleRegionByRegion.detectSmoothness
+ LightDarkModule.getIntensityThresholdPercent:darktissue
+ MorphologyModule.fillSmallHoles
+ MorphologyModule.removeSmallObjects
+ BlurDetectionModule.identifyBlurryRegions
+ BasicModule.finalProcessingSpur
+ BasicModule.finalProcessingArea
+; HistogramModule.compareToTemplates
+; HistogramModule.getHistogram
+; BrightContrastModule.getContrast
+; BrightContrastModule.getBrightnessGray
+; BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+; BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+; DeconvolutionModule.separateStains
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+
+#not yet implemented
+confirm_base_mag: False
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+#[ClassificationModule.byExample]
+[ClassificationModule.byExampleWithFeatures:pen_markings]
+name: pen_markings
+threshold: .5
+examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png
+
+area_threshold: 100
+features: frangi
+ laplace
+ rgb
+ #lbp
+ #gabor
+ #median
+ #gaussian
+
+laplace_ksize: 3
+
+frangi_scale_range: (1,10)
+frangi_scale_step: 2
+frangi_beta1: .5
+frangi_beta2: 15
+frangi_black_ridges: True
+
+gabor_theta: 4
+gabor_sigma: (1,3)
+gabor_frequency: (0.05, 0.25)
+
+lbp_radius: 3
+lbp_points: 24
+lbp_method: default
+
+median_disk_size: 3
+
+#gaussian_sigma: 1
+#gaussian_multichan: False
+
+
+
+[ClassificationModule.byExampleWithFeatures:coverslip_edge]
+name: coverslip_edge
+threshold: .5
+
+examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png
+
+area_threshold: 16000
+features: frangi
+ laplace
+ rgb
+
+
+[LightDarkModule.getIntensityThresholdPercent:bubble]
+name: bubble
+upper_threshold: .94
+lower_threshold: .82
+upper_variance: 11
+invert: true
+
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: nonwhite
+upper_threshold: .99
+lower_std: 10
+
+[LightDarkModule.getIntensityThresholdPercent:darktissue]
+name: dark
+upper_threshold: .15
+invert: true
+
+
+[LightDarkModule.getTissuePercent]
+threshold: .8
+
+[LightDarkModule.getDarkTissuePercent]
+threshold: .15
+
+[MorphologyModule.removeSmallObjects]
+min_size: 64
+
+[MorphologyModule.fillSmallHoles]
+min_size: 1000
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from ‘RGB’, ‘HSV’, ‘RGB CIE’, ‘XYZ’, ‘YUV’, ‘YIQ’, ‘YPbPr’, ‘YCbCr’ : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
+[BlurDetectionModule.identifyBlurryRegions]
+image_work_size = 2.5x
+blur_radius: 7
+blur_threshold: .05
+
+
+[BasicModule.finalComputations]
+; two options: absolute, relative2image. relative2mask is not available here
+mask_statistics = absolute
+
+[BasicModule.finalProcessingSpur]
+disk_radius: 7
+
+[BasicModule.finalProcessingArea]
+area_threshold: 90000
+
+[DeconvolutionModule.separateStains]
+;hed_from_rgb: Hematoxylin + Eosin + DAB
+;hdx_from_rgb: Hematoxylin + DAB
+;fgx_from_rgb: Feulgen + Light Green
+;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin
+;rbd_from_rgb: FastRed + FastBlue + DAB
+;gdx_from_rgb: Methyl Green + DAB
+;hax_from_rgb: Hematoxylin + AEC
+;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G
+;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin
+;ahx_from_rgb: Alcian Blue + Hematoxylin
+;hpx_from_rgb: Hematoxylin + PAS
+stain: hed_from_rgb
+use_mask: True
+
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini
new file mode 100644
index 00000000..59bfc33b
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/config/config_v2.1.ini
@@ -0,0 +1,262 @@
+[pipeline]
+steps= BasicModule.getBasicStats
+ LightDarkModule.saveEqualisedImage
+# ClassificationModule.byExampleWithFeatures:pen_markings
+# ClassificationModule.byExampleWithFeatures:coverslip_edge
+ LightDarkModule.minimumPixelIntensityNeighborhoodFiltering
+ LightDarkModule.getIntensityThresholdPercent:darktissue
+ BubbleRegionByRegion.detectSmoothness
+ MorphologyModule.removeFatlikeTissue
+ MorphologyModule.fillSmallHoles
+ MorphologyModule.removeSmallObjects
+ LocalTextureEstimationModule.estimateGreyComatrixFeatures:background
+ BrightContrastModule.getContrast:background
+ BrightContrastModule.getBrightnessGray:background
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background
+ BlurDetectionModule.identifyBlurryRegions
+ BasicModule.finalProcessingSpur
+ BasicModule.finalProcessingArea
+ HistogramModule.compareToTemplates
+ HistogramModule.getHistogram
+ LocalTextureEstimationModule.estimateGreyComatrixFeatures:final
+ BrightContrastModule.getContrast
+ BrightContrastModule.getBrightnessGray
+ BrightContrastModule.getBrightnessByChannelinColorSpace:RGB
+ BrightContrastModule.getBrightnessByChannelinColorSpace:YUV
+ DeconvolutionModule.separateStains
+ SaveModule.saveFinalMask
+ SaveModule.saveMacro
+ SaveModule.saveThumbnails
+ ArtifactToGeoJSONModule.export_geojson
+ BasicModule.countTissuePieces
+ BasicModule.finalComputations
+
+
+[BaseImage.BaseImage]
+image_work_size = 1.25x
+in_memory_compression = True
+
+#not yet implemented
+confirm_base_mag: False
+
+#three options: relative2mask, absolute, relative2image
+mask_statistics = relative2mask
+
+[BasicModule.getBasicStats]
+image_work_size = 1.25x
+
+#[ClassificationModule.byExample]
+[ClassificationModule.byExampleWithFeatures:pen_markings]
+name: pen_markings
+threshold: .5
+examples: ./pen/1k_version/pen_green.png:./pen/1k_version/pen_green_mask.png
+ ./pen/1k_version/pen_red.png:./pen/1k_version/pen_red_mask.png
+ ./pen/1k_version/pen_black.png:./pen/1k_version/pen_black_mask.png
+
+nsamples_per_example: 10000
+area_threshold: 100
+features: frangi
+ laplace
+ rgb
+ #lbp
+ #gabor
+ #median
+ #gaussian
+
+laplace_ksize: 3
+
+frangi_scale_range: (1,10)
+frangi_scale_step: 2
+frangi_beta1: .5
+frangi_beta2: 15
+frangi_black_ridges: True
+
+gabor_theta: 4
+gabor_sigma: (1,3)
+gabor_frequency: (0.05, 0.25)
+
+lbp_radius: 3
+lbp_points: 24
+lbp_method: default
+
+median_disk_size: 3
+
+#gaussian_sigma: 1
+#gaussian_multichan: False
+
+
+
+[ClassificationModule.byExampleWithFeatures:coverslip_edge]
+name: coverslip_edge
+threshold: .5
+
+examples: ./models/coverslip_edge_he/coverslip_edge.png:./models/coverslip_edge_he/coverslip_edge_mask.png
+
+nsamples_per_example: 10000
+
+area_threshold: 15
+features: frangi
+ laplace
+ rgb
+
+dilate_kernel_size: 5
+
+[LightDarkModule.getIntensityThresholdPercent:bubble]
+name: bubble
+upper_threshold: .94
+lower_threshold: .82
+upper_variance: 11
+invert: true
+
+
+[LightDarkModule.getIntensityThresholdPercent:tissue]
+name: bright
+upper_threshold: .9
+lower_std: 10
+
+[LightDarkModule.getIntensityThresholdPercent:darktissue]
+name: dark
+upper_threshold: .15
+invert: true
+
+
+[LightDarkModule.getTissuePercent]
+threshold: .8
+
+[LightDarkModule.getDarkTissuePercent]
+threshold: .15
+
+[MorphologyModule.removeSmallObjects]
+min_size: 64
+
+[MorphologyModule.removeFatlikeTissue]
+kernel_size: 10
+max_keep_size: 1000
+fat_cell_size: 64
+
+[MorphologyModule.fillSmallHoles]
+min_size: 1000
+
+[HistogramModule.compareToTemplates]
+limit_to_mask: True
+bins: 20
+templates= ./templates/template1.png
+ ./templates/template2.png
+ ./templates/template3.png
+ ./templates/template4.png
+
+[HistogramModule.getHistogram]
+limit_to_mask: True
+bins: 20
+
+[BrightContrastModule.getContrast]
+limit_to_mask: True
+
+
+[BrightContrastModule.getBrightnessGray]
+limit_to_mask: True
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB]
+limit_to_mask: True
+
+
+
+[BrightContrastModule.getContrast:background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessGray:background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:RGB_background]
+prefix: background
+limit_to_mask: True
+invert: True
+mask_name: img_mask_use
+
+[BrightContrastModule.getBrightnessByChannelinColorSpace:YUV]
+limit_to_mask: True
+#pick a color space in the list from 'RGB', 'HSV', 'RGB CIE', 'XYZ', 'YUV', 'YIQ', 'YPbPr', 'YCbCr' : http://scikit-image.org/docs/dev/api/skimage.color.html#skimage.color.convert_colorspace
+to_color_space: YUV
+
+[SaveModule.saveFinalMask]
+overlay: True
+
+[SaveModule.saveMacro]
+dim: 500
+
+[SaveModule.saveThumbnails]
+image_work_size: 1.25x
+small_dim: 500
+
+[BlurDetectionModule.identifyBlurryRegions]
+image_work_size = 2.5x
+blur_radius: 100
+blur_threshold: .15
+
+
+[BasicModule.finalComputations]
+; two options: absolute, relative2image. relative2mask is not available here
+mask_statistics = absolute
+
+[BasicModule.finalProcessingSpur]
+disk_radius: 5
+
+[BasicModule.finalProcessingArea]
+#area_threshold: 90000
+area_threshold: 10000
+
+[DeconvolutionModule.separateStains]
+;hed_from_rgb: Hematoxylin + Eosin + DAB
+;hdx_from_rgb: Hematoxylin + DAB
+;fgx_from_rgb: Feulgen + Light Green
+;bex_from_rgb: Giemsa stain : Methyl Blue + Eosin
+;rbd_from_rgb: FastRed + FastBlue + DAB
+;gdx_from_rgb: Methyl Green + DAB
+;hax_from_rgb: Hematoxylin + AEC
+;bro_from_rgb: Blue matrix Anilline Blue + Red matrix Azocarmine + Orange matrix Orange-G
+;bpx_from_rgb: Methyl Blue + Ponceau Fuchsin
+;ahx_from_rgb: Alcian Blue + Hematoxylin
+;hpx_from_rgb: Hematoxylin + PAS
+stain: hed_from_rgb
+use_mask: True
+
+
+[BubbleRegionByRegion.detectSmoothness]
+threshold: .01
+kernel_size: 10
+min_object_size: 500
+
+
+
+[LocalTextureEstimationModule.estimateGreyComatrixFeatures:background]
+prefix: background
+patch_size: 32
+npatches: 1000
+nlevels: 8
+feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation
+invert: True
+mask_name: img_mask_use
+
+[LocalTextureEstimationModule.estimateGreyComatrixFeatures:final]
+prefix: final
+patch_size: 32
+nlevels: 8
+npatches: 1000
+feats: contrast:dissimilarity:homogeneity:ASM:energy:correlation
+invert: False
+mask_name: img_mask_use
+
+[LightDarkModule.minimumPixelIntensityNeighborhoodFiltering]
+disk_size: 5
+upper_threshold: 210
+invert: True
+
+[BasicModule.countTissuePieces]
+
+[ArtifactToGeoJSONModule.export_geojson]
diff --git a/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py
new file mode 100644
index 00000000..956e42f9
--- /dev/null
+++ b/runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/data/__init__.py
@@ -0,0 +1,162 @@
+"""histoqc.data
+
+helpers for providing data resources for histoqc modules
+
+"""
+import os
+import re
+from configparser import ConfigParser
+from contextlib import ContextDecorator
+from contextlib import ExitStack
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from types import MethodType
+
+try:
+ from importlib.resources import files as _files
+except ImportError:
+ from importlib_resources import files as _files
+
+
+# --- dispatch config rewriting ---------------------------------------
+
+# noinspection PyPep8Naming
+class _ManagedPkgData(ContextDecorator):
+
+ _section_re = re.compile(
+ r'(?P[A-Za-z_][A-Za-z0-9_]+[.][A-Za-z_][A-Za-z0-9_]+)'
+ r'([:](?P