Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions runtime/ops/histoqc_op/_init_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-
from datamate.core.base_op import OPERATORS
OPERATORS.register_module(
module_name='HistoQCMapper',
module_path='ops.user.histoqc_op.process'
)
32 changes: 32 additions & 0 deletions runtime/ops/histoqc_op/histoqc_src/HistoQC/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
The Clear BSD License

Copyright (c) 2019 Andrew Janowczyk
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted (subject to the limitations in the disclaimer
below) provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.

NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
19 changes: 19 additions & 0 deletions runtime/ops/histoqc_op/histoqc_src/HistoQC/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# HistoQC Mapper for DataMate

[Original Project](https://github.com/choosehappy/HistoQC)

**说明**: 本算子是 [HistoQC]在 DataMate 算子管线中的标准集成实现。包含了针对 WSI 质量控制产物的结构化提取逻辑。


## 主要改动 (Modifications)

本算子在集成过程中,针对 DataMate 的 `Mapper` 架构进行了以下逻辑增强:

### 1. 空间坐标提取 (Spatial Coordinates Extraction)
**掩码转坐标**:新增了对 HistoQC 输出掩码(Mask PNG)的后处理逻辑。
**GeoJSON 产出**:通过 OpenCV 轮廓检测算法,将伪影区域(如笔迹、盖玻片边缘)转换为标准的 GeoJSON 多边形数据,并封装于 `sample["text"]` 中。
**缩放对齐**:支持通过 `scaleFactor` 参数对检测坐标进行重采样映射,确保坐标体系与原图一致。

### 2. 性能与耗时监控 (Performance & Timing)
**执行计时**:在算子执行周期内引入了计时器,记录每个切片任务的实际处理时长。
**指标关联**:将处理耗时(Processing Time)与输入文件大小(File Size)作为元数据存入输出结果,便于后续进行算法性能评估。
126 changes: 126 additions & 0 deletions runtime/ops/histoqc_op/histoqc_src/HistoQC/histoqc/AnnotationModule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import logging
from typing import List, Tuple
from histoqc.BaseImage import printMaskHelper
from skimage import io, img_as_ubyte
import os
from pathlib import PurePosixPath, Path
from shapely.geometry import Polygon
from shapely import affinity
from PIL import Image, ImageDraw
import numpy as np
from histoqc.annotations.annot_collection import AnnotCollection, PARSER_BUILDER_MAP, TYPE_SUPPORTED_PARSER, Region


def rescale_by_img_bbox(polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float) -> Polygon:
if isinstance(offset_xy, float):
offset_xy = (offset_xy, offset_xy)
x_off, y_off = offset_xy
polygon = affinity.translate(polygon, xoff=x_off, yoff=y_off)
polygon = affinity.scale(polygon, xfact=resize_factor, yfact=resize_factor, origin=(0, 0))
return polygon


def polygon_filled(draw_pil: ImageDraw, polygon: Polygon, offset_xy: Tuple[float, float], resize_factor: float):
polygon = rescale_by_img_bbox(polygon, offset_xy, resize_factor)
# outer
exterior_coords = list(polygon.exterior.coords)
draw_pil.polygon(exterior_coords, fill=1, outline=1, width=0)
for component in polygon.interiors:
interior_coord = list(component.coords)
draw_pil.polygon(interior_coord, fill=0, outline=0, width=0)
return draw_pil


def annotation_to_mask(width: int, height: int, annot_collection: AnnotCollection, offset_xy: Tuple[float, float],
resize_factor: float) -> np.ndarray:
# binary
mask = Image.new(mode="1", size=(width, height))
draw_pil = ImageDraw.Draw(mask)
all_regions: List[Region] = annot_collection.all_regions
for region in all_regions:
polygon: Polygon = region['polygon']
# skip if empty ring (e.g., misclick in qupath)
if polygon.is_empty or (not polygon.is_valid):
continue
draw_pil = polygon_filled(draw_pil, polygon, offset_xy, resize_factor)
# noinspection PyTypeChecker
return np.array(mask)


def getParams(s, params):
# read params - format: xml, json; file_path; suffix;
ann_format = params.get("format", None)
file_path = params.get("file_path", None)
suffix = params.get("suffix", "")

# try use default value if the params are not provided
if not ann_format:
# set default format
ann_format = "xml"
# warning msg
msg = f"format is not provided, using xml as the default format."
logging.warning(f"{s['filename']} - {msg}")
s["warnings"].append(msg)

if not file_path:
# set default file path
file_path = s["dir"]
# warning msg
msg = f"file path is not provided, using \"{s['dir']}\" as the default file path"
logging.warning(f"{s['filename']} - {msg}")
s["warnings"].append(msg)

return ann_format, file_path, suffix


def saveAnnotationMask(s, params):
logging.info(f"{s['filename']} - \tgetAnnotationMask")

(ann_format, file_path, suffix) = getParams(s, params)

# annotation file path
f_path = f"{file_path}{os.sep}{PurePosixPath(s['filename']).stem}{suffix}.{ann_format}"

if not Path(f_path).is_file():
msg = f"Annotation file {f_path} does not exist. Skipping..."
logging.warning(f"{s['filename']} - {msg}")
s["warnings"].append(msg)
return

logging.info(f"{s['filename']} - \tusing {f_path}")

# todo better using the Py3.10 match statement - so it will be a Literal
# noinspection PyTypeChecker
annotation_type: TYPE_SUPPORTED_PARSER = ann_format.lower()
logging.info(f"{s['filename']} - \tusing {annotation_type}")
# read points set
if annotation_type in PARSER_BUILDER_MAP: # xml
annot_collection = AnnotCollection.build(parser_type=annotation_type, uri=f_path, label_map=None)
# get_points_from_geojson(s, f_path)
else: # unsupported format
msg = f"unsupported file format '{ann_format}'. Skipping..."
logging.warning(f"{s['filename']} - {msg}")
s["warnings"].append(msg)
return

(off_x, off_y, ncol, nrow) = s["img_bbox"]
resize_factor = np.shape(s["img_mask_use"])[1] / ncol
height, width = s["img_mask_use"].shape
annotationMask = annotation_to_mask(width, height, annot_collection, (off_x, off_y), resize_factor) > 0

mask_file_name = f"{s['outdir']}{os.sep}{s['filename']}_annot_{ann_format.lower()}.png"
io.imsave(mask_file_name, img_as_ubyte(annotationMask))

prev_mask = s["img_mask_use"]
s["img_mask_use"] = prev_mask & annotationMask
s.addToPrintList("getAnnotationMask",
printMaskHelper(params.get("mask_statistics", s["mask_statistics"]), prev_mask, s["img_mask_use"]))

if len(s["img_mask_use"].nonzero()[0]) == 0: # add warning in case the final tissue is empty
logging.warning(
f"{s['filename']} - After AnnotationModule.getAnnotationMask "
f"NO tissue remains detectable! Downstream modules likely to be incorrect/fail")
s["warnings"].append(
f"After AnnotationModule.getAnnotationMask NO tissue remains detectable!"
f" Downstream modules likely to be incorrect/fail")
return
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import cv2
import numpy as np
import json
import os
import logging
from pathlib import Path

def export_geojson(di, params):
# 获取当前切片的输出目录
outdir = di.get("outdir", ".")
# 获取切片名称 (HistoQC 通常以这个名字开头保存图片)
sname = di.get("sname") or di.get("img_base")
if not sname and "filename" in di:
sname = Path(di["filename"]).stem

if not sname:
logging.error("无法获取切片名称,跳过 GeoJSON 生成。")
return

# 获取缩放因子 (WSI原图与处理图的倍率)
scale_factor = di.get("log_factor", 1.0)

# 定义想要识别的后缀和对应 QuPath 类别
# 注意:这里务必和 output 文件夹里生成的文件名后缀一致
ARTIFACT_CONFIG = {
"_pen_markings.png": {"name": "Pen Marking", "color": -65536},
"_coverslip_edge.png": {"name": "Coverslip Edge", "color": -16711936},
"_flat.png": {"name": "Air Bubble", "color": -16776961}
}

all_features = []
found_files = []

logging.info(f" [切片: {sname}] 启动后期扫描提取 ")

# 扫描文件夹中对应的 PNG
for suffix, info in ARTIFACT_CONFIG.items():
# HistoQC 保存的规律通常是 {sname}{suffix}
# 例如: TCGA-XXX_pen_markings.png
target_path = Path(outdir) / f"{sname}{suffix}"

if not target_path.exists():
# 兼容性扫描:如果文件名里有多余的点或空格,进行模糊匹配
potential = list(Path(outdir).glob(f"*{suffix}"))
if potential:
target_path = potential[0]
else:
continue

# 读取并转换
mask = cv2.imread(str(target_path), cv2.IMREAD_GRAYSCALE)
if mask is None or np.max(mask) == 0:
continue

found_files.append(target_path.name)

# 提取轮廓
_, thresh = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:
pts = (contour.reshape(-1, 2) * scale_factor).tolist()
if len(pts) < 3: continue
pts.append(pts[0]) # 闭合

all_features.append({
"type": "Feature",
"properties": {
"objectType": "annotation",
"classification": {"name": info["name"], "colorRGB": info["color"]},
"isLocked": False
},
"geometry": {"type": "Polygon", "coordinates": [pts]}
})
# 保存 JSON
if all_features:
output_file = Path(outdir) / f"{sname}_artifacts.json"
with open(output_file, 'w') as f:
json.dump({"type": "FeatureCollection", "features": all_features}, f)
logging.info(f"[成功] 从 {found_files} 提取了 {len(all_features)} 个坐标点")
else:
logging.warning(f"[跳过] 在目录 {outdir} 中未找到有效伪影图像或图像为空")

return
Loading