epicure.tm_loader
Module for loading TrackMate files into EpiCure.
This loader uses an iterative parsing approach to efficiently handle large XML files.
Relevant metadata such as time step and pixel size are stored into a dictionary. The cells positions are stored in a unique NumPy array with columns for label, time, x position, and y position. Segmentations are stored as NumPy arrays with dimensions matching the original image data. Tracks are stored as a dictionary mapping daughter cell labels to their mother cell labels: {label_of_daughter_cell: [label_of_mother_cell]}
1""" 2Module for loading TrackMate files into EpiCure. 3 4This loader uses an iterative parsing approach to efficiently handle large XML files. 5 6Relevant metadata such as time step and pixel size are stored into a dictionary. 7The cells positions are stored in a unique NumPy array with columns for label, time, 8x position, and y position. 9Segmentations are stored as NumPy arrays with dimensions matching the original image data. 10Tracks are stored as a dictionary mapping daughter cell labels to their mother cell labels: 11{label_of_daughter_cell: [label_of_mother_cell]} 12""" 13 14import xml.etree.ElementTree as ET 15from copy import deepcopy 16from pathlib import Path 17from typing import Iterator, Union 18 19import numpy as np 20from skimage.draw import polygon2mask 21import epicure.Utils as ut 22 23 24def _get_ImageData_tag(xml_path: Path) -> ET.Element: 25 """ 26 Extract the 'ImageData' tag from an XML file. 27 28 This function parses an XML file to find and extract the 'ImageData' tag. 29 Once found, a new element with only the attributes is created and returned. 30 31 Args: 32 xml_path (Path): The file path of the XML file to be parsed. 33 34 Returns: 35 ET.Element: An `ET.Element` object with the 'ImageData' attributes. 36 37 Raises: 38 LookupError: If the 'ImageData' tag is not found in the XML file. 39 """ 40 img_data_tag = None 41 with open(xml_path, "rb") as f: 42 it = ET.iterparse(f, events=["start", "end"]) 43 _, root = next(it) # Saving the root of the tree for later cleaning. 44 45 for event, element in it: 46 if event == "end" and element.tag == "ImageData": 47 # Create a new element with only the attributes (no children subtree). 48 img_data_tag = ET.Element(element.tag, element.attrib) 49 root.clear() # Cleaning the tree to free up memory. 50 break # We found what we need, exit early. 51 elif event == "end": 52 element.clear() 53 54 if img_data_tag is None: 55 raise LookupError("The 'ImageData' tag was not found in the XML file.") 56 57 return img_data_tag 58 59 60def _get_metadata(img_data: ET.Element) -> dict[str, Union[int, float, str]]: 61 """ 62 Extract metadata from the 'ImageData' XML element. 63 64 Parameters 65 ---------- 66 img_data : ET.Element 67 The XML element containing the 'ImageData' information. 68 69 Returns 70 ------- 71 dict 72 A dictionary containing the extracted image metadata. 73 """ 74 int_keys = ["width", "height", "nframes"] 75 float_keys = ["pixelwidth", "pixelheight", "timeinterval"] 76 metadata = {} 77 for key in int_keys + float_keys: 78 metadata[key] = img_data.attrib.get(key) 79 80 for key in int_keys: 81 if metadata[key] is None: 82 raise KeyError(f"No '{key}' attribute in the 'ImageData' XML element.") 83 metadata[key] = int(metadata[key]) 84 for key in float_keys: 85 if metadata[key] is None: 86 raise KeyError(f"No '{key}' attribute in the 'ImageData' XML element.") 87 metadata[key] = float(metadata[key]) 88 89 # TODO: check if non-square pixels ok. 90 assert metadata["pixelwidth"] == metadata["pixelheight"], "Non-square pixels. Unsure if supported?" 91 92 return metadata 93 94 95def _get_units( 96 element: ET.Element, 97) -> dict[str, str]: 98 """Extract units information from an XML element and return it as a dictionary. 99 100 This function deep copies the attributes of the XML element into a dictionary, 101 then clears the element to free up memory. 102 103 Args: 104 element (ET._Element): The XML element holding the units information. 105 106 Returns: 107 dict[str, str]: A dictionary containing the units information. 108 Keys are 'spatialunits' and 'timeunits'. 109 110 Warns: 111 If the 'spatialunits' or 'timeunits' attributes are not found, 112 defaulting them to 'pixel' and 'frame', respectively. 113 """ 114 units = {} # type: dict[str, str] 115 if element.attrib: 116 units = deepcopy(element.attrib) 117 if "spatialunits" not in units: 118 ut.show_warning("No space unit found in the XML file. Setting to 'pixel'.") 119 units["spatialunits"] = "pixel" # TrackMate default value 120 if "timeunits" not in units: 121 ut.show_warning("No time unit found in the XML file. Setting to 'frame'.") 122 units["timeunits"] = "frame" # TrackMate default value 123 element.clear() # We won't need it anymore so we free up some memory. 124 # .clear() does not delete the element: it only removes all subelements 125 # and clears or sets to `None` all attributes. 126 return units 127 128 129def _parse_all_spots( 130 it: Iterator[tuple[str, ET.Element]], 131 positions: np.ndarray, 132 segmentation: np.ndarray, 133 metadata: dict[str, Union[int, float, str]], 134) -> list[int]: 135 """ 136 Parse the 'AllSpots' XML element to extract spot positions and segmentation data. 137 138 This function iterates through the XML elements under 'AllSpots' to extract 139 spot positions and update the segmentation array. 140 141 Args: 142 it (ET.iterparse): An iterator for parsing XML elements. 143 positions (np.ndarray): A NumPy array to store the extracted positions. 144 segmentation (np.ndarray): A NumPy array to store segmentation data. 145 metadata (dict[str, Union[int, float, str]]): A dictionary containing units information. 146 147 Returns: 148 list[int]: A list of spot IDs filtered out in TrackMate. 149 """ 150 px_width = float(metadata.get("pixelwidth", 1.0)) 151 px_height = float(metadata.get("pixelheight", 1.0)) 152 spot_index = 0 153 invisible_spots_ids = [] 154 for event, element in it: 155 if element.tag == "Spot" and event == "end": 156 if not int(element.attrib["VISIBILITY"]): 157 invisible_spots_ids.append(int(element.attrib["ID"])) 158 element.clear() 159 continue # skip invisible spots 160 161 t = int(float(element.attrib["FRAME"])) 162 x = float(element.attrib["POSITION_X"]) / px_width 163 y = float(element.attrib["POSITION_Y"]) / px_height 164 label = int(element.attrib["ID"]) 165 positions[spot_index] = [label, t, x, y] 166 167 contour = element.text 168 npoints = int(element.attrib["ROI_N_POINTS"]) 169 170 if contour is not None: 171 coords = np.array([float(x) for x in contour.split()]) 172 dimension = len(coords) // npoints 173 coords = coords.reshape(-1, dimension) 174 coords[:, 0] = x + (coords[:, 0] / px_width) 175 coords[:, 1] = y + (coords[:, 1] / px_height) 176 contour_rc = np.flip(coords, axis=1) # x, y to row, col 177 mask = polygon2mask(segmentation[t].shape, contour_rc) 178 segmentation[t][mask] = label 179 else: 180 print(f"no contour for spot {label}, frame {t}") 181 182 spot_index += 1 183 element.clear() 184 elif element.tag == "AllSpots" and event == "end": 185 break 186 187 return invisible_spots_ids 188 189 190def _parse_all_tracks(it: Iterator[tuple[str, ET.Element]], tracks: dict[int, list[int]]) -> None: 191 """ 192 Parse the 'AllTracks' XML element to extract track information. 193 194 This function iterates through the XML elements under 'AllTracks' to extract 195 track information and populate the tracks dictionary. This dictionary maps 196 daughter cell labels to their mother cell labels. 197 198 Args: 199 it (ET.iterparse): An iterator for parsing XML elements. 200 tracks (dict[int, list[int]]): A dictionary to store the extracted tracks. 201 """ 202 for event, element in it: 203 if element.tag == "Edge" and event == "start": 204 mother_id = int(element.attrib["SPOT_SOURCE_ID"]) 205 daughter_id = int(element.attrib["SPOT_TARGET_ID"]) 206 if daughter_id not in tracks: 207 tracks[daughter_id] = [mother_id] 208 else: 209 tracks[daughter_id].append(mother_id) 210 element.clear() 211 212 elif element.tag == "AllTracks" and event == "end": 213 break 214 215 216def _build_label_mapping(positions: np.ndarray, tracks: dict[int, list[int]]) -> dict[int, int]: 217 """ 218 Build a mapping from TrackMate labels to EpiCure labels. 219 220 In TrackMate, each detected spot has a unique label, while in EpiCure, 221 labels are constant per tracklet, hence the need for mapping. 222 223 Args: 224 positions (np.ndarray): The array of positions. 225 tracks (dict[int, list[int]]): The dictionary of tracks. 226 227 Returns: 228 dict[int, int]: A dictionary mapping TrackMate labels to EpiCure labels. 229 """ 230 # Reverse mapping from daughter to mother to get simple edges and division edges. 231 mother_to_daughters = {} 232 for daughter, mothers in tracks.items(): 233 for mother in mothers: 234 if mother not in mother_to_daughters: 235 mother_to_daughters[mother] = [daughter] 236 else: 237 mother_to_daughters[mother].append(daughter) 238 239 edges = {m: d[0] for m, d in mother_to_daughters.items() if len(d) == 1} 240 divisions = {m: d for m, d in mother_to_daughters.items() if len(d) > 1} 241 fusions = {d: m for d, m in tracks.items() if len(m) > 1} 242 fusion_mothers = set() # a set of mothers that participate in fusions 243 for mothers in fusions.values(): 244 fusion_mothers.update(mothers) 245 246 label_mapping = {} 247 new_label = 1 248 frames = np.unique(positions[:, 1]) 249 250 for frame in frames: 251 frame_positions = positions[positions[:, 1] == frame] 252 for old_label in frame_positions[:, 0]: 253 old_label_int = int(old_label) 254 255 # Division edge => mother keeps its label, each daughter gets a new label. 256 if old_label_int in divisions: 257 # Assign new label to the mother when not already assigned (tracklet start). 258 if old_label_int not in label_mapping: 259 label_mapping[old_label_int] = new_label 260 new_label += 1 261 # Assign new labels to each daughter. 262 daughters = divisions[old_label_int] 263 for daughter in daughters: 264 label_mapping[int(daughter)] = new_label 265 new_label += 1 266 267 # Mother in a fusion => gets its own label, tracklet ends here. 268 elif old_label_int in fusion_mothers: 269 if old_label_int not in label_mapping: 270 label_mapping[old_label_int] = new_label 271 new_label += 1 272 273 # Daughter in a fusion => gets a new label (multiple mothers merge into this). 274 elif old_label_int in fusions: 275 if old_label_int not in label_mapping: 276 label_mapping[old_label_int] = new_label 277 new_label += 1 278 # If this fusion daughter is also a mother in a simple edge, propagate the label. 279 if old_label_int in edges: 280 daughter_id = int(edges[old_label_int]) 281 if daughter_id not in label_mapping: 282 label_mapping[daughter_id] = label_mapping[old_label_int] 283 284 # Simple edge => mother and daughter share the same label (same tracklet). 285 elif old_label_int in edges: 286 if old_label_int not in label_mapping: 287 label_mapping[old_label_int] = new_label 288 new_label += 1 289 # Propagate the same label to the daughter (continue tracklet). 290 daughter_id = int(edges[old_label_int]) 291 if daughter_id not in label_mapping: 292 label_mapping[daughter_id] = label_mapping[old_label_int] 293 294 # Lone detection or start of new track. 295 elif old_label_int not in label_mapping: 296 label_mapping[old_label_int] = new_label 297 new_label += 1 298 299 # Do we have everyone mapped? 300 assert len(label_mapping) == positions.shape[0], "Some labels were not mapped!" 301 assert sorted(label_mapping.keys()) == sorted(set(positions[:, 0].astype(int))), "Some labels were not mapped!" 302 303 return label_mapping 304 305 306def relabel_positions(label_mapping: dict[int, int], positions: np.ndarray) -> np.ndarray: 307 """ 308 Relabel positions to match EpiCure requirements. 309 310 Args: 311 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 312 positions (np.ndarray): The array of positions to be relabeled. 313 314 Returns: 315 np.ndarray: The relabeled positions. 316 """ 317 new_positions = np.zeros_like(positions) 318 for i in range(positions.shape[0]): 319 old_label = int(positions[i, 0]) 320 new_label = label_mapping[old_label] 321 new_positions[i] = positions[i] 322 new_positions[i, 0] = new_label 323 return new_positions 324 325 326def relabel_tracks(label_mapping: dict[int, int], tracks: dict[int, list[int]]) -> dict[int, list[int]]: 327 """ 328 Relabel tracks to match EpiCure requirements. 329 330 Args: 331 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 332 tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled. 333 334 Returns: 335 dict[int, list[int]]: The relabeled tracks. 336 """ 337 new_tracks = {} 338 for daughter_old, mothers_old in tracks.items(): 339 daughter_new = label_mapping[daughter_old] 340 mothers_new = [label_mapping[mother_old] for mother_old in mothers_old] 341 # Ignore entries for which the daughter label is identical to the mother(s) label. 342 if daughter_new not in mothers_new: 343 new_tracks[daughter_new] = mothers_new 344 return new_tracks 345 346 347def relabel_segmentation(label_mapping: dict[int, int], segmentation: np.ndarray) -> np.ndarray: 348 """ 349 Relabel segmentation to match EpiCure requirements. 350 351 Args: 352 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 353 segmentation (np.ndarray): The segmentation array to be relabeled. 354 355 Returns: 356 np.ndarray: The relabeled segmentation. 357 """ 358 new_seg = np.zeros_like(segmentation) 359 for old_label, new_label in label_mapping.items(): 360 new_seg[segmentation == old_label] = new_label 361 return new_seg 362 363 364def _parse_Model_tag( 365 xml_path: Path, 366 metadata: dict[str, Union[int, float, str]], 367 segmentation: np.ndarray, 368) -> tuple[np.ndarray, dict[int, list[int]]]: 369 """ 370 Extract the 'Model' tag from an XML file. 371 372 This function parses an XML file to find and extract the 'Model' tag. 373 Once found, the tag is deep copied and returned. 374 375 Args: 376 xml_path (Path): The file path of the XML file to be parsed. 377 metadata (dict[str, int | float]): A dictionary to update with extracted units information. 378 segmentation (np.ndarray): A NumPy array to store segmentation data. 379 380 Returns: 381 np.ndarray: A NumPy array containing the positions data. 382 dict[int, list[int]]: A dictionary containing the tracks data. 383 """ 384 ignored_spots = None 385 with open(xml_path, "rb") as f: 386 it = ET.iterparse(f, events=["start", "end"]) 387 _, root = next(it) # saving the root of the tree for later cleaning 388 389 units: dict[str, str] = {} 390 positions: np.ndarray = np.empty((0, 4), dtype=np.float32) 391 tracks: dict[int, list[int]] = {} 392 for event, element in it: 393 # Check for the 'Model' tag 394 if element.tag == "Model" and event == "start": 395 units = _get_units(element) 396 metadata.update(units) 397 root.clear() # cleaning the tree to free up some memory 398 # All the browsed subelements of `root` are deleted. 399 400 # From AllSpots we extract the positions and segmentation. 401 if element.tag == "AllSpots" and event == "start": 402 positions = np.zeros((int(element.attrib["nspots"]), 4), dtype=np.float32) 403 ignored_spots = _parse_all_spots(it, positions, segmentation, metadata) 404 root.clear() 405 406 # From AllTracks we extract the dict of tracks. 407 if element.tag == "AllTracks" and event == "start": 408 _parse_all_tracks(it, tracks) 409 root.clear() 410 411 if element.tag == "Model" and event == "end": 412 root.clear() 413 break # not interested in the following data 414 415 if ignored_spots is not None: 416 ut.show_warning(f"{len(ignored_spots)} spots were filtered out in TrackMate and will not be loaded into EpiCure. IDs: {ignored_spots}.") 417 # The array positions was initialized with the total number of spots, 418 # but since some spots were ignored, we need to filter them out. 419 positions = positions[: positions.shape[0] - len(ignored_spots)] 420 421 return positions, tracks
307def relabel_positions(label_mapping: dict[int, int], positions: np.ndarray) -> np.ndarray: 308 """ 309 Relabel positions to match EpiCure requirements. 310 311 Args: 312 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 313 positions (np.ndarray): The array of positions to be relabeled. 314 315 Returns: 316 np.ndarray: The relabeled positions. 317 """ 318 new_positions = np.zeros_like(positions) 319 for i in range(positions.shape[0]): 320 old_label = int(positions[i, 0]) 321 new_label = label_mapping[old_label] 322 new_positions[i] = positions[i] 323 new_positions[i, 0] = new_label 324 return new_positions
Relabel positions to match EpiCure requirements.
Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. positions (np.ndarray): The array of positions to be relabeled.
Returns: np.ndarray: The relabeled positions.
327def relabel_tracks(label_mapping: dict[int, int], tracks: dict[int, list[int]]) -> dict[int, list[int]]: 328 """ 329 Relabel tracks to match EpiCure requirements. 330 331 Args: 332 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 333 tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled. 334 335 Returns: 336 dict[int, list[int]]: The relabeled tracks. 337 """ 338 new_tracks = {} 339 for daughter_old, mothers_old in tracks.items(): 340 daughter_new = label_mapping[daughter_old] 341 mothers_new = [label_mapping[mother_old] for mother_old in mothers_old] 342 # Ignore entries for which the daughter label is identical to the mother(s) label. 343 if daughter_new not in mothers_new: 344 new_tracks[daughter_new] = mothers_new 345 return new_tracks
Relabel tracks to match EpiCure requirements.
Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled.
Returns: dict[int, list[int]]: The relabeled tracks.
348def relabel_segmentation(label_mapping: dict[int, int], segmentation: np.ndarray) -> np.ndarray: 349 """ 350 Relabel segmentation to match EpiCure requirements. 351 352 Args: 353 label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. 354 segmentation (np.ndarray): The segmentation array to be relabeled. 355 356 Returns: 357 np.ndarray: The relabeled segmentation. 358 """ 359 new_seg = np.zeros_like(segmentation) 360 for old_label, new_label in label_mapping.items(): 361 new_seg[segmentation == old_label] = new_label 362 return new_seg
Relabel segmentation to match EpiCure requirements.
Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. segmentation (np.ndarray): The segmentation array to be relabeled.
Returns: np.ndarray: The relabeled segmentation.