epicure.tm_loader

Module for loading TrackMate files into EpiCure.

This loader uses an iterative parsing approach to efficiently handle large XML files.

Relevant metadata such as time step and pixel size are stored into a dictionary. The cells positions are stored in a unique NumPy array with columns for label, time, x position, and y position. Segmentations are stored as NumPy arrays with dimensions matching the original image data. Tracks are stored as a dictionary mapping daughter cell labels to their mother cell labels: {label_of_daughter_cell: [label_of_mother_cell]}

  1"""
  2Module for loading TrackMate files into EpiCure.
  3
  4This loader uses an iterative parsing approach to efficiently handle large XML files.
  5
  6Relevant metadata such as time step and pixel size are stored into a dictionary.
  7The cells positions are stored in a unique NumPy array with columns for label, time,
  8x position, and y position.
  9Segmentations are stored as NumPy arrays with dimensions matching the original image data.
 10Tracks are stored as a dictionary mapping daughter cell labels to their mother cell labels:
 11{label_of_daughter_cell: [label_of_mother_cell]}
 12"""
 13
 14import xml.etree.ElementTree as ET
 15from copy import deepcopy
 16from pathlib import Path
 17from typing import Iterator, Union
 18
 19import numpy as np
 20from skimage.draw import polygon2mask
 21import epicure.Utils as ut
 22
 23
 24def _get_ImageData_tag(xml_path: Path) -> ET.Element:
 25    """
 26    Extract the 'ImageData' tag from an XML file.
 27
 28    This function parses an XML file to find and extract the 'ImageData' tag.
 29    Once found, a new element with only the attributes is created and returned.
 30
 31    Args:
 32        xml_path (Path): The file path of the XML file to be parsed.
 33
 34    Returns:
 35        ET.Element: An `ET.Element` object with the 'ImageData' attributes.
 36
 37    Raises:
 38        LookupError: If the 'ImageData' tag is not found in the XML file.
 39    """
 40    img_data_tag = None
 41    with open(xml_path, "rb") as f:
 42        it = ET.iterparse(f, events=["start", "end"])
 43        _, root = next(it)  # Saving the root of the tree for later cleaning.
 44
 45        for event, element in it:
 46            if event == "end" and element.tag == "ImageData":
 47                # Create a new element with only the attributes (no children subtree).
 48                img_data_tag = ET.Element(element.tag, element.attrib)
 49                root.clear()  # Cleaning the tree to free up memory.
 50                break  # We found what we need, exit early.
 51            elif event == "end":
 52                element.clear()
 53
 54    if img_data_tag is None:
 55        raise LookupError("The 'ImageData' tag was not found in the XML file.")
 56
 57    return img_data_tag
 58
 59
 60def _get_metadata(img_data: ET.Element) -> dict[str, Union[int, float, str]]:
 61    """
 62    Extract metadata from the 'ImageData' XML element.
 63
 64    Parameters
 65    ----------
 66    img_data : ET.Element
 67        The XML element containing the 'ImageData' information.
 68
 69    Returns
 70    -------
 71    dict
 72        A dictionary containing the extracted image metadata.
 73    """
 74    int_keys = ["width", "height", "nframes"]
 75    float_keys = ["pixelwidth", "pixelheight", "timeinterval"]
 76    metadata = {}
 77    for key in int_keys + float_keys:
 78        metadata[key] = img_data.attrib.get(key)
 79
 80    for key in int_keys:
 81        if metadata[key] is None:
 82            raise KeyError(f"No '{key}' attribute in the 'ImageData' XML element.")
 83        metadata[key] = int(metadata[key])
 84    for key in float_keys:
 85        if metadata[key] is None:
 86            raise KeyError(f"No '{key}' attribute in the 'ImageData' XML element.")
 87        metadata[key] = float(metadata[key])
 88
 89    # TODO: check if non-square pixels ok.
 90    assert metadata["pixelwidth"] == metadata["pixelheight"], "Non-square pixels. Unsure if supported?"
 91
 92    return metadata
 93
 94
 95def _get_units(
 96    element: ET.Element,
 97) -> dict[str, str]:
 98    """Extract units information from an XML element and return it as a dictionary.
 99
100    This function deep copies the attributes of the XML element into a dictionary,
101    then clears the element to free up memory.
102
103    Args:
104        element (ET._Element): The XML element holding the units information.
105
106    Returns:
107        dict[str, str]: A dictionary containing the units information.
108        Keys are 'spatialunits' and 'timeunits'.
109
110    Warns:
111        If the 'spatialunits' or 'timeunits' attributes are not found,
112        defaulting them to 'pixel' and 'frame', respectively.
113    """
114    units = {}  # type: dict[str, str]
115    if element.attrib:
116        units = deepcopy(element.attrib)
117    if "spatialunits" not in units:
118        ut.show_warning("No space unit found in the XML file. Setting to 'pixel'.")
119        units["spatialunits"] = "pixel"  # TrackMate default value
120    if "timeunits" not in units:
121        ut.show_warning("No time unit found in the XML file. Setting to 'frame'.")
122        units["timeunits"] = "frame"  # TrackMate default value
123    element.clear()  # We won't need it anymore so we free up some memory.
124    # .clear() does not delete the element: it only removes all subelements
125    # and clears or sets to `None` all attributes.
126    return units
127
128
129def _parse_all_spots(
130    it: Iterator[tuple[str, ET.Element]],
131    positions: np.ndarray,
132    segmentation: np.ndarray,
133    metadata: dict[str, Union[int, float, str]],
134) -> list[int]:
135    """
136    Parse the 'AllSpots' XML element to extract spot positions and segmentation data.
137
138    This function iterates through the XML elements under 'AllSpots' to extract
139    spot positions and update the segmentation array.
140
141    Args:
142        it (ET.iterparse): An iterator for parsing XML elements.
143        positions (np.ndarray): A NumPy array to store the extracted positions.
144        segmentation (np.ndarray): A NumPy array to store segmentation data.
145        metadata (dict[str, Union[int, float, str]]): A dictionary containing units information.
146
147    Returns:
148        list[int]: A list of spot IDs filtered out in TrackMate.
149    """
150    px_width = float(metadata.get("pixelwidth", 1.0))
151    px_height = float(metadata.get("pixelheight", 1.0))
152    spot_index = 0
153    invisible_spots_ids = []
154    for event, element in it:
155        if element.tag == "Spot" and event == "end":
156            if not int(element.attrib["VISIBILITY"]):
157                invisible_spots_ids.append(int(element.attrib["ID"]))
158                element.clear()
159                continue  # skip invisible spots
160
161            t = int(float(element.attrib["FRAME"]))
162            x = float(element.attrib["POSITION_X"]) / px_width
163            y = float(element.attrib["POSITION_Y"]) / px_height
164            label = int(element.attrib["ID"])
165            positions[spot_index] = [label, t, x, y]
166
167            contour = element.text
168            npoints = int(element.attrib["ROI_N_POINTS"])
169
170            if contour is not None:
171                coords = np.array([float(x) for x in contour.split()])
172                dimension = len(coords) // npoints
173                coords = coords.reshape(-1, dimension)
174                coords[:, 0] = x + (coords[:, 0] / px_width)
175                coords[:, 1] = y + (coords[:, 1] / px_height)
176                contour_rc = np.flip(coords, axis=1)  # x, y to row, col
177                mask = polygon2mask(segmentation[t].shape, contour_rc)
178                segmentation[t][mask] = label
179            else:
180                print(f"no contour for spot {label}, frame {t}")
181
182            spot_index += 1
183            element.clear()
184        elif element.tag == "AllSpots" and event == "end":
185            break
186
187    return invisible_spots_ids
188
189
190def _parse_all_tracks(it: Iterator[tuple[str, ET.Element]], tracks: dict[int, list[int]]) -> None:
191    """
192    Parse the 'AllTracks' XML element to extract track information.
193
194    This function iterates through the XML elements under 'AllTracks' to extract
195    track information and populate the tracks dictionary. This dictionary maps
196    daughter cell labels to their mother cell labels.
197
198    Args:
199        it (ET.iterparse): An iterator for parsing XML elements.
200        tracks (dict[int, list[int]]): A dictionary to store the extracted tracks.
201    """
202    for event, element in it:
203        if element.tag == "Edge" and event == "start":
204            mother_id = int(element.attrib["SPOT_SOURCE_ID"])
205            daughter_id = int(element.attrib["SPOT_TARGET_ID"])
206            if daughter_id not in tracks:
207                tracks[daughter_id] = [mother_id]
208            else:
209                tracks[daughter_id].append(mother_id)
210            element.clear()
211
212        elif element.tag == "AllTracks" and event == "end":
213            break
214
215
216def _build_label_mapping(positions: np.ndarray, tracks: dict[int, list[int]]) -> dict[int, int]:
217    """
218    Build a mapping from TrackMate labels to EpiCure labels.
219
220    In TrackMate, each detected spot has a unique label, while in EpiCure,
221    labels are constant per tracklet, hence the need for mapping.
222
223    Args:
224        positions (np.ndarray): The array of positions.
225        tracks (dict[int, list[int]]): The dictionary of tracks.
226
227    Returns:
228        dict[int, int]: A dictionary mapping TrackMate labels to EpiCure labels.
229    """
230    # Reverse mapping from daughter to mother to get simple edges and division edges.
231    mother_to_daughters = {}
232    for daughter, mothers in tracks.items():
233        for mother in mothers:
234            if mother not in mother_to_daughters:
235                mother_to_daughters[mother] = [daughter]
236            else:
237                mother_to_daughters[mother].append(daughter)
238
239    edges = {m: d[0] for m, d in mother_to_daughters.items() if len(d) == 1}
240    divisions = {m: d for m, d in mother_to_daughters.items() if len(d) > 1}
241    fusions = {d: m for d, m in tracks.items() if len(m) > 1}
242    fusion_mothers = set()  # a set of mothers that participate in fusions
243    for mothers in fusions.values():
244        fusion_mothers.update(mothers)
245
246    label_mapping = {}
247    new_label = 1
248    frames = np.unique(positions[:, 1])
249
250    for frame in frames:
251        frame_positions = positions[positions[:, 1] == frame]
252        for old_label in frame_positions[:, 0]:
253            old_label_int = int(old_label)
254
255            # Division edge => mother keeps its label, each daughter gets a new label.
256            if old_label_int in divisions:
257                # Assign new label to the mother when not already assigned (tracklet start).
258                if old_label_int not in label_mapping:
259                    label_mapping[old_label_int] = new_label
260                    new_label += 1
261                # Assign new labels to each daughter.
262                daughters = divisions[old_label_int]
263                for daughter in daughters:
264                    label_mapping[int(daughter)] = new_label
265                    new_label += 1
266
267            # Mother in a fusion => gets its own label, tracklet ends here.
268            elif old_label_int in fusion_mothers:
269                if old_label_int not in label_mapping:
270                    label_mapping[old_label_int] = new_label
271                    new_label += 1
272
273            # Daughter in a fusion => gets a new label (multiple mothers merge into this).
274            elif old_label_int in fusions:
275                if old_label_int not in label_mapping:
276                    label_mapping[old_label_int] = new_label
277                    new_label += 1
278                # If this fusion daughter is also a mother in a simple edge, propagate the label.
279                if old_label_int in edges:
280                    daughter_id = int(edges[old_label_int])
281                    if daughter_id not in label_mapping:
282                        label_mapping[daughter_id] = label_mapping[old_label_int]
283
284            # Simple edge => mother and daughter share the same label (same tracklet).
285            elif old_label_int in edges:
286                if old_label_int not in label_mapping:
287                    label_mapping[old_label_int] = new_label
288                    new_label += 1
289                # Propagate the same label to the daughter (continue tracklet).
290                daughter_id = int(edges[old_label_int])
291                if daughter_id not in label_mapping:
292                    label_mapping[daughter_id] = label_mapping[old_label_int]
293
294            # Lone detection or start of new track.
295            elif old_label_int not in label_mapping:
296                label_mapping[old_label_int] = new_label
297                new_label += 1
298
299    # Do we have everyone mapped?
300    assert len(label_mapping) == positions.shape[0], "Some labels were not mapped!"
301    assert sorted(label_mapping.keys()) == sorted(set(positions[:, 0].astype(int))), "Some labels were not mapped!"
302
303    return label_mapping
304
305
306def relabel_positions(label_mapping: dict[int, int], positions: np.ndarray) -> np.ndarray:
307    """
308    Relabel positions to match EpiCure requirements.
309
310    Args:
311        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
312        positions (np.ndarray): The array of positions to be relabeled.
313
314    Returns:
315        np.ndarray: The relabeled positions.
316    """
317    new_positions = np.zeros_like(positions)
318    for i in range(positions.shape[0]):
319        old_label = int(positions[i, 0])
320        new_label = label_mapping[old_label]
321        new_positions[i] = positions[i]
322        new_positions[i, 0] = new_label
323    return new_positions
324
325
326def relabel_tracks(label_mapping: dict[int, int], tracks: dict[int, list[int]]) -> dict[int, list[int]]:
327    """
328    Relabel tracks to match EpiCure requirements.
329
330    Args:
331        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
332        tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled.
333
334    Returns:
335        dict[int, list[int]]: The relabeled tracks.
336    """
337    new_tracks = {}
338    for daughter_old, mothers_old in tracks.items():
339        daughter_new = label_mapping[daughter_old]
340        mothers_new = [label_mapping[mother_old] for mother_old in mothers_old]
341        # Ignore entries for which the daughter label is identical to the mother(s) label.
342        if daughter_new not in mothers_new:
343            new_tracks[daughter_new] = mothers_new
344    return new_tracks
345
346
347def relabel_segmentation(label_mapping: dict[int, int], segmentation: np.ndarray) -> np.ndarray:
348    """
349    Relabel segmentation to match EpiCure requirements.
350
351    Args:
352        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
353        segmentation (np.ndarray): The segmentation array to be relabeled.
354
355    Returns:
356        np.ndarray: The relabeled segmentation.
357    """
358    new_seg = np.zeros_like(segmentation)
359    for old_label, new_label in label_mapping.items():
360        new_seg[segmentation == old_label] = new_label
361    return new_seg
362
363
364def _parse_Model_tag(
365    xml_path: Path,
366    metadata: dict[str, Union[int, float, str]],
367    segmentation: np.ndarray,
368) -> tuple[np.ndarray, dict[int, list[int]]]:
369    """
370    Extract the 'Model' tag from an XML file.
371
372    This function parses an XML file to find and extract the 'Model' tag.
373    Once found, the tag is deep copied and returned.
374
375    Args:
376        xml_path (Path): The file path of the XML file to be parsed.
377        metadata (dict[str, int | float]): A dictionary to update with extracted units information.
378        segmentation (np.ndarray): A NumPy array to store segmentation data.
379
380    Returns:
381        np.ndarray: A NumPy array containing the positions data.
382        dict[int, list[int]]: A dictionary containing the tracks data.
383    """
384    ignored_spots = None
385    with open(xml_path, "rb") as f:
386        it = ET.iterparse(f, events=["start", "end"])
387        _, root = next(it)  # saving the root of the tree for later cleaning
388
389        units: dict[str, str] = {}
390        positions: np.ndarray = np.empty((0, 4), dtype=np.float32)
391        tracks: dict[int, list[int]] = {}
392        for event, element in it:
393            # Check for the 'Model' tag
394            if element.tag == "Model" and event == "start":
395                units = _get_units(element)
396                metadata.update(units)
397                root.clear()  # cleaning the tree to free up some memory
398                # All the browsed subelements of `root` are deleted.
399
400            # From AllSpots we extract the positions and segmentation.
401            if element.tag == "AllSpots" and event == "start":
402                positions = np.zeros((int(element.attrib["nspots"]), 4), dtype=np.float32)
403                ignored_spots = _parse_all_spots(it, positions, segmentation, metadata)
404                root.clear()
405
406            # From AllTracks we extract the dict of tracks.
407            if element.tag == "AllTracks" and event == "start":
408                _parse_all_tracks(it, tracks)
409                root.clear()
410
411            if element.tag == "Model" and event == "end":
412                root.clear()
413                break  # not interested in the following data
414
415    if ignored_spots is not None:
416        ut.show_warning(f"{len(ignored_spots)} spots were filtered out in TrackMate and will not be loaded into EpiCure. IDs: {ignored_spots}.")
417        # The array positions was initialized with the total number of spots,
418        # but since some spots were ignored, we need to filter them out.
419        positions = positions[: positions.shape[0] - len(ignored_spots)]
420
421    return positions, tracks
def relabel_positions(label_mapping: dict[int, int], positions: numpy.ndarray) -> numpy.ndarray:
307def relabel_positions(label_mapping: dict[int, int], positions: np.ndarray) -> np.ndarray:
308    """
309    Relabel positions to match EpiCure requirements.
310
311    Args:
312        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
313        positions (np.ndarray): The array of positions to be relabeled.
314
315    Returns:
316        np.ndarray: The relabeled positions.
317    """
318    new_positions = np.zeros_like(positions)
319    for i in range(positions.shape[0]):
320        old_label = int(positions[i, 0])
321        new_label = label_mapping[old_label]
322        new_positions[i] = positions[i]
323        new_positions[i, 0] = new_label
324    return new_positions

Relabel positions to match EpiCure requirements.

Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. positions (np.ndarray): The array of positions to be relabeled.

Returns: np.ndarray: The relabeled positions.

def relabel_tracks( label_mapping: dict[int, int], tracks: dict[int, list[int]]) -> dict[int, list[int]]:
327def relabel_tracks(label_mapping: dict[int, int], tracks: dict[int, list[int]]) -> dict[int, list[int]]:
328    """
329    Relabel tracks to match EpiCure requirements.
330
331    Args:
332        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
333        tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled.
334
335    Returns:
336        dict[int, list[int]]: The relabeled tracks.
337    """
338    new_tracks = {}
339    for daughter_old, mothers_old in tracks.items():
340        daughter_new = label_mapping[daughter_old]
341        mothers_new = [label_mapping[mother_old] for mother_old in mothers_old]
342        # Ignore entries for which the daughter label is identical to the mother(s) label.
343        if daughter_new not in mothers_new:
344            new_tracks[daughter_new] = mothers_new
345    return new_tracks

Relabel tracks to match EpiCure requirements.

Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. tracks (dict[int, list[int]]): The dictionary of tracks to be relabeled.

Returns: dict[int, list[int]]: The relabeled tracks.

def relabel_segmentation( label_mapping: dict[int, int], segmentation: numpy.ndarray) -> numpy.ndarray:
348def relabel_segmentation(label_mapping: dict[int, int], segmentation: np.ndarray) -> np.ndarray:
349    """
350    Relabel segmentation to match EpiCure requirements.
351
352    Args:
353        label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels.
354        segmentation (np.ndarray): The segmentation array to be relabeled.
355
356    Returns:
357        np.ndarray: The relabeled segmentation.
358    """
359    new_seg = np.zeros_like(segmentation)
360    for old_label, new_label in label_mapping.items():
361        new_seg[segmentation == old_label] = new_label
362    return new_seg

Relabel segmentation to match EpiCure requirements.

Args: label_mapping (dict[int, int]): A dictionary mapping TrackMate labels to EpiCure labels. segmentation (np.ndarray): The segmentation array to be relabeled.

Returns: np.ndarray: The relabeled segmentation.