Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
f3fe407
Reword and reprioritize main.py TODO list
robertsamples Jun 29, 2026
9feb9ec
Add search-tab run-check, clean up main.py's dead imports
robertsamples Jun 29, 2026
b5a804c
Add GroupSetModel.move() for groupset reordering (model layer only)
robertsamples Jun 29, 2026
504410f
Clarify stale PCA TODO note
robertsamples Jun 29, 2026
f06a971
Merge remote-tracking branch 'origin/main' into todo-cleanup
robertsamples Jun 29, 2026
e110fa1
Merge branch 'main' into todo-cleanup
robertsamples Jun 29, 2026
c0c160c
Add Qt-free multivariate ordination backend (PCA/NMDS/PLS-DA)
robertsamples Jun 29, 2026
9c15631
Rework the mislabeled "PCA" plot into a multivariate ordination tab
robertsamples Jun 29, 2026
c7b6a01
Fix ordination feedback: scaling, axis limits, NMDS %explained, bar s…
robertsamples Jun 29, 2026
ede1968
Add dendrogram purity coloring: technical/biological replicate QC view
robertsamples Jun 29, 2026
573cbfa
Dendrogram: polyphyletic branches in red, add a no-coloring option
robertsamples Jun 29, 2026
bab713d
Replace treemap/upset PNG round-trip with real canvas plots
robertsamples Jun 29, 2026
3be44dc
Dendrogram: bridge-only red coloring; move bootstrap/collapse checkbo…
robertsamples Jun 29, 2026
5ca9baf
Fix dendrogram coloring: red = proven non-monophyly (label-set overlap)
robertsamples Jun 29, 2026
66b3e02
Dendrogram: add Use Sample/Group Names labels; fix AU/BP label scaling
robertsamples Jun 29, 2026
9697aa3
Docs: update mkdocs guide for ordination rework and dendrogram improv…
robertsamples Jun 29, 2026
52c1d37
correlation matrix control improvements
robertsamples Jun 30, 2026
6a28902
Update tests.yml
robertsamples Jun 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
# against a few system libraries the base image doesn't ship.
- if: runner.os == 'Linux'
run: sudo apt-get update && sudo apt-get install -y libgl1 libxkbcommon-x11-0 libxcb-cursor0
- run: pip install "numpy<2" pandas scipy tqdm pytest PyQt5
- run: pip install "numpy<2" pandas scipy scikit-learn tqdm pytest PyQt5
- run: python -m pytest code/tests -v

lint:
Expand Down
108 changes: 108 additions & 0 deletions code/clusterpurity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""
MPACT
Copyright 2022, Robert M. Samples, Sara P. Puckett, and Marcy J. Balunas

Qt-free dendrogram "purity" coloring: a branch is colored green if every
leaf beneath it shares the same group label -- i.e. that group is a
monophyletic clade, it clustered together before merging with anything
else -- and left at the default color otherwise. Used by the dendrogram tab
to make it visually obvious whether technical replicates of one Sample
cluster tightly together, and separately whether biological replicates of
one Biolgroup are well separated from other groups.

Default colors are green/magenta rather than the more conventional
green/red -- red-green colorblindness (the most common form) makes the two
indistinguishable; magenta stays distinguishable from green under all
common forms of color vision deficiency.

This module is Qt-free and unit-tested (see ``tests/test_clusterpurity.py``).
"""


def purity_link_color_func(Z, leaf_labels, true_color='green', false_color='magenta', neutral_color='black'):
"""Build a ``link_color_func`` for ``scipy.cluster.hierarchy.dendrogram``.

Three-way coloring, classified by comparing the two children's label
sets (not by simply asking "is the merge result impure", which would
paint every ancestor of a single mixing event false_color all the way to
the root):

- ``true_color`` ("monophyletic"): the two children's label sets are
identical and contain exactly one label -- every leaf under this link
shares one label.
- ``false_color`` ("polyphyletic"): the two children's label sets
*overlap* (share at least one label) without being identical-and-
singleton -- this is definitive proof that some label's leaves are
split apart by this exact merge (some of that label is on each side),
i.e. genuinely non-monophyletic, not just "still impure from before".
- ``neutral_color``: the two children's label sets are *disjoint* (no
label in common) -- this merge simply joins two regions that don't
contradict each other; it's a clean bridge even if one or both
children are themselves impure from a *different* label's tangle
further down. This is what keeps a single low-level tangle from
cascading false_color all the way up the tree: once a tangled label's
clade stops growing (nothing more of that label to fold in), every
merge above it only ever joins disjoint regions, so it reverts to
``neutral_color``.

Args:
Z: linkage matrix (``scipy.cluster.hierarchy.linkage`` or
fastcluster's drop-in) built on observations in the same order
as ``leaf_labels``.
leaf_labels: sequence, length == number of observations clustered by
``Z``, giving each leaf's group label (e.g. its Sample or
Biolgroup), in the same order as the data passed to ``linkage``.

Returns:
callable: ``link_color_func(k)`` as expected by ``dendrogram``'s
``link_color_func`` argument.
"""
n_leaves = len(leaf_labels)
leaf_label_sets = {i: {leaf_labels[i]} for i in range(n_leaves)}
colors = {}
for i, row in enumerate(Z):
a, b = int(row[0]), int(row[1])
node_id = n_leaves + i
set_a, set_b = leaf_label_sets[a], leaf_label_sets[b]
merged = set_a | set_b
leaf_label_sets[node_id] = merged
if len(merged) == 1:
colors[node_id] = true_color
elif set_a.isdisjoint(set_b):
colors[node_id] = neutral_color
else:
colors[node_id] = false_color
return lambda k: colors.get(k, neutral_color)


def purity_summary(Z, leaf_labels):
"""Count how many distinct group labels form one pure clade each.

A label is "pure" only if *every* leaf carrying that label ends up
together in one clade before that clade merges with any other leaf --
i.e. the group is exactly monophyletic in the dendrogram. (A node whose
descendants are a uniform-but-incomplete subset of a label -- e.g. 2 of
a Sample's 3 technical replicates -- does NOT count: the third
replicate clustering elsewhere means that Sample isn't really pure.)

Returns:
(n_pure, n_total): number of distinct labels that are fully pure
clades, out of the total number of distinct labels in
``leaf_labels``.
"""
n_leaves = len(leaf_labels)
leaf_index_sets = {i: frozenset((i,)) for i in range(n_leaves)}
target_sets = {
label: frozenset(i for i in range(n_leaves) if leaf_labels[i] == label)
for label in set(leaf_labels)
}
pure_labels = set()
for i, row in enumerate(Z):
a, b = int(row[0]), int(row[1])
node_id = n_leaves + i
merged = leaf_index_sets[a] | leaf_index_sets[b]
leaf_index_sets[node_id] = merged
for label, target in target_sets.items():
if merged == target:
pure_labels.add(label)
return len(pure_labels), len(target_sets)
28 changes: 28 additions & 0 deletions code/groupsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,34 @@ def remove(self, index=None):
del self._items[index]
self.select(self._selected)

def move(self, from_index, to_index):
"""Reorder the groupset at ``from_index`` to ``to_index``.

Both indices are clamped to the valid range; out-of-range or equal
indices are a no-op. Selection follows the moved item, so the
groupset that was selected before the move is still selected after
(by identity, not by index) -- a drag-and-drop reorder shouldn't
change which groupset is being edited.
"""
if not self._items:
return
from_index = max(0, min(from_index, len(self._items) - 1))
to_index = max(0, min(to_index, len(self._items) - 1))
if from_index == to_index:
return
selected_item = self.selected
groupset = self._items.pop(from_index)
self._items.insert(to_index, groupset)
if selected_item is not None:
# Identity, not '==' -- GroupSet.__eq__ is value-based, and two
# distinct groupsets can compare equal (e.g. freshly added ones
# before either is edited), so list.index() could pick the wrong
# one.
for i, item in enumerate(self._items):
if item is selected_item:
self._selected = i
break

def update(self, index, *, name=None, src=None, incl=None, excl=None, colour=None):
"""Overwrite the given fields of the groupset at ``index``."""
groupset = self._items[index]
Expand Down
85 changes: 51 additions & 34 deletions code/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@
import time
import string

import platform
from PyQt5 import QtCore, QtWidgets
from PyQt5.QtWidgets import QMainWindow, QSizeGrip, QGraphicsDropShadowEffect, QFileDialog, QListWidgetItem, QColorDialog
from PyQt5.QtCore import (QCoreApplication, QPropertyAnimation, QDate, QDateTime, QMetaObject, QObject, QPoint, QRect, QSize, QTime, QUrl, Qt, QEvent)
from PyQt5.QtGui import QBrush, QColor, QIcon, QPalette, QPainter, QPixmap
from PyQt5.QtWidgets import QMainWindow, QSizeGrip
from PyQt5.QtCore import QObject, Qt
from PyQt5.QtGui import QPixmap
from pathlib import Path

# Install/verify non-stock dependencies (epam.indigo, UpSetPlot, squarify)
Expand All @@ -35,14 +34,14 @@
import files

from MSFaST import run_MSFaST, analysis_parameters
from groupsets import GroupSet, GroupSetModel, build_query_dict
from groupsets import GroupSetModel, build_query_dict
from plotslots import PlotSlotRegistry
from paramfields import save_checkbox_fields
from csvcache import cached_read_csv, invalidate as invalidate_csv_cache
from biogroups import compute_biological_groups
from dbsearch import search_npatlas
from searchtree import SearchTreePanel
from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_PCA, prev_cv, gen_upsetplt, gen_treemap
from plotting import plot_abund, show_spectrum, show_featureplt, plot_heatmap, plot_mzrt, plot_samplecorr, kendrick, plot_volcano, plot_fc3d, plot_dendrogram, plot_ordination, prev_cv, plot_upset, plot_treemap
import getfragdb

from indigo import Indigo
Expand Down Expand Up @@ -71,27 +70,31 @@
-add bypass for plots based on checkmark. possibly use if check: ... else: button.hide() then pass

- distribution of CVs on bottom of cvplt?
- add pca option and allow visualization of key features on multivar plt?

#TODO#
- in source spectra viewer in tab plot
- do overall data quality score, AUC
- in source spectra viewer in spectrum details tab plot with preexisting in source fragment deconvolution algoirthm
- clean up import sections and general code for better maintability and good syntax/standards
~main.py's own import section done (dead PyQt5/stdlib/groupsets imports removed,
verified unused via pyflakes + grep, no behavior change); other files not yet swept
- do overall data quality score, AUC on CV plot or something, may be present in a different form already
- standardize method and class names
- database management, options
- fix up analysisinfo file output

- mzmine msp file import
- add other ordination options
- add terminal output with current line to status bar instead of just static status messages, perhaps with expand button to show full terminal output
- potentially consider other database options like HMDB etc
- fix up analysisinfo file output with better and more useful log ingo
- add other ordination options like pca, pls-da, etc etc
- add custom keyword arguments for each plot to make calling them easier
- add runcheck before searching when switching to search tab
- Figure out way to have only active plot be updated and then to update others
when plot is switched
- make it so groups can be reordered
- make it so groups can be reordered in the groupsets widgets?
~model-layer support done: GroupSetModel.move() (groupsets.py), tested in
test_groupsets.py. UI drag-drop wiring (listWidget_pltgrps InternalMove +
syncing its rowsMoved signal to model.move()) not done -- needs a live
GUI session to verify the selection-tracking interacts correctly with
updatesets()'s existing blockSignals dance, which isn't something to
guess at unverified
- consider if indexing and feature highly functions in plot options have any easy wins for optimization or disk use. (prob not)
- make goto buttons just one class and lambda an index for the stacked widgets
when connecting!


likely items that need more thought and planning
- maybe have a comparison mode for many different strains with and without elicitor
- specificity/sensitivity plot
- other statistical models
Expand Down Expand Up @@ -238,7 +241,15 @@ def __init__(self):

self.ui.setupUi(self)
self.ui.label_credits.setText('v1.00.01 r26.06.29')


# "PCA" was a misnomer left over from when this checkbox/button only
# ran NMDS (see plotting.plot_ordination) -- the underlying
# checkBox_pca objectName/analysis_params.PCA attribute stay
# unchanged for .mpct save-file compatibility; only the visible text
# and tooltip change.
self.ui.checkBox_pca.setText('Multivariate')
self.ui.btn_pca.setToolTip('Multivariate Ordination (PCA/NMDS/PLS-DA)')

#initialize other dialog windows
self.dialog = dialog()
self.ftrdialog = ftrdialog()
Expand Down Expand Up @@ -759,6 +770,13 @@ def _refresh_highlight(self):
)
self.canvas['kmd'].draw_idle()

# Update the multivariate plot's loadings-view highlight (a separate
# concept from its scores view, which highlights a clicked *sample*
# via parent.pickedsample, not a feature -- so this only applies
# when self.pca exists and is currently showing loadings).
if getattr(self, 'pca', None) is not None:
self.pca.highlight_loading(self.pickedfeature, self.highlightcol)

# Update feature plot with the selected feature
self.highlight['featureplt'].set_data(
[iondict.loc[self.pickedfeature, 'Retention time (min)']],
Expand Down Expand Up @@ -1038,6 +1056,11 @@ def _generate_plots(self):
dfs = self.filtereddfs
grpsts = self.groupsets

self._create_or_reset('treemap', 'treemap',
lambda: plot_treemap(self, 'treemap', self.ui.frame_treemap, pltfile, '', ''),
lambda: self.treemap.reset(pltfile, '', ''))
stop_functime('treemap complete')

if params.CVfil:
self._create_or_reset('prevcv', 'CV plot',
lambda: prev_cv(self, 'cvplt', self.ui.frame_cvplt, 'none', 'none', 'none'),
Expand All @@ -1055,10 +1078,10 @@ def _generate_plots(self):
stop_functime('dendrogram complete')

if params.PCA:
self._create_or_reset('pca', 'PCA/NMDS plot',
lambda: plot_PCA(self, 'pca', self.ui.frame_pca, pltfile, '', ''),
self._create_or_reset('pca', 'multivariate ordination plot',
lambda: plot_ordination(self, 'pca', self.ui.frame_pca, pltfile, '', ''),
lambda: self.pca.reset(pltfile, '', ''))
stop_functime('nmds complete')
stop_functime('ordination complete')

if params.FC3Dplt:
self._create_or_reset('fc3d', '3D fold-change plot',
Expand Down Expand Up @@ -1101,6 +1124,11 @@ def _generate_plots(self):
lambda: self.samplecorr.reset(iondictfile, dfs, grpsts))
stop_functime('samplecorr complete')

self._create_or_reset('upset', 'upset plot',
lambda: plot_upset(self, 'upset', self.ui.frame_upset, iondictfile, '', ''),
lambda: self.upset.reset(iondictfile, '', ''))
stop_functime('upsetplt complete')

def run_analysis(self):
# Ignore re-clicks while an analysis is already running on the worker thread.
if getattr(self, '_analysis_thread', None) is not None and self._analysis_thread.isRunning():
Expand Down Expand Up @@ -1153,12 +1181,6 @@ def _on_compute_finished(self):
self.ui.btn_run.setEnabled(True)

def _finish_analysis(self):
try:
gen_treemap(self) # move back to end
except Exception:
print("not generating tremap due to an error")
stop_functime('treemap complete')

# Used for point opacity based on abundance colouring
iondict = cached_read_csv(self.analysis_paramsgui.outputdir / 'iondict.csv', sep=',', header=[0], index_col=None)
self.analysis_paramsgui.maxval = iondict['logmax'].max()
Expand Down Expand Up @@ -1206,11 +1228,6 @@ def _finish_analysis(self):
self.fillfttree()
self.dbsearchdone = True

try:
gen_upsetplt(self)
except Exception:
print("not generating upset plot due to an error")
stop_functime('upsetplt complete')
self.ui.label_status.setText('Analysis Complete')
stop_functime('analysis complete')
print('')
Expand Down
Loading
Loading