Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: natcpp
Title: Fast C++ Primitives for the 'NeuroAnatomy Toolbox'
Version: 0.2
Version: 0.3.0
Authors@R:
person(given = "Gregory",
family = "Jefferis",
Expand All @@ -18,14 +18,17 @@ Description: Fast functions implemented in C++ via 'Rcpp' to support the
License: GPL (>= 3)
URL: https://github.com/natverse/natcpp, https://natverse.org/natcpp/
BugReports: https://github.com/natverse/natcpp/issues
Imports:
Imports:
Matrix,
methods,
Rcpp (>= 1.0.6)
Suggests:
Suggests:
spelling,
testthat (>= 3.0.0)
LinkingTo:
Rcpp
LinkingTo:
Rcpp,
RcppThread
Config/testthat/edition: 3
Encoding: UTF-8
Language: en-GB
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,8 @@ export(c_sub2ind)
export(c_topntail)
export(c_topntail_list)
export(c_total_cable)
export(c_weighted_jaccard_dense)
export(c_weighted_jaccard_sparse)
import(Rcpp)
importFrom(methods,as)
useDynLib(natcpp, .registration = TRUE)
29 changes: 29 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,32 @@ c_EdgeListFromSegList <- function(L) {
.Call(`_natcpp_c_EdgeListFromSegList`, L)
}

#' @noRd
weighted_jaccard_sparse_fill <- function(x, pattern, transpose = FALSE, display_progress = TRUE, threads = 4L) {
.Call(`_natcpp_weighted_jaccard_sparse_fill`, x, pattern, transpose, display_progress, threads)
}

#' Dense weighted Jaccard similarity via C++
#'
#' Compute the full weighted Jaccard similarity matrix for a
#' \link[Matrix:dgCMatrix-class]{dgCMatrix} using
#' an adaptive dense accumulation strategy. For small output matrices, uses a
#' feature-oriented loop; for large outputs, switches to a column-oriented
#' loop for better cache performance.
#'
#' @param x A \link[Matrix:dgCMatrix-class]{dgCMatrix} (sparse column-compressed matrix)
#' @param transpose If \code{FALSE}, compare columns; if \code{TRUE}, compare
#' rows
#' @param threads Number of threads (default 4). Set to 0 for all cores.
#' @param triangle If \code{TRUE}, return only the lower triangle as a flat
#' numeric vector in \code{\link{dist}} layout. If \code{FALSE} (default),
#' return a full square matrix.
#' @param distance If \code{TRUE}, return distance (\code{1 - similarity})
#' instead of similarity. Default \code{FALSE}.
#' @return A dense numeric similarity matrix, or a numeric vector in
#' \code{\link{dist}} layout when \code{triangle = TRUE}.
#' @export
c_weighted_jaccard_dense <- function(x, transpose = FALSE, threads = 4L, triangle = FALSE, distance = FALSE) {
.Call(`_natcpp_c_weighted_jaccard_dense`, x, transpose, threads, triangle, distance)
}

87 changes: 87 additions & 0 deletions R/weighted_jaccard.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#' Sparse weighted Jaccard similarity via C++
#'
#' Compute the weighted Jaccard similarity matrix for a
#' \link[Matrix:dgCMatrix-class]{dgCMatrix}, returning a
#' sparse result. Uses \code{weighted_jaccard_sparse_fill} to compute min-sums
#' only for column (or row) pairs that share at least one non-zero feature, then
#' normalises to similarity. Only the upper triangle is computed, taking
#' advantage of the symmetry of the Jaccard index.
#'
#' @param x A \link[Matrix:dgCMatrix-class]{dgCMatrix} (sparse column-compressed matrix)
#' @param transpose If \code{FALSE} (default), compare columns; if
#' \code{TRUE}, compare rows.
#' @param display_progress Whether to show a text progress bar (default
#' \code{TRUE}).
#' @param threads Number of threads for parallel computation (default 4).
#' Set to 0 to use all available cores.
#' @param triangle If \code{TRUE}, return a symmetric \code{dsCMatrix}
#' (upper triangle only). If \code{FALSE} (default), return a general
#' \link[Matrix:dgCMatrix-class]{dgCMatrix}.
#' @param distance If \code{TRUE}, return distance (\code{1 - similarity})
#' instead of similarity. Default \code{FALSE}. A warning is issued since
#' sparse distance matrices are typically dense.
#' @return A sparse similarity (or distance) matrix: \code{dsCMatrix} when
#' \code{triangle = TRUE}, \link[Matrix:dgCMatrix-class]{dgCMatrix} otherwise.
#' @importFrom methods as
#' @export
#' @seealso \code{\link{c_weighted_jaccard_dense}} for the dense equivalent
#' @examples
#' \dontrun{
#' library(Matrix)
#' m <- sparseMatrix(i = c(1,2,1,2,3,3), j = c(1,1,2,2,2,3),
#' x = c(4,2,1,3,3,1), dims = c(3,3))
#' c_weighted_jaccard_sparse(m)
#' }
c_weighted_jaccard_sparse <- function(x, transpose = FALSE, display_progress = TRUE,
threads = 4L, triangle = FALSE,
distance = FALSE) {
if (distance)
warning("distance=TRUE with sparse output produces a mostly-dense matrix; ",
"consider using c_weighted_jaccard_dense() with triangle=TRUE instead.")

crossfun <- if (transpose) Matrix::tcrossprod else Matrix::crossprod
n <- if (transpose) nrow(x) else ncol(x)

if (length(x@x) == 0L) {
sim_val <- if (distance) 0 else 1
m <- Matrix::sparseMatrix(i = seq_len(n), j = seq_len(n), x = sim_val,
dims = c(n, n), symmetric = triangle)
if (!triangle)
m <- as(m, "generalMatrix")
return(m)
}

# Sparsity pattern from binarised crossprod — dsCMatrix (upper triangle)
b <- x
b@x <- rep(1, length(b@x))
A <- crossfun(b)

# Column/row totals for denominator
totals <- if (transpose) Matrix::rowSums(x) else Matrix::colSums(x)

# Fill in min_sums using C++ (only upper triangle entries)
A@x <- weighted_jaccard_sparse_fill(x, A, transpose = transpose,
display_progress = display_progress,
threads = threads)

# Convert min_sums to similarity: sim = ms / (s_i + s_j - ms)
col_idx <- rep(seq_along(diff(A@p)), diff(A@p))
row_idx <- A@i + 1L
denom <- totals[row_idx] + totals[col_idx] - A@x
nonzero <- denom != 0
A@x[nonzero] <- A@x[nonzero] / denom[nonzero]
A@x[!nonzero] <- 0

# Set diagonal to 1 for similarity
diag_pos <- which(A@i + 1L == col_idx)
A@x[diag_pos] <- 1

if (distance) {
A@x <- 1 - A@x # diagonal 1→0, off-diag sim→1-sim
}

if (!triangle)
A <- as(A, "generalMatrix")

A
}
2 changes: 2 additions & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
CMD
Codecov
Jaccard
Lifecycle
NeuroAnatomy
Nx
Expand Down
40 changes: 40 additions & 0 deletions man/c_weighted_jaccard_dense.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions man/c_weighted_jaccard_sparse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/Makevars
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS)
33 changes: 33 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#include <RcppThread.h>
#include <Rcpp.h>

using namespace Rcpp;
Expand Down Expand Up @@ -135,6 +136,36 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// weighted_jaccard_sparse_fill
NumericVector weighted_jaccard_sparse_fill(const S4& x, const S4& pattern, bool transpose, bool display_progress, int threads);
RcppExport SEXP _natcpp_weighted_jaccard_sparse_fill(SEXP xSEXP, SEXP patternSEXP, SEXP transposeSEXP, SEXP display_progressSEXP, SEXP threadsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const S4& >::type x(xSEXP);
Rcpp::traits::input_parameter< const S4& >::type pattern(patternSEXP);
Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP);
Rcpp::traits::input_parameter< bool >::type display_progress(display_progressSEXP);
Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
rcpp_result_gen = Rcpp::wrap(weighted_jaccard_sparse_fill(x, pattern, transpose, display_progress, threads));
return rcpp_result_gen;
END_RCPP
}
// c_weighted_jaccard_dense
SEXP c_weighted_jaccard_dense(const S4& x, bool transpose, int threads, bool triangle, bool distance);
RcppExport SEXP _natcpp_c_weighted_jaccard_dense(SEXP xSEXP, SEXP transposeSEXP, SEXP threadsSEXP, SEXP triangleSEXP, SEXP distanceSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const S4& >::type x(xSEXP);
Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP);
Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
Rcpp::traits::input_parameter< bool >::type triangle(triangleSEXP);
Rcpp::traits::input_parameter< bool >::type distance(distanceSEXP);
rcpp_result_gen = Rcpp::wrap(c_weighted_jaccard_dense(x, transpose, threads, triangle, distance));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_natcpp_c_seglengths", (DL_FUNC) &_natcpp_c_seglengths, 4},
Expand All @@ -147,6 +178,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_natcpp_c_topntail", (DL_FUNC) &_natcpp_c_topntail, 1},
{"_natcpp_c_topntail_list", (DL_FUNC) &_natcpp_c_topntail_list, 1},
{"_natcpp_c_EdgeListFromSegList", (DL_FUNC) &_natcpp_c_EdgeListFromSegList, 1},
{"_natcpp_weighted_jaccard_sparse_fill", (DL_FUNC) &_natcpp_weighted_jaccard_sparse_fill, 5},
{"_natcpp_c_weighted_jaccard_dense", (DL_FUNC) &_natcpp_c_weighted_jaccard_dense, 5},
{NULL, NULL, 0}
};

Expand Down
Loading
Loading