From b2d45379db03b3c7a9794361b43d68d13ebb223f Mon Sep 17 00:00:00 2001 From: John Muirhead-Gould Date: Wed, 1 Jul 2026 19:17:24 -0400 Subject: [PATCH] fix: expand() handles NULL geno data in sites-only VCFs (#72) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sites-only VCFs (e.g., gnomAD allele frequency files) declare FORMAT fields in the header but have no sample columns. This results in geno data stored as empty list-matrices (nrow × 0). When expand() attempted to process Number=R fields (like AD), .expandAD() would call array(NULL, ...) because unlist() on an empty list returns NULL. Fix: add an early return in .expandAD() when the input list is empty or xcols (number of samples) is 0, returning an appropriately-shaped empty array instead. Also adds a sites-only test VCF and corresponding unit test. Closes #72 --- DESCRIPTION | 2 +- NEWS | 13 ++++++++++++ R/methods-expand.R | 6 ++++++ inst/unitTests/cases/sites_only.vcf | 9 ++++++++ inst/unitTests/test_expand-methods.R | 31 ++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 inst/unitTests/cases/sites_only.vcf diff --git a/DESCRIPTION b/DESCRIPTION index 9bdc489..7967a72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Annotation of Genetic Variants Description: Annotate variants, compute amino acid coding changes, predict coding outcomes. -Version: 1.59.0 +Version: 1.59.1 Authors@R: c( person("Valerie", "Oberchain", role="aut"), person("Martin", "Morgan", role="aut"), diff --git a/NEWS b/NEWS index 6fcc401..d6d0a24 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +CHANGES IN VERSION 1.59.1 +------------------------- + +BUG FIXES + + o expand() no longer errors with "'data' must be of a vector type, + was 'NULL'" when called on sites-only VCFs (e.g., gnomAD allele + frequency files) that declare FORMAT fields in the header but have + no sample columns. The internal .expandAD() helper now returns an + appropriately-shaped empty array when the geno data is an empty + list (0 samples), instead of passing NULL to array(). (GitHub + issue #72) + CHANGES IN VERSION 1.36.0 ------------------------- diff --git a/R/methods-expand.R b/R/methods-expand.R index ccce4b8..5bd5b7c 100644 --- a/R/methods-expand.R +++ b/R/methods-expand.R @@ -118,6 +118,12 @@ setMethod("expand", "CollapsedVCF", .expandAD <- function(AD, idxlen, xcols) { if (is.list(AD)) { + ## No data to expand (e.g., sites-only VCF with 0 samples; + ## list elements may all be NULL). Return empty array. + ## (GitHub issue #72) + if (length(AD) == 0L || xcols == 0L) + return(array(integer(0L), c(idxlen, xcols, 2L))) + adpart <- PartitioningByWidth(AD) if (any(zeros <- width(adpart) == 0L)) { AD[zeros] <- list(rep(NA_integer_, 2L)) diff --git a/inst/unitTests/cases/sites_only.vcf b/inst/unitTests/cases/sites_only.vcf new file mode 100644 index 0000000..f8a5f0c --- /dev/null +++ b/inst/unitTests/cases/sites_only.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##INFO= +##INFO= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 100 rs1 A C,T 50 PASS AF=0.1,0.2;DP=100 +chr1 200 rs2 G A 30 PASS AF=0.3;DP=50 +chr1 300 rs3 T G,C,A 99 PASS AF=0.4,0.3,0.1;DP=200 diff --git a/inst/unitTests/test_expand-methods.R b/inst/unitTests/test_expand-methods.R index 37c1295..69385ec 100644 --- a/inst/unitTests/test_expand-methods.R +++ b/inst/unitTests/test_expand-methods.R @@ -90,3 +90,34 @@ test_expand_adr_adf <- function() }) } + +## --------------------------------------------------------------- +## Test for GitHub issue #72: expand() errors with 'data' must be +## of a vector type, was 'NULL' on sites-only VCFs (e.g. gnomAD). +## --------------------------------------------------------------- +test_expand_sitesOnly_issue72 <- function() +{ + fl <- system.file("unitTests", "cases", "sites_only.vcf", + package="VariantAnnotation") + vcf <- readVcf(fl, "") + + ## Verify this is indeed a sites-only VCF (0 samples) + checkIdentical(ncol(vcf), 0L) + checkTrue(nrow(vcf) > 0L) + + ## expand() must not error on sites-only VCF with FORMAT headers + exp <- expand(vcf) + checkTrue(is(exp, "ExpandedVCF")) + + ## Multi-allelic rows are expanded correctly + ## Input: 3 rows with ALT counts 2, 1, 3 => 6 expanded rows + checkIdentical(nrow(exp), 6L) + checkIdentical(ncol(exp), 0L) + + ## INFO Number=A fields are expanded and scalar + checkTrue(is.numeric(info(exp)$AF)) + checkEquals(info(exp)$AF, c(0.1, 0.2, 0.3, 0.4, 0.3, 0.1)) + + ## Geno data is preserved (empty but valid structure) + checkTrue(length(geno(exp)) >= 0L) +}