diff --git a/DESCRIPTION b/DESCRIPTION index 9bdc489..7967a72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Annotation of Genetic Variants Description: Annotate variants, compute amino acid coding changes, predict coding outcomes. -Version: 1.59.0 +Version: 1.59.1 Authors@R: c( person("Valerie", "Oberchain", role="aut"), person("Martin", "Morgan", role="aut"), diff --git a/NEWS b/NEWS index 6fcc401..5ccf3db 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +CHANGES IN VERSION 1.59.1 +------------------------- + +BUG FIXES + + o expand() now converts Number=A to Number=1 in both INFO and FORMAT + VCF header metadata. After expansion, each row contains exactly one + ALT allele, so per-ALT fields are scalar. This fixes the round-trip + discrepancy where writeVcf() followed by readVcf() + expand() would + produce a CompressedNumericList for fields like AF instead of a plain + numeric vector. The fix applies to both the multi-allele expansion + path and the biallelic early-return path. (GitHub issue #79) + CHANGES IN VERSION 1.36.0 ------------------------- diff --git a/R/methods-expand.R b/R/methods-expand.R index ccce4b8..acb846f 100644 --- a/R/methods-expand.R +++ b/R/methods-expand.R @@ -13,17 +13,18 @@ setMethod("expand", "CollapsedVCF", if (all(elt == 1L)) { fxd <- fixed(x) fxd$ALT <- unlist(alt(x), use.names=FALSE) - ghdr <- geno(header(x)) - varsR <- rownames(ghdr)[rownames(ghdr) == "AD" | ghdr$Number == "R"] - varsR <- varsR[varsR %in% names(geno(x))] - if (length(varsR) > 0){ - geno(x)[varsR] <- endoapply( - geno(x)[varsR], function(i) - .expandAD(i, nrow(x), ncol(x)) - ) - } - return(VCF(rd, colData(x), metadata(x), fxd, .unlistAltInfo(x), - geno(x), ..., collapsed=FALSE)) + ghdr <- geno(header(x)) + varsR <- rownames(ghdr)[rownames(ghdr) == "AD" | ghdr$Number == "R"] + varsR <- varsR[varsR %in% names(geno(x))] + if (length(varsR) > 0){ + geno(x)[varsR] <- endoapply( + geno(x)[varsR], function(i) + .expandAD(i, nrow(x), ncol(x)) + ) + } + res <- VCF(rd, colData(x), metadata(x), fxd, .unlistAltInfo(x), + geno(x), ..., collapsed=FALSE) + return(.updateHeaderNumberA(res)) } ## info, fixed, geno @@ -40,17 +41,36 @@ setMethod("expand", "CollapsedVCF", mcols(rdexp) <- NULL } else rdexp <- rd[idx, "paramRangeID"] - tmp = VCF(rdexp, colData(x), metadata(x), fexp, iexp, gexp, - ..., collapsed=FALSE) # https://github.com/Bioconductor/VariantAnnotation/issues/79 says 'A' should not occur in info(header())$Number - nhi = info(header(tmp)) - num = nhi$Number - inda = grep("A", num) - if (length(inda)>0) num[inda] = "1" - info(header(tmp))$Number = num - tmp + res <- VCF(rdexp, colData(x), metadata(x), fexp, iexp, gexp, + ..., collapsed=FALSE) + .updateHeaderNumberA(res) } ) +## After expand(), each row has exactly one ALT allele, so per-ALT fields +## (Number=A) are now scalar — update the header to reflect this. +## (GitHub issue #79) +.updateHeaderNumberA <- function(vcf) +{ + ## INFO header + ihdr <- info(header(vcf)) + if (nrow(ihdr) > 0L) { + isA <- ihdr$Number == "A" + if (any(isA)) + ihdr$Number[isA] <- "1" + info(header(vcf)) <- ihdr + } + ## FORMAT/geno header + ghdr <- geno(header(vcf)) + if (nrow(ghdr) > 0L) { + isA <- ghdr$Number == "A" + if (any(isA)) + ghdr$Number[isA] <- "1" + geno(header(vcf)) <- ghdr + } + vcf +} + .expandGeno <- function(x, hdr, elt, idx) { gvar <- geno(x) diff --git a/inst/unitTests/test_expand-methods.R b/inst/unitTests/test_expand-methods.R index 37c1295..86381af 100644 --- a/inst/unitTests/test_expand-methods.R +++ b/inst/unitTests/test_expand-methods.R @@ -90,3 +90,37 @@ test_expand_adr_adf <- function() }) } + +## --------------------------------------------------------------- +## Test for GitHub issue #79: Number=A should become Number=1 in +## both INFO and FORMAT headers after expand(). +## --------------------------------------------------------------- +test_expand_numberA_to_1_issue79 <- function() +{ + ## Test with multi-allele VCF (exercises main expand path) + fl <- system.file("unitTests", "cases", "expand.vcf", + package="VariantAnnotation") + vcf <- suppressWarnings(readVcf(fl, "hg19")) + checkIdentical(info(header(vcf))["AF", "Number"], "A") + exp <- expand(vcf) + checkIdentical(info(header(exp))["AF", "Number"], "1") + checkTrue(is.numeric(info(exp)$AF)) + + ## Test with biallelic VCF (exercises early-return path) + fl2 <- system.file("extdata", "ex2.vcf", package="VariantAnnotation") + vcf2 <- readVcf(fl2) + checkIdentical(info(header(vcf2))["AF", "Number"], "A") + exp2 <- expand(vcf2) + checkIdentical(info(header(exp2))["AF", "Number"], "1") + checkTrue(is.numeric(info(exp2)$AF)) + + ## Round-trip: writeVcf + readVcf + expand should keep AF as numeric + out <- tempfile(fileext=".vcf") + on.exit(unlink(out)) + writeVcf(exp2, out) + rt <- readVcf(out, row.names=FALSE) + rt <- expand(rt) + checkIdentical(info(header(rt))["AF", "Number"], "1") + checkTrue(is.numeric(info(rt)$AF)) + checkEquals(info(exp2)$AF, info(rt)$AF) +}