Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Annotation of Genetic Variants
Description: Annotate variants, compute amino acid coding changes,
predict coding outcomes.
Version: 1.59.0
Version: 1.59.1
Authors@R: c(
person("Valerie", "Oberchain", role="aut"),
person("Martin", "Morgan", role="aut"),
Expand Down
13 changes: 13 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
CHANGES IN VERSION 1.59.1
-------------------------

BUG FIXES

o expand() now converts Number=A to Number=1 in both INFO and FORMAT
VCF header metadata. After expansion, each row contains exactly one
ALT allele, so per-ALT fields are scalar. This fixes the round-trip
discrepancy where writeVcf() followed by readVcf() + expand() would
produce a CompressedNumericList for fields like AF instead of a plain
numeric vector. The fix applies to both the multi-allele expansion
path and the biallelic early-return path. (GitHub issue #79)

CHANGES IN VERSION 1.36.0
-------------------------

Expand Down
58 changes: 39 additions & 19 deletions R/methods-expand.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,18 @@ setMethod("expand", "CollapsedVCF",
if (all(elt == 1L)) {
fxd <- fixed(x)
fxd$ALT <- unlist(alt(x), use.names=FALSE)
ghdr <- geno(header(x))
varsR <- rownames(ghdr)[rownames(ghdr) == "AD" | ghdr$Number == "R"]
varsR <- varsR[varsR %in% names(geno(x))]
if (length(varsR) > 0){
geno(x)[varsR] <- endoapply(
geno(x)[varsR], function(i)
.expandAD(i, nrow(x), ncol(x))
)
}
return(VCF(rd, colData(x), metadata(x), fxd, .unlistAltInfo(x),
geno(x), ..., collapsed=FALSE))
ghdr <- geno(header(x))
varsR <- rownames(ghdr)[rownames(ghdr) == "AD" | ghdr$Number == "R"]
varsR <- varsR[varsR %in% names(geno(x))]
if (length(varsR) > 0){
geno(x)[varsR] <- endoapply(
geno(x)[varsR], function(i)
.expandAD(i, nrow(x), ncol(x))
)
}
res <- VCF(rd, colData(x), metadata(x), fxd, .unlistAltInfo(x),
geno(x), ..., collapsed=FALSE)
return(.updateHeaderNumberA(res))
}

## info, fixed, geno
Expand All @@ -40,17 +41,36 @@ setMethod("expand", "CollapsedVCF",
mcols(rdexp) <- NULL
} else rdexp <- rd[idx, "paramRangeID"]

tmp = VCF(rdexp, colData(x), metadata(x), fexp, iexp, gexp,
..., collapsed=FALSE) # https://github.com/Bioconductor/VariantAnnotation/issues/79 says 'A' should not occur in info(header())$Number
nhi = info(header(tmp))
num = nhi$Number
inda = grep("A", num)
if (length(inda)>0) num[inda] = "1"
info(header(tmp))$Number = num
tmp
res <- VCF(rdexp, colData(x), metadata(x), fexp, iexp, gexp,
..., collapsed=FALSE)
.updateHeaderNumberA(res)
}
)

## After expand(), each row has exactly one ALT allele, so per-ALT fields
## (Number=A) are now scalar — update the header to reflect this.
## (GitHub issue #79)
.updateHeaderNumberA <- function(vcf)
{
## INFO header
ihdr <- info(header(vcf))
if (nrow(ihdr) > 0L) {
isA <- ihdr$Number == "A"
if (any(isA))
ihdr$Number[isA] <- "1"
info(header(vcf)) <- ihdr
}
## FORMAT/geno header
ghdr <- geno(header(vcf))
if (nrow(ghdr) > 0L) {
isA <- ghdr$Number == "A"
if (any(isA))
ghdr$Number[isA] <- "1"
geno(header(vcf)) <- ghdr
}
vcf
}

.expandGeno <- function(x, hdr, elt, idx)
{
gvar <- geno(x)
Expand Down
34 changes: 34 additions & 0 deletions inst/unitTests/test_expand-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,37 @@ test_expand_adr_adf <- function()
})

}

## ---------------------------------------------------------------
## Test for GitHub issue #79: Number=A should become Number=1 in
## both INFO and FORMAT headers after expand().
## ---------------------------------------------------------------
test_expand_numberA_to_1_issue79 <- function()
{
## Test with multi-allele VCF (exercises main expand path)
fl <- system.file("unitTests", "cases", "expand.vcf",
package="VariantAnnotation")
vcf <- suppressWarnings(readVcf(fl, "hg19"))
checkIdentical(info(header(vcf))["AF", "Number"], "A")
exp <- expand(vcf)
checkIdentical(info(header(exp))["AF", "Number"], "1")
checkTrue(is.numeric(info(exp)$AF))

## Test with biallelic VCF (exercises early-return path)
fl2 <- system.file("extdata", "ex2.vcf", package="VariantAnnotation")
vcf2 <- readVcf(fl2)
checkIdentical(info(header(vcf2))["AF", "Number"], "A")
exp2 <- expand(vcf2)
checkIdentical(info(header(exp2))["AF", "Number"], "1")
checkTrue(is.numeric(info(exp2)$AF))

## Round-trip: writeVcf + readVcf + expand should keep AF as numeric
out <- tempfile(fileext=".vcf")
on.exit(unlink(out))
writeVcf(exp2, out)
rt <- readVcf(out, row.names=FALSE)
rt <- expand(rt)
checkIdentical(info(header(rt))["AF", "Number"], "1")
checkTrue(is.numeric(info(rt)$AF))
checkEquals(info(exp2)$AF, info(rt)$AF)
}