Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Annotation of Genetic Variants
Description: Annotate variants, compute amino acid coding changes,
predict coding outcomes.
Version: 1.59.0
Version: 1.59.1
Authors@R: c(
person("Valerie", "Oberchain", role="aut"),
person("Martin", "Morgan", role="aut"),
Expand Down
18 changes: 18 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
CHANGES IN VERSION 1.59.1
-------------------------

BUG FIXES

o writeVcf() now preserves the existing fileDate header value instead
of unconditionally replacing it with the current date. This restores
provenance information and enables faithful round-trips.

o writeVcf() no longer emits bare ##contig=<ID=X> header lines when
seqinfo contains only NA seqlengths and NA genomes. Such lines are
uninformative and were the primary cause of round-trip mismatches
(extra contig lines altered the header structure on re-read).

o Together these changes allow all.equal(readVcf(writeVcf(x)), x)
to succeed for VCFs whose seqinfo is all-NA (e.g., structural.vcf).
(GitHub issue #78)

CHANGES IN VERSION 1.36.0
-------------------------

Expand Down
24 changes: 16 additions & 8 deletions R/methods-writeVcf.R
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,15 @@
header <- c(fileformat, header)
}
contig <- any(grepl("contig", names(header), fixed=TRUE))
if (!contig)
header <- c(header, .contigsFromSeqinfo(seqinfo(obj)))
if (!contig) {
## Only add contig lines if seqinfo carries useful information
## (at least one non-NA seqlength or genome). Bare ##contig=<ID=X>
## lines are uninformative and break round-trip fidelity (#78).
si <- seqinfo(obj)
has_info <- !all(is.na(seqlengths(si))) || !all(is.na(genome(si)))
if (has_info)
header <- c(header, .contigsFromSeqinfo(si))
}

## Last line before data
colnms <- c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO")
Expand All @@ -171,11 +178,12 @@
if (nms == "META" && ncol(df) == 1L) {
if (!"fileformat" %in% rownames(df))
df <- rbind(DataFrame(Value="VCFv4.3", row.names="fileformat"), df)
fd <- format(Sys.time(), "%Y%m%d")
if ("fileDate" %in% rownames(df))
df[rownames(df) == "fileDate", ] <- fd
else
## Preserve existing fileDate for round-trip fidelity (#78).
## Only add a new fileDate if none exists.
if (!"fileDate" %in% rownames(df)) {
fd <- format(Sys.time(), "%Y%m%d")
df <- rbind(df, DataFrame(Value=fd, row.names="fileDate"))
}
paste("##", rownames(df), "=", df[,1], sep="")
## Support VCF v4.2 and v4.3 PEDIGREE field
} else if(nms == "PEDIGREE" || nms == "ALT") {
Expand All @@ -193,8 +201,8 @@
## (Rsamtools reports unstructured headers as one column named "Value")
} else if(ncol(df) == 1L && names(df)[1] == "Value" && nrow(df) == 1L) {
if (nms == "fileDate") {
fd <- format(Sys.time(), "%Y%m%d")
paste("##fileDate=", fd, sep="")
## Preserve existing fileDate for round-trip fidelity (#78).
paste("##fileDate=", df[1, 1], sep="")
} else
paste("##", nms, "=", df[,1], sep="")
## 'non-simple' key-value pairs
Expand Down
35 changes: 35 additions & 0 deletions inst/unitTests/test_writeVcf-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,38 @@ chk = fixed(header(readVcf(tmp)))$ALT
checkIdentical(chk, good)
}


## ---------------------------------------------------------------
## Test for GitHub issue #78: faithful round-trip via writeVcf + readVcf
## ---------------------------------------------------------------
test_writeVcf_roundtrip_issue78 <- function()
{
## structural.vcf has all-NA seqinfo and an existing fileDate
fl <- system.file("extdata", "structural.vcf", package="VariantAnnotation")
out <- tempfile(fileext=".vcf")
on.exit(unlink(out))

first <- readVcf(fl)
writeVcf(first, out)
roundtrip <- readVcf(out)

## Should be identical (no spurious contig lines, fileDate preserved)
checkTrue(isTRUE(all.equal(roundtrip, first)),
msg="structural.vcf round-trip should be perfect")

## ex2.vcf has real seqinfo — contig lines should still be written
fl2 <- system.file("extdata", "ex2.vcf", package="VariantAnnotation")
out2 <- tempfile(fileext=".vcf")
on.exit(unlink(out2), add=TRUE)

first2 <- readVcf(fl2)
writeVcf(first2, out2)
roundtrip2 <- readVcf(out2)
checkTrue(isTRUE(all.equal(roundtrip2, first2)),
msg="ex2.vcf round-trip should be perfect")

## Verify fileDate is preserved (not replaced with today's date)
lines <- readLines(out2)
fd_line <- grep("^##fileDate=", lines, value=TRUE)
checkIdentical(fd_line, "##fileDate=20090805")
}