diff --git a/DESCRIPTION b/DESCRIPTION index 9bdc489..7967a72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Annotation of Genetic Variants Description: Annotate variants, compute amino acid coding changes, predict coding outcomes. -Version: 1.59.0 +Version: 1.59.1 Authors@R: c( person("Valerie", "Oberchain", role="aut"), person("Martin", "Morgan", role="aut"), diff --git a/NEWS b/NEWS index 6fcc401..05d4887 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,21 @@ +CHANGES IN VERSION 1.59.1 +------------------------- + +BUG FIXES + + o writeVcf() now preserves the existing fileDate header value instead + of unconditionally replacing it with the current date. This restores + provenance information and enables faithful round-trips. + + o writeVcf() no longer emits bare ##contig= header lines when + seqinfo contains only NA seqlengths and NA genomes. Such lines are + uninformative and were the primary cause of round-trip mismatches + (extra contig lines altered the header structure on re-read). + + o Together these changes allow all.equal(readVcf(writeVcf(x)), x) + to succeed for VCFs whose seqinfo is all-NA (e.g., structural.vcf). + (GitHub issue #78) + CHANGES IN VERSION 1.36.0 ------------------------- diff --git a/R/methods-writeVcf.R b/R/methods-writeVcf.R index 82b7f7f..0d0abcc 100644 --- a/R/methods-writeVcf.R +++ b/R/methods-writeVcf.R @@ -152,8 +152,15 @@ header <- c(fileformat, header) } contig <- any(grepl("contig", names(header), fixed=TRUE)) - if (!contig) - header <- c(header, .contigsFromSeqinfo(seqinfo(obj))) + if (!contig) { + ## Only add contig lines if seqinfo carries useful information + ## (at least one non-NA seqlength or genome). Bare ##contig= + ## lines are uninformative and break round-trip fidelity (#78). + si <- seqinfo(obj) + has_info <- !all(is.na(seqlengths(si))) || !all(is.na(genome(si))) + if (has_info) + header <- c(header, .contigsFromSeqinfo(si)) + } ## Last line before data colnms <- c("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO") @@ -171,11 +178,12 @@ if (nms == "META" && ncol(df) == 1L) { if (!"fileformat" %in% rownames(df)) df <- rbind(DataFrame(Value="VCFv4.3", row.names="fileformat"), df) - fd <- format(Sys.time(), "%Y%m%d") - if ("fileDate" %in% rownames(df)) - df[rownames(df) == "fileDate", ] <- fd - else + ## Preserve existing fileDate for round-trip fidelity (#78). + ## Only add a new fileDate if none exists. + if (!"fileDate" %in% rownames(df)) { + fd <- format(Sys.time(), "%Y%m%d") df <- rbind(df, DataFrame(Value=fd, row.names="fileDate")) + } paste("##", rownames(df), "=", df[,1], sep="") ## Support VCF v4.2 and v4.3 PEDIGREE field } else if(nms == "PEDIGREE" || nms == "ALT") { @@ -193,8 +201,8 @@ ## (Rsamtools reports unstructured headers as one column named "Value") } else if(ncol(df) == 1L && names(df)[1] == "Value" && nrow(df) == 1L) { if (nms == "fileDate") { - fd <- format(Sys.time(), "%Y%m%d") - paste("##fileDate=", fd, sep="") + ## Preserve existing fileDate for round-trip fidelity (#78). + paste("##fileDate=", df[1, 1], sep="") } else paste("##", nms, "=", df[,1], sep="") ## 'non-simple' key-value pairs diff --git a/inst/unitTests/test_writeVcf-methods.R b/inst/unitTests/test_writeVcf-methods.R index 27d0091..4c49b00 100644 --- a/inst/unitTests/test_writeVcf-methods.R +++ b/inst/unitTests/test_writeVcf-methods.R @@ -133,3 +133,38 @@ chk = fixed(header(readVcf(tmp)))$ALT checkIdentical(chk, good) } + +## --------------------------------------------------------------- +## Test for GitHub issue #78: faithful round-trip via writeVcf + readVcf +## --------------------------------------------------------------- +test_writeVcf_roundtrip_issue78 <- function() +{ + ## structural.vcf has all-NA seqinfo and an existing fileDate + fl <- system.file("extdata", "structural.vcf", package="VariantAnnotation") + out <- tempfile(fileext=".vcf") + on.exit(unlink(out)) + + first <- readVcf(fl) + writeVcf(first, out) + roundtrip <- readVcf(out) + + ## Should be identical (no spurious contig lines, fileDate preserved) + checkTrue(isTRUE(all.equal(roundtrip, first)), + msg="structural.vcf round-trip should be perfect") + + ## ex2.vcf has real seqinfo — contig lines should still be written + fl2 <- system.file("extdata", "ex2.vcf", package="VariantAnnotation") + out2 <- tempfile(fileext=".vcf") + on.exit(unlink(out2), add=TRUE) + + first2 <- readVcf(fl2) + writeVcf(first2, out2) + roundtrip2 <- readVcf(out2) + checkTrue(isTRUE(all.equal(roundtrip2, first2)), + msg="ex2.vcf round-trip should be perfect") + + ## Verify fileDate is preserved (not replaced with today's date) + lines <- readLines(out2) + fd_line <- grep("^##fileDate=", lines, value=TRUE) + checkIdentical(fd_line, "##fileDate=20090805") +}