diff --git a/DESCRIPTION b/DESCRIPTION index 9bdc489..7967a72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Annotation of Genetic Variants Description: Annotate variants, compute amino acid coding changes, predict coding outcomes. -Version: 1.59.0 +Version: 1.59.1 Authors@R: c( person("Valerie", "Oberchain", role="aut"), person("Martin", "Morgan", role="aut"), diff --git a/NEWS b/NEWS index 6fcc401..0732d4b 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,18 @@ +CHANGES IN VERSION 1.59.1 +------------------------- + +BUG FIXES + + o writeVcf() now restores '*' (spanning deletion) alleles in multi- + allele ALT fields instead of writing empty alleles that produce + malformed VCF lines (e.g. "CA,,TTA" instead of "CA,*,TTA"). This + fixes IGV/htsjdk errors ("empty alleles are not permitted in VCF + records") when opening files written by VariantAnnotation. + Note: the ambiguous single-allele case (ALT='*' vs ALT='.') cannot + be resolved at write time due to read-time representation loss; + a future fix at the C parsing level would fully resolve this. + (GitHub issue #65) + CHANGES IN VERSION 1.36.0 ------------------------- diff --git a/R/methods-writeVcf.R b/R/methods-writeVcf.R index 82b7f7f..7dd15b1 100644 --- a/R/methods-writeVcf.R +++ b/R/methods-writeVcf.R @@ -39,6 +39,28 @@ if (is(ALT, "XStringSetList")) { ALT <- as(ALT, "CharacterList") } + ## Restore '*' for spanning deletion alleles stored as empty strings + ## at read time (GitHub issue #65). Per VCF spec, '*' represents a + ## spanning deletion allele. At read time, both '*' and '.' (no allele) + ## get converted to empty strings, making them indistinguishable. + ## Conservative fix: only restore '*' when an empty string appears + ## alongside other alleles (multi-allele context), since that's + ## unambiguously a spanning deletion. A lone empty string is written + ## as '.' to preserve monomorphic reference site semantics. + if (is(ALT, "CharacterList") || is(ALT, "List")) { + ALT <- endoapply(ALT, function(a) { + empty <- !is.na(a) & nchar(a) == 0L + if (any(empty) && length(a) > 1L) + a[empty] <- "*" + a + }) + } else if (is.character(ALT)) { + ## ExpandedVCF: scalar character per row — ambiguous case. + ## These are already expanded so a single '*' would have been + ## the only allele for that row. Write as '*' since expand() + ## doesn't produce monomorphic rows. + ALT[!is.na(ALT) & nchar(ALT) == 0L] <- "*" + } ALT <- as.character(unstrsplit(ALT, ",")) ALT[nchar(ALT) == 0L | is.na(ALT)] <- "." if (is.null(QUAL <- qual(obj)))