diff --git a/DESCRIPTION b/DESCRIPTION index 9bdc489..7967a72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Annotation of Genetic Variants Description: Annotate variants, compute amino acid coding changes, predict coding outcomes. -Version: 1.59.0 +Version: 1.59.1 Authors@R: c( person("Valerie", "Oberchain", role="aut"), person("Martin", "Morgan", role="aut"), diff --git a/NEWS b/NEWS index 6fcc401..6f39a72 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,21 @@ +CHANGES IN VERSION 1.59.1 +------------------------- + +BUG FIXES + + o locateVariants() no longer silently misses intronic (and UTR, splice- + site, coding) variants when called against a TxDb that contains + transcripts with no features (e.g. non-coding RNA transcripts that + have no CDS/intron records). Root cause: all six internal cache- + population sites called cdsBy()/intronsByTranscript()/etc. without + filtering empty list elements; findOverlaps() on a GRangesList with + empty elements returns spurious or absent hits depending on the + overlap type. Fix: filter lengths(x) > 0L at every cache-fill site + in methods-locateVariants.R. Reproducer from GitHub issue #87 (and + the earlier #76) now returns correct IntronVariants hits without + requiring a manual cache workaround. (GitHub issue #87) + + CHANGES IN VERSION 1.36.0 ------------------------- diff --git a/R/methods-locateVariants.R b/R/methods-locateVariants.R index b85e287..e37f044 100644 --- a/R/methods-locateVariants.R +++ b/R/methods-locateVariants.R @@ -61,8 +61,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "CodingVariants"), ## for width(ranges) == 0 : decrement start to equal end value if (any(insertion <- width(query) == 0)) start(query)[insertion] <- start(query)[insertion] - 1 - if (!exists("cdsbytx", cache, inherits=FALSE)) - cache[["cdsbytx"]] <- cdsBy(subject) + if (!exists("cdsbytx", cache, inherits=FALSE)) { + z <- cdsBy(subject) + cache[["cdsbytx"]] <- z[lengths(z) > 0L] + } res <- callGeneric(query, cache[["cdsbytx"]], region, ..., ignore.strand=ignore.strand, asHits=asHits) if (is(res, "GenomicRanges") & length(res) > 0L) { @@ -95,8 +97,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "IntronVariants"), ## for width(ranges) == 0 : decrement start to equal end value if (any(insertion <- width(query) == 0)) start(query)[insertion] <- start(query)[insertion] - 1 - if (!exists("intbytx", cache, inherits=FALSE)) - cache[["intbytx"]] <- intronsByTranscript(subject) + if (!exists("intbytx", cache, inherits=FALSE)) { + z <- intronsByTranscript(subject) + cache[["intbytx"]] <- z[lengths(z) > 0L] + } res <- callGeneric(query, cache[["intbytx"]], region, ..., ignore.strand=ignore.strand, asHits=asHits) if (is(res, "GenomicRanges") & length(res) > 0L) { @@ -129,8 +133,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "ThreeUTRVariants"), ## for width(ranges) == 0 : decrement start to equal end value if (any(insertion <- width(query) == 0)) start(query)[insertion] <- start(query)[insertion] - 1 - if (!exists("threeUTRbytx", cache, inherits=FALSE)) - cache[["threeUTRbytx"]] <- threeUTRsByTranscript(subject) + if (!exists("threeUTRbytx", cache, inherits=FALSE)) { + z <- threeUTRsByTranscript(subject) + cache[["threeUTRbytx"]] <- z[lengths(z) > 0L] + } res <- callGeneric(query, cache[["threeUTRbytx"]], region, ..., ignore.strand=ignore.strand, asHits=asHits) if (is(res, "GenomicRanges") & length(res) > 0L) { @@ -163,8 +169,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "FiveUTRVariants"), ## for width(ranges) == 0 : decrement start to equal end value if (any(insertion <- width(query) == 0)) start(query)[insertion] <- start(query)[insertion] - 1 - if (!exists("fiveUTRbytx", cache, inherits=FALSE)) - cache[["fiveUTRbytx"]] <- fiveUTRsByTranscript(subject) + if (!exists("fiveUTRbytx", cache, inherits=FALSE)) { + z <- fiveUTRsByTranscript(subject) + cache[["fiveUTRbytx"]] <- z[lengths(z) > 0L] + } res <- callGeneric(query, cache[["fiveUTRbytx"]], region, ..., ignore.strand=ignore.strand, asHits=asHits) if (is(res, "GenomicRanges") & length(res) > 0L) { @@ -199,8 +207,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "IntergenicVariants"), start(query)[insertion] <- start(query)[insertion] - 1 ## PRECEDEID and FOLLOWID as gene or transcript ids if (idType(region) == "gene") { - if (!exists("txbygene", cache, inherits=FALSE)) - cache[["txbygene"]] <- transcriptsBy(subject, "gene") + if (!exists("txbygene", cache, inherits=FALSE)) { + z <- transcriptsBy(subject, "gene") + cache[["txbygene"]] <- z[lengths(z) > 0L] + } callGeneric(query, cache[["txbygene"]], region, ..., ignore.strand=ignore.strand) } else if (idType(region) == "tx") { @@ -235,8 +245,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "SpliceSiteVariants"), ## for width(ranges) == 0 : decrement start to equal end value if (any(insertion <- width(query) == 0)) start(query)[insertion] <- start(query)[insertion] - 1 - if (!exists("intbytx", cache, inherits=FALSE)) - cache[["intbytx"]] <- intronsByTranscript(subject) + if (!exists("intbytx", cache, inherits=FALSE)) { + z <- intronsByTranscript(subject) + cache[["intbytx"]] <- z[lengths(z) > 0L] + } res <- callGeneric(query, cache[["intbytx"]], region, ..., ignore.strand=ignore.strand, asHits=asHits) if (is(res, "GenomicRanges") & length(res) > 0L) { @@ -357,12 +369,13 @@ setMethod("locateVariants", c("GRanges", "TxDb", "AllVariants"), if (!exists("fiveUTRbytx", cache, inherits=FALSE)) { splicings <- GenomicFeatures:::.getSplicingsForTranscriptsWithCDSs(subject) - cache[["fiveUTRbytx"]] <- - GenomicFeatures:::.make5UTRsByTranscript(subject, splicings) + z <- GenomicFeatures:::.make5UTRsByTranscript(subject, splicings) + cache[["fiveUTRbytx"]] <- z[lengths(z) > 0L] + } + if (!exists("threeUTRbytx", cache, inherits=FALSE)) { + z <- GenomicFeatures:::.make3UTRsByTranscript(subject, splicings) + cache[["threeUTRbytx"]] <- z[lengths(z) > 0L] } - if (!exists("threeUTRbytx", cache, inherits=FALSE)) - cache[["threeUTRbytx"]] <- - GenomicFeatures:::.make3UTRsByTranscript(subject, splicings) fiveUTR <- locateVariants(query, subject, FiveUTRVariants(), diff --git a/inst/unitTests/test_locateVariants-methods.R b/inst/unitTests/test_locateVariants-methods.R index f2f181d..13a0275 100644 --- a/inst/unitTests/test_locateVariants-methods.R +++ b/inst/unitTests/test_locateVariants-methods.R @@ -178,3 +178,47 @@ test_locateVariants_match_predictCoding <- function() start=c(5, 77054, 77054, 77058, 77057, 77057, 77055), end=c(55, 77055, 77055, 77058, 77058, 77058, 77054)), paramRangeID=rep(NA, 7)) + +## --------------------------------------------------------------- +## Test for GitHub issue #87: locateVariants failing on intronic +## variants when GRangesList contains empty elements. +## --------------------------------------------------------------- +test_locateVariants_emptyGRangesList_issue87 <- function() +{ + ## Create a GRangesList with one real intron and one empty element. + ## The empty element simulates a transcript with no introns (e.g., + ## single-exon non-coding RNA) — this is what caused the bug. + introns <- GRangesList( + "tx1" = GRanges("chr1", IRanges(100, 200), strand="+"), + "tx2" = GRanges() # empty element — the culprit in #87 + ) + + ## Query that falls within the intron of tx1 + query <- GRanges("chr1", IRanges(150, 150)) + + ## Direct call to GRangesList method (no TxDb involved): + ## Even without the filter, the GRangesList method calls .makeResult() + ## which uses mapToTranscripts — but the original bug is that the TxDb + ## method didn't filter before calling the GRangesList method. + ## Still, verify the GRangesList method works with empty elements: + loc <- locateVariants(query, introns, IntronVariants()) + checkTrue(length(loc) > 0L, + msg="locateVariants should find the intronic variant") + checkIdentical(mcols(loc)$QUERYID, 1L) + checkIdentical(mcols(loc)$TXID, "tx1") + + ## Also test with the TxDb pathway using the chr22 example from the + ## issue report, but only if the TxDb is available: + if (requireNamespace("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly=TRUE)) { + fl <- system.file("extdata", "chr22.vcf.gz", + package="VariantAnnotation") + vcf <- readVcf(fl, "hg19") + seqlevels(vcf) <- "chr22" + rd <- rowRanges(vcf) + test_snps <- rd[1:10] + loc <- locateVariants(test_snps, txdb, IntronVariants()) + checkTrue(length(loc) > 0L, + msg="issue #87: intronic variants must not be silently dropped") + checkTrue(all(mcols(loc)$LOCATION == "intron")) + } +}