Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Annotation of Genetic Variants
Description: Annotate variants, compute amino acid coding changes,
predict coding outcomes.
Version: 1.59.0
Version: 1.59.1
Authors@R: c(
person("Valerie", "Oberchain", role="aut"),
person("Martin", "Morgan", role="aut"),
Expand Down
18 changes: 18 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
CHANGES IN VERSION 1.59.1
-------------------------

BUG FIXES

o locateVariants() no longer silently misses intronic (and UTR, splice-
site, coding) variants when called against a TxDb that contains
transcripts with no features (e.g. non-coding RNA transcripts that
have no CDS/intron records). Root cause: all six internal cache-
population sites called cdsBy()/intronsByTranscript()/etc. without
filtering empty list elements; findOverlaps() on a GRangesList with
empty elements returns spurious or absent hits depending on the
overlap type. Fix: filter lengths(x) > 0L at every cache-fill site
in methods-locateVariants.R. Reproducer from GitHub issue #87 (and
the earlier #76) now returns correct IntronVariants hits without
requiring a manual cache workaround. (GitHub issue #87)


CHANGES IN VERSION 1.36.0
-------------------------

Expand Down
47 changes: 30 additions & 17 deletions R/methods-locateVariants.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "CodingVariants"),
## for width(ranges) == 0 : decrement start to equal end value
if (any(insertion <- width(query) == 0))
start(query)[insertion] <- start(query)[insertion] - 1
if (!exists("cdsbytx", cache, inherits=FALSE))
cache[["cdsbytx"]] <- cdsBy(subject)
if (!exists("cdsbytx", cache, inherits=FALSE)) {
z <- cdsBy(subject)
cache[["cdsbytx"]] <- z[lengths(z) > 0L]
}
res <- callGeneric(query, cache[["cdsbytx"]], region, ...,
ignore.strand=ignore.strand, asHits=asHits)
if (is(res, "GenomicRanges") & length(res) > 0L) {
Expand Down Expand Up @@ -95,8 +97,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "IntronVariants"),
## for width(ranges) == 0 : decrement start to equal end value
if (any(insertion <- width(query) == 0))
start(query)[insertion] <- start(query)[insertion] - 1
if (!exists("intbytx", cache, inherits=FALSE))
cache[["intbytx"]] <- intronsByTranscript(subject)
if (!exists("intbytx", cache, inherits=FALSE)) {
z <- intronsByTranscript(subject)
cache[["intbytx"]] <- z[lengths(z) > 0L]
}
res <- callGeneric(query, cache[["intbytx"]], region, ...,
ignore.strand=ignore.strand, asHits=asHits)
if (is(res, "GenomicRanges") & length(res) > 0L) {
Expand Down Expand Up @@ -129,8 +133,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "ThreeUTRVariants"),
## for width(ranges) == 0 : decrement start to equal end value
if (any(insertion <- width(query) == 0))
start(query)[insertion] <- start(query)[insertion] - 1
if (!exists("threeUTRbytx", cache, inherits=FALSE))
cache[["threeUTRbytx"]] <- threeUTRsByTranscript(subject)
if (!exists("threeUTRbytx", cache, inherits=FALSE)) {
z <- threeUTRsByTranscript(subject)
cache[["threeUTRbytx"]] <- z[lengths(z) > 0L]
}
res <- callGeneric(query, cache[["threeUTRbytx"]], region, ...,
ignore.strand=ignore.strand, asHits=asHits)
if (is(res, "GenomicRanges") & length(res) > 0L) {
Expand Down Expand Up @@ -163,8 +169,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "FiveUTRVariants"),
## for width(ranges) == 0 : decrement start to equal end value
if (any(insertion <- width(query) == 0))
start(query)[insertion] <- start(query)[insertion] - 1
if (!exists("fiveUTRbytx", cache, inherits=FALSE))
cache[["fiveUTRbytx"]] <- fiveUTRsByTranscript(subject)
if (!exists("fiveUTRbytx", cache, inherits=FALSE)) {
z <- fiveUTRsByTranscript(subject)
cache[["fiveUTRbytx"]] <- z[lengths(z) > 0L]
}
res <- callGeneric(query, cache[["fiveUTRbytx"]], region, ...,
ignore.strand=ignore.strand, asHits=asHits)
if (is(res, "GenomicRanges") & length(res) > 0L) {
Expand Down Expand Up @@ -199,8 +207,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "IntergenicVariants"),
start(query)[insertion] <- start(query)[insertion] - 1
## PRECEDEID and FOLLOWID as gene or transcript ids
if (idType(region) == "gene") {
if (!exists("txbygene", cache, inherits=FALSE))
cache[["txbygene"]] <- transcriptsBy(subject, "gene")
if (!exists("txbygene", cache, inherits=FALSE)) {
z <- transcriptsBy(subject, "gene")
cache[["txbygene"]] <- z[lengths(z) > 0L]
}
callGeneric(query, cache[["txbygene"]], region, ...,
ignore.strand=ignore.strand)
} else if (idType(region) == "tx") {
Expand Down Expand Up @@ -235,8 +245,10 @@ setMethod("locateVariants", c("GRanges", "TxDb", "SpliceSiteVariants"),
## for width(ranges) == 0 : decrement start to equal end value
if (any(insertion <- width(query) == 0))
start(query)[insertion] <- start(query)[insertion] - 1
if (!exists("intbytx", cache, inherits=FALSE))
cache[["intbytx"]] <- intronsByTranscript(subject)
if (!exists("intbytx", cache, inherits=FALSE)) {
z <- intronsByTranscript(subject)
cache[["intbytx"]] <- z[lengths(z) > 0L]
}
res <- callGeneric(query, cache[["intbytx"]], region, ...,
ignore.strand=ignore.strand, asHits=asHits)
if (is(res, "GenomicRanges") & length(res) > 0L) {
Expand Down Expand Up @@ -357,12 +369,13 @@ setMethod("locateVariants", c("GRanges", "TxDb", "AllVariants"),
if (!exists("fiveUTRbytx", cache, inherits=FALSE)) {
splicings <-
GenomicFeatures:::.getSplicingsForTranscriptsWithCDSs(subject)
cache[["fiveUTRbytx"]] <-
GenomicFeatures:::.make5UTRsByTranscript(subject, splicings)
z <- GenomicFeatures:::.make5UTRsByTranscript(subject, splicings)
cache[["fiveUTRbytx"]] <- z[lengths(z) > 0L]
}
if (!exists("threeUTRbytx", cache, inherits=FALSE)) {
z <- GenomicFeatures:::.make3UTRsByTranscript(subject, splicings)
cache[["threeUTRbytx"]] <- z[lengths(z) > 0L]
}
if (!exists("threeUTRbytx", cache, inherits=FALSE))
cache[["threeUTRbytx"]] <-
GenomicFeatures:::.make3UTRsByTranscript(subject, splicings)

fiveUTR <-
locateVariants(query, subject, FiveUTRVariants(),
Expand Down
44 changes: 44 additions & 0 deletions inst/unitTests/test_locateVariants-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,47 @@ test_locateVariants_match_predictCoding <- function()
start=c(5, 77054, 77054, 77058, 77057, 77057, 77055),
end=c(55, 77055, 77055, 77058, 77058, 77058, 77054)),
paramRangeID=rep(NA, 7))

## ---------------------------------------------------------------
## Test for GitHub issue #87: locateVariants failing on intronic
## variants when GRangesList contains empty elements.
## ---------------------------------------------------------------
test_locateVariants_emptyGRangesList_issue87 <- function()
{
## Create a GRangesList with one real intron and one empty element.
## The empty element simulates a transcript with no introns (e.g.,
## single-exon non-coding RNA) — this is what caused the bug.
introns <- GRangesList(
"tx1" = GRanges("chr1", IRanges(100, 200), strand="+"),
"tx2" = GRanges() # empty element — the culprit in #87
)

## Query that falls within the intron of tx1
query <- GRanges("chr1", IRanges(150, 150))

## Direct call to GRangesList method (no TxDb involved):
## Even without the filter, the GRangesList method calls .makeResult()
## which uses mapToTranscripts — but the original bug is that the TxDb
## method didn't filter before calling the GRangesList method.
## Still, verify the GRangesList method works with empty elements:
loc <- locateVariants(query, introns, IntronVariants())
checkTrue(length(loc) > 0L,
msg="locateVariants should find the intronic variant")
checkIdentical(mcols(loc)$QUERYID, 1L)
checkIdentical(mcols(loc)$TXID, "tx1")

## Also test with the TxDb pathway using the chr22 example from the
## issue report, but only if the TxDb is available:
if (requireNamespace("TxDb.Hsapiens.UCSC.hg19.knownGene", quietly=TRUE)) {
fl <- system.file("extdata", "chr22.vcf.gz",
package="VariantAnnotation")
vcf <- readVcf(fl, "hg19")
seqlevels(vcf) <- "chr22"
rd <- rowRanges(vcf)
test_snps <- rd[1:10]
loc <- locateVariants(test_snps, txdb, IntronVariants())
checkTrue(length(loc) > 0L,
msg="issue #87: intronic variants must not be silently dropped")
checkTrue(all(mcols(loc)$LOCATION == "intron"))
}
}