Bioconductor · jmg421 · Jul 1, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,7 +3,7 @@ Type: Package
 Title: Annotation of Genetic Variants
 Description: Annotate variants, compute amino acid coding changes,
         predict coding outcomes.
-Version: 1.59.0
+Version: 1.59.1
 Authors@R: c(
         person("Valerie", "Oberchain", role="aut"),
         person("Martin", "Morgan", role="aut"),

diff --git a/NEWS b/NEWS
@@ -1,3 +1,20 @@
+CHANGES IN VERSION 1.59.1
+-------------------------
+
+BUG FIXES
+
+    o locateVariants() with IntergenicVariants now correctly classifies
+      flanking genes into PRECEDEID and FOLLOWID based on gene strand.
+      Previously, classification was purely positional (genes to the right
+      went to PRECEDEID, genes to the left went to FOLLOWID) regardless
+      of the gene's transcriptional orientation. Now:
+      - PRECEDEID contains genes the variant is upstream of (i.e., the
+        variant precedes the gene's TSS in transcriptional direction)
+      - FOLLOWID contains genes the variant is downstream of (i.e., the
+        variant follows the gene's TSS in transcriptional direction)
+      This matches Ensembl VEP's upstream_gene_variant/downstream_gene_variant
+      classification. (GitHub issue #55)
+
 CHANGES IN VERSION 1.36.0
 -------------------------
 

diff --git a/R/methods-locateVariants.R b/R/methods-locateVariants.R
@@ -484,19 +484,53 @@ setMethod("locateVariants", c("GRanges", "TxDb", "AllVariants"),
         q_range <- query[intergenic]
         s_genes <- rep(names(subject), elementNROWS(s_range))
         s_unlist <- unlist(s_range, use.names=FALSE)
+        s_strand <- as.character(strand(s_unlist))
 
-        ## ID all genes that fall in upstream / downstream range.
-        ## upstream == follow:
+        ## Find genes in upstream window (left of variant for +/* queries)
         f_range <- .shiftRangeUpDown(q_range, upstream(region), TRUE)
-        f_fo <- findOverlaps(f_range, s_unlist, ignore.strand=ignore.strand)
-        f_factor <- factor(queryHits(f_fo), seq_len(queryLength(f_fo)))
-        f_genes <- unname(splitAsList(s_genes[subjectHits(f_fo)], f_factor))
-
-        ## downstream == precede:
+        f_fo <- findOverlaps(f_range, s_unlist, ignore.strand=TRUE)
+
+        ## Find genes in downstream window (right of variant for +/* queries)
         p_range <- .shiftRangeUpDown(q_range, downstream(region), FALSE)
-        p_fo <- findOverlaps(p_range, s_unlist, ignore.strand=ignore.strand)
-        p_factor <- factor(queryHits(p_fo), seq_len(queryLength(p_fo)))
-        p_genes <- unname(splitAsList(s_genes[subjectHits(p_fo)], p_factor))
+        p_fo <- findOverlaps(p_range, s_unlist, ignore.strand=TRUE)
+
+        ## Strand-aware classification (GitHub issue #55):
+        ## PRECEDEID = genes the variant is upstream of (variant precedes gene TSS)
+        ##   - '+' strand gene to the RIGHT of variant (downstream window)
+        ##   - '-' strand gene to the LEFT of variant (upstream window)
+        ## FOLLOWID = genes the variant is downstream of (variant follows gene TSS)
+        ##   - '+' strand gene to the LEFT of variant (upstream window)
+        ##   - '-' strand gene to the RIGHT of variant (downstream window)
+
+        ## From upstream window: '+' genes → FOLLOWID, '-' genes → PRECEDEID
+        f_qhits <- queryHits(f_fo)
+        f_shits <- subjectHits(f_fo)
+        f_strands <- s_strand[f_shits]
+        f_gnames <- s_genes[f_shits]
+
+        ## From downstream window: '+' genes → PRECEDEID, '-' genes → FOLLOWID
+        p_qhits <- queryHits(p_fo)
+        p_shits <- subjectHits(p_fo)
+        p_strands <- s_strand[p_shits]
+        p_gnames <- s_genes[p_shits]
+
+        nq <- length(q_range)
+
+        ## Build PRECEDEID: '+' from downstream + '-' from upstream
+        prec_qhits <- c(p_qhits[p_strands != "-"],
+                         f_qhits[f_strands == "-"])
+        prec_genes <- c(p_gnames[p_strands != "-"],
+                         f_gnames[f_strands == "-"])
+        prec_factor <- factor(prec_qhits, seq_len(nq))
+        p_genes <- unname(splitAsList(prec_genes, prec_factor))
+
+        ## Build FOLLOWID: '+' from upstream + '-' from downstream
+        foll_qhits <- c(f_qhits[f_strands != "-"],
+                         p_qhits[p_strands == "-"])
+        foll_genes <- c(f_gnames[f_strands != "-"],
+                         p_gnames[p_strands == "-"])
+        foll_factor <- factor(foll_qhits, seq_len(nq))
+        f_genes <- unname(splitAsList(foll_genes, foll_factor))
 
         if (ignore.strand)
             strand(q_range) <- "*"

diff --git a/inst/unitTests/test_locateVariants-methods.R b/inst/unitTests/test_locateVariants-methods.R
@@ -178,3 +178,34 @@ test_locateVariants_match_predictCoding <- function()
         start=c(5, 77054, 77054, 77058, 77057, 77057, 77055), 
         end=c(55, 77055, 77055, 77058, 77058, 77058, 77054)),
         paramRangeID=rep(NA, 7))
+
+## ---------------------------------------------------------------
+## Test for GitHub issue #55: PRECEDEID/FOLLOWID should be
+## strand-aware for intergenic variants.
+## ---------------------------------------------------------------
+test_locateVariants_intergenic_strand_issue55 <- function()
+{
+    ## Variant at position 500.
+    ## Gene A: + strand, right of variant (700-1000)
+    ##   → variant is upstream of Gene A → PRECEDEID
+    ## Gene B: - strand, right of variant (700-1000)
+    ##   → variant is downstream of Gene B → FOLLOWID
+    ## Gene C: + strand, left of variant (100-300)
+    ##   → variant is downstream of Gene C → FOLLOWID
+    ## Gene D: - strand, left of variant (100-300)
+    ##   → variant is upstream of Gene D → PRECEDEID
+    query <- GRanges("chr1", IRanges(500, 500))
+    subject <- GRangesList(
+        "GeneA" = GRanges("chr1", IRanges(700, 1000), strand="+"),
+        "GeneB" = GRanges("chr1", IRanges(700, 1000), strand="-"),
+        "GeneC" = GRanges("chr1", IRanges(100, 300), strand="+"),
+        "GeneD" = GRanges("chr1", IRanges(100, 300), strand="-")
+    )
+    loc <- locateVariants(query, subject, IntergenicVariants(500, 500))
+    checkTrue(length(loc) == 1L)
+
+    prec <- sort(as.character(loc$PRECEDEID[[1]]))
+    foll <- sort(as.character(loc$FOLLOWID[[1]]))
+    checkIdentical(prec, c("GeneA", "GeneD"))
+    checkIdentical(foll, c("GeneB", "GeneC"))
+}