diff --git a/bugbug/tools/code_review/database.py b/bugbug/tools/code_review/database.py index 6320e26978..4f19b7c092 100644 --- a/bugbug/tools/code_review/database.py +++ b/bugbug/tools/code_review/database.py @@ -135,7 +135,7 @@ class EvaluationAction(enum.Enum): @dataclass class SuggestionFeedback: - id: int + id: int | str comment: str file_path: str action: Literal["APPROVE", "REJECT", "IGNORE"] diff --git a/bugbug/tools/code_review/langchain_tools.py b/bugbug/tools/code_review/langchain_tools.py index a6b2b0bbc0..71985712c9 100644 --- a/bugbug/tools/code_review/langchain_tools.py +++ b/bugbug/tools/code_review/langchain_tools.py @@ -203,7 +203,7 @@ async def search_text( except SearchfoxNetworkError as e: return _tool_error(f"search failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request searching for %r (path=%r, langs=%r): %s", query, path_filter, @@ -233,7 +233,7 @@ async def get_field_layout( except SearchfoxNetworkError as e: return _tool_error(f"field layout fetch failed: {e}") except SearchfoxRequestError as e: - logger.error("Bad request fetching field layout for %r: %s", class_name, e) + logger.warning("Bad request fetching field layout for %r: %s", class_name, e) return _tool_error(f"field layout fetch failed: {e}") except Exception as e: logger.error("Unexpected error fetching field layout for %r: %s", class_name, e) @@ -265,7 +265,7 @@ async def get_blame( except SearchfoxNetworkError as e: return _tool_error(f"blame fetch failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request fetching blame for %r lines %r: %s", file_path, lines, e ) return _tool_error(f"blame fetch failed: {e}") @@ -304,7 +304,7 @@ async def check_can_gc( except SearchfoxNetworkError as e: return _tool_error(f"GC check failed: {e}") except SearchfoxRequestError as e: - logger.error("Bad request checking GC status for %r: %s", symbol, e) + logger.warning("Bad request checking GC status for %r: %s", symbol, e) return _tool_error(f"GC check failed: {e}") except Exception as e: logger.error("Unexpected error checking GC status for %r: %s", symbol, e) @@ -333,7 +333,7 @@ async def find_definition( except SearchfoxNetworkError as e: return _tool_error(f"definition lookup failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request finding definition for %r (path=%r): %s", name, path_filter, e ) return _tool_error(f"definition lookup failed: {e}") @@ -376,7 +376,7 @@ async def search_identifier( except SearchfoxNetworkError as e: return _tool_error(f"identifier search failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request searching for identifier %r (path=%r, langs=%r): %s", identifier, path_filter, @@ -408,7 +408,7 @@ async def calls_from( except SearchfoxNetworkError as e: return _tool_error(f"call graph fetch failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request fetching calls from %r (depth=%d): %s", symbol, depth, e ) return _tool_error(f"call graph fetch failed: {e}") @@ -436,7 +436,7 @@ async def calls_to( except SearchfoxNetworkError as e: return _tool_error(f"call graph fetch failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request fetching calls to %r (depth=%d): %s", symbol, depth, e ) return _tool_error(f"call graph fetch failed: {e}") @@ -468,7 +468,7 @@ async def calls_between( except SearchfoxNetworkError as e: return _tool_error(f"call graph fetch failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request fetching calls between %r and %r (depth=%d): %s", symbol_a, symbol_b, @@ -508,7 +508,7 @@ async def get_function_at_line( except SearchfoxNetworkError as e: return _tool_error(f"function lookup failed: {e}") except SearchfoxRequestError as e: - logger.error( + logger.warning( "Bad request fetching function at line %d in %r: %s", line, file_path, e ) return _tool_error(f"function lookup failed: {e}") diff --git a/bugbug/tools/core/platforms/bugzilla.py b/bugbug/tools/core/platforms/bugzilla.py index 7096d7c783..bdd157eb78 100644 --- a/bugbug/tools/core/platforms/bugzilla.py +++ b/bugbug/tools/core/platforms/bugzilla.py @@ -10,6 +10,7 @@ from datetime import datetime, timezone from functools import cached_property +import requests from libmozdata.bugzilla import Bugzilla, BugzillaBase, BugzillaUser from libmozdata.config import set_default_value @@ -18,6 +19,22 @@ logger = logging.getLogger(__name__) EDITBUGS_GROUP_ID = 9 + + +class BugNotFoundError(ValueError): + pass + + +class BugzillaAPIError(IOError): + pass + + +class BugzillaTransientError(BugzillaAPIError): + """Transient error from the Bugzilla API (5xx or unexpected response format).""" + + pass + + TRUST_BEFORE_DATE = datetime(2022, 1, 1, tzinfo=timezone.utc) REDACTED_TITLE = "[Unvalidated bug title redacted for security]" @@ -418,15 +435,28 @@ def __getattr__(self, name: str): @classmethod def get(cls, bug_id: int, allow_private: bool = False) -> "Bug": bugs: list[dict] = [] - Bugzilla( - bug_id, - include_fields=["_default", "comments", "history"], - bughandler=lambda bug, data: data.append(bug), - bugdata=bugs, - ).get_data().wait() + try: + Bugzilla( + bug_id, + include_fields=["_default", "comments", "history"], + bughandler=lambda bug, data: data.append(bug), + bugdata=bugs, + ).get_data().wait() + except requests.HTTPError as e: + if e.response is not None and e.response.status_code >= 500: + raise BugzillaTransientError( + f"Bugzilla API error for bug {bug_id}: {e}" + ) from e + raise BugzillaAPIError(f"Bugzilla API error for bug {bug_id}: {e}") from e + except KeyError as e: + raise BugzillaTransientError( + f"Unexpected Bugzilla API response for bug {bug_id}: {e}" + ) from e if not bugs or (not allow_private and bugs[0]["groups"]): - raise ValueError(f"Bug {bug_id} not found") + raise BugNotFoundError( + f"Bug {bug_id} was not found. It may not exist or may be a confidential security bug." + ) bug_data = bugs[0] assert bug_data["id"] == bug_id diff --git a/bugbug/vectordb.py b/bugbug/vectordb.py index fa499166b5..f3882978d3 100644 --- a/bugbug/vectordb.py +++ b/bugbug/vectordb.py @@ -23,8 +23,8 @@ class VectorPoint: @dataclass(order=True) class PayloadScore: - score: int - id: int + score: float + id: int | str payload: dict @@ -134,9 +134,10 @@ def search( ) -> Iterable[PayloadScore]: qdrant_filter = filter.to_qdrant_filter() if filter else None - for item in self.client.search( - self.collection_name, query, qdrant_filter, limit=limit - ): + response = self.client.query_points( + self.collection_name, query=query, query_filter=qdrant_filter, limit=limit + ) + for item in response.points: yield PayloadScore(item.score, item.id, item.payload) def get_existing_ids(self) -> Iterable[int]: diff --git a/services/mcp/src/bugbug_mcp/__init__.py b/services/mcp/src/bugbug_mcp/__init__.py index 720ed0485c..d17591d6d2 100644 --- a/services/mcp/src/bugbug_mcp/__init__.py +++ b/services/mcp/src/bugbug_mcp/__init__.py @@ -3,6 +3,9 @@ import sentry_sdk +from bugbug.tools.core.platforms.bugzilla import BugNotFoundError +from bugbug_mcp.exceptions import InvalidDocPathError, RevisionNotFoundError + __version__ = "0.1.0" @@ -15,4 +18,5 @@ # Add data like request headers and IP for users, # see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info send_default_pii=True, + ignore_errors=[BugNotFoundError, RevisionNotFoundError, InvalidDocPathError], ) diff --git a/services/mcp/src/bugbug_mcp/exceptions.py b/services/mcp/src/bugbug_mcp/exceptions.py new file mode 100644 index 0000000000..a00d8190e8 --- /dev/null +++ b/services/mcp/src/bugbug_mcp/exceptions.py @@ -0,0 +1,9 @@ +from fastmcp.exceptions import ToolError + + +class RevisionNotFoundError(ToolError): + pass + + +class InvalidDocPathError(ToolError): + pass diff --git a/services/mcp/src/bugbug_mcp/server.py b/services/mcp/src/bugbug_mcp/server.py index a61cf2f9a8..335a37f8ad 100644 --- a/services/mcp/src/bugbug_mcp/server.py +++ b/services/mcp/src/bugbug_mcp/server.py @@ -8,7 +8,6 @@ import httpx from fastmcp import FastMCP -from fastmcp.exceptions import ToolError from fastmcp.resources import FileResource from pydantic import Field @@ -18,6 +17,7 @@ PhabricatorPatch, SanitizedPhabricatorPatch, ) +from bugbug_mcp.exceptions import InvalidDocPathError, RevisionNotFoundError mcp = FastMCP("Firefox Development MCP Server") @@ -146,7 +146,7 @@ def bughandler(bug): def _get_revision_md(revision_id: int) -> str: patch = SanitizedPhabricatorPatch(revision_id=revision_id) if not patch.is_accessible() or not patch.is_public(): - raise ToolError( + raise RevisionNotFoundError( f"Revision D{revision_id} was not found. It may not exist or may be private." ) return patch.to_md() @@ -187,7 +187,7 @@ async def read_fx_doc_section( ) -> str: """Retrieve the content of a section from Firefox Source Tree Documentation.""" if not doc_path.endswith(".md") and not doc_path.endswith(".rst"): - raise ToolError( + raise InvalidDocPathError( f"Invalid path: {doc_path}. Path must end with `.rst` or `.md`. Use the docs://llms.txt resource on this MCP to find valid paths." ) @@ -196,7 +196,7 @@ async def read_fx_doc_section( async with httpx.AsyncClient() as client: response = await client.get(url) if response.status_code == 404: - raise ToolError( + raise InvalidDocPathError( f"Invalid path: {doc_path}. Use the docs://llms.txt resource on this MCP to find valid paths." ) diff --git a/services/reviewhelper-api/app/routers/request.py b/services/reviewhelper-api/app/routers/request.py index e3ba4578b8..044050a534 100644 --- a/services/reviewhelper-api/app/routers/request.py +++ b/services/reviewhelper-api/app/routers/request.py @@ -5,6 +5,7 @@ from fastapi import APIRouter, Depends, status from fastapi.responses import JSONResponse from sqlalchemy import select +from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncSession from app.auth import verify_external_api_key @@ -80,7 +81,19 @@ async def create_or_get_review_request( status=ReviewStatus.PENDING, ) db.add(review_request) - await db.commit() + try: + await db.commit() + except IntegrityError: + await db.rollback() + existing_request = await db.scalar(stmt) + if existing_request is None: + raise + return JSONResponse( + { + "status": existing_request.status.value, + "message": _build_response_message(existing_request), + } + ) schedule_time = ( request.revision_created_at + INITIAL_TASK_DELAY