Skip to content

Commit

Permalink
Merge branch 'main' into extract-report-logic
Browse files Browse the repository at this point in the history
Signed-off-by: Robin <[email protected]>
  • Loading branch information
Robin5605 authored Oct 30, 2024
2 parents b15fc2e + 5f203df commit f270b80
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 922 deletions.
1 change: 0 additions & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ services:
MICROSOFT_TENANT_ID: tenant_id
MICROSOFT_CLIENT_ID: client_id
MICROSOFT_CLIENT_SECRET: client_secret
EMAIL_RECIPIENT: email_recipient
DRAGONFLY_GITHUB_TOKEN: test
volumes:
- "./src:/app/src"
Expand Down
1,072 changes: 336 additions & 736 deletions pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies = [
"alembic==1.13.2",
"asgi-correlation-id==4.3.1",
"dragonfly-logging-config @ git+https://github.com/vipyrsec/[email protected]",
"fastapi-pagination>=0.12.25",
"fastapi-slim==0.112.0",
"httpx==0.27.0",
"letsbuilda-pypi==5.1.0",
Expand Down
17 changes: 12 additions & 5 deletions src/mainframe/endpoints/package.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from collections.abc import Iterable
from collections.abc import Iterable, Sequence
import datetime as dt
from typing import Annotated, Optional

import structlog
from fastapi import APIRouter, Depends, HTTPException
from fastapi_pagination import Page, Params
from fastapi_pagination.ext.sqlalchemy import paginate # type: ignore
from letsbuilda.pypi import Package as PyPIPackage, PyPIServices # type: ignore
from letsbuilda.pypi.exceptions import PackageNotFoundError
from sqlalchemy import select, tuple_
Expand Down Expand Up @@ -122,7 +124,9 @@ def lookup_package_info(
since: Optional[int] = None,
name: Optional[str] = None,
version: Optional[str] = None,
):
page: Optional[int] = None,
size: Optional[int] = None,
) -> Page[Package] | Sequence[Package]:
"""
Lookup information on scanned packages based on name, version, or time
scanned. If multiple packages are returned, they are ordered with the most
Expand Down Expand Up @@ -175,10 +179,13 @@ def lookup_package_info(
query = query.where(Scan.finished_at >= dt.datetime.fromtimestamp(since, tz=dt.timezone.utc))

with session, session.begin():
if page and size:
params = Params(page=page, size=size)
return paginate(
session, query, params=params, transformer=lambda items: [Package.from_db(item) for item in items]
)
data = session.scalars(query).unique()
packages = [Package.from_db(result) for result in data]

return packages
return [Package.from_db(result) for result in data]


def _deduplicate_packages(packages: list[PackageSpecifier], session: Session) -> set[tuple[str, str]]:
Expand Down
90 changes: 12 additions & 78 deletions src/mainframe/endpoints/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from mainframe.json_web_token import AuthenticationData
from mainframe.models.orm import Scan
from mainframe.models.schemas import (
EmailReport,
Error,
ObservationKind,
ObservationReport,
Expand Down Expand Up @@ -101,36 +100,6 @@ def _validate_inspector_url(name: str, version: str, body_url: Optional[str], sc
return inspector_url


def _validate_additional_information(body: ReportPackageBody, scan: Scan):
"""
Validates the additional_information field.
Returns:
None if `body.additional_information` is valid.
Raises:
HTTPException: 400 Bad Request if `additional_information` was required
and was not passed
"""
log = logger.bind(package={"name": body.name, "version": body.version})

if body.additional_information is None:
if len(scan.rules) == 0 or body.use_email is False:
if len(scan.rules) == 0:
detail = (
f"additional_information is a required field as package "
f"`{body.name}@{body.version}` has no matched rules in the database"
)
else:
detail = "additional_information is required when using Observation API"

error = HTTPException(400, detail=detail)
log.error(
"Missing additional_information field", error_message=detail, tag="missing_additional_information"
)
raise error


def _validate_pypi(name: str, version: str, http_client: httpx.Client):
log = logger.bind(package={"name": name, "version": version})

Expand All @@ -157,33 +126,16 @@ def report_package(
"""
Report a package to PyPI.
The optional `use_email` field can be used to send reports by email. This
defaults to `False`.
There are some restrictions on what packages can be reported. They must:
- exist in the database
- exist on PyPI
- not already be reported
While the `inspector_url` and `additional_information` fields are optional
in the schema, the API requires you to provide them in certain cases. Some
of those are outlined below.
`inspector_url` and `additional_information` both must be provided if the
package being reported is in a `QUEUED` or `PENDING` state. That is, the
package has not yet been scanned and therefore has no records for
`inspector_url` or any matched rules
If the package has successfully been scanned (that is, it is in
a `FINISHED` state), and it has been determined to be malicious, then
neither `inspector_url` nor `additional_information` is required. If the
`inspector_url` is omitted, then it will default to a URL that points to
the file with the highest total score.
If the package has successfully been scanned (that is, it is in
a `FINISHED` state), and it has been determined NOT to be malicious (that
is, it has no matched rules), then you must provide `inspector_url` AND
`additional_information`.
`inspector_url` argument is required if the package has no matched rules.
If `inspector_url` argument is not provided for a package with matched rules,
the Inspector URL of the file with the highest total score will be used.
If `inspector_url` argument is provided for a package with matched rules,
the given Inspector URL will override the default one.
"""

name = body.name
Expand Down Expand Up @@ -211,38 +163,21 @@ def report_package(

raise error
inspector_url = _validate_inspector_url(name, version, body.inspector_url, scan.inspector_url)
_validate_additional_information(body, scan)

# If execution reaches here, we must have found a matching scan in our
# database. Check if the package we want to report exists on PyPI.
_validate_pypi(name, version, httpx_client)

rules_matched: list[str] = [rule.name for rule in scan.rules]

if body.use_email is True:
report = EmailReport(
name=body.name,
version=body.version,
rules_matched=rules_matched,
recipient=body.recipient,
inspector_url=inspector_url,
additional_information=body.additional_information,
)

httpx_client.post(f"{mainframe_settings.reporter_url}/report/email", json=jsonable_encoder(report))
else:
# We previously checked this condition, but the typechecker isn't smart
# enough to figure that out
assert body.additional_information is not None

report = ObservationReport(
kind=ObservationKind.Malware,
summary=body.additional_information,
inspector_url=inspector_url,
extra=dict(yara_rules=rules_matched),
)
report = ObservationReport(
kind=ObservationKind.Malware,
summary=body.additional_information,
inspector_url=inspector_url,
extra=dict(yara_rules=rules_matched),
)

httpx_client.post(f"{mainframe_settings.reporter_url}/report/{name}", json=jsonable_encoder(report))
httpx_client.post(f"{mainframe_settings.reporter_url}/report/{name}", json=jsonable_encoder(report))

database.mark_reported(scan=scan, subject=auth.subject)

Expand All @@ -254,7 +189,6 @@ def report_package(
"inspector_url": inspector_url,
"additional_information": body.additional_information,
"rules_matched": rules_matched,
"use_email": body.use_email,
},
reported_by=auth.subject,
)
Expand Down
13 changes: 1 addition & 12 deletions src/mainframe/models/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,8 @@ class PackageSpecifier(BaseModel):


class ReportPackageBody(PackageSpecifier):
recipient: Optional[str]
inspector_url: Optional[str]
additional_information: Optional[str]
use_email: bool = False


class EmailReport(PackageSpecifier):
"""Model for a report using email"""

rules_matched: list[str]
recipient: Optional[str] = None
inspector_url: Optional[str]
additional_information: Optional[str]
additional_information: str


# Taken from
Expand Down
3 changes: 3 additions & 0 deletions src/mainframe/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sentry_sdk
from asgi_correlation_id import CorrelationIdMiddleware, correlation_id
from fastapi import Depends, FastAPI
from fastapi_pagination import add_pagination
from letsbuilda.pypi import PyPIServices
from sentry_sdk.integrations.logging import LoggingIntegration
from structlog_sentry import SentryProcessor
Expand Down Expand Up @@ -104,3 +105,5 @@ async def update_rules():

for router in routers:
app.include_router(router)

add_pagination(app)
46 changes: 29 additions & 17 deletions tests/test_package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
import datetime

from fastapi_pagination import Page
import pytest
from fastapi import HTTPException
from letsbuilda.pypi import PyPIServices
Expand Down Expand Up @@ -29,33 +30,44 @@


@pytest.mark.parametrize(
"since,name,version",
"since,name,version,page,size",
[
(0, "a", None),
(0, None, None),
(0, "b", None),
(None, "a", "0.1.0"),
(0, "a", None, 1, 50),
(0, None, None, 1, 50),
(0, "b", None, 1, 50),
(None, "a", "0.1.0", 1, 50),
(0, "a", None, None, None), # No pagination parameters
(None, "a", "0.1.0", None, None), # No pagination parameters
],
)
def test_package_lookup(
since: Optional[int],
name: Optional[str],
version: Optional[str],
page: Optional[int],
size: Optional[int],
test_data: list[Scan],
db_session: Session,
):
exp: set[tuple[str, str]] = set()
for scan in test_data:
if since is not None and (scan.finished_at is None or since > int(scan.finished_at.timestamp())):
continue
if name is not None and scan.name != name:
continue
if version is not None and scan.version != version:
continue
exp.add((scan.name, scan.version))

scans = lookup_package_info(db_session, since, name, version)
assert exp == {(scan.name, scan.version) for scan in scans}
expected_scans = {
(scan.name, scan.version)
for scan in test_data
if (
(since is None or (scan.finished_at and since <= int(scan.finished_at.timestamp())))
and (name is None or scan.name == name)
and (version is None or scan.version == version)
)
}

actual_scans = lookup_package_info(db_session, since, name, version, page, size)

actual_scan_set: set[tuple[str, str | None]] = (
{(scan.name, scan.version) for scan in actual_scans.items} # type: ignore
if isinstance(actual_scans, Page)
else {(scan.name, scan.version) for scan in actual_scans}
)

assert expected_scans == actual_scan_set


@pytest.mark.parametrize(
Expand Down
74 changes: 1 addition & 73 deletions tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
validate_package,
get_reported_version,
)
from mainframe.endpoints.report import (
_validate_additional_information, # pyright: ignore [reportPrivateUsage]
)
from mainframe.endpoints.report import (
_validate_inspector_url, # pyright: ignore [reportPrivateUsage]
)
Expand Down Expand Up @@ -248,76 +245,7 @@ def test_report_inspector_url(body_url: Optional[str], scan_url: Optional[str]):


@pytest.mark.parametrize(
("body", "scan"),
[
( # No additional information, and no rules with email
ReportPackageBody(
name="c",
version="1.0.0",
recipient=None,
inspector_url="inspector url override",
additional_information=None,
use_email=True,
),
Scan(
name="c",
version="1.0.0",
status=Status.FINISHED,
score=0,
inspector_url=None,
rules=[],
download_urls=[],
queued_at=datetime.now() - timedelta(seconds=60),
queued_by="remmy",
pending_at=datetime.now() - timedelta(seconds=30),
pending_by="remmy",
finished_at=datetime.now() - timedelta(seconds=10),
finished_by="remmy",
reported_at=None,
reported_by=None,
fail_reason=None,
commit_hash="test commit hash",
),
),
( # No additional information with Observations
ReportPackageBody(
name="c",
version="1.0.0",
recipient=None,
inspector_url="inspector url override",
additional_information=None,
use_email=False,
),
Scan(
name="c",
version="1.0.0",
status=Status.FINISHED,
score=0,
inspector_url=None,
rules=[Rule(name="ayo")],
download_urls=[],
queued_at=datetime.now() - timedelta(seconds=60),
queued_by="remmy",
pending_at=datetime.now() - timedelta(seconds=30),
pending_by="remmy",
finished_at=datetime.now() - timedelta(seconds=10),
finished_by="remmy",
reported_at=None,
reported_by=None,
fail_reason=None,
commit_hash="test commit hash",
),
),
],
)
def test_report_missing_additional_information(body: ReportPackageBody, scan: Scan):
with pytest.raises(HTTPException) as e:
_validate_additional_information(body, scan)
assert e.value.status_code == 400


@pytest.mark.parametrize(
("scans", "name", "version", "expected_exception"),
("scans", "name", "version", "expected_status_code"),
[
([], "a", "1.0.0", PackageNotFound),
(
Expand Down

0 comments on commit f270b80

Please sign in to comment.