8000 Migrate Alpine Linux importer to aboutcode pipeline by keshav-space · Pull Request #1737 · aboutcode-org/vulnerablecode · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Migrate Alpine Linux importer to aboutcode pipeline #1737

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.importers import alpine_linux
from vulnerabilities.importers import apache_httpd
from vulnerabilities.importers import apache_kafka
from vulnerabilities.importers import apache_tomcat
Expand Down Expand Up @@ -35,6 +34,7 @@
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import alpine_linux_importer
from vulnerabilities.pipelines import github_importer
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import nginx_importer
Expand All @@ -44,7 +44,6 @@
from vulnerabilities.pipelines import pysec_importer

IMPORTERS_REGISTRY = [
alpine_linux.AlpineImporter,
openssl.OpensslImporter,
redhat.RedhatImporter,
debian.DebianImporter,
Expand Down Expand Up @@ -78,6 +77,7 @@
github_importer.GitHubAPIImporterPipeline,
nvd_importer.NVDImporterPipeline,
pysec_importer.PyPIImporterPipeline,
alpine_linux_importer.AlpineLinuxImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

from django.db import migrations

"""
Update the created_by field on Advisory from the old qualified_name
to the new pipeline_id.
"""


def update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.alpine_linux_importer import AlpineLinuxImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by="vulnerabilities.importers.alpine_linux.AlpineImporter").update(
created_by=AlpineLinuxImporterPipeline.pipeline_id
)


def reverse_update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.alpine_linux_importer import AlpineLinuxImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by=AlpineLinuxImporterPipeline.pipeline_id).update(
created_by="vulnerabilities.importers.alpine_linux.AlpineImporter"
)


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0086_codefix"),
]

operations = [
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
]
6D40
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
Expand All @@ -21,40 +20,61 @@

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.references import WireSharkReference
from vulnerabilities.references import XsaReference
from vulnerabilities.references import ZbxReference
from vulnerabilities.utils import fetch_response
from vulnerabilities.utils import is_cve

LOGGER = logging.getLogger(__name__)
BASE_URL = "https://secdb.alpinelinux.org/"

class AlpineLinuxImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect Alpine Linux advisories."""

pipeline_id = "alpine_linux_importer"

class AlpineImporter(Importer):
spdx_license_expression = "CC-BY-SA-4.0"
license_url = "https://secdb.alpinelinux.org/license.txt"
url = "https://secdb.alpinelinux.org/"
importer_name = "Alpine Linux Importer"

def advisory_data(self) -> Iterable[AdvisoryData]:
page_response_content = fetch_response(BASE_URL).content
advisory_directory_links = fetch_advisory_directory_links(page_response_content)
@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self) -> int:
return 0

def collect_advisories(self) -> Iterable[AdvisoryData]:
page_response_content = fetch_response(self.url).content
advisory_directory_links = fetch_advisory_directory_links(
page_response_content, self.url, self.log
)
advisory_links = []
for advisory_directory_link in advisory_directory_links:
advisory_directory_page = fetch_response(advisory_directory_link).content
advisory_links.extend(
fetch_advisory_links(advisory_directory_page, advisory_directory_link)
fetch_advisory_links(advisory_directory_page, advisory_directory_link, self.log)
)
for link in advisory_links:
record = fetch_response(link).json()
if not record["packages"]:
LOGGER.error(f'"packages" not found in {link!r}')
self.log(
f'"packages" not found in {link!r}',
level=logging.DEBUG,
)
continue
yield from process_record(record=record, url=link)
yield from process_record(record=record, url=link, logger=self.log)


def fetch_advisory_directory_links(page_response_content: str) -> List[str]:
def fetch_advisory_directory_links(
page_response_content: str,
base_url: str,
logger: callable = None,
) -> List[str]:
"""
Return a list of advisory directory links present in `page_response_content` html string
"""
Expand All @@ -66,60 +86,83 @@ def fetch_advisory_directory_links(page_response_content: str) -> List[str]:
]

if not alpine_versions:
LOGGER.error(f"No versions found in {BASE_URL!r}")
if logger:
logger(
f"No versions found in {base_url!r}",
level=logging.DEBUG,
)
return []

advisory_directory_links = [urljoin(BASE_URL, version) for version in alpine_versions]
advisory_directory_links = [urljoin(base_url, version) for version in alpine_versions]

return advisory_directory_links


def fetch_advisory_links(
advisory_directory_page: str, advisory_directory_link: str
advisory_directory_page: str,
advisory_directory_link: str,
logger: callable = None,
) -> Iterable[str]:
"""
Yield json file urls present in `advisory_directory_page`
"""
advisory_directory_page = BeautifulSoup(advisory_directory_page, features="lxml")
anchor_tags = advisory_directory_page.find_all("a")
if not anchor_tags:
LOGGER.error(f"No anchor tags found in {advisory_directory_link!r}")
if logger:
logger(
f"No anchor tags found in {advisory_directory_link!r}",
level=logging.DEBUG,
)
return iter([])
for anchor_tag in anchor_tags:
if anchor_tag.text.endswith("json"):
yield urljoin(advisory_directory_link, anchor_tag.text)


def check_for_attributes(record) -> bool:
def check_for_attributes(record, logger) -> bool:
attributes = ["distroversion", "reponame", "archs"]
for attribute in attributes:
if attribute not in record:
LOGGER.error(f'"{attribute!r}" not found in {record!r}')
if logger:
logger(
f'"{attribute!r}" not found in {record!r}',
level=logging.DEBUG,
)
return False
return True


def process_record(record: dict, url: str) -> Iterable[AdvisoryData]:
def process_record(record: dict, url: str, logger: callable = None) -> Iterable[AdvisoryData]:
"""
Return a list of AdvisoryData objects by processing data
present in that `record`
"""
if not record.get("packages"):
LOGGER.error(f'"packages" not found in this record {record!r}')
if logger:
logger(
f'"packages" not found in this record {record!r}',
level=logging.DEBUG,
)
return []

for package in record["packages"]:
if not package["pkg"]:
LOGGER.error(f'"pkg" not found in this package {package!r}')
if logger:
logger(
f'"pkg" not found in this package {package!r}',
level=logging.DEBUG,
)
continue
if not check_for_attributes(record):
if not check_for_attributes(record, logger):
continue
yield from load_advisories(
pkg_infos=package["pkg"],
distroversion=record["distroversion"],
reponame=record["reponame"],
archs=record["archs"],
url=url,
logger=logger,
)


Expand All @@ -129,24 +172,37 @@ def load_advisories(
reponame: str,
archs: List[str],
url: str,
logger: callable = None,
) -> Iterable[AdvisoryData]:
"""
Yield AdvisoryData by mapping data from `pkg_infos`
and form PURL for AffectedPackages by using
`distroversion`, `reponame`, `archs`
"""
if not pkg_infos.get("name"):
LOGGER.error(f'"name" is not available in package {pkg_infos!r}')
if logger:
logger(
f'"name" is not available in package {pkg_infos!r}',
level=logging.DEBUG,
)
return []

for version, fixed_vulns in pkg_infos["secfixes"].items():
if not fixed_vulns:
LOGGER.error(f"No fixed vulnerabilities in version {version!r}")
if logger:
logger(
f"No fixed vulnerabilities in version {version!r}",
level=logging.DEBUG,
)
continue

for vuln_ids in fixed_vulns:
if not isinstance(vuln_ids, str):
LOGGER.error(f"{vuln_ids!r} is not of `str` instance")
if logger:
logger(
f"{vuln_ids!r} is not of `str` instance",
level=logging.DEBUG,
)
continue
vuln_ids = vuln_ids.split()
aliases = []
Expand Down Expand Up @@ -179,10 +235,18 @@ def load_advisories(
try:
fixed_version = AlpineLinuxVersion(version)
except Exception as e:
LOGGER.error(f"{version!r} is not a valid AlpineVersion {e!r}")
if logger:
logger(
f"{version!r} is not a valid AlpineVersion {e!r}",
level=logging.DEBUG,
)
continue
if not isinstance(archs, List):
LOGGER.error(f"{archs!r} is not of `List` instance")
if logger:
logger(
f"{archs!r} is not of `List` instance",
level=logging.DEBUG,
)
continue
if archs:
for arch in archs:
Expand Down
Loading
0