From 258254bf7a1a757a5d3f854da28a4eccaf9896d3 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 9 Dec 2022 18:33:56 +0530 Subject: [PATCH] Migrate NPM importer Signed-off-by: Tushar Goel --- CHANGELOG.rst | 5 + vulnerabilities/importer.py | 9 + vulnerabilities/importers/__init__.py | 2 + vulnerabilities/importers/npm.py | 284 +++++++----------- vulnerabilities/models.py | 4 + vulnerabilities/tests/conftest.py | 1 - .../tests/test_data/npm_sample.json | 27 ++ .../parse-advisory-npm-expected.json | 58 ++++ vulnerabilities/tests/test_npm.py | 155 ++-------- 9 files changed, 246 insertions(+), 299 deletions(-) create mode 100644 vulnerabilities/tests/test_data/npm_sample.json create mode 100644 vulnerabilities/tests/test_data/parse-advisory-npm-expected.json diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3303014e6..0c2151a22 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ Release notes ============= +Version v31.1.0 +---------------- + +- We re-enabled support for the NPM vulnerabilities advisories importer. + Version v31.0.0 ---------------- diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index e0d8b5ab4..576e3a040 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -294,6 +294,7 @@ class Importer: spdx_license_expression = "" license_url = "" notice = "" + vcs_response = None def __init__(self): if not self.spdx_license_expression: @@ -319,6 +320,14 @@ def advisory_data(self) -> Iterable[AdvisoryData]: """ raise NotImplementedError + def clone(self, repo_url): + try: + self.vcs_response = fetch_via_vcs(repo_url) + except Exception as e: + msg = f"Failed to fetch {repo_url} via vcs: {e}" + logger.error(msg) + raise ForkError(msg) from e + class ForkError(Exception): pass diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 0c0cd7f8d..bfdc1f736 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -14,6 +14,7 @@ from vulnerabilities.importers import github from vulnerabilities.importers import gitlab from vulnerabilities.importers import nginx +from vulnerabilities.importers import npm from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import postgresql @@ -37,6 +38,7 @@ archlinux.ArchlinuxImporter, ubuntu.UbuntuImporter, debian_oval.DebianOvalImporter, + npm.NpmImporter, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/importers/npm.py b/vulnerabilities/importers/npm.py index 0b962255b..4b6df9055 100644 --- a/vulnerabilities/importers/npm.py +++ b/vulnerabilities/importers/npm.py @@ -9,190 +9,134 @@ # Author: Navonil Das (@NavonilDas) -import asyncio +from pathlib import Path +from typing import Iterable from typing import List -from typing import Set -from typing import Tuple -from urllib.parse import quote import pytz from dateutil.parser import parse from packageurl import PackageURL -from univers.version_range import VersionRange -from univers.versions import SemverVersion +from univers.version_range import NpmVersionRange from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import GitImporter +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference -from vulnerabilities.package_managers import NpmVersionAPI +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.utils import build_description from vulnerabilities.utils import load_json -from vulnerabilities.utils import nearest_patched_package -NPM_URL = "https://registry.npmjs.org{}" - -class NpmImporter(GitImporter): - def __enter__(self): - super(NpmImporter, self).__enter__() - if not getattr(self, "_added_files", None): - self._added_files, self._updated_files = self.file_changes( - recursive=True, file_ext="json", subdir="./vuln/npm" +class NpmImporter(Importer): + spdx_license_expression = "MIT" + license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" + repo_url = "git+https://github.com/nodejs/security-wg" + + def advisory_data(self) -> Iterable[AdvisoryData]: + try: + self.clone(self.repo_url) + path = Path(self.vcs_response.dest_dir) + + vuln = path / "vuln" + npm_vulns = vuln / "npm" + for file in npm_vulns.glob("*.json"): + yield from self.to_advisory_data(file) + finally: + if self.vcs_response: + self.vcs_response.delete() + + def to_advisory_data(self, file: Path) -> List[AdvisoryData]: + data = load_json(file) + id = data.get("id") + description = data.get("overview") or "" + summary = data.get("title") or "" + date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) + references = [] + cvss_vector = data.get("cvss_vector") + cvss_score = data.get("cvss_score") + severities = [] + if cvss_vector and cvss_vector.startswith("CVSS:3.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV3, + value=cvss_score, + ) + ) + if cvss_vector and cvss_vector.startswith("CVSS:2.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV2, + value=cvss_score, + ) ) - self._versions = NpmVersionAPI() - self.set_api(self.collect_packages()) - - def updated_advisories(self) -> Set[AdvisoryData]: - files = self._updated_files.union(self._added_files) - advisories = [] - for f in files: - processed_data = self.process_file(f) - if processed_data: - advisories.extend(processed_data) - return self.batch_advisories(advisories) - - def set_api(self, packages): - asyncio.run(self._versions.load_api(packages)) - - def collect_packages(self): - packages = set() - files = self._updated_files.union(self._added_files) - for f in files: - data = load_json(f) - packages.add(data["module_name"].strip()) - - return packages - - @property - def versions(self): # quick hack to make it patchable - return self._versions - - def process_file(self, file) -> List[AdvisoryData]: - - record = load_json(file) - advisories = [] - package_name = record["module_name"].strip() - - publish_date = parse(record["updated_at"]) - publish_date = publish_date.replace(tzinfo=pytz.UTC) - - all_versions = self.versions.get(package_name, until=publish_date).valid_versions - aff_range = record.get("vulnerable_versions") - if not aff_range: - aff_range = "" - fixed_range = record.get("patched_versions") - if not fixed_range: - fixed_range = "" - - if aff_range == "*" or fixed_range == "*": - return [] - - impacted_versions, resolved_versions = categorize_versions( - all_versions, aff_range, fixed_range + advisory_reference = Reference( + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + reference_id=id, + severities=severities, ) - impacted_purls = _versions_to_purls(package_name, impacted_versions) - resolved_purls = _versions_to_purls(package_name, resolved_versions) - vuln_reference = [ - Reference( - url=NPM_URL.format(f'/-/npm/v1/advisories/{record["id"]}'), - reference_id=record["id"], - ) - ] - - for cve_id in record.get("cves") or [""]: - advisories.append( - AdvisoryData( - summary=record.get("overview", ""), - vulnerability_id=cve_id, - affected_packages=nearest_patched_package(impacted_purls, resolved_purls), - references=vuln_reference, + for ref in data.get("references") or []: + references.append( + Reference( + url=ref, + severities=severities, ) ) - return advisories - - -def _versions_to_purls(package_name, versions): - purls = {f"pkg:npm/{quote(package_name)}@{v}" for v in versions} - return [PackageURL.from_string(s) for s in purls] - - -def normalize_ranges(version_range_string): - """ - - Splits version range strings with "||" operator into separate ranges. - - Removes spaces between range operator and range operands - - Normalizes 'x' ranges - Example: - >>> z = normalize_ranges(">=6.1.3 < 7.0.0 || >=7.0.3") - >>> assert z == [">=6.1.3,<7.0.0", ">=7.0.3"] - """ - - version_ranges = version_range_string.split("||") - version_ranges = list(map(str.strip, version_ranges)) - for id, version_range in enumerate(version_ranges): - - # TODO: This is cryptic, simplify this if possible - version_ranges[id] = ",".join(version_range.split()) - version_ranges[id] = version_ranges[id].replace(">=,", ">=") - version_ranges[id] = version_ranges[id].replace("<=,", "<=") - version_ranges[id] = version_ranges[id].replace("<=,", "<=") - version_ranges[id] = version_ranges[id].replace("<,", "<") - version_ranges[id] = version_ranges[id].replace(">,", ">") - - # "x" is interpretted as wild card character here. These are not part of semver - # spec. We replace the "x" with aribitarily large number to simulate the effect. - if ".x." in version_ranges[id]: - version_ranges[id] = version_ranges[id].replace(".x", ".10000.0") - if ".x" in version_ranges[id]: - version_ranges[id] = version_ranges[id].replace(".x", ".10000") - - return version_ranges - - -def categorize_versions( - all_versions: Set[str], - affected_version_range: str, - fixed_version_range: str, -) -> Tuple[Set[str], Set[str]]: - """ - Seperate list of affected versions and unaffected versions from all versions - using the ranges specified. - - :return: impacted, resolved versions - """ - if not all_versions: - # NPM registry has no data regarding this package, we skip these - return set(), set() - - aff_spec = [] - fix_spec = [] - - if affected_version_range: - aff_specs = normalize_ranges(affected_version_range) - aff_spec = [ - VersionRange.from_scheme_version_spec_string("semver", spec) - for spec in aff_specs - if len(spec) >= 3 - ] - - if fixed_version_range: - fix_specs = normalize_ranges(fixed_version_range) - fix_spec = [ - VersionRange.from_scheme_version_spec_string("semver", spec) - for spec in fix_specs - if len(spec) >= 3 - ] - aff_ver, fix_ver = set(), set() - # Unaffected version is that version which is in the fixed_version_range - # or which is absent in the affected_version_range - for ver in all_versions: - ver_obj = SemverVersion(ver) - - if not any([ver_obj in spec for spec in aff_spec]) or any( - [ver_obj in spec for spec in fix_spec] - ): - fix_ver.add(ver) - else: - aff_ver.add(ver) - - return aff_ver, fix_ver + + if advisory_reference not in references: + references.append(advisory_reference) + + package_name = data.get("module_name") + affected_packages = [] + if package_name: + affected_packages.append(self.get_affected_package(data, package_name)) + advsisory_aliases = data.get("cves") or [] + + for alias in advsisory_aliases: + yield AdvisoryData( + summary=build_description(summary=summary, description=description), + references=references, + date_published=date_published, + affected_packages=affected_packages, + aliases=[alias], + ) + + def get_affected_package(self, data, package_name): + affected_version_range = None + unaffected_version_range = None + fixed_version = None + + vulnerable_range = data.get("vulnerable_versions") or "" + patched_range = data.get("patched_versions") or "" + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14 + if vulnerable_range == "<=99.999.99999": + vulnerable_range = "*" + if vulnerable_range: + affected_version_range = NpmVersionRange.from_native(vulnerable_range) + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15 + if patched_range == "<0.0.0": + patched_range = None + if patched_range: + unaffected_version_range = NpmVersionRange.from_native(patched_range) + + # We only store single fixed versions and not a range of fixed versions + # If there is a single constraint in the unaffected_version_range + # having comparator as ">=" then we store that as the fixed version + if unaffected_version_range and len(unaffected_version_range.constraints) == 1: + constraint = unaffected_version_range.constraints[0] + if constraint.comparator == ">=": + fixed_version = constraint.version + + return AffectedPackage( + package=PackageURL( + type="npm", + name=package_name, + ), + affected_version_range=affected_version_range, + fixed_version=fixed_version, + ) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ac59dcbbc..3e55f83d4 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -781,6 +781,10 @@ def url(self): if alias.startswith("GHSA"): return f"https://github.com/advisories/{alias}" + if alias.startswith("NPM-"): + id = alias.lstrip("NPM-") + return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" + class Advisory(models.Model): """ diff --git a/vulnerabilities/tests/conftest.py b/vulnerabilities/tests/conftest.py index ff5fae3b0..6802907da 100644 --- a/vulnerabilities/tests/conftest.py +++ b/vulnerabilities/tests/conftest.py @@ -35,7 +35,6 @@ def no_rmtree(monkeypatch): "test_models.py", "test_mozilla.py", "test_msr2019.py", - "test_npm.py", "test_package_managers.py", "test_retiredotnet.py", "test_ruby.py", diff --git a/vulnerabilities/tests/test_data/npm_sample.json b/vulnerabilities/tests/test_data/npm_sample.json new file mode 100644 index 000000000..dcc733ac1 --- /dev/null +++ b/vulnerabilities/tests/test_data/npm_sample.json @@ -0,0 +1,27 @@ +{ + "id": 152, + "created_at": "2016-10-27", + "updated_at": "2017-03-23", + "title": "Predictable temp filenames allow overwrite of arbitrary files", + "author": { + "name": "Jérémy Lal", + "website": null, + "username": null + }, + "module_name": "npm", + "publish_date": "2017-03-23", + "cves": [ + "CVE-2013-4116" + ], + "vulnerable_versions": "<1.3.3", + "patched_versions": ">=1.3.3", + "overview": "npm versions before 1.3.3 allows local users to overwrite arbitrary files via a symlink attack on temporary files with predictable names that are created when unpacking archives.", + "recommendation": "Upgrade to the latest version of npm", + "references": [ + "https://github.com/npm/npm/issues/3635", + "http://www.openwall.com/lists/oss-security/2013/07/10/17" + ], + "cvss_vector": "CVSS:3.0/AV:L/AC:L/PR:L/UI:R/S:C/C:N/I:L/A:N", + "cvss_score": 3.2, + "coordinating_vendor": "^Lift Security" + } \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/parse-advisory-npm-expected.json b/vulnerabilities/tests/test_data/parse-advisory-npm-expected.json new file mode 100644 index 000000000..f777f257c --- /dev/null +++ b/vulnerabilities/tests/test_data/parse-advisory-npm-expected.json @@ -0,0 +1,58 @@ +[ + { + "aliases": [ + "CVE-2013-4116" + ], + "summary": "Predictable temp filenames allow overwrite of arbitrary files\nnpm versions before 1.3.3 allows local users to overwrite arbitrary files via a symlink attack on temporary files with predictable names that are created when unpacking archives.", + "affected_packages": [ + { + "package": { + "type": "npm", + "namespace": null, + "name": "npm", + "version": null, + "qualifiers": null, + "subpath": null + }, + "affected_version_range": "vers:npm/<1.3.3", + "fixed_version": "1.3.3" + } + ], + "references": [ + { + "reference_id": "", + "url": "https://github.com/npm/npm/issues/3635", + "severities": [ + { + "system": "cvssv3", + "value": 3.2, + "scoring_elements": "" + } + ] + }, + { + "reference_id": "", + "url": "http://www.openwall.com/lists/oss-security/2013/07/10/17", + "severities": [ + { + "system": "cvssv3", + "value": 3.2, + "scoring_elements": "" + } + ] + }, + { + "reference_id": 152, + "url": "https://github.com/nodejs/security-wg/blob/main/vuln/npm/152.json", + "severities": [ + { + "system": "cvssv3", + "value": 3.2, + "scoring_elements": "" + } + ] + } + ], + "date_published": "2016-10-27T00:00:00+00:00" + } +] \ No newline at end of file diff --git a/vulnerabilities/tests/test_npm.py b/vulnerabilities/tests/test_npm.py index 1b36f10a2..afd72181b 100644 --- a/vulnerabilities/tests/test_npm.py +++ b/vulnerabilities/tests/test_npm.py @@ -9,138 +9,37 @@ # import os -import shutil -import tempfile -import zipfile -from unittest.mock import patch -from django.test import TestCase +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import NpmVersionRange +from univers.versions import SemverVersion -from vulnerabilities import models -from vulnerabilities.import_runner import ImportRunner -from vulnerabilities.importers.npm import categorize_versions -from vulnerabilities.package_managers import NpmVersionAPI -from vulnerabilities.package_managers import Version +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importers.npm import NpmImporter +from vulnerabilities.tests import util_tests +from vulnerabilities.utils import load_json BASE_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATA = os.path.join(BASE_DIR, "test_data/") -MOCK_VERSION_API = NpmVersionAPI( - cache={ - "jquery": {Version("3.4.0"), Version("3.8.0")}, - "kerberos": {Version("0.5.8"), Version("1.2.0")}, - "@hapi/subtext": { - Version("3.7.0"), - Version("4.1.1"), - Version("6.1.3"), - Version("7.0.0"), - Version("7.0.5"), - }, - } -) - - -@patch("vulnerabilities.importers.NpmImporter._update_from_remote") -class NpmImportTest(TestCase): - - tempdir = None - - @classmethod - def setUpClass(cls) -> None: - cls.tempdir = tempfile.mkdtemp() - zip_path = os.path.join(TEST_DATA, "npm.zip") - - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(cls.tempdir) - - cls.importer = models.Importer.objects.create( - name="npm_unittests", - license="", - last_run=None, - data_source="NpmImporter", - data_source_cfg={ - "repository_url": "https://example.git", - "working_directory": os.path.join(cls.tempdir, "npm/npm_test"), - "create_working_directory": False, - "remove_working_directory": False, - }, - ) - - @classmethod - def tearDownClass(cls) -> None: - # Make sure no requests for unexpected package names have been made during the tests. - shutil.rmtree(cls.tempdir) - assert len(MOCK_VERSION_API.cache) == 3, MOCK_VERSION_API.cache - - def test_import(self, _): - runner = ImportRunner(self.importer, 5) - - with patch("vulnerabilities.importers.NpmImporter.versions", new=MOCK_VERSION_API): - with patch("vulnerabilities.importers.NpmImporter.set_api"): - runner.run() - - assert models.Vulnerability.objects.count() == 3 - assert models.VulnerabilityReference.objects.count() == 3 - assert models.PackageRelatedVulnerability.objects.all().count() == 4 - - assert models.Package.objects.count() == 8 - - self.assert_for_package( - "jquery", {"3.4.0"}, {"3.8.0"}, "1518", vulnerability_id="CVE-2020-11022" - ) # nopep8 - self.assert_for_package("kerberos", {"0.5.8"}, {"1.2.0"}, "1514") - self.assert_for_package("subtext", {"4.1.1", "7.0.0"}, {"6.1.3", "7.0.5"}, "1476") - - def assert_for_package( - self, - package_name, - impacted_versions, - resolved_versions, - vuln_id, - vulnerability_id=None, - ): - vuln = None - - for version in impacted_versions: - pkg = models.Package.objects.get(name=package_name, version=version) - - assert pkg.vulnerabilities.count() == 1 - vuln = pkg.vulnerabilities.first() - if vulnerability_id: - assert vuln.vulnerability_id == vulnerability_id - - ref_url = f"https://registry.npmjs.org/-/npm/v1/advisories/{vuln_id}" - assert models.VulnerabilityReference.objects.get(url=ref_url, vulnerability=vuln) - - for version in resolved_versions: - pkg = models.Package.objects.get(name=package_name, version=version) - assert models.PackageRelatedVulnerability.objects.filter( - patched_package=pkg, vulnerability=vuln - ) - - -def test_categorize_versions_simple_ranges(): - all_versions = {"3.4.0", "3.8.0"} - impacted_ranges = "<3.5.0" - resolved_ranges = ">=3.5.0" - - impacted_versions, resolved_versions = categorize_versions( - all_versions, impacted_ranges, resolved_ranges - ) - - assert impacted_versions == {"3.4.0"} - assert resolved_versions == {"3.8.0"} - - -def test_categorize_versions_complex_ranges(): - all_versions = {"3.7.0", "4.1.1", "6.1.3", "7.0.0", "7.0.5"} - impacted_ranges = ">=4.1.0 <6.1.3 || >= 7.0.0 <7.0.3" - resolved_ranges = ">=6.1.3 <7.0.0 || >=7.0.3" - - impacted_versions, resolved_versions = categorize_versions( - all_versions, impacted_ranges, resolved_ranges - ) - - assert impacted_versions == {"4.1.1", "7.0.0"} - assert resolved_versions == {"3.7.0", "6.1.3", "7.0.5"} +def test_npm_importer(): + file = os.path.join(TEST_DATA, "npm_sample.json") + result = [adv.to_dict() for adv in NpmImporter().to_advisory_data(file=file)] + expected_file = os.path.join(TEST_DATA, f"parse-advisory-npm-expected.json") + util_tests.check_results_against_json(result, expected_file) + + +def test_get_affected_package(): + file = os.path.join(TEST_DATA, "npm_sample.json") + data = load_json(file) + assert AffectedPackage( + package=PackageURL( + type="npm", namespace=None, name="npm", version=None, qualifiers={}, subpath=None + ), + affected_version_range=NpmVersionRange( + constraints=(VersionConstraint(comparator="<", version=SemverVersion(string="1.3.3")),) + ), + fixed_version=SemverVersion(string="1.3.3"), + ) == NpmImporter().get_affected_package(data, "npm")