8000 Migrate npm importer by TG1999 · Pull Request #960 · aboutcode-org/vulnerablecode · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Migrate npm importer #960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ Release notes
=============


Version v31.1.0
----------------

- We re-enabled support for the NPM vulnerabilities advisories importer.


Version v31.0.0
----------------
Expand Down
9 changes: 9 additions & 0 deletions vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ class Importer:
spdx_license_expression = ""
license_url = ""
notice = ""
vcs_response = None

def __init__(self):
if not self.spdx_license_expression:
Expand All @@ -319,6 +320,14 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
"""
raise NotImplementedError

def clone(self, repo_url):
try:
self.vcs_response = fetch_via_vcs(repo_url)
except Exception as e:
msg = f"Failed to fetch {repo_url} via vcs: {e}"
logger.error(msg)
raise ForkError(msg) from e


class ForkError(Exception):
pass
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from vulnerabilities.importers import github
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import nginx
from vulnerabilities.importers import npm
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import postgresql
Expand All @@ -37,6 +38,7 @@
archlinux.ArchlinuxImporter,
ubuntu.UbuntuImporter,
debian_oval.DebianOvalImporter,
npm.NpmImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
284 changes: 114 additions & 170 deletions vulnerabilities/importers/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,190 +9,134 @@

# Author: Navonil Das (@NavonilDas)

import asyncio
from pathlib import Path
from typing import Iterable
from typing import List
from typing import Set
from typing import Tuple
from urllib.parse import quote

import pytz
from dateutil.parser import parse
from packageurl import PackageURL
from univers.version_range import VersionRange
from univers.versions import SemverVersion
from univers.version_range import NpmVersionRange

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import GitImporter
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.package_managers import NpmVersionAPI
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.severity_systems import CVSSV2
from vulnerabilities.severity_systems import CVSSV3
from vulnerabilities.utils import build_description
from vulnerabilities.utils import load_json
from vulnerabilities.utils import nearest_patched_package

NPM_URL = "https://registry.npmjs.org{}"


class NpmImporter(GitImporter):
def __enter__(self):
super(NpmImporter, self).__enter__()
if not getattr(self, "_added_files", None):
self._added_files, self._updated_files = self.file_changes(
recursive=True, file_ext="json", subdir="./vuln/npm"
class NpmImporter(Importer):
spdx_license_expression = "MIT"
license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md"
repo_url = "git+https://github.com/nodejs/security-wg"

def advisory_data(self) -> Iterable[AdvisoryData]:
try:
self.clone(self.repo_url)
path = Path(self.vcs_response.dest_dir)

vuln = path / "vuln"
npm_vulns = vuln / "npm"
for file in npm_vulns.glob("*.json"):
yield from self.to_advisory_data(file)
finally:
if self.vcs_response:
self.vcs_response.delete()

def to_advisory_data(self, file: Path) -> List[AdvisoryData]:
data = load_json(file)
id = data.get("id")
description = data.get("overview") or ""
summary = data.get("title") or ""
date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC)
references = []
cvss_vector = data.get("cvss_vector")
cvss_score = data.get("cvss_score")
severities = []
if cvss_vector and cvss_vector.startswith("CVSS:3.0/"):
severities.append(
VulnerabilitySeverity(
system=CVSSV3,
value=cvss_score,
)
)
if cvss_vector and cvss_vector.startswith("CVSS:2.0/"):
severities.append(
VulnerabilitySeverity(
system=CVSSV2,
value=cvss_score,
)
)

self._versions = NpmVersionAPI()
self.set_api(self.collect_packages())

def updated_advisories(self) -> Set[AdvisoryData]:
files = self._updated_files.union(self._added_files)
advisories = []
for f in files:
processed_data = self.process_file(f)
if processed_data:
advisories.extend(processed_data)
return self.batch_advisories(advisories)

def set_api(self, packages):
asyncio.run(self._versions.load_api(packages))

def collect_packages(self):
packages = set()
files = self._updated_files.union(self._added_files)
for f in files:
data = load_json(f)
packages.add(data["module_name"].strip())

return packages

@property
def versions(self): # quick hack to make it patchable
return self._versions

def process_file(self, file) -> List[AdvisoryData]:

record = load_json(file)
advisories = []
package_name = record["module_name"].strip()

publish_date = parse(record["updated_at"])
publish_date = publish_date.replace(tzinfo=pytz.UTC)

all_versions = self.versions.get(package_name, until=publish_date).valid_versions
aff_range = record.get("vulnerable_versions")
if not aff_range:
aff_range = ""
fixed_range = record.get("patched_versions")
if not fixed_range:
fixed_range = ""

if aff_range == "*" or fixed_range == "*":
return []

impacted_versions, resolved_versions = categorize_versions(
all_versions, aff_range, fixed_range
advisory_reference = Reference(
url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
reference_id=id,
severities=severities,
)

impacted_purls = _versions_to_purls(package_name, impacted_versions)
resolved_purls = _versions_to_purls(package_name, resolved_versions)
vuln_reference = [
Reference(
url=NPM_URL.format(f'/-/npm/v1/advisories/{record["id"]}'),
reference_id=record["id"],
)
]

for cve_id in record.get("cves") or [""]:
advisories.append(
AdvisoryData(
summary=record.get("overview", ""),
vulnerability_id=cve_id,
affected_packages=nearest_patched_package(impacted_purls, resolved_purls),
references=vuln_reference,
for ref in data.get("references") or []:
references.append(
Reference(
url=ref,
severities=severities,
)
)
return advisories


def _versions_to_purls(package_name, versions):
purls = {f"pkg:npm/{quote(package_name)}@{v}" for v in versions}
return [PackageURL.from_string(s) for s in purls]


def normalize_ranges(version_range_string):
"""
- Splits version range strings with "||" operator into separate ranges.
- Removes spaces between range operator and range operands
- Normalizes 'x' ranges
Example:
>>> z = normalize_ranges(">=6.1.3 < 7.0.0 || >=7.0.3")
>>> assert z == [">=6.1.3,<7.0.0", ">=7.0.3"]
"""

version_ranges = version_range_string.split("||")
version_ranges = list(map(str.strip, version_ranges))
for id, version_range in enumerate(version_ranges):

# TODO: This is cryptic, simplify this if possible
version_ranges[id] = ",".join(version_range.split())
version_ranges[id] = version_ranges[id].replace(">=,", ">=")
version_ranges[id] = version_ranges[id].replace("<=,", "<=")
version_ranges[id] = version_ranges[id].replace("<=,", "<=")
version_ranges[id] = version_ranges[id].replace("<,", "<")
version_ranges[id] = version_ranges[id].replace(">,", ">")

# "x" is interpretted as wild card character here. These are not part of semver
# spec. We replace the "x" with aribitarily large number to simulate the effect.
if ".x." in version_ranges[id]:
version_ranges[id] = version_ranges[id].replace(".x", ".10000.0")
if ".x" in version_ranges[id]:
version_ranges[id] = version_ranges[id].replace(".x", ".10000")

return version_ranges


def categorize_versions(
all_versions: Set[str],
affected_version_range: str,
fixed_version_range: str,
) -> Tuple[Set[str], Set[str]]:
"""
Seperate list of affected versions and unaffected versions from all versions
using the ranges specified.

:return: impacted, resolved versions
"""
if not all_versions:
# NPM registry has no data regarding this package, we skip these
return set(), set()

aff_spec = []
fix_spec = []

if affected_version_range:
aff_specs = normalize_ranges(affected_version_range)
aff_spec = [
VersionRange.from_scheme_version_spec_string("semver", spec)
for spec in aff_specs
if len(spec) >= 3
]

if fixed_version_range:
fix_specs = normalize_ranges(fixed_version_range)
fix_spec = [
VersionRange.from_scheme_version_spec_string("semver", spec)
for spec in fix_specs
if len(spec) >= 3
]
aff_ver, fix_ver = set(), set()
# Unaffected version is that version which is in the fixed_version_range
# or which is absent in the affected_version_range
for ver in all_versions:
ver_obj = SemverVersion(ver)

if not any([ver_obj in spec for spec in aff_spec]) or any(
[ver_obj in spec for spec in fix_spec]
):
fix_ver.add(ver)
else:
aff_ver.add(ver)

return aff_ver, fix_ver

if advisory_reference not in references:
references.append(advisory_reference)

package_name = data.get("module_name")
affected_packages = []
if package_name:
affected_packages.append(self.get_affected_package(data, package_name))
advsisory_aliases = data.get("cves") or []

for alias in advsisory_aliases:
yield AdvisoryData(
summary=build_description(summary=summary, description=description),
references=references,
date_published=date_published,
affected_packages=affected_packages,
aliases=[alias],
)

def get_affected_package(self, data, package_name):
affected_version_range = None
unaffected_version_range = None
fixed_version = None

vulnerable_range = data.get("vulnerable_versions") or ""
patched_range = data.get("patched_versions") or ""

# https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14
if vulnerable_range == "<=99.999.99999":
vulnerable_range = "*"
if vulnerable_range:
affected_version_range = NpmVersionRange.from_native(vulnerable_range)

# https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15
if patched_range == "<0.0.0":
patched_range = None
if patched_range:
unaffected_version_range = NpmVersionRange.from_native(patched_range)

# We only store single fixed versions and not a range of fixed versions
# If there is a single constraint in the unaffected_version_range
# having comparator as ">=" then we store that as the fixed version
if unaffected_version_range and len(unaffected_version_range.constraints) == 1:
constraint = unaffected_version_range.constraints[0]
if constraint.comparator == ">=":
fixed_version = constraint.version

return AffectedPackage(
package=PackageURL(
type="npm",
name=package_name,
),
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)
4 changes: 4 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,10 @@ def url(self):
if alias.startswith("GHSA"):
return f"https://github.com/advisories/{alias}"

if alias.startswith("NPM-"):
id = alias.lstrip("NPM-")
return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"


class Advisory(models.Model):
"""
Expand Down
1 change: 0 additions & 1 deletion vulnerabilities/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def no_rmtree(monkeypatch):
"test_models.py",
"test_mozilla.py",
"test_msr2019.py",
"test_npm.py",
"test_package_managers.py",
"test_retiredotnet.py",
"test_ruby.py",
Expand Down
Loading
0