diff --git a/requirements.txt b/requirements.txt index 29a9b2490..5caf0c186 100644 --- a/requirements.txt +++ b/requirements.txt @@ -120,3 +120,4 @@ drf-spectacular==0.24.2 coreapi==2.3.3 coreschema==0.0.4 itypes==1.2.0 +progress==1.6 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index a030f0ded..35139efb2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -73,6 +73,7 @@ install_requires = packageurl-python>=0.10.5rc1 univers>=30.11.0 license-expression>=21.6.14 + progress>=1.6 # file and data formats binaryornot>=0.4.4 diff --git a/vulnerabilities/importers/alpine_linux.py b/vulnerabilities/importers/alpine_linux.py index 9ad2a79b5..3f8ce8c1a 100644 --- a/vulnerabilities/importers/alpine_linux.py +++ b/vulnerabilities/importers/alpine_linux.py @@ -17,6 +17,7 @@ from bs4 import BeautifulSoup from packageurl import PackageURL +from progress.bar import ChargingBar from univers.versions import AlpineLinuxVersion from vulnerabilities.importer import AdvisoryData @@ -41,6 +42,10 @@ def advisory_data(self) -> Iterable[AdvisoryData]: page_response_content = fetch_response(BASE_URL).content advisory_directory_links = fetch_advisory_directory_links(page_response_content) advisory_links = [] + progress_for_package_fetch = ChargingBar( + "\tFetching Packages", max=len(advisory_directory_links) + ) + progress_for_package_fetch.start() for advisory_directory_link in advisory_directory_links: advisory_directory_page = fetch_response(advisory_directory_link).content advisory_links.extend( @@ -52,6 +57,8 @@ def advisory_data(self) -> Iterable[AdvisoryData]: LOGGER.error(f'"packages" not found in {link!r}') continue yield from process_record(record=record, url=link) + progress_for_package_fetch.next() + progress_for_package_fetch.finish() def fetch_advisory_directory_links(page_response_content: str) -> List[str]: diff --git a/vulnerabilities/importers/apache_httpd.py b/vulnerabilities/importers/apache_httpd.py index 10a99dd02..e61301d29 100644 --- a/vulnerabilities/importers/apache_httpd.py +++ b/vulnerabilities/importers/apache_httpd.py @@ -13,6 +13,7 @@ import requests from bs4 import BeautifulSoup from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_constraint import VersionConstraint from univers.version_range import ApacheVersionRange from univers.versions import SemverVersion @@ -37,9 +38,13 @@ class ApacheHTTPDImporter(Importer): def advisory_data(self): links = fetch_links(self.base_url) + progress_bar_for_fetch_links = ChargingBar("\tFetching Vulnerabilitites", max=len(links)) + progress_bar_for_fetch_links.start() for link in links: data = requests.get(link).json() yield self.to_advisory(data) + progress_bar_for_fetch_links.next() + progress_bar_for_fetch_links.finish() def to_advisory(self, data): alias = get_item(data, "CVE_data_meta", "ID") diff --git a/vulnerabilities/importers/apache_kafka.py b/vulnerabilities/importers/apache_kafka.py index 89de85219..4fa6ab5c3 100644 --- a/vulnerabilities/importers/apache_kafka.py +++ b/vulnerabilities/importers/apache_kafka.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from dateutil.parser import parse from packageurl import PackageURL +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage @@ -113,6 +114,8 @@ def to_advisory(self, advisory_page): advisory_page = BeautifulSoup(advisory_page, features="lxml") cve_section_beginnings = advisory_page.find_all("h2") + progress_bar_for_cve_fetch = ChargingBar("\tFetching CVEs", max=len(cve_section_beginnings)) + progress_bar_for_cve_fetch.start() for cve_section_beginning in cve_section_beginnings: # This sometimes includes text that follows the CVE on the same line -- sometimes there is a carriage return, sometimes there is not # cve_id = cve_section_beginning.text.split("\n")[0] @@ -195,5 +198,8 @@ def to_advisory(self, advisory_page): url=f"{self.ASF_PAGE_URL}#{cve_id}", ) ) + progress_bar_for_cve_fetch.next() + + progress_bar_for_cve_fetch.finish() return advisories diff --git a/vulnerabilities/importers/apache_tomcat.py b/vulnerabilities/importers/apache_tomcat.py index 50a02a0ec..a0344169c 100644 --- a/vulnerabilities/importers/apache_tomcat.py +++ b/vulnerabilities/importers/apache_tomcat.py @@ -15,6 +15,7 @@ import requests from bs4 import BeautifulSoup from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_constraint import VersionConstraint from univers.version_range import ApacheVersionRange from univers.version_range import MavenVersionRange @@ -124,9 +125,14 @@ def fetch_advisory_pages(self): """ Yield the content of each HTML page containing version-related security data. """ - links = self.fetch_advisory_links("https://tomcat.apache.org/security") + links = list(self.fetch_advisory_links("https://tomcat.apache.org/security")) + progress_bar_for_advisory_fetch = ChargingBar("\tFetching Advisories", max=len(links)) + progress_bar_for_advisory_fetch.start() for page_url in links: yield page_url, requests.get(page_url).content + progress_bar_for_advisory_fetch.next() + + progress_bar_for_advisory_fetch.finish() def fetch_advisory_links(self, url): """ diff --git a/vulnerabilities/importers/archlinux.py b/vulnerabilities/importers/archlinux.py index 2e9ef6a87..df7c4eac2 100644 --- a/vulnerabilities/importers/archlinux.py +++ b/vulnerabilities/importers/archlinux.py @@ -12,6 +12,7 @@ from typing import Mapping from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import ArchLinuxVersionRange from univers.versions import ArchLinuxVersion @@ -35,8 +36,13 @@ def fetch(self) -> Iterable[Mapping]: return response.json() def advisory_data(self) -> Iterable[AdvisoryData]: - for record in self.fetch(): + records = self.fetch() + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages", max=len(records or [])) + progress_bar_for_package_fetch.start() + for record in records: yield from self.parse_advisory(record) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() def parse_advisory(self, record) -> List[AdvisoryData]: advisories = [] diff --git a/vulnerabilities/importers/debian.py b/vulnerabilities/importers/debian.py index 94057675f..a7a29f6f1 100644 --- a/vulnerabilities/importers/debian.py +++ b/vulnerabilities/importers/debian.py @@ -15,6 +15,7 @@ import requests from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import DebianVersionRange from univers.versions import DebianVersion @@ -89,8 +90,14 @@ def get_response(self): def advisory_data(self) -> Iterable[AdvisoryData]: response = self.get_response() + progress_bar_for_package_fetch = ChargingBar( + "\tFetching Packages", max=len(response.items()) + ) + progress_bar_for_package_fetch.start() for pkg_name, records in response.items(): yield from self.parse(pkg_name, records) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() def parse(self, pkg_name: str, records: Mapping[str, Any]) -> Iterable[AdvisoryData]: for cve_id, record in records.items(): diff --git a/vulnerabilities/importers/debian_oval.py b/vulnerabilities/importers/debian_oval.py index aa3d6917d..8d3d82eab 100644 --- a/vulnerabilities/importers/debian_oval.py +++ b/vulnerabilities/importers/debian_oval.py @@ -12,6 +12,7 @@ import xml.etree.ElementTree as ET import requests +from progress.bar import ChargingBar from vulnerabilities.importer import OvalImporter @@ -65,6 +66,8 @@ def __init__(self, *args, **kwargs): def _fetch(self): releases = ["wheezy", "stretch", "jessie", "buster", "bullseye"] + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages", max=len(releases)) + progress_bar_for_package_fetch.start() for release in releases: file_url = f"https://www.debian.org/security/oval/oval-definitions-{release}.xml.bz2" self.data_url = file_url @@ -74,3 +77,5 @@ def _fetch(self): {"type": "deb", "namespace": "debian", "qualifiers": {"distro": release}}, ET.ElementTree(ET.fromstring(extracted.decode("utf-8"))), ) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() diff --git a/vulnerabilities/importers/elixir_security.py b/vulnerabilities/importers/elixir_security.py index 4fd492a92..7939f5739 100644 --- a/vulnerabilities/importers/elixir_security.py +++ b/vulnerabilities/importers/elixir_security.py @@ -12,6 +12,7 @@ from dateutil import parser as dateparser from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_constraint import VersionConstraint from univers.version_range import HexVersionRange @@ -31,13 +32,19 @@ class ElixirSecurityImporter(Importer): importer_name = "Elixir Security Importer" def advisory_data(self) -> Set[AdvisoryData]: + progress_bar_for_cve_fetch: ChargingBar try: self.clone(self.repo_url) base_path = Path(self.vcs_response.dest_dir) vuln = base_path / "packages" - for file in vuln.glob("**/*.yml"): + vuln_files = list(vuln.glob("**/*.yml")) + progress_bar_for_cve_fetch = ChargingBar("\tFetching CVEs", max=len(vuln_files)) + progress_bar_for_cve_fetch.start() + for file in vuln_files: yield from self.process_file(file, base_path) + progress_bar_for_cve_fetch.next() finally: + progress_bar_for_cve_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py index f39ff6c45..20f1101b5 100644 --- a/vulnerabilities/importers/fireeye.py +++ b/vulnerabilities/importers/fireeye.py @@ -12,6 +12,8 @@ from typing import Iterable from typing import List +from progress.bar import ChargingBar + from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference @@ -34,12 +36,19 @@ class FireyeImporter(Importer): importer_name = "FireEye Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_advisory_fetch: ChargingBar try: self.vcs_response = self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) - files = filter( - lambda p: p.suffix in [".md", ".MD"], Path(self.vcs_response.dest_dir).glob("**/*") + files = list( + filter( + lambda p: p.suffix in [".md", ".MD"], + Path(self.vcs_response.dest_dir).glob("**/*"), + ) ) + progress_bar_for_advisory_fetch = ChargingBar("\tFetching Advisories", max=len(files)) + progress_bar_for_advisory_fetch.start() + for file in files: if Path(file).stem == "README": continue @@ -48,7 +57,10 @@ def advisory_data(self) -> Iterable[AdvisoryData]: yield parse_advisory_data(raw_data=f.read(), file=file, base_path=base_path) except UnicodeError: logger.error(f"Invalid file {file}") + finally: + progress_bar_for_advisory_fetch.next() finally: + progress_bar_for_advisory_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/gentoo.py b/vulnerabilities/importers/gentoo.py index 2c91f7f2f..f51998575 100644 --- a/vulnerabilities/importers/gentoo.py +++ b/vulnerabilities/importers/gentoo.py @@ -14,6 +14,7 @@ from typing import Iterable from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_constraint import VersionConstraint from univers.version_range import EbuildVersionRange from univers.versions import GentooVersion @@ -34,12 +35,18 @@ class GentooImporter(Importer): importer_name = "Gentoo Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_package_fetch: ChargingBar try: self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) - for file_path in base_path.glob("**/*.xml"): + base_paths = list(base_path.glob("**/*.xml")) + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages", max=len(base_paths)) + progress_bar_for_package_fetch.start() + for file_path in base_paths: yield from self.process_file(file_path) + progress_bar_for_package_fetch.next() finally: + progress_bar_for_package_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/github.py b/vulnerabilities/importers/github.py index f6eb724f5..be64cf9a8 100644 --- a/vulnerabilities/importers/github.py +++ b/vulnerabilities/importers/github.py @@ -14,6 +14,7 @@ from cwe2.database import Database from dateutil import parser as dateparser from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import build_range_from_github_advisory_constraint @@ -88,6 +89,10 @@ } """ +progress_bar_for_package_fetch = ChargingBar( + "\tFetching Packages", max=len(PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items()) +) + class GitHubAPIImporter(Importer): spdx_license_expression = "CC-BY-4.0" @@ -95,22 +100,31 @@ class GitHubAPIImporter(Importer): license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_package_fetch.start() for ecosystem, package_type in PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM.items(): - end_cursor_exp = "" - while True: - graphql_query = {"query": GRAPHQL_QUERY_TEMPLATE % (ecosystem, end_cursor_exp)} - response = utils.fetch_github_graphql_query(graphql_query) + yield from send_graphql_query(ecosystem, package_type) + progress_bar_for_package_fetch.finish() + + +def send_graphql_query(ecosystem: str, package_type: str) -> Iterable[AdvisoryData]: + try: + end_cursor_exp = "" + while True: + graphql_query = {"query": GRAPHQL_QUERY_TEMPLATE % (ecosystem, end_cursor_exp)} + response = utils.fetch_github_graphql_query(graphql_query) - page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") - end_cursor = get_item(page_info, "endCursor") - if end_cursor: - end_cursor = f'"{end_cursor}"' - end_cursor_exp = f"after: {end_cursor}" + page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") + end_cursor = get_item(page_info, "endCursor") + if end_cursor: + end_cursor = f'"{end_cursor}"' + end_cursor_exp = f"after: {end_cursor}" - yield from process_response(response, package_type=package_type) + yield from process_response(response, package_type=package_type) - if not get_item(page_info, "hasNextPage"): - break + if not get_item(page_info, "hasNextPage"): + break + finally: + progress_bar_for_package_fetch.next() def get_purl(pkg_type: str, github_name: str) -> Optional[PackageURL]: diff --git a/vulnerabilities/importers/gitlab.py b/vulnerabilities/importers/gitlab.py index cd42b24ed..3e7acec35 100644 --- a/vulnerabilities/importers/gitlab.py +++ b/vulnerabilities/importers/gitlab.py @@ -18,6 +18,7 @@ import saneyaml from dateutil import parser as dateparser from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.version_range import VersionRange from univers.version_range import from_gitlab_native @@ -55,11 +56,14 @@ class GitLabAPIImporter(Importer): repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages") try: self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) - - for file_path in base_path.glob("**/*.yml"): + file_paths_for_fetched_files = list(base_path.glob("**/*.yml")) + progress_bar_for_package_fetch.max = len(file_paths_for_fetched_files) + progress_bar_for_package_fetch.start() + for file_path in file_paths_for_fetched_files: gitlab_type, package_slug, vuln_id = parse_advisory_path( base_path=base_path, file_path=file_path, @@ -71,7 +75,9 @@ def advisory_data(self, _keep_clone=False) -> Iterable[AdvisoryData]: else: logger.error(f"Unknow package type {gitlab_type!r} in {file_path!r}") continue + progress_bar_for_package_fetch.next() finally: + progress_bar_for_package_fetch.finish() if self.vcs_response and not _keep_clone: self.vcs_response.delete() diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 9341a76de..5dad1a255 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -21,6 +21,7 @@ from dateutil import parser from django.db.models.query import QuerySet from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_constraint import VersionConstraint from univers.version_range import GitHubVersionRange from univers.version_range import GolangVersionRange @@ -45,11 +46,17 @@ class IstioImporter(Importer): importer_name = "Istio Importer" def advisory_data(self) -> Set[AdvisoryData]: + progress_bar_for_vuln_fetch: ChargingBar try: self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) vuln = base_path / "content/en/news/security/" - for file in vuln.glob("**/*.md"): + file_paths_for_fetched_files = list(vuln.glob("**/*.md")) + progress_bar_for_vuln_fetch = ChargingBar( + "\tFetching Advisories", max=len(file_paths_for_fetched_files) + ) + progress_bar_for_vuln_fetch.start() + for file in file_paths_for_fetched_files: # Istio website has files with name starting with underscore, these contain metadata # required for rendering the website. We're not interested in these. # See also https://github.com/nexB/vulnerablecode/issues/563 @@ -57,7 +64,9 @@ def advisory_data(self) -> Set[AdvisoryData]: if file.endswith("_index.md"): continue yield from self.process_file(file=file, base_path=base_path) + progress_bar_for_vuln_fetch.next() finally: + progress_bar_for_vuln_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/mozilla.py b/vulnerabilities/importers/mozilla.py index 11667badc..d03437776 100644 --- a/vulnerabilities/importers/mozilla.py +++ b/vulnerabilities/importers/mozilla.py @@ -17,6 +17,7 @@ from bs4 import BeautifulSoup from markdown import markdown from packageurl import PackageURL +from progress.bar import ChargingBar from univers.versions import SemverVersion from vulnerabilities import severity_systems @@ -40,15 +41,22 @@ class MozillaImporter(Importer): importer_name = "Mozilla Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_fetched_files: ChargingBar try: self.clone(self.repo_url) base_path = Path(self.vcs_response.dest_dir) vuln = base_path / "announce" paths = list(vuln.glob("**/*.yml")) + list(vuln.glob("**/*.md")) + progress_bar_for_fetched_files = ChargingBar( + "\tFetching Vulnerabilities", max=len(paths) + ) + progress_bar_for_fetched_files.start() for file_path in paths: yield from to_advisories(file_path, base_path) + progress_bar_for_fetched_files.next() finally: + progress_bar_for_fetched_files.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/importers/nginx.py index 4fe0ca6ae..fe3daab09 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/importers/nginx.py @@ -16,6 +16,7 @@ from bs4 import BeautifulSoup from django.db.models.query import QuerySet from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import NginxVersionRange from univers.versions import NginxVersion @@ -52,9 +53,13 @@ def advisory_data_from_text(text): """ soup = BeautifulSoup(text, features="lxml") vuln_list = soup.select("li p") + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages", max=len(vuln_list)) + progress_bar_for_package_fetch.start() for vuln_info in vuln_list: - ngnix_adv = parse_advisory_data_from_paragraph(vuln_info) - yield to_advisory_data(ngnix_adv) + nginx_adv = parse_advisory_data_from_paragraph(vuln_info) + yield to_advisory_data(nginx_adv) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() class NginxAdvisory(NamedTuple): diff --git a/vulnerabilities/importers/npm.py b/vulnerabilities/importers/npm.py index 4dcc30705..3fc8c09d0 100644 --- a/vulnerabilities/importers/npm.py +++ b/vulnerabilities/importers/npm.py @@ -16,6 +16,7 @@ import pytz from dateutil.parser import parse from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import NpmVersionRange from vulnerabilities.importer import AdvisoryData @@ -36,15 +37,20 @@ class NpmImporter(Importer): importer_name = "Npm Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_fetched_files = ChargingBar("\tFetching Files") try: self.clone(self.repo_url) path = Path(self.vcs_response.dest_dir) - vuln = path / "vuln" npm_vulns = vuln / "npm" - for file in npm_vulns.glob("*.json"): + paths_for_files_fetched = list(npm_vulns.glob("*.json")) + progress_bar_for_fetched_files.max = len(paths_for_files_fetched) + progress_bar_for_fetched_files.start() + for file in paths_for_files_fetched: yield from self.to_advisory_data(file) + progress_bar_for_fetched_files.next() finally: + progress_bar_for_fetched_files.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/nvd.py b/vulnerabilities/importers/nvd.py index f72e0fc21..b51b443ff 100644 --- a/vulnerabilities/importers/nvd.py +++ b/vulnerabilities/importers/nvd.py @@ -14,6 +14,7 @@ import attr import requests from dateutil import parser as dateparser +from progress.bar import ChargingBar from vulnerabilities import severity_systems from vulnerabilities.importer import AdvisoryData @@ -79,10 +80,16 @@ def fetch_cve_data_1_1(starting_year=2002): year since ``starting_year`` defaulting to 2002. """ current_year = date.today().year + progress_bar_for_records_fetched = ChargingBar( + "\tRecords fetched", max=(current_year - starting_year) + 1 + ) + progress_bar_for_records_fetched.start() # NVD json feeds start from 2002. for year in range(starting_year, current_year + 1): download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" yield year, fetch(url=download_url) + progress_bar_for_records_fetched.next() + progress_bar_for_records_fetched.finish() def to_advisories(cve_data): diff --git a/vulnerabilities/importers/openssl.py b/vulnerabilities/importers/openssl.py index ca69436c9..ac7ee685a 100644 --- a/vulnerabilities/importers/openssl.py +++ b/vulnerabilities/importers/openssl.py @@ -16,6 +16,7 @@ import requests from dateutil import parser as dateparser from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import OpensslVersionRange from univers.versions import OpensslVersion @@ -49,11 +50,15 @@ def advisory_data(self) -> Iterable[AdvisoryData]: def parse_vulnerabilities(xml_response) -> Iterable[AdvisoryData]: root = DET.fromstring(xml_response) + progress_bar_for_vulnerability_fetch = ChargingBar("\tFetching Vulnerabilities", max=len(root)) + progress_bar_for_vulnerability_fetch.start() for xml_issue in root: if xml_issue.tag == "issue": advisory = to_advisory_data(xml_issue) if advisory: yield advisory + progress_bar_for_vulnerability_fetch.next() + progress_bar_for_vulnerability_fetch.finish() def to_advisory_data(xml_issue) -> AdvisoryData: diff --git a/vulnerabilities/importers/oss_fuzz.py b/vulnerabilities/importers/oss_fuzz.py index e86df5ce8..dc5006355 100644 --- a/vulnerabilities/importers/oss_fuzz.py +++ b/vulnerabilities/importers/oss_fuzz.py @@ -11,6 +11,7 @@ from typing import Iterable import saneyaml +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -27,11 +28,15 @@ class OSSFuzzImporter(Importer): importer_name = "OSS Fuzz Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_cve_fetch = ChargingBar("\tFetching CVEs") try: self.clone(repo_url=self.url) base_path = Path(self.vcs_response.dest_dir) path = base_path / "vulns" - for file in path.glob("**/*.yaml"): + files = list(path.glob("**/*.yaml")) + progress_bar_for_cve_fetch.max = len(files) + progress_bar_for_cve_fetch.start() + for file in files: with open(file) as f: yaml_data = saneyaml.load(f.read()) advisory_url = get_advisory_url( @@ -42,6 +47,8 @@ def advisory_data(self) -> Iterable[AdvisoryData]: yield parse_advisory_data( yaml_data, supported_ecosystem="oss-fuzz", advisory_url=advisory_url ) + progress_bar_for_cve_fetch.next() finally: + progress_bar_for_cve_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/postgresql.py b/vulnerabilities/importers/postgresql.py index ee6de3976..1120be104 100644 --- a/vulnerabilities/importers/postgresql.py +++ b/vulnerabilities/importers/postgresql.py @@ -12,6 +12,7 @@ import requests from bs4 import BeautifulSoup from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import GenericVersionRange from univers.versions import GenericVersion @@ -34,6 +35,7 @@ def advisory_data(self): known_urls = {self.root_url} visited_urls = set() data_by_url = {} + progress_bar_for_advisory_fetch = ChargingBar("\tFetching Advisories") while True: unvisited_urls = known_urls - visited_urls for url in unvisited_urls: @@ -45,8 +47,12 @@ def advisory_data(self): if known_urls == visited_urls: break + progress_bar_for_advisory_fetch.max = len(data_by_url) + progress_bar_for_advisory_fetch.start() for url, data in data_by_url.items(): yield from to_advisories(data) + progress_bar_for_advisory_fetch.next() + progress_bar_for_advisory_fetch.finish() def to_advisories(data): diff --git a/vulnerabilities/importers/project_kb_msr2019.py b/vulnerabilities/importers/project_kb_msr2019.py index e099d3f36..5f7041ddf 100644 --- a/vulnerabilities/importers/project_kb_msr2019.py +++ b/vulnerabilities/importers/project_kb_msr2019.py @@ -6,6 +6,7 @@ # See https://github.com/nexB/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -30,13 +31,18 @@ def advisory_data(self): def to_advisories(self, csv_reader): # Project KB MSR csv file has no header row - for row in csv_reader: + + # This refactoring can cause excessive memory usage. + # But there was no better way to do with current library. + # Might consider upgrading to TQDM in future + rows = list(csv_reader) + progress_bar_for_cve_fetch = ChargingBar("\tFetching CVEs", max=len(rows)) + progress_bar_for_cve_fetch.start() + for row in rows: vuln_id, proj_home, fix_commit, _ = row commit_link = proj_home + "/commit/" + fix_commit - if not is_cve(vuln_id): continue - reference = Reference(url=commit_link) yield AdvisoryData( aliases=[vuln_id], @@ -44,3 +50,5 @@ def to_advisories(self, csv_reader): references=[reference], url=self.url, ) + progress_bar_for_cve_fetch.next() + progress_bar_for_cve_fetch.finish() diff --git a/vulnerabilities/importers/pypa.py b/vulnerabilities/importers/pypa.py index 0f545be55..b5e1c82c7 100644 --- a/vulnerabilities/importers/pypa.py +++ b/vulnerabilities/importers/pypa.py @@ -12,6 +12,7 @@ from typing import Iterable import saneyaml +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -28,16 +29,21 @@ class PyPaImporter(Importer): importer_name = "Pypa Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages") try: vcs_response = self.clone(repo_url=self.repo_url) path = Path(vcs_response.dest_dir) + progress_bar_for_package_fetch.max = len(dict(fork_and_get_files(base_path=path))) + progress_bar_for_package_fetch.start() for advisory_url, raw_data in fork_and_get_files(base_path=path): yield parse_advisory_data( raw_data=raw_data, supported_ecosystem="pypi", advisory_url=advisory_url, ) + progress_bar_for_package_fetch.next() finally: + progress_bar_for_package_fetch.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/pysec.py b/vulnerabilities/importers/pysec.py index b42d7cb7e..7dfacb5c7 100644 --- a/vulnerabilities/importers/pysec.py +++ b/vulnerabilities/importers/pysec.py @@ -13,12 +13,14 @@ from zipfile import ZipFile import requests +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer from vulnerabilities.importers.osv import parse_advisory_data logger = logging.getLogger(__name__) +progress_bar_for_package_fetch = ChargingBar("\tFetching Packages") class PyPIImporter(Importer): @@ -33,12 +35,20 @@ def advisory_data(self) -> Iterable[AdvisoryData]: url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" response = requests.get(url).content with ZipFile(BytesIO(response)) as zip_file: - for file_name in zip_file.namelist(): - if not file_name.startswith("PYSEC-"): - logger.error(f"Unsupported PyPI advisory data file: {file_name}") - continue - with zip_file.open(file_name) as f: - vul_info = json.load(f) - yield parse_advisory_data( - raw_data=vul_info, supported_ecosystem="pypi", advisory_url=url - ) + progress_bar_for_package_fetch.max = len(zip_file.namelist()) + yield from process_zipfile_response(zip_file, url) + + +def process_zipfile_response(zip_file: ZipFile, url: str) -> Iterable[AdvisoryData]: + progress_bar_for_package_fetch.start() + for file_name in zip_file.namelist(): + if not file_name.startswith("PYSEC-"): + logger.error(f"Unsupported PyPI advisory data file: {file_name}") + continue + with zip_file.open(file_name) as f: + vul_info = json.load(f) + yield parse_advisory_data( + raw_data=vul_info, supported_ecosystem="pypi", advisory_url=url + ) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() diff --git a/vulnerabilities/importers/redhat.py b/vulnerabilities/importers/redhat.py index a2cc1940b..48e675bc7 100644 --- a/vulnerabilities/importers/redhat.py +++ b/vulnerabilities/importers/redhat.py @@ -15,6 +15,7 @@ import requests from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import RpmVersionRange from vulnerabilities import severity_systems @@ -68,9 +69,16 @@ class RedhatImporter(Importer): importer_name = "RedHat Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + page_no = 1 for redhat_cves in fetch_cves(): + progress_bar_for_cve_fetch = ChargingBar( + f"\tFetching CVE Set-{page_no}", max=len(redhat_cves) + ) for redhat_cve in redhat_cves: yield to_advisory(redhat_cve) + progress_bar_for_cve_fetch.next() + progress_bar_for_cve_fetch.finish() + page_no += 1 def to_advisory(advisory_data): diff --git a/vulnerabilities/importers/retiredotnet.py b/vulnerabilities/importers/retiredotnet.py index 0ed580ba1..1c67ec38d 100644 --- a/vulnerabilities/importers/retiredotnet.py +++ b/vulnerabilities/importers/retiredotnet.py @@ -14,6 +14,7 @@ from typing import List from packageurl import PackageURL +from progress.bar import ChargingBar from univers.version_range import NugetVersionRange from univers.versions import NugetVersion @@ -31,16 +32,23 @@ class RetireDotnetImporter(Importer): importer_name = "RetireDotNet Importer" def advisory_data(self) -> Iterable[AdvisoryData]: + progress_bar_for_fetched_files: ChargingBar try: self.clone(repo_url=self.repo_url) base_path = Path(self.vcs_response.dest_dir) - vuln = base_path / "Content" - for file in vuln.glob("*.json"): + paths_for_vulnerabilities = list(vuln.glob("*.json")) + progress_bar_for_fetched_files = ChargingBar( + "\tFetching Vulnerabilities", max=len(paths_for_vulnerabilities) + ) + progress_bar_for_fetched_files.start() + for file in paths_for_vulnerabilities: advisory = self.process_file(file, base_path) if advisory: yield advisory + progress_bar_for_fetched_files.next() finally: + progress_bar_for_fetched_files.finish() if self.vcs_response: self.vcs_response.delete() diff --git a/vulnerabilities/importers/suse_scores.py b/vulnerabilities/importers/suse_scores.py index f43e69576..9f9eccbd7 100644 --- a/vulnerabilities/importers/suse_scores.py +++ b/vulnerabilities/importers/suse_scores.py @@ -9,6 +9,8 @@ from typing import Iterable +from progress.bar import ChargingBar + from vulnerabilities import severity_systems from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -36,7 +38,8 @@ def to_advisory(self, score_data): "3": severity_systems.CVSSV3, "3.1": severity_systems.CVSSV31, } - + progress_bar_for_cve_fetch = ChargingBar("\tFetching CVEs", max=len(score_data or [])) + progress_bar_for_cve_fetch.start() for cve_id in score_data or []: severities = [] for cvss_score in score_data[cve_id].get("cvss") or []: @@ -52,7 +55,6 @@ def to_advisory(self, score_data): scoring_elements=vector, ) severities.append(score) - if not is_cve(cve_id): continue @@ -62,3 +64,5 @@ def to_advisory(self, score_data): references=[Reference(url=URL, severities=severities)], url=URL, ) + progress_bar_for_cve_fetch.next() + progress_bar_for_cve_fetch.finish() diff --git a/vulnerabilities/importers/ubuntu.py b/vulnerabilities/importers/ubuntu.py index 646b40028..54d64ec88 100644 --- a/vulnerabilities/importers/ubuntu.py +++ b/vulnerabilities/importers/ubuntu.py @@ -12,6 +12,7 @@ import xml.etree.ElementTree as ET import requests +from progress.bar import ChargingBar from vulnerabilities.importer import OvalImporter @@ -73,6 +74,8 @@ def __init__(self, *args, **kwargs): def _fetch(self): base_url = "https://people.canonical.com/~ubuntu-security/oval" releases = ["bionic", "trusty", "focal", "eoan", "xenial"] + progress_bar_for_package_fetch = ChargingBar("\tFetching Packages", max=len(releases)) + progress_bar_for_package_fetch.start() for release in releases: file_url = f"{base_url}/com.ubuntu.{release}.cve.oval.xml.bz2" # nopep8 self.data_url = file_url @@ -83,9 +86,10 @@ def _fetch(self): f"Failed to fetch Ubuntu Oval: HTTP {response.status_code} : {file_url}" ) continue - extracted = bz2.decompress(response.content) yield ( {"type": "deb", "namespace": "ubuntu"}, ET.ElementTree(ET.fromstring(extracted.decode("utf-8"))), ) + progress_bar_for_package_fetch.next() + progress_bar_for_package_fetch.finish() diff --git a/vulnerabilities/importers/ubuntu_usn.py b/vulnerabilities/importers/ubuntu_usn.py index 6bef117f9..4672794d4 100644 --- a/vulnerabilities/importers/ubuntu_usn.py +++ b/vulnerabilities/importers/ubuntu_usn.py @@ -11,6 +11,7 @@ import json import requests +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -69,6 +70,8 @@ def advisory_data(self): yield from self.to_advisories(usn_db=usn_db) def to_advisories(self, usn_db): + progress_bar_for_advisory_fetch = ChargingBar("\tFetching Advisories", max=len(usn_db)) + progress_bar_for_advisory_fetch.start() for usn in usn_db: usn_data = usn_db[usn] usn_reference = get_usn_reference(usn_data.get("id")) @@ -88,6 +91,9 @@ def to_advisories(self, usn_db): references=usn_references, url=usn_reference.url or self.db_url, ) + progress_bar_for_advisory_fetch.next() + + progress_bar_for_advisory_fetch.finish() def get_usn_reference(usn_id): diff --git a/vulnerabilities/importers/xen.py b/vulnerabilities/importers/xen.py index 2b28a9771..884230ff4 100644 --- a/vulnerabilities/importers/xen.py +++ b/vulnerabilities/importers/xen.py @@ -6,6 +6,7 @@ # See https://github.com/nexB/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # +from progress.bar import ChargingBar from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer @@ -64,8 +65,14 @@ def advisory_data(self): if not data: return [] xsas = data[0]["xsas"] + progress_bar_for_advisory_fetch = ChargingBar("\tFetching Advisories", max=len(xsas)) + progress_bar_for_advisory_fetch.start() + for xsa in xsas: yield from self.to_advisories(xsa) + progress_bar_for_advisory_fetch.next() + + progress_bar_for_advisory_fetch.finish() def to_advisories(self, xsa): xsa_id = xsa.get("xsa") diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 5ae885299..1d44ab162 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -8,8 +8,10 @@ # import traceback +import progress from django.core.management.base import BaseCommand from django.core.management.base import CommandError +from progress.bar import IncrementalBar from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY @@ -54,9 +56,10 @@ def import_data(self, importers): names for the importers. """ failed_importers = [] - + progress_bar_for_import = IncrementalBar("Fetching Data from Databases", max=len(importers)) + progress_bar_for_import.start() for importer in importers: - self.stdout.write(f"Importing data using {importer.qualified_name}") + self.stdout.write(f"\nImporting data using {importer.qualified_name}") try: ImportRunner(importer).run() self.stdout.write( @@ -72,6 +75,9 @@ def import_data(self, importers): f"Failed to run importer {importer.qualified_name}. Continuing..." ) ) + finally: + progress_bar_for_import.next() + progress_bar_for_import.finish() if failed_importers: raise CommandError(f"{len(failed_importers)} failed!: {','.join(failed_importers)}")