From 89a8aee972ccdb4d474b771ac9d02aff1e0a9fd2 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 8 Jul 2021 10:37:49 -0700 Subject: [PATCH 01/45] Initialize data binding using roundtrip --- hatchet/external/boxplot.py | 161 ++++++++++++++++++++++++ hatchet/external/roundtrip/roundtrip.py | 48 ++++--- 2 files changed, 191 insertions(+), 18 deletions(-) create mode 100644 hatchet/external/boxplot.py diff --git a/hatchet/external/boxplot.py b/hatchet/external/boxplot.py new file mode 100644 index 00000000..7a1fab5c --- /dev/null +++ b/hatchet/external/boxplot.py @@ -0,0 +1,161 @@ +# Copyright 2017-2021 Lawrence Livermore National Security, LLC and other +# CallFlow Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT +# ------------------------------------------------------------------------------ + +import numpy as np +import hatchet as ht +from scipy.stats import kurtosis, skew + +class BoxPlot: + """ + Boxplot computation for a dataframe segment + """ + + def __init__(self, tgt_gf, bkg_gf=None, callsite=[], iqr_scale=1.5): + """ + Boxplot for callsite or module + + :param tgt_gf: (ht.GraphFrame) Target GraphFrame + :param bkg_gf: (ht.GraphFrame) Relative supergraph + :param callsite: (str) Callsite name + :param iqr_scale: (float) IQR range for outliers. + """ + assert isinstance(tgt_gf, ht.GraphFrame) + assert isinstance(callsite, list) + assert isinstance(iqr_scale, float) + + assert 0 + + self.box_types = ["tgt"] + if relative_gf is not None: + self.box_types = ["tgt", "bkg"] + + self.nid = gf.get_idx(name, ntype) + node = {"id": self.nid, "type": ntype, "name": name} + + # TODO: Avoid this. + self.c_path = None + self.rel_c_path = None + + if ntype == "callsite": + df = sg.callsite_aux_dict[name] + if 'component_path' in sg.dataframe.columns: + self.c_path = sg.get_component_path(node) + + if relative_sg is not None: + rel_df = relative_sg.callsite_aux_dict[name] + + if 'component_path' in relative_sg.dataframe.columns: + self.rel_c_path = sg.get_component_path(node) + + elif ntype == "module": + df = sg.module_aux_dict[self.nid] + if relative_sg is not None: + rel_df = relative_sg.module_aux_dict[self.nid] + + if relative_sg is not None and "dataset" in rel_df.columns: + self.ndataset = df_count(rel_df, 'dataset') + + self.time_columns = [proxy_columns.get(_, _) for _ in TIME_COLUMNS] + self.result = {} + self.ntype = ntype + self.iqr_scale = iqr_scale + + self.result["name"] = name + if ntype == "callsite": + self.result["module"] = sg.get_module(sg.get_idx(name, ntype)) + + if relative_sg is not None: + self.result["bkg"] = self.compute(rel_df) + self.result["tgt"] = self.compute(df) + + def compute(self, df): + """ + Compute boxplot related information. + + :param df: Dataframe to calculate the boxplot information. + :return: + """ + + ret = {_: {} for _ in TIME_COLUMNS} + for tk, tv in zip(TIME_COLUMNS, self.time_columns): + q = np.percentile(df[tv], [0.0, 25.0, 50.0, 75.0, 100.0]) + mask = outliers(df[tv], scale=self.iqr_scale) + mask = np.where(mask)[0] + + if 'rank' in df.columns: + rank = df['rank'].to_numpy()[mask] + else: + rank = np.zeros(mask.shape[0], dtype=int) + + _data = df[tv].to_numpy() + _min, _mean, _max = _data.min(), _data.mean(), _data.max() + _var = _data.var() if _data.shape[0] > 0 else 0.0 + _imb = (_max - _mean) / _mean if not np.isclose(_mean, 0.0) else _max + _skew = skew(_data) + _kurt = kurtosis(_data) + + ret[tk] = { + "q": q, + "oval": df[tv].to_numpy()[mask], + "orank": rank, + "d": _data, + "rng": (_min, _max), + "uv": (_mean, _var), + "imb": _imb, + "ks": (_kurt, _skew), + "nid": self.nid, + } + if 'dataset' in df.columns: + ret[tk]['odset'] = df['dataset'].to_numpy()[mask] + + # TODO: Find a better way to send the component_path from data. + if self.c_path is not None: + ret[tk]['cpath'] = self.c_path + + if self.rel_c_path is not None: + ret[tk]['rel_cpath'] = self.rel_c_path + + return ret + + def unpack(self): + """ + Unpack the boxplot data into JSON format. + """ + result = {} + for box_type in self.box_types: + result[box_type] = {} + for metric in self.time_columns: + box = self.result[box_type][metric] + result[box_type][metric] = { + "q": box["q"].tolist(), + "outliers": { + "values": box["oval"].tolist(), + "ranks": box["orank"].tolist() + }, + "min": box["rng"][0], + "max": box["rng"][1], + "mean": box["uv"][0], + "var": box["uv"][1], + "imb": box["imb"], + "kurt": box["ks"][0], + "skew": box["ks"][1], + "nid": box["nid"], + "name": self.result["name"], + } + result["name"] = self.result["name"] + + if 'odset' in box: + result[box_type][metric]['odset'] = box['odset'].tolist() + + if 'cpath' in box: + result[box_type][metric]['cpath'] = box['cpath'] + + if 'rel_cpath' in box: + result[box_type][metric]['rel_cpath'] = box['rel_cpath'] + + return result + +# ------------------------------------------------------------------------------ diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 1726ad25..81c15fcc 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -1,6 +1,7 @@ from __future__ import print_function from IPython.core.magic import Magics, magics_class, line_magic from IPython.display import HTML, Javascript, display +import os """ File: roundtrip.py @@ -47,12 +48,12 @@ def loadVisualization(self, line): # Path is a variable from the nb namespace path = self.shell.user_ns[args[0]] - fileAndPath = "" - if path[-1] == "/": - fileAndPath = path + "roundtripTree.js" - else: - fileAndPath = path + "/roundtripTree.js" + visToFileMapping = { + "literal_tree": "roundtripTree.js", + "boxplot": "boxplot.js" + } + fileAndPath = os.path.join(path, visToFileMapping[args[1]]) javascriptFile = open(fileAndPath).read() # Source input files @@ -61,11 +62,30 @@ def loadVisualization(self, line): displayObj = display(HTML(argList), display_id=True) - args[1] = self.shell.user_ns[args[1]] - displayObj.update(Javascript('argList.push("' + str(args[1]) + '")')) + data = self.shell.user_ns[args[1]] + displayObj.update(Javascript('argList.push("' + str(data) + '")')) + + dataValidation = { + "literal_tree": self._validate_literal_tree, + "boxplot": self._validate_boxplot + } + + dataValidation[args[1]](data) + + # Get curent cell id + self.codeMap[name] = javascriptFile + + preRun = """ + // Grab current context + elementTop = element.get(0);""" + displayObj.update(Javascript(preRun)) + + self.runVis(name, javascriptFile) + self.id_number += 1 + def _validate_literal_tree(self, data): # Check that users provided a tree literal - if not isinstance(args[1], list): + if not isinstance(data, list): print( """The argument is not a tree literal or it is not a valid Python list. Please check that you have provided a list of nodes and nested children of the following form to loadVisualization: literal_tree = [{ @@ -82,16 +102,8 @@ def loadVisualization(self, line): ) raise Exception("Bad argument") - # Get curent cell id - self.codeMap[name] = javascriptFile - - preRun = """ - // Grab current context - elementTop = element.get(0);""" - displayObj.update(Javascript(preRun)) - - self.runVis(name, javascriptFile) - self.id_number += 1 + def _validate_boxplot(self, data): + pass def runVis(self, name, javascriptFile): name = "roundtripTreeVis" + str(self.id_number) From 3fc0f31484961bd9ad70cb2eab0497ff486e7d79 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 8 Jul 2021 14:03:50 -0700 Subject: [PATCH 02/45] Create a module for d3-utils --- hatchet/external/roundtrip/boxplot.js | 32 +++++++++ hatchet/external/roundtrip/lib/d3_utils.js | 84 ++++++++++++++++++++++ hatchet/external/roundtrip/roundtrip.py | 2 + 3 files changed, 118 insertions(+) create mode 100644 hatchet/external/roundtrip/boxplot.js create mode 100644 hatchet/external/roundtrip/lib/d3_utils.js diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js new file mode 100644 index 00000000..8315ffe7 --- /dev/null +++ b/hatchet/external/roundtrip/boxplot.js @@ -0,0 +1,32 @@ +//d3.v4 +(function (element) { + const [path, data_string] = argList; + requirejs.config({ + baseUrl: path, + paths: { + d3src: 'https://d3js.org', + lib: 'lib', + }, + map: { + '*': { + 'd3': 'd3src/d3.v6.min', + 'd3-color': 'd3src/d3-color.v1.min', + 'd3-interpolate': 'd3src/d3-interpolate.v1.min', + 'd3-scale-chromatic': 'd3src/d3-scale-chromatic.v1.min', + 'd3-utils': 'lib/d3_utils', + } + } + }); + require(['d3', 'd3-utils'], function (d3, d3_utils) { + const data = JSON.parse(data_string.replace(/'/g, '"')); + + const callsites = Object.keys(data); + const metrics = Object.keys(data[callsites[0]]); + + console.log(callsites, d3_utils); + d3_utils.selectionDropDown(element, metrics, "metricSelect"); + + // document.getElementById("metricSelect").style.margin = "10px 10px 10px 0px"; + + }) +})(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js new file mode 100644 index 00000000..97d34639 --- /dev/null +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -0,0 +1,84 @@ +define(function (require) { + const d3 = require('d3'); + return { + calcContainerWidth : name => +d3.select(name).style('width').slice(0, -2), + calcContainerHeight : name => +d3.select(name).style('height').slice(0, -2), + calcCellWidth : (width, colNames) => width / colNames.length, + calcCellHeight : (height, rowNames) => height / rowNames.length, + calcCellSize : (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], + prepareSvgArea : (windowWidth, windowHeight, margin) => { + return { + width: windowWidth - margin.left - margin.right, + height: windowHeight - margin.top - margin.bottom, + margin: margin + } + }, + prepareSvg : (id, svgArea) => { + d3.select(id).selectAll('*').remove(); + const svg = d3.select(id) + .append('svg') + .attr('width', svgArea.width + svgArea.margin.left + svgArea.margin.right) + .attr('height', svgArea.height + svgArea.margin.top + svgArea.margin.bottom) + .append('g') + .attr('transform', + 'translate(' + svgArea.margin.left + ',' + svgArea.margin.top + ')'); + + return svg; + }, + initSvgInfo : (targetView, margin) => { + const sd = targetView.svgData; + const domId = targetView.domId; + + sd.svgArea = prepareSvgArea( + calcContainerWidth(`#${domId}`), + calcContainerHeight(`#${domId}`), margin || { + top: 0, + right: 0, + bottom: 0, + left: 0 + }) + sd.svg = prepareSvg(`#${domId}`, sd.svgArea); + sd.domId = targetView.domId; + }, + + // Axes, Scaling + genX : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain(domain).range([0, svgArea.width]); + }, + genInvX : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain([0, svgArea.width]).range(domain); + }, + genY : (data, svgArea, domain = null, scaler = d3.scaleLinear(), goUp = true) => { + if (domain === null) { + domain = d3.extent(data); + } + return goUp ? + scaler.domain(domain).range([svgArea.height, 0]) : + scaler.domain(domain).range([0, svgArea.height]); + }, + genInvY : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + if (domain === null) { + domain = d3.extent(data); + } + return scaler.domain([svgArea.height, 0]).range(domain); + }, + + // UI Components + selectionDropDown : (element, data, id) => { + return d3.select(element).append("select") + .attr("id", id) + .selectAll('option') + .data(data) + .enter() + .append('option') + .text(d => d) + .attr('value', d => d); + } + } +}); diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 81c15fcc..73fced2e 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -63,8 +63,10 @@ def loadVisualization(self, line): displayObj = display(HTML(argList), display_id=True) data = self.shell.user_ns[args[1]] + displayObj.update(Javascript('argList.push("' + str(path) + '")')) displayObj.update(Javascript('argList.push("' + str(data) + '")')) + dataValidation = { "literal_tree": self._validate_literal_tree, "boxplot": self._validate_boxplot From 1f35d1bb44d15f75f51686cf08f3866ff0b7b276 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 8 Jul 2021 18:18:36 -0700 Subject: [PATCH 03/45] Get boxplot data from graphframe, Parse and sort the callsites by selected attribute --- hatchet/external/boxplot.py | 215 +++++++++++---------- hatchet/external/roundtrip/boxplot.js | 62 +++++- hatchet/external/roundtrip/lib/d3_utils.js | 9 + hatchet/external/roundtrip/roundtrip.py | 10 +- 4 files changed, 190 insertions(+), 106 deletions(-) diff --git a/hatchet/external/boxplot.py b/hatchet/external/boxplot.py index 7a1fab5c..7e92a821 100644 --- a/hatchet/external/boxplot.py +++ b/hatchet/external/boxplot.py @@ -1,75 +1,113 @@ -# Copyright 2017-2021 Lawrence Livermore National Security, LLC and other -# CallFlow Project Developers. See the top-level LICENSE file for details. -# -# SPDX-License-Identifier: MIT -# ------------------------------------------------------------------------------ - -import numpy as np import hatchet as ht -from scipy.stats import kurtosis, skew +import numpy as np +import pandas as pd +from scipy import stats class BoxPlot: """ Boxplot computation for a dataframe segment """ - def __init__(self, tgt_gf, bkg_gf=None, callsite=[], iqr_scale=1.5): + def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], iqr_scale=1.5): """ - Boxplot for callsite or module + Boxplot for callsite :param tgt_gf: (ht.GraphFrame) Target GraphFrame :param bkg_gf: (ht.GraphFrame) Relative supergraph - :param callsite: (str) Callsite name + :param callsites: (list) Callsite name + :param metrics: (list) Runtime metrics :param iqr_scale: (float) IQR range for outliers. """ assert isinstance(tgt_gf, ht.GraphFrame) - assert isinstance(callsite, list) + assert isinstance(callsites, list) assert isinstance(iqr_scale, float) - - assert 0 + + self.metrics = metrics + self.iqr_scale = iqr_scale + self.callsites = callsites + + tgt_gf.dataframe.reset_index(inplace=True) + tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=["rank"], apply_func=lambda _: _.mean()) + + if bkg_gf is not None: + bkg_gf.dataframe.reset_index(inplace=True) + bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=["rank"], apply_func=lambda _: _.mean()) + + self.result = {} self.box_types = ["tgt"] - if relative_gf is not None: + if bkg_gf is not None: self.box_types = ["tgt", "bkg"] - self.nid = gf.get_idx(name, ntype) - node = {"id": self.nid, "type": ntype, "name": name} - - # TODO: Avoid this. - self.c_path = None - self.rel_c_path = None + for callsite in self.callsites: + ret = {} + tgt_df = tgt_dict[callsite] + ret["tgt"] = self.compute(tgt_df) - if ntype == "callsite": - df = sg.callsite_aux_dict[name] - if 'component_path' in sg.dataframe.columns: - self.c_path = sg.get_component_path(node) + if bkg_gf is not None: + bkg_df = bkg_dict[callsite] + ret["bkg"] = self.compute(bkg_df) - if relative_sg is not None: - rel_df = relative_sg.callsite_aux_dict[name] - - if 'component_path' in relative_sg.dataframe.columns: - self.rel_c_path = sg.get_component_path(node) - - elif ntype == "module": - df = sg.module_aux_dict[self.nid] - if relative_sg is not None: - rel_df = relative_sg.module_aux_dict[self.nid] - - if relative_sg is not None and "dataset" in rel_df.columns: - self.ndataset = df_count(rel_df, 'dataset') - - self.time_columns = [proxy_columns.get(_, _) for _ in TIME_COLUMNS] - self.result = {} - self.ntype = ntype - self.iqr_scale = iqr_scale - - self.result["name"] = name - if ntype == "callsite": - self.result["module"] = sg.get_module(sg.get_idx(name, ntype)) + self.result[callsite] = ret + + @staticmethod + def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}): + _cols = cols + group_by + + # If there is only one attribute to group by, we use the 1st index. + if len(group_by) == 1: + group_by = group_by[0] + + # Find the grouping + if scnd_group_attr is not None: + _groups = [frst_group_attr, scnd_group_attr] + else: + _groups = [frst_group_attr] + + # Set the df.index as the _groups + _df = df.set_index(_groups) + _levels = _df.index.unique().tolist() + + # If "rank" is present in the columns, we will group by "rank". + if "rank" in _df.columns and len(df["rank"].unique().tolist()) > 1: + if scnd_group_attr is not None: + if len(group_by) == 0: + _cols = _cols + ["rank"] + return { _ : _df.xs(_)[_cols] for (_, __) in _levels } + return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels } + else: + if len(group_by) == 0: + _cols = _cols + ["rank"] + return { _ : _df.xs(_)[_cols] for _ in _levels } + return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels } + else: + return { _ : _df.xs(_)[_cols] for _ in _levels} + + @staticmethod + def outliers(data, scale=1.5, side="both"): + assert isinstance(data, (pd.Series, np.ndarray)) + assert len(data.shape) == 1 + assert isinstance(scale, float) + assert side in ["gt", "lt", "both"] + + d_q13 = np.percentile(data, [25.0, 75.0]) + iqr_distance = np.multiply(stats.iqr(data), scale) + + if side in ["gt", "both"]: + upper_range = d_q13[1] + iqr_distance + upper_outlier = np.greater(data - upper_range.reshape(1), 0) + + if side in ["lt", "both"]: + lower_range = d_q13[0] - iqr_distance + lower_outlier = np.less(data - lower_range.reshape(1), 0) + + if side == "gt": + return upper_outlier + if side == "lt": + return lower_outlier + if side == "both": + return np.logical_or(upper_outlier, lower_outlier) - if relative_sg is not None: - self.result["bkg"] = self.compute(rel_df) - self.result["tgt"] = self.compute(df) def compute(self, df): """ @@ -79,10 +117,10 @@ def compute(self, df): :return: """ - ret = {_: {} for _ in TIME_COLUMNS} - for tk, tv in zip(TIME_COLUMNS, self.time_columns): + ret = {_: {} for _ in self.metrics} + for tk, tv in zip(self.metrics, self.metrics): q = np.percentile(df[tv], [0.0, 25.0, 50.0, 75.0, 100.0]) - mask = outliers(df[tv], scale=self.iqr_scale) + mask = BoxPlot.outliers(df[tv], scale=self.iqr_scale) mask = np.where(mask)[0] if 'rank' in df.columns: @@ -94,8 +132,8 @@ def compute(self, df): _min, _mean, _max = _data.min(), _data.mean(), _data.max() _var = _data.var() if _data.shape[0] > 0 else 0.0 _imb = (_max - _mean) / _mean if not np.isclose(_mean, 0.0) else _max - _skew = skew(_data) - _kurt = kurtosis(_data) + _skew = stats.skew(_data) + _kurt = stats.kurtosis(_data) ret[tk] = { "q": q, @@ -106,18 +144,10 @@ def compute(self, df): "uv": (_mean, _var), "imb": _imb, "ks": (_kurt, _skew), - "nid": self.nid, } if 'dataset' in df.columns: ret[tk]['odset'] = df['dataset'].to_numpy()[mask] - # TODO: Find a better way to send the component_path from data. - if self.c_path is not None: - ret[tk]['cpath'] = self.c_path - - if self.rel_c_path is not None: - ret[tk]['rel_cpath'] = self.rel_c_path - return ret def unpack(self): @@ -125,37 +155,28 @@ def unpack(self): Unpack the boxplot data into JSON format. """ result = {} - for box_type in self.box_types: - result[box_type] = {} - for metric in self.time_columns: - box = self.result[box_type][metric] - result[box_type][metric] = { - "q": box["q"].tolist(), - "outliers": { - "values": box["oval"].tolist(), - "ranks": box["orank"].tolist() - }, - "min": box["rng"][0], - "max": box["rng"][1], - "mean": box["uv"][0], - "var": box["uv"][1], - "imb": box["imb"], - "kurt": box["ks"][0], - "skew": box["ks"][1], - "nid": box["nid"], - "name": self.result["name"], - } - result["name"] = self.result["name"] - - if 'odset' in box: - result[box_type][metric]['odset'] = box['odset'].tolist() - - if 'cpath' in box: - result[box_type][metric]['cpath'] = box['cpath'] - - if 'rel_cpath' in box: - result[box_type][metric]['rel_cpath'] = box['rel_cpath'] - - return result - -# ------------------------------------------------------------------------------ + for callsite in self.callsites: + result[callsite] = {} + for box_type in self.box_types: + result[callsite][box_type] = {} + for metric in self.metrics: + box = self.result[callsite][box_type][metric] + result[callsite][box_type][metric] = { + "q": box["q"].tolist(), + "outliers": { + "values": box["oval"].tolist(), + "ranks": box["orank"].tolist() + }, + "min": box["rng"][0], + "max": box["rng"][1], + "mean": box["uv"][0], + "var": box["uv"][1], + "imb": box["imb"], + "kurt": box["ks"][0], + "skew": box["ks"][1], + } + + if 'odset' in box: + result[callsite][box_type][metric]['odset'] = box['odset'].tolist() + + return result \ No newline at end of file diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 8315ffe7..cab53bfc 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -20,13 +20,69 @@ require(['d3', 'd3-utils'], function (d3, d3_utils) { const data = JSON.parse(data_string.replace(/'/g, '"')); + const margin = {top: 20, right: 20, bottom: 80, left: 20}, + containerHeight = 400, + width = element.clientWidth - margin.right - margin.left, + height = containerHeight - margin.top - margin.bottom; + + const svgArea = d3_utils.prepareSvgArea(width, height, margin); + const svg = d3_utils.prepareSvg(element, svgArea); + + /** + * Sort the callsite ordering based on the attribute. + * + * @param {Array} callsites - Callsites as a list. + * @param {Stirng} metric - Metric (e.g., time or time (inc)) + * @param {String} attribute - Attribute to sort by. + */ + const sortByAttribute = (callsites, metric, attribute, boxplot_type) => { + let items = Object.keys(callsites).map(function (key) { + return [key, callsites[key][boxplot_type]]; + }); + + items = items.sort( (first, second) => { + return second[1][metric][attribute] - first[1][metric][attribute]; + }); + + return items.reduce(function (map, obj) { + map[obj[0]] = obj[1][metric]; + return map; + }, {}); + } + const callsites = Object.keys(data); - const metrics = Object.keys(data[callsites[0]]); - console.log(callsites, d3_utils); + // Selection dropdown for metrics. + const metrics = Object.keys(data[callsites[0]]["tgt"]); + const selected_metric = metrics[0] d3_utils.selectionDropDown(element, metrics, "metricSelect"); + + // Selection dropdown for attributes. + const attributes = ["min", "max", "mean", "var", "imb", "kurt", "skew"]; + const selected_attribute = "mean"; + d3_utils.selectionDropDown(element, attributes, "attributeSelect"); + + const sort_callsites = sortByAttribute(data, selected_metric, selected_attribute, "tgt"); + console.log(sort_callsites); - // document.getElementById("metricSelect").style.margin = "10px 10px 10px 0px"; + let stats = {}; + let boxplot = {}; + for (let callsite in sort_callsites) { + d = sort_callsites[callsite]; + // Set the dictionaries for metadata information. + stats[callsite] = { + "min": d3_utils.formatRuntime(d["min"]), + "max": d3_utils.formatRuntime(d["max"]), + "mean": d3_utils.formatRuntime(d["mean"]), + "var": d3_utils.formatRuntime(d["var"]), + "imb": d3_utils.formatRuntime(d["imb"]), + "kurt": d3_utils.formatRuntime(d["kurt"]), + "skew": d3_utils.formatRuntime(d["skew"]), + }; + + // Set the data for the boxplot. + boxplot[callsite] = {"q": d["q"], "outliers": d["outliers"]}; + } }) })(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 97d34639..32c87562 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -79,6 +79,15 @@ define(function (require) { .append('option') .text(d => d) .attr('value', d => d); + }, + + // Formatting numbers + formatRuntime: (val) => { + if (val == 0) { + return val; } + let format = d3.format(".3"); + return format(val); + }, } }); diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 73fced2e..3c0bd102 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -52,6 +52,10 @@ def loadVisualization(self, line): "literal_tree": "roundtripTree.js", "boxplot": "boxplot.js" } + dataValidation = { + "literal_tree": self._validate_literal_tree, + "boxplot": self._validate_boxplot + } fileAndPath = os.path.join(path, visToFileMapping[args[1]]) javascriptFile = open(fileAndPath).read() @@ -66,12 +70,6 @@ def loadVisualization(self, line): displayObj.update(Javascript('argList.push("' + str(path) + '")')) displayObj.update(Javascript('argList.push("' + str(data) + '")')) - - dataValidation = { - "literal_tree": self._validate_literal_tree, - "boxplot": self._validate_boxplot - } - dataValidation[args[1]](data) # Get curent cell id From fa29afd3404975aaee648c28eee4d3c0364154d6 Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 9 Jul 2021 15:36:47 -0700 Subject: [PATCH 04/45] Implement version 1 for the performance boxplots --- .../performance_variability_boxplots.ipynb | 488 ++++++++++++++++++ hatchet/external/roundtrip/boxplot.js | 154 ++++-- hatchet/external/roundtrip/lib/d3_utils.js | 79 ++- 3 files changed, 657 insertions(+), 64 deletions(-) create mode 100644 docs/examples/tutorial/performance_variability_boxplots.ipynb diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb new file mode 100644 index 00000000..751f11a4 --- /dev/null +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -0,0 +1,488 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Performance Variability Boxplots\n", + "\n", + "\n", + "### Load roundtrip" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import os, sys\n", + "from IPython.display import HTML, display\n", + "\n", + "import hatchet as ht\n", + "\n", + "# This is the relative path from the notebook to Roundtrip files in hatchet/external/roundtrip/\n", + "roundtrip_path = '../../../hatchet/external/roundtrip/'\n", + "hatchet_path = \".\"\n", + "\n", + "# Add the path so that the notebook can find the Roundtrip extension\n", + "module_path = os.path.abspath(os.path.join(roundtrip_path)) \n", + "if module_path not in sys.path:\n", + " sys.path.append(module_path)\n", + " sys.path.append(hatchet_path)\n", + "\n", + " \n", + "# Uncomment this line to widen the cells to handle large trees \n", + "#display(HTML(\"\"))\n", + "\n", + "# Load the Roundtrip extension. This only needs to be loaded once.\n", + "%load_ext roundtrip" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np \n", + "import pandas as pd\n", + "from scipy import stats\n", + "\n", + "class BoxPlot:\n", + " \"\"\"\n", + " Boxplot computation for a dataframe segment\n", + " \"\"\"\n", + "\n", + " def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=[\"time\", \"time (inc)\"], iqr_scale=1.5):\n", + " \"\"\"\n", + " Boxplot for callsite \n", + " \n", + " :param tgt_gf: (ht.GraphFrame) Target GraphFrame \n", + " :param bkg_gf: (ht.GraphFrame) Relative supergraph\n", + " :param callsite: (str) Callsite name\n", + " :param iqr_scale: (float) IQR range for outliers.\n", + " \"\"\"\n", + " assert isinstance(tgt_gf, ht.GraphFrame)\n", + " assert isinstance(callsites, list)\n", + " assert isinstance(iqr_scale, float)\n", + " \n", + " self.metrics = metrics\n", + " self.iqr_scale = iqr_scale\n", + " self.callsites = callsites\n", + " \n", + " tgt_gf.dataframe.reset_index(inplace=True)\n", + " tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, \"name\", None, cols=metrics + [\"nid\"], group_by=[\"rank\"], apply_func=lambda _: _.mean())\n", + " \n", + " if bkg_gf is not None:\n", + " bkg_gf.dataframe.reset_index(inplace=True)\n", + " bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, \"name\", None, cols=metrics + [\"nid\"], group_by=[\"rank\"], apply_func=lambda _: _.mean())\n", + " \n", + " self.result = {}\n", + "\n", + " self.box_types = [\"tgt\"] \n", + " if bkg_gf is not None:\n", + " self.box_types = [\"tgt\", \"bkg\"]\n", + "\n", + " for callsite in self.callsites:\n", + " ret = {}\n", + " tgt_df = tgt_dict[callsite]\n", + " ret[\"tgt\"] = self.compute(tgt_df)\n", + "\n", + " if bkg_gf is not None:\n", + " bkg_df = bkg_dict[callsite]\n", + " ret[\"bkg\"] = self.compute(bkg_df)\n", + " \n", + " self.result[callsite] = ret\n", + " \n", + " @staticmethod\n", + " def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}):\n", + " _cols = cols + group_by\n", + "\n", + " # If there is only one attribute to group by, we use the 1st index.\n", + " if len(group_by) == 1:\n", + " group_by = group_by[0]\n", + "\n", + " # Find the grouping\n", + " if scnd_group_attr is not None:\n", + " _groups = [frst_group_attr, scnd_group_attr]\n", + " else:\n", + " _groups = [frst_group_attr]\n", + "\n", + " # Set the df.index as the _groups\n", + " _df = df.set_index(_groups)\n", + " _levels = _df.index.unique().tolist()\n", + "\n", + " # If \"rank\" is present in the columns, group by \"rank\".\n", + " if \"rank\" in _df.columns and len(df[\"rank\"].unique().tolist()) > 1:\n", + " if scnd_group_attr is not None:\n", + " if len(group_by) == 0:\n", + " _cols = _cols + [\"rank\"]\n", + " return { _ : _df.xs(_)[_cols] for (_, __) in _levels }\n", + " return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels }\n", + " else:\n", + " if len(group_by) == 0:\n", + " _cols = _cols + [\"rank\"]\n", + " return { _ : _df.xs(_)[_cols] for _ in _levels }\n", + " return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels }\n", + " else: \n", + " return { _ : _df.xs(_)[_cols] for _ in _levels}\n", + " \n", + " @staticmethod\n", + " def outliers(data, scale=1.5, side=\"both\"):\n", + " assert isinstance(data, (pd.Series, np.ndarray))\n", + " assert len(data.shape) == 1\n", + " assert isinstance(scale, float)\n", + " assert side in [\"gt\", \"lt\", \"both\"]\n", + "\n", + " d_q13 = np.percentile(data, [25.0, 75.0])\n", + " iqr_distance = np.multiply(stats.iqr(data), scale)\n", + "\n", + " if side in [\"gt\", \"both\"]:\n", + " upper_range = d_q13[1] + iqr_distance\n", + " upper_outlier = np.greater(data - upper_range.reshape(1), 0)\n", + "\n", + " if side in [\"lt\", \"both\"]:\n", + " lower_range = d_q13[0] - iqr_distance\n", + " lower_outlier = np.less(data - lower_range.reshape(1), 0)\n", + "\n", + " if side == \"gt\":\n", + " return upper_outlier\n", + " if side == \"lt\":\n", + " return lower_outlier\n", + " if side == \"both\":\n", + " return np.logical_or(upper_outlier, lower_outlier)\n", + "\n", + " \n", + " def compute(self, df):\n", + " \"\"\"\n", + " Compute boxplot related information.\n", + "\n", + " :param df: Dataframe to calculate the boxplot information.\n", + " :return:\n", + " \"\"\"\n", + "\n", + " ret = {_: {} for _ in self.metrics}\n", + " for tk, tv in zip(self.metrics, self.metrics):\n", + " q = np.percentile(df[tv], [0.0, 25.0, 50.0, 75.0, 100.0])\n", + " mask = BoxPlot.outliers(df[tv], scale=self.iqr_scale)\n", + " mask = np.where(mask)[0]\n", + "\n", + " if 'rank' in df.columns:\n", + " rank = df['rank'].to_numpy()[mask]\n", + " else:\n", + " rank = np.zeros(mask.shape[0], dtype=int)\n", + "\n", + " _data = df[tv].to_numpy()\n", + " _min, _mean, _max = _data.min(), _data.mean(), _data.max()\n", + " _var = _data.var() if _data.shape[0] > 0 else 0.0\n", + " _imb = (_max - _mean) / _mean if not np.isclose(_mean, 0.0) else _max\n", + " _skew = stats.skew(_data)\n", + " _kurt = stats.kurtosis(_data)\n", + "\n", + " ret[tk] = {\n", + " \"q\": q,\n", + " \"oval\": df[tv].to_numpy()[mask],\n", + " \"orank\": rank,\n", + " \"d\": _data,\n", + " \"rng\": (_min, _max),\n", + " \"uv\": (_mean, _var),\n", + " \"imb\": _imb,\n", + " \"ks\": (_kurt, _skew),\n", + " }\n", + " if 'dataset' in df.columns:\n", + " ret[tk]['odset'] = df['dataset'].to_numpy()[mask]\n", + "\n", + " return ret\n", + " \n", + " def unpack(self):\n", + " \"\"\"\n", + " Unpack the boxplot data into JSON format.\n", + " \"\"\"\n", + " result = {}\n", + " for callsite in self.callsites:\n", + " result[callsite] = {}\n", + " for box_type in self.box_types:\n", + " result[callsite][box_type] = {}\n", + " for metric in self.metrics:\n", + " box = self.result[callsite][box_type][metric]\n", + " result[callsite][box_type][metric] = {\n", + " \"q\": box[\"q\"].tolist(),\n", + " \"outliers\": {\n", + " \"values\": box[\"oval\"].tolist(),\n", + " \"ranks\": box[\"orank\"].tolist()\n", + " },\n", + " \"min\": box[\"rng\"][0],\n", + " \"max\": box[\"rng\"][1],\n", + " \"mean\": box[\"uv\"][0],\n", + " \"var\": box[\"uv\"][1],\n", + " \"imb\": box[\"imb\"],\n", + " \"kurt\": box[\"ks\"][0],\n", + " \"skew\": box[\"ks\"][1],\n", + " }\n", + "\n", + " if 'odset' in box:\n", + " result[callsite][box_type][metric]['odset'] = box['odset'].tolist()\n", + "\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data_dir = \"/Users/jarus/Work/llnl/hatchet/hatchet/tests/data\"\n", + "data_path = os.path.join(data_dir, \"caliper-cpi-json/cpi-callpath-profile.json\")\n", + "gf = ht.GraphFrame.from_caliper_json(data_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "callsites = gf.dataframe.name.unique().tolist()\n", + "bp = BoxPlot(tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])\n", + "boxplot = bp.unpack()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'tgt': {'time': {'q': [309432.0, 309537.75, 309663.5, 318511.0, 344782.0], 'outliers': {'values': [344782.0], 'ranks': [3]}, 'min': 309432.0, 'max': 344782.0, 'mean': 318385.25, 'var': 232275830.6875, 'imb': 0.08290820633179458, 'kurt': -0.6668163878655595, 'skew': 1.1545063317709627}}}\n" + ] + } + ], + "source": [ + "print(boxplot[\"main\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%loadVisualization roundtrip_path boxplot" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.6 64-bit", + "language": "python", + "name": "python37664bit3a5637fa2c7f4443bca7a2894d18d23d" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index cab53bfc..4852f390 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -3,52 +3,41 @@ const [path, data_string] = argList; requirejs.config({ baseUrl: path, - paths: { + paths: { d3src: 'https://d3js.org', lib: 'lib', }, map: { '*': { 'd3': 'd3src/d3.v6.min', - 'd3-color': 'd3src/d3-color.v1.min', - 'd3-interpolate': 'd3src/d3-interpolate.v1.min', - 'd3-scale-chromatic': 'd3src/d3-scale-chromatic.v1.min', 'd3-utils': 'lib/d3_utils', } } }); - require(['d3', 'd3-utils'], function (d3, d3_utils) { + require(['d3', 'd3-utils'], (d3, d3_utils) => { const data = JSON.parse(data_string.replace(/'/g, '"')); - const margin = {top: 20, right: 20, bottom: 80, left: 20}, - containerHeight = 400, - width = element.clientWidth - margin.right - margin.left, - height = containerHeight - margin.top - margin.bottom; + /** + * Sort the callsite ordering based on the attribute. + * + * @param {Array} callsites - Callsites as a list. + * @param {Stirng} metric - Metric (e.g., time or time (inc)) + * @param {String} attribute - Attribute to sort by. + */ + const sortByAttribute = (callsites, metric, attribute, boxplot_type) => { + let items = Object.keys(callsites).map(function (key) { + return [key, callsites[key][boxplot_type]]; + }); - const svgArea = d3_utils.prepareSvgArea(width, height, margin); - const svg = d3_utils.prepareSvg(element, svgArea); + items = items.sort( (first, second) => { + return second[1][metric][attribute] - first[1][metric][attribute]; + }); - /** - * Sort the callsite ordering based on the attribute. - * - * @param {Array} callsites - Callsites as a list. - * @param {Stirng} metric - Metric (e.g., time or time (inc)) - * @param {String} attribute - Attribute to sort by. - */ - const sortByAttribute = (callsites, metric, attribute, boxplot_type) => { - let items = Object.keys(callsites).map(function (key) { - return [key, callsites[key][boxplot_type]]; - }); - - items = items.sort( (first, second) => { - return second[1][metric][attribute] - first[1][metric][attribute]; - }); - - return items.reduce(function (map, obj) { - map[obj[0]] = obj[1][metric]; - return map; - }, {}); - } + return items.reduce(function (map, obj) { + map[obj[0]] = obj[1][metric]; + return map; + }, {}); + } const callsites = Object.keys(data); @@ -63,26 +52,91 @@ d3_utils.selectionDropDown(element, attributes, "attributeSelect"); const sort_callsites = sortByAttribute(data, selected_metric, selected_attribute, "tgt"); - console.log(sort_callsites); + + const margin = {top: 20, right: 20, bottom: 0, left: 20}, + containerHeight = 100 * Object.keys(sort_callsites).length, + width = element.clientWidth - margin.right - margin.left, + height = containerHeight - margin.top - margin.bottom; + + const svgArea = d3_utils.prepareSvgArea(width, height, margin); + const svg = d3_utils.prepareSvg(element, svgArea); - let stats = {}; - let boxplot = {}; - for (let callsite in sort_callsites) { - d = sort_callsites[callsite]; - - // Set the dictionaries for metadata information. - stats[callsite] = { - "min": d3_utils.formatRuntime(d["min"]), - "max": d3_utils.formatRuntime(d["max"]), - "mean": d3_utils.formatRuntime(d["mean"]), - "var": d3_utils.formatRuntime(d["var"]), - "imb": d3_utils.formatRuntime(d["imb"]), - "kurt": d3_utils.formatRuntime(d["kurt"]), - "skew": d3_utils.formatRuntime(d["skew"]), + let idx = 0; + for (let [callsite, d] of Object.entries(sort_callsites)) { + const stats = { + "min": d3_utils.formatRuntime(d.min), + "max": d3_utils.formatRuntime(d.max), + "mean": d3_utils.formatRuntime(d.mean), + "var": d3_utils.formatRuntime(d.var), + "imb": d3_utils.formatRuntime(d.imb), + "kurt": d3_utils.formatRuntime(d.kurt), + "skew": d3_utils.formatRuntime(d.skew), }; - // Set the data for the boxplot. - boxplot[callsite] = {"q": d["q"], "outliers": d["outliers"]}; + const boxWidth = 0.6 * width; + const xScale = d3.scaleLinear() + .domain([d.min, d.max]) + .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + + const gId = "box-" + idx; + const gYOffset = 200; + const g = svg.append("g") + .attr("id", gId) + .attr("width", boxWidth) + .attr("transform", "translate(0, " + gYOffset * idx + ")"); + d3_utils.drawText(element, gId, "callsite: " + callsite, 10, 0); + + let statIdx = 1; + for( let [stat, val] of Object.entries(stats)) { + d3_utils.drawText(element, gId, `${stat}: ${val}`, 1.1 * boxWidth, 15, statIdx); + statIdx += 1; + } + + // const tooltip = element; + + // const mouseover = (data) => tooltip.render(data); + // const mouseout = (data) => tooltip.clear(); + // const click = (data) => tooltip.render(data); + + const boxHeight = 80; + const boxYOffset = 30; + const fillColor = "#d9d9d9"; + const strokeColor = "#202020"; + const strokeWidth = 1; + + // Centerline + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); + + // Box + const box = d3_utils.drawRect(g, { + "class": "rect", + "x": xScale(d.q[1]), + "y": boxYOffset, + "height": boxHeight, + "fill": fillColor, + "width": xScale(d.q[3]) - xScale(d.q[1]), + "stroke": strokeColor, + "stroke-width": strokeWidth + }); + + // Markers + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, strokeColor); + d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, strokeColor); + + // Outliers + const outlierRadius = 4; + let outliers = [] + for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { + outliers.push({ + x: xScale(d.outliers["values"][idx]), + value: d.outliers["values"][idx], + rank: d.outliers["ranks"][idx], + // dataset: d.dataset + }) + } + d3_utils.drawCircle(g, outliers, outlierRadius, boxYOffset, fillColor); + + idx += 1 } - }) + }); })(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 32c87562..16fdd229 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -1,19 +1,20 @@ define(function (require) { - const d3 = require('d3'); + const d3 = require("d3"); + return { - calcContainerWidth : name => +d3.select(name).style('width').slice(0, -2), - calcContainerHeight : name => +d3.select(name).style('height').slice(0, -2), - calcCellWidth : (width, colNames) => width / colNames.length, - calcCellHeight : (height, rowNames) => height / rowNames.length, - calcCellSize : (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], - prepareSvgArea : (windowWidth, windowHeight, margin) => { + calcContainerWidth: name => +d3.select(name).style('width').slice(0, -2), + calcContainerHeight: name => +d3.select(name).style('height').slice(0, -2), + calcCellWidth: (width, colNames) => width / colNames.length, + calcCellHeight: (height, rowNames) => height / rowNames.length, + calcCellSize: (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], + prepareSvgArea: (windowWidth, windowHeight, margin) => { return { width: windowWidth - margin.left - margin.right, height: windowHeight - margin.top - margin.bottom, margin: margin } }, - prepareSvg : (id, svgArea) => { + prepareSvg: (id, svgArea) => { d3.select(id).selectAll('*').remove(); const svg = d3.select(id) .append('svg') @@ -25,7 +26,7 @@ define(function (require) { return svg; }, - initSvgInfo : (targetView, margin) => { + initSvgInfo: (targetView, margin) => { const sd = targetView.svgData; const domId = targetView.domId; @@ -42,19 +43,19 @@ define(function (require) { }, // Axes, Scaling - genX : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + genX: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { if (domain === null) { domain = d3.extent(data); } return scaler.domain(domain).range([0, svgArea.width]); }, - genInvX : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + genInvX: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { if (domain === null) { domain = d3.extent(data); } return scaler.domain([0, svgArea.width]).range(domain); }, - genY : (data, svgArea, domain = null, scaler = d3.scaleLinear(), goUp = true) => { + genY: (data, svgArea, domain = null, scaler = d3.scaleLinear(), goUp = true) => { if (domain === null) { domain = d3.extent(data); } @@ -62,7 +63,7 @@ define(function (require) { scaler.domain(domain).range([svgArea.height, 0]) : scaler.domain(domain).range([0, svgArea.height]); }, - genInvY : (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { + genInvY: (data, svgArea, domain = null, scaler = d3.scaleLinear()) => { if (domain === null) { domain = d3.extent(data); } @@ -70,7 +71,7 @@ define(function (require) { }, // UI Components - selectionDropDown : (element, data, id) => { + selectionDropDown: (element, data, id) => { return d3.select(element).append("select") .attr("id", id) .selectAll('option') @@ -80,6 +81,7 @@ define(function (require) { .text(d => d) .attr('value', d => d); }, + // Formatting numbers formatRuntime: (val) => { @@ -89,5 +91,54 @@ define(function (require) { let format = d3.format(".3"); return format(val); }, + + // SVG elements + drawRect: (element, attrDict, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { + return element.append("rect") + .attr("x", attrDict["x"]) + .attr("y", attrDict["y"]) + .attr("height", attrDict["height"]) + .attr("width", attrDict["width"]) + .attr("fill", attrDict["fill"]) + .attr("stroke", attrDict["stroke"]) + .attr("stroke-width", attrDict["stroke-width"]) + .on("click", click) + .on("mouseover", mouseover) + .on("mouseout", mouseout); + }, + drawText: (element, forId, text, xOffset, yOffset, yOffsetIdx) => { + return d3.select(element) + .select('#' + forId) + .append('text') + .attr("x", xOffset) + .attr("y", yOffset * yOffsetIdx) + .attr('for', forId) + .text(text); + }, + drawLine: (element, x1, y1, x2, y2, strokeColor) => { + return element + .append("line") + .attr("class", "line") + .attr("x1", x1) + .attr("y1", y1) + .attr("x2", x2) + .attr("y2", y2) + .attr("stroke", strokeColor) + .style("stroke-width", "1.5"); + }, + drawCircle: (element, data, radius, yOffset, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { + return element + .selectAll(".circle") + .data(data) + .join("circle") + .attr("r", radius) + .attr("cx", (d) => d.x) + .attr("cy", (d) => d.y + yOffset) + .attr("class", "circle") + .style("fill", fillColor) + .on("click", (d) => click(d)) + .on("mouseover", (d) => mouseover(d)) + .on("mouseout", (d) => mouseout(d)); + } } }); From 55dacdbe6210f9e7dd529d4b00e9409e3df947fc Mon Sep 17 00:00:00 2001 From: jarusified Date: Sat, 10 Jul 2021 14:22:33 -0700 Subject: [PATCH 05/45] Improve validation of the arguments --- .../performance_variability_boxplots.ipynb | 139 ++++++++++++++++-- hatchet/external/roundtrip/boxplot.js | 29 +++- hatchet/external/roundtrip/roundtrip.py | 57 ++++--- 3 files changed, 183 insertions(+), 42 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index 751f11a4..71c35b50 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -6,6 +6,16 @@ "source": [ "# Performance Variability Boxplots\n", "\n", + "Boxplots provide an insight into the runtime distribution among its MPI ranks. We provide 3 modes to visualize the performance variability of a GraphFrame:\n", + "\n", + "1) Single GraphFrame.\n", + "\n", + "2) Compare two GraphFrame.\n", + "\n", + "3) Compare GraphFrame against an unified GraphFrame.\n", + "\n", + "\n", + "Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond to the ranks which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness across the MPI ranks are also provided.\"\n", "\n", "### Load roundtrip" ] @@ -52,6 +62,13 @@ "%load_ext roundtrip" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Boxplot - High-level function API on top of the GraphFrame" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -73,7 +90,8 @@ " \n", " :param tgt_gf: (ht.GraphFrame) Target GraphFrame \n", " :param bkg_gf: (ht.GraphFrame) Relative supergraph\n", - " :param callsite: (str) Callsite name\n", + " :param callsite: (list) List of callsites\n", + " :param metrics: (list) List of metrics to compute.\n", " :param iqr_scale: (float) IQR range for outliers.\n", " \"\"\"\n", " assert isinstance(tgt_gf, ht.GraphFrame)\n", @@ -240,14 +258,21 @@ " return result" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Boxplots for target GraphFrame" + ] + }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "data_dir = \"/Users/jarus/Work/llnl/hatchet/hatchet/tests/data\"\n", - "data_path = os.path.join(data_dir, \"caliper-cpi-json/cpi-callpath-profile.json\")\n", + "data_dir = os.path.realpath(\"../../../hatchet/tests/data\")\n", + "data_path = os.path.join(data_dir, \"caliper-lulesh-json/lulesh-annotation-profile.json\")\n", "gf = ht.GraphFrame.from_caliper_json(data_path)" ] }, @@ -271,7 +296,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'tgt': {'time': {'q': [309432.0, 309537.75, 309663.5, 318511.0, 344782.0], 'outliers': {'values': [344782.0], 'ranks': [3]}, 'min': 309432.0, 'max': 344782.0, 'mean': 318385.25, 'var': 232275830.6875, 'imb': 0.08290820633179458, 'kurt': -0.6668163878655595, 'skew': 1.1545063317709627}}}\n" + "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'outliers': {'values': [], 'ranks': []}, 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" ] } ], @@ -286,6 +311,15 @@ "scrolled": true }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "roundtrip_path\n", + "\"boxplot\"\n", + "here\n" + ] + }, { "data": { "application/javascript": [ @@ -307,14 +341,22 @@ "
\n", " " ], @@ -549,27 +567,216 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "roundtrip_path\n", - "boxplot_comparison\n" - ] + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { - "ename": "IndexError", - "evalue": "list index out of range", - "output_type": "error", - "traceback": [ - "\u001b[0;31m-------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'loadVisualization'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'roundtrip_path boxplot_comparison'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/dev/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_line_magic\u001b[0;34m(self, magic_name, line, _stack_depth)\u001b[0m\n\u001b[1;32m 2325\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'local_ns'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_local_scope\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2326\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2327\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2328\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2329\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mloadVisualization\u001b[0;34m(self, line)\u001b[0m\n", - "\u001b[0;32m~/dev/IPython/core/magic.py\u001b[0m in \u001b[0;36m\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Work/llnl/hatchet/hatchet/external/roundtrip/roundtrip.py\u001b[0m in \u001b[0;36mloadVisualization\u001b[0;34m(self, line)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0mpath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcleanLineArgument\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0mvisType\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcleanLineArgument\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_ns\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvisType\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mVIS_TO_FILE\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mIndexError\u001b[0m: list index out of range" - ] + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index d5400e86..faeba5af 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -1,5 +1,6 @@ // TODO: Adopt MVC pattern for this module. (function (element) { + const BOXPLOT_TYPES = ["tgt", "bkg"]; const [path, visType, variableString] = cleanInputs(argList); // Quit if visType is not boxplot. @@ -59,9 +60,13 @@ require(['d3', 'd3-utils'], (d3, d3_utils) => { const data = JSON.parse(variableString.replace(/'/g, '"')); - const BOXPLOT_TYPES = ["tgt", "bkg"]; const callsites = Object.keys(data); + const MODE = Object.keys(data[callsites[0]]).length == 2 ? "COMPARISON" : "NORMAL"; + + // Assign an index to the callsites. + const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); + const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}) // Selection dropdown for metrics. const metrics = Object.keys(data[callsites[0]]["tgt"]); @@ -78,91 +83,101 @@ // Setup VIS area. const margin = {top: 20, right: 20, bottom: 0, left: 20}, - containerHeight = 100 * Object.keys(sortedCallsites).length, + containerHeight = 100 * Object.keys(callsites).length, width = element.clientWidth - margin.right - margin.left, height = containerHeight - margin.top - margin.bottom; const svgArea = d3_utils.prepareSvgArea(width, height, margin); const svg = d3_utils.prepareSvg(element, svgArea); - - // TODO: Remove idx variable from here. - let idx = 0; - for (let [callsite, d] of Object.entries(sortedCallsites)) { - const stats = { - "min": d3_utils.formatRuntime(d.min), - "max": d3_utils.formatRuntime(d.max), - "mean": d3_utils.formatRuntime(d.mean), - "var": d3_utils.formatRuntime(d.var), - "imb": d3_utils.formatRuntime(d.imb), - "kurt": d3_utils.formatRuntime(d.kurt), - "skew": d3_utils.formatRuntime(d.skew), - }; - - const boxWidth = 0.6 * width; - const xScale = d3.scaleLinear() - .domain([d.min, d.max]) - .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); - - const gId = "box-" + idx; - const gYOffset = 200; - const g = svg.append("g") - .attr("id", gId) - .attr("width", boxWidth) - .attr("transform", "translate(0, " + gYOffset * idx + ")"); - - // Text for callsite name - d3_utils.drawText(element, gId, "callsite: " + callsite, 10, 0); - - // Text for statistics - let statIdx = 1; - for( let [stat, val] of Object.entries(stats)) { - d3_utils.drawText(element, gId, `${stat}: ${val}`, 1.1 * boxWidth, 15, statIdx); - statIdx += 1; - } - // const tooltip = element; - // const mouseover = (data) => tooltip.render(data); - // const mouseout = (data) => tooltip.clear(); - // const click = (data) => tooltip.render(data); - - const boxHeight = 80; - const boxYOffset = 30; - const fillColor = "#d9d9d9"; - const strokeColor = "#202020"; - const strokeWidth = 1; - - // Centerline - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); - - // Box - const box = d3_utils.drawRect(g, { - "class": "rect", - "x": xScale(d.q[1]), - "y": boxYOffset, - "height": boxHeight, - "fill": fillColor, - "width": xScale(d.q[3]) - xScale(d.q[1]), - "stroke": strokeColor, - "stroke-width": strokeWidth - }); - - // Markers - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, strokeColor); - d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, strokeColor); - - // Outliers - const outlierRadius = 4; - let outliers = [] - for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { - outliers.push({ - x: xScale(d.outliers["values"][idx]), - value: d.outliers["values"][idx], - rank: d.outliers["ranks"][idx], - // dataset: d.dataset # TODO: pass dataset to differentiate. - }) + visualize(sortedCallsites, nameToIdxMap, "tgt"); + if (MODE == "COMPARISON") { + const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); + visualize(sortedBkgCallsites, nameToIdxMap, "bkg"); + } + + function visualize(callsites, idxMap, mode) { + for (let [callsite, d] of Object.entries(callsites)) { + const stats = { + "min": d3_utils.formatRuntime(d.min), + "max": d3_utils.formatRuntime(d.max), + "mean": d3_utils.formatRuntime(d.mean), + "var": d3_utils.formatRuntime(d.var), + "imb": d3_utils.formatRuntime(d.imb), + "kurt": d3_utils.formatRuntime(d.kurt), + "skew": d3_utils.formatRuntime(d.skew), + }; + + const boxWidth = 0.6 * width; + const xScale = d3.scaleLinear() + .domain([d.min, d.max]) + .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + + const idx = idxMap[callsite]; + const gId = "box-" + idx; + const gYOffset = 200; + const g = svg.append("g") + .attr("id", gId) + .attr("width", boxWidth) + .attr("transform", "translate(0, " + gYOffset * idx + ")"); + + // Text for callsite name + d3_utils.drawText(element, gId, "callsite: " + callsite, 10, 0); + + const yOffset = mode === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; + const textColor = mode === "tgt" ? "#4DAF4A": "#202020"; + d3_utils.drawText(element, gId, mode, yOffset, 15, 0, textColor); + + // Text for statistics + let statIdx = 1; + for( let [stat, val] of Object.entries(stats)) { + d3_utils.drawText(element, gId, `${stat}: ${val}`, yOffset, 15, statIdx, textColor); + statIdx += 1; + } + + // const tooltip = element; + // const mouseover = (data) => tooltip.render(data); + // const mouseout = (data) => tooltip.clear(); + // const click = (data) => tooltip.render(data); + + const boxHeight = 80; + const boxYOffset = 30; + const fillColor = mode === "tgt" ? "#d9d9d9": "#4DAF4A"; + const strokeColor = "#202020"; + const strokeWidth = 1; + + // Centerline + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); + + // Box + const box = d3_utils.drawRect(g, { + "class": "rect", + "x": xScale(d.q[1]), + "y": boxYOffset, + "height": boxHeight, + "fill": fillColor, + "width": xScale(d.q[3]) - xScale(d.q[1]), + "stroke": strokeColor, + "stroke-width": strokeWidth + }); + + // Markers + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, strokeColor); + d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, strokeColor); + + // Outliers + const outlierRadius = 4; + let outliers = [] + for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { + outliers.push({ + x: xScale(d.outliers["values"][idx]), + value: d.outliers["values"][idx], + rank: d.outliers["ranks"][idx], + // dataset: d.dataset # TODO: pass dataset to differentiate. + }) + } + d3_utils.drawCircle(g, outliers, outlierRadius, boxYOffset, fillColor); } - d3_utils.drawCircle(g, outliers, outlierRadius, boxYOffset, fillColor); - - idx += 1 } + }); })(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 16fdd229..363f8c91 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -106,12 +106,13 @@ define(function (require) { .on("mouseover", mouseover) .on("mouseout", mouseout); }, - drawText: (element, forId, text, xOffset, yOffset, yOffsetIdx) => { + drawText: (element, forId, text, xOffset, yOffset, yOffsetIdx, textColor) => { return d3.select(element) .select('#' + forId) .append('text') .attr("x", xOffset) .attr("y", yOffset * yOffsetIdx) + .attr("stroke", textColor) .attr('for', forId) .text(text); }, From 3e1d1e4a5ae3e1da0ab6336f981ddc608a868b26 Mon Sep 17 00:00:00 2001 From: jarusified Date: Mon, 12 Jul 2021 11:38:23 -0700 Subject: [PATCH 09/45] Add color to text --- .../performance_variability_boxplots.ipynb | 27 +++++++++++++------ hatchet/external/roundtrip/boxplot.js | 12 +++++---- hatchet/external/roundtrip/lib/d3_utils.js | 2 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index 48922a95..b63ebebb 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -423,13 +423,13 @@ " const svgArea = d3_utils.prepareSvgArea(width, height, margin);\n", " const svg = d3_utils.prepareSvg(element, svgArea);\n", "\n", - " visualize(sortedCallsites, nameToIdxMap, \"tgt\");\n", + " visualize(sortedCallsites, nameToIdxMap, \"tgt\", true);\n", " if (MODE == \"COMPARISON\") {\n", " const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, \"bkg\");\n", - " visualize(sortedBkgCallsites, nameToIdxMap, \"bkg\");\n", + " visualize(sortedBkgCallsites, nameToIdxMap, \"bkg\", false);\n", " }\n", " \n", - " function visualize(callsites, idxMap, mode) {\n", + " function visualize(callsites, idxMap, mode, drawCenterLine) {\n", " for (let [callsite, d] of Object.entries(callsites)) {\n", " const stats = { \n", " \"min\": d3_utils.formatRuntime(d.min),\n", @@ -480,7 +480,9 @@ " const strokeWidth = 1;\n", "\n", " // Centerline\n", - " d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor);\n", + " if (drawCenterLine) {\n", + " d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor);\n", + " }\n", "\n", " // Box\n", " const box = d3_utils.drawRect(g, {\n", @@ -678,13 +680,13 @@ " const svgArea = d3_utils.prepareSvgArea(width, height, margin);\n", " const svg = d3_utils.prepareSvg(element, svgArea);\n", "\n", - " visualize(sortedCallsites, nameToIdxMap, \"tgt\");\n", + " visualize(sortedCallsites, nameToIdxMap, \"tgt\", true);\n", " if (MODE == \"COMPARISON\") {\n", " const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, \"bkg\");\n", - " visualize(sortedBkgCallsites, nameToIdxMap, \"bkg\");\n", + " visualize(sortedBkgCallsites, nameToIdxMap, \"bkg\", false);\n", " }\n", " \n", - " function visualize(callsites, idxMap, mode) {\n", + " function visualize(callsites, idxMap, mode, drawCenterLine) {\n", " for (let [callsite, d] of Object.entries(callsites)) {\n", " const stats = { \n", " \"min\": d3_utils.formatRuntime(d.min),\n", @@ -735,7 +737,9 @@ " const strokeWidth = 1;\n", "\n", " // Centerline\n", - " d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor);\n", + " if (drawCenterLine) {\n", + " d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor);\n", + " }\n", "\n", " // Box\n", " const box = d3_utils.drawRect(g, {\n", @@ -782,6 +786,13 @@ "source": [ "%loadVisualization roundtrip_path \"boxplot\" boxplot_comparison" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index faeba5af..d9b3d0a2 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -89,13 +89,13 @@ const svgArea = d3_utils.prepareSvgArea(width, height, margin); const svg = d3_utils.prepareSvg(element, svgArea); - visualize(sortedCallsites, nameToIdxMap, "tgt"); + visualize(sortedCallsites, nameToIdxMap, "tgt", true); if (MODE == "COMPARISON") { const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); - visualize(sortedBkgCallsites, nameToIdxMap, "bkg"); + visualize(sortedBkgCallsites, nameToIdxMap, "bkg", false); } - function visualize(callsites, idxMap, mode) { + function visualize(callsites, idxMap, mode, drawCenterLine) { for (let [callsite, d] of Object.entries(callsites)) { const stats = { "min": d3_utils.formatRuntime(d.min), @@ -141,12 +141,14 @@ const boxHeight = 80; const boxYOffset = 30; - const fillColor = mode === "tgt" ? "#d9d9d9": "#4DAF4A"; + const fillColor = mode === "tgt" ? "#4DAF4A": "#D9D9D9"; const strokeColor = "#202020"; const strokeWidth = 1; // Centerline - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); + if (drawCenterLine) { + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); + } // Box const box = d3_utils.drawRect(g, { diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 363f8c91..1029afef 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -112,7 +112,7 @@ define(function (require) { .append('text') .attr("x", xOffset) .attr("y", yOffset * yOffsetIdx) - .attr("stroke", textColor) + .attr("fill", textColor) .attr('for', forId) .text(text); }, From 961af3609b2c548c4f6bbf2121f0cee7a024530e Mon Sep 17 00:00:00 2001 From: jarusified Date: Mon, 12 Jul 2021 14:15:17 -0700 Subject: [PATCH 10/45] Add xAxis to the boxplots --- hatchet/external/roundtrip/boxplot.js | 20 +++++++++++----- hatchet/external/roundtrip/lib/d3_utils.js | 27 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index d9b3d0a2..f7520d50 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -89,6 +89,7 @@ const svgArea = d3_utils.prepareSvgArea(width, height, margin); const svg = d3_utils.prepareSvg(element, svgArea); + // Visualize the boxplots. visualize(sortedCallsites, nameToIdxMap, "tgt", true); if (MODE == "COMPARISON") { const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); @@ -96,6 +97,12 @@ } function visualize(callsites, idxMap, mode, drawCenterLine) { + const boxHeight = 80; + const boxYOffset = 30; + const fillColor = mode === "tgt" ? "#4DAF4A": "#D9D9D9"; + const strokeColor = "#202020"; + const strokeWidth = 1; + for (let [callsite, d] of Object.entries(callsites)) { const stats = { "min": d3_utils.formatRuntime(d.min), @@ -120,9 +127,13 @@ .attr("width", boxWidth) .attr("transform", "translate(0, " + gYOffset * idx + ")"); - // Text for callsite name + const axisOffset = boxHeight * 1.5; + d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); + + // Text for callsite name. d3_utils.drawText(element, gId, "callsite: " + callsite, 10, 0); + // Text fpr statistics title. const yOffset = mode === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; const textColor = mode === "tgt" ? "#4DAF4A": "#202020"; d3_utils.drawText(element, gId, mode, yOffset, 15, 0, textColor); @@ -139,11 +150,7 @@ // const mouseout = (data) => tooltip.clear(); // const click = (data) => tooltip.render(data); - const boxHeight = 80; - const boxYOffset = 30; - const fillColor = mode === "tgt" ? "#4DAF4A": "#D9D9D9"; - const strokeColor = "#202020"; - const strokeWidth = 1; + // Centerline if (drawCenterLine) { @@ -178,6 +185,7 @@ }) } d3_utils.drawCircle(g, outliers, outlierRadius, boxYOffset, fillColor); + } } diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 1029afef..69e735c4 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -140,6 +140,33 @@ define(function (require) { .on("click", (d) => click(d)) .on("mouseover", (d) => mouseover(d)) .on("mouseout", (d) => mouseout(d)); + }, + drawXAxis: (element, xScale, numOfTicks, tickFormatFn, xOffset, yOffset, strokeColor) => { + const axis = d3.axisBottom(xScale) + .ticks(numOfTicks) + .tickFormat(tickFormatFn); + + const line = element.append("g") + .attr("class", "xAxis") + .attr("transform", `translate(${xOffset}, ${yOffset})`) + .call(axis); + + line.selectAll("path") + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("line") + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("text") + .style("font-size", "12px") + .style("font-family", "sans-serif") + .style("font-weight", "lighter"); + + return line; } } }); From 5cafe08995e7cbd5019971b5f89cae9be41a2355 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 11:02:27 -0700 Subject: [PATCH 11/45] Refactor the code to fix the xAxis --- hatchet/external/roundtrip/boxplot.js | 212 +++++++++++++-------- hatchet/external/roundtrip/lib/d3_utils.js | 4 +- 2 files changed, 135 insertions(+), 81 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index f7520d50..428c6a63 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -44,22 +44,37 @@ if (!BOXPLOT_TYPES.includes(boxplotType)) { console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'") } + + // Sanity check to see if the boxplotType is present in the callsites. + let _is_empty = false; + Object.keys(callsites).map(function (key) { + if(callsites[key][boxplotType] === undefined) { + _is_empty = true; + } + }) + let items = Object.keys(callsites).map(function (key) { return [key, callsites[key][boxplotType]]; }); - - items = items.sort( (first, second) => { - return second[1][metric][attribute] - first[1][metric][attribute]; - }); + + if(!_is_empty) { + items = items.sort( (first, second) => { + return second[1][metric][attribute] - first[1][metric][attribute]; + }); + } return items.reduce(function (map, obj) { - map[obj[0]] = obj[1][metric]; + if (obj[1] !== undefined) { + map[obj[0]] = obj[1][metric]; + } else { + map[obj[0]] = obj[1]; + } return map; }, {}); } require(['d3', 'd3-utils'], (d3, d3_utils) => { - const data = JSON.parse(variableString.replace(/'/g, '"')); + const data = JSON.parse(variableString); const callsites = Object.keys(data); const MODE = Object.keys(data[callsites[0]]).length == 2 ? "COMPARISON" : "NORMAL"; @@ -79,7 +94,8 @@ d3_utils.selectionDropDown(element, attributes, "attributeSelect"); // Sort the callsites by the selected attribute and metric. - const sortedCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); + const sortedTgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); + const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); // Setup VIS area. const margin = {top: 20, right: 20, bottom: 0, left: 20}, @@ -89,36 +105,113 @@ const svgArea = d3_utils.prepareSvgArea(width, height, margin); const svg = d3_utils.prepareSvg(element, svgArea); - // Visualize the boxplots. - visualize(sortedCallsites, nameToIdxMap, "tgt", true); - if (MODE == "COMPARISON") { - const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); - visualize(sortedBkgCallsites, nameToIdxMap, "bkg", false); + visualize(sortedTgtCallsites, sortedBkgCallsites, nameToIdxMap, false); + + function _format(d) { + return { + "min": d3_utils.formatRuntime(d.min), + "max": d3_utils.formatRuntime(d.max), + "mean": d3_utils.formatRuntime(d.mean), + "var": d3_utils.formatRuntime(d.var), + "imb": d3_utils.formatRuntime(d.imb), + "kurt": d3_utils.formatRuntime(d.kurt), + "skew": d3_utils.formatRuntime(d.skew), + }; } - - function visualize(callsites, idxMap, mode, drawCenterLine) { - const boxHeight = 80; + + function visualizeStats (d, mode, gId, boxWidth) { + const stats = _format(d); + + // Text fpr statistics title. + const xOffset = mode === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; + const textColor = mode === "tgt" ? "#4DAF4A": "#202020"; + // d3_utils.drawText(element, gId, mode, xOffset, 15, 0, textColor); + + // Text for statistics + let statIdx = 1; + for( let [stat, val] of Object.entries(stats)) { + d3_utils.drawText(element, gId, `${stat}: ${val}`, xOffset, 15, statIdx, textColor); + statIdx += 1; + } + } + + function visualizeBoxplot(g, d, type, xScale, drawCenterLine) { + const fillColor = { + "tgt": "#4DAF4A", + "bkg": "#D9D9D9" + }; + const strokeWidth = 1; const boxYOffset = 30; - const fillColor = mode === "tgt" ? "#4DAF4A": "#D9D9D9"; const strokeColor = "#202020"; - const strokeWidth = 1; + const boxHeight = 80; + + // Centerline + if (drawCenterLine) { + const [min, max] = xScale.domain(); + d3_utils.drawLine(g, xScale(min), boxYOffset + boxHeight/2, xScale(max), boxYOffset + boxHeight/2, strokeColor); + } + + // Box + d3_utils.drawRect(g, { + "class": "rect", + "x": xScale(d.q[1]), + "y": boxYOffset, + "height": boxHeight, + "fill": fillColor[type], + "width": xScale(d.q[3]) - xScale(d.q[1]), + "stroke": strokeColor, + "stroke-width": strokeWidth + }); + + // Markers + const markerStrokeWidth = 3; + d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + + // Outliers + const outlierRadius = 4; + const outlierYOffset = 20; + let outliers = [] + for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { + outliers.push({ + x: xScale(d.outliers["values"][idx]), + value: d.outliers["values"][idx], + rank: d.outliers["ranks"][idx], + // dataset: d.dataset # TODO: pass dataset to differentiate. + }) + } + d3_utils.drawCircle(g, outliers, outlierRadius, outlierYOffset, fillColor[type]); + } + + function visualize(tgtCallsites, bkgCallsites, idxMap) { + const boxWidth = 0.6 * width; + const allCallsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; + + for (let callsite of allCallsites) { + let tgt = null; + if (callsite in tgtCallsites) { + tgt = tgtCallsites[callsite]; + } + + let bkg = null; + if (callsite in bkgCallsites) { + bkg = bkgCallsites[callsite]; + } - for (let [callsite, d] of Object.entries(callsites)) { - const stats = { - "min": d3_utils.formatRuntime(d.min), - "max": d3_utils.formatRuntime(d.max), - "mean": d3_utils.formatRuntime(d.mean), - "var": d3_utils.formatRuntime(d.var), - "imb": d3_utils.formatRuntime(d.imb), - "kurt": d3_utils.formatRuntime(d.kurt), - "skew": d3_utils.formatRuntime(d.skew), - }; - - const boxWidth = 0.6 * width; + // Set the min and max for xScale. + let min = 0, max = 0; + if (bkg === undefined) { + min = tgt.min; + max = tgt.max; + } else { + min = Math.min(tgt.min, bkg.min); + max = Math.max(tgt.max, bkg.max); + } const xScale = d3.scaleLinear() - .domain([d.min, d.max]) - .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + .domain([min, max]) + .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + // Set up a g container const idx = idxMap[callsite]; const gId = "box-" + idx; const gYOffset = 200; @@ -127,22 +220,15 @@ .attr("width", boxWidth) .attr("transform", "translate(0, " + gYOffset * idx + ")"); - const axisOffset = boxHeight * 1.5; + const axisOffset = gYOffset * 0.6; d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); // Text for callsite name. - d3_utils.drawText(element, gId, "callsite: " + callsite, 10, 0); - - // Text fpr statistics title. - const yOffset = mode === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; - const textColor = mode === "tgt" ? "#4DAF4A": "#202020"; - d3_utils.drawText(element, gId, mode, yOffset, 15, 0, textColor); - - // Text for statistics - let statIdx = 1; - for( let [stat, val] of Object.entries(stats)) { - d3_utils.drawText(element, gId, `${stat}: ${val}`, yOffset, 15, statIdx, textColor); - statIdx += 1; + d3_utils.drawText(element, gId, "callsite: " + callsite, 0, 0); + + visualizeStats(tgt, "tgt", gId, boxWidth); + if (bkg !== undefined) { + visualizeStats(bkg, "bkg", gId, boxWidth); } // const tooltip = element; @@ -150,42 +236,10 @@ // const mouseout = (data) => tooltip.clear(); // const click = (data) => tooltip.render(data); - - - // Centerline - if (drawCenterLine) { - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset + boxHeight/2, xScale(d.q[4]), boxYOffset + boxHeight/2, strokeColor); - } - - // Box - const box = d3_utils.drawRect(g, { - "class": "rect", - "x": xScale(d.q[1]), - "y": boxYOffset, - "height": boxHeight, - "fill": fillColor, - "width": xScale(d.q[3]) - xScale(d.q[1]), - "stroke": strokeColor, - "stroke-width": strokeWidth - }); - - // Markers - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, strokeColor); - d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, strokeColor); - - // Outliers - const outlierRadius = 4; - let outliers = [] - for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { - outliers.push({ - x: xScale(d.outliers["values"][idx]), - value: d.outliers["values"][idx], - rank: d.outliers["ranks"][idx], - // dataset: d.dataset # TODO: pass dataset to differentiate. - }) + visualizeBoxplot(g, tgt, "tgt", xScale, true); + if (bkg !== undefined) { + visualizeBoxplot(g, bkg, "bkg", xScale, false); } - d3_utils.drawCircle(g, outliers, outlierRadius, boxYOffset, fillColor); - } } diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 69e735c4..6003e263 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -116,7 +116,7 @@ define(function (require) { .attr('for', forId) .text(text); }, - drawLine: (element, x1, y1, x2, y2, strokeColor) => { + drawLine: (element, x1, y1, x2, y2, strokeColor, strokeWidth) => { return element .append("line") .attr("class", "line") @@ -125,7 +125,7 @@ define(function (require) { .attr("x2", x2) .attr("y2", y2) .attr("stroke", strokeColor) - .style("stroke-width", "1.5"); + .style("stroke-width", strokeWidth); }, drawCircle: (element, data, radius, yOffset, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { return element From 0c275692ef8d2dc1b6460dda272db3fe40b02558 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 15:25:15 -0700 Subject: [PATCH 12/45] Add tooltip (not working); Factor stats data --- hatchet/external/roundtrip/boxplot.js | 39 ++++++++++++++-------- hatchet/external/roundtrip/lib/d3_utils.js | 36 +++++++++++++++++--- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 428c6a63..934743f3 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -119,18 +119,26 @@ }; } - function visualizeStats (d, mode, gId, boxWidth) { + function visualizeStats (g, d, type, boxWidth) { const stats = _format(d); + const TYPE_TEXTS = { + "tgt": "Target", + "bkg": "Background" + }; // Text fpr statistics title. - const xOffset = mode === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; - const textColor = mode === "tgt" ? "#4DAF4A": "#202020"; - // d3_utils.drawText(element, gId, mode, xOffset, 15, 0, textColor); + const xOffset = type === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; + const textColor = type === "tgt" ? "#4DAF4A": "#202020"; + + const statsG = g.append("g") + .attr("class", "stats"); + + d3_utils.drawText(statsG, TYPE_TEXTS[type], xOffset, 15, 0, textColor, "underline"); // Text for statistics let statIdx = 1; for( let [stat, val] of Object.entries(stats)) { - d3_utils.drawText(element, gId, `${stat}: ${val}`, xOffset, 15, statIdx, textColor); + d3_utils.drawText(statsG, `${stat}: ${val}`, xOffset, 15, statIdx, textColor); statIdx += 1; } } @@ -151,6 +159,14 @@ d3_utils.drawLine(g, xScale(min), boxYOffset + boxHeight/2, xScale(max), boxYOffset + boxHeight/2, strokeColor); } + // Tooltip + const tooltipWidth = 100; + const tooltipHeight = 30; + const tooltipText = `q1: ${d3_utils.formatRuntime(d.q[1])}, q3: ${d3_utils.formatRuntime(d.q[3])}`; + const mouseover = (event) => d3_utils.drawToolTip(g, event, tooltipText, tooltipWidth, tooltipHeight); + const mouseout = (event) => d3_utils.clearToolTip(g, event); + const click = (event) => d3_utils.drawToolTip(g, event, tooltipText, tooltipWidth, tooltipHeight); + // Box d3_utils.drawRect(g, { "class": "rect", @@ -161,7 +177,7 @@ "width": xScale(d.q[3]) - xScale(d.q[1]), "stroke": strokeColor, "stroke-width": strokeWidth - }); + }, click, mouseover, mouseout); // Markers const markerStrokeWidth = 3; @@ -224,18 +240,13 @@ d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); // Text for callsite name. - d3_utils.drawText(element, gId, "callsite: " + callsite, 0, 0); + d3_utils.drawText(g, "callsite: " + callsite, 0, 0); - visualizeStats(tgt, "tgt", gId, boxWidth); + visualizeStats(g, tgt, "tgt", boxWidth); if (bkg !== undefined) { - visualizeStats(bkg, "bkg", gId, boxWidth); + visualizeStats(g, bkg, "bkg", boxWidth); } - // const tooltip = element; - // const mouseover = (data) => tooltip.render(data); - // const mouseout = (data) => tooltip.clear(); - // const click = (data) => tooltip.render(data); - visualizeBoxplot(g, tgt, "tgt", xScale, true); if (bkg !== undefined) { visualizeBoxplot(g, bkg, "bkg", xScale, false); diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 6003e263..b141cb62 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -106,14 +106,13 @@ define(function (require) { .on("mouseover", mouseover) .on("mouseout", mouseout); }, - drawText: (element, forId, text, xOffset, yOffset, yOffsetIdx, textColor) => { - return d3.select(element) - .select('#' + forId) + drawText: (element, text, xOffset, yOffset, yOffsetIdx, textColor, textDecoration) => { + return element .append('text') .attr("x", xOffset) .attr("y", yOffset * yOffsetIdx) .attr("fill", textColor) - .attr('for', forId) + .attr("text-decoration", textDecoration) .text(text); }, drawLine: (element, x1, y1, x2, y2, strokeColor, strokeWidth) => { @@ -167,6 +166,35 @@ define(function (require) { .style("font-weight", "lighter"); return line; + }, + drawToolTip: (element, event, text, width, height) => { + const [ mousePosX, mousePosY] = d3.pointer(event, element.node()); + console.log(mousePosX, mousePosY); + const toolTipG = element + .append("g") + .attr("class", "tooltip") + .attr("transform", `translate(${mousePosX}, ${mousePosY})`) + + toolTipG.append("rect") + .attr("fill", "#fff") + .attr("stroke", "#000") + .attr("rx", "10px") + .attr("width", width) + .attr("height", height); + + toolTipG.append("text") + .style("font-family", "sans-serif") + .style("font-size", "12px") + .attr("fill", "#000") + .attr("class", "tooltip-content") + .text(text); + + // return toolTipG; + + }, + clearToolTip: (element) => { + element.selectAll(".tooltip").remove(); + element.selectAll(".tooltip-content").remove(); } } }); From fca42534bb087ae596bc8e1a91663ba613f01cd5 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 15:50:18 -0700 Subject: [PATCH 13/45] segment the boxplot elements into g --- hatchet/external/roundtrip/boxplot.js | 25 ++++++++++++---------- hatchet/external/roundtrip/lib/d3_utils.js | 15 +++++-------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 934743f3..c074bddf 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -153,22 +153,25 @@ const strokeColor = "#202020"; const boxHeight = 80; + const boxG = g.append("g").attr("class", "box"); + // Centerline if (drawCenterLine) { const [min, max] = xScale.domain(); - d3_utils.drawLine(g, xScale(min), boxYOffset + boxHeight/2, xScale(max), boxYOffset + boxHeight/2, strokeColor); + d3_utils.drawLine(boxG, xScale(min), boxYOffset + boxHeight/2, xScale(max), boxYOffset + boxHeight/2, strokeColor); } + // Tooltip const tooltipWidth = 100; const tooltipHeight = 30; const tooltipText = `q1: ${d3_utils.formatRuntime(d.q[1])}, q3: ${d3_utils.formatRuntime(d.q[3])}`; - const mouseover = (event) => d3_utils.drawToolTip(g, event, tooltipText, tooltipWidth, tooltipHeight); - const mouseout = (event) => d3_utils.clearToolTip(g, event); - const click = (event) => d3_utils.drawToolTip(g, event, tooltipText, tooltipWidth, tooltipHeight); + const mouseover = (event) => d3_utils.drawToolTip(boxG, event, tooltipText, tooltipWidth, tooltipHeight); + const mouseout = (event) => d3_utils.clearToolTip(boxG, event); + const click = (event) => d3_utils.drawToolTip(boxG, event, tooltipText, tooltipWidth, tooltipHeight); // Box - d3_utils.drawRect(g, { + d3_utils.drawRect(boxG, { "class": "rect", "x": xScale(d.q[1]), "y": boxYOffset, @@ -181,22 +184,22 @@ // Markers const markerStrokeWidth = 3; - d3_utils.drawLine(g, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); - d3_utils.drawLine(g, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + d3_utils.drawLine(boxG, xScale(d.q[0]), boxYOffset, xScale(d.q[0]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); + d3_utils.drawLine(boxG, xScale(d.q[4]), boxYOffset, xScale(d.q[4]), boxYOffset + boxHeight, fillColor[type], markerStrokeWidth); // Outliers - const outlierRadius = 4; + const outlierRadius = 4; const outlierYOffset = 20; - let outliers = [] + let outliers = []; for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { outliers.push({ x: xScale(d.outliers["values"][idx]), value: d.outliers["values"][idx], rank: d.outliers["ranks"][idx], // dataset: d.dataset # TODO: pass dataset to differentiate. - }) + }); } - d3_utils.drawCircle(g, outliers, outlierRadius, outlierYOffset, fillColor[type]); + d3_utils.drawCircle(boxG, outliers, outlierRadius, outlierYOffset, fillColor[type]); } function visualize(tgtCallsites, bkgCallsites, idxMap) { diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index b141cb62..e35db955 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -169,32 +169,27 @@ define(function (require) { }, drawToolTip: (element, event, text, width, height) => { const [ mousePosX, mousePosY] = d3.pointer(event, element.node()); - console.log(mousePosX, mousePosY); const toolTipG = element .append("g") .attr("class", "tooltip") .attr("transform", `translate(${mousePosX}, ${mousePosY})`) toolTipG.append("rect") - .attr("fill", "#fff") - .attr("stroke", "#000") - .attr("rx", "10px") + .attr("class", "tooltip-area") .attr("width", width) - .attr("height", height); + .attr("height", height) + .attr("fill", "#fff") + .attr("stroke", "#000"); toolTipG.append("text") + .attr("class", "tooltip-content") .style("font-family", "sans-serif") .style("font-size", "12px") .attr("fill", "#000") - .attr("class", "tooltip-content") .text(text); - - // return toolTipG; - }, clearToolTip: (element) => { element.selectAll(".tooltip").remove(); - element.selectAll(".tooltip-content").remove(); } } }); From f25196f1e03575bc39e0c8fb506b8f5504de27d5 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 16:13:17 -0700 Subject: [PATCH 14/45] More documentation and factoring --- hatchet/external/roundtrip/boxplot.js | 75 +++++++++++++++------------ 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index c074bddf..3304fc03 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -9,10 +9,13 @@ return; } + // -------------------------------------------------------------------------------- + // RequireJS setup. + // -------------------------------------------------------------------------------- // Setup the requireJS config to get required libraries. requirejs.config({ baseUrl: path, - paths: { + paths: { d3src: 'https://d3js.org', lib: 'lib', }, @@ -29,7 +32,7 @@ // -------------------------------------------------------------------------------- // TODO: Move this to a common utils folder. function cleanInputs(strings) { - return strings.map( (_) => _.replace(/'/g, '"')); + return strings.map((_) => _.replace(/'/g, '"')); } /** @@ -40,7 +43,7 @@ * @param {String} attribute - Attribute to sort by. * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. */ - function sortByAttribute (callsites, metric, attribute, boxplotType) { + function sortByAttribute(callsites, metric, attribute, boxplotType) { if (!BOXPLOT_TYPES.includes(boxplotType)) { console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'") } @@ -48,7 +51,7 @@ // Sanity check to see if the boxplotType is present in the callsites. let _is_empty = false; Object.keys(callsites).map(function (key) { - if(callsites[key][boxplotType] === undefined) { + if (callsites[key][boxplotType] === undefined) { _is_empty = true; } }) @@ -56,12 +59,12 @@ let items = Object.keys(callsites).map(function (key) { return [key, callsites[key][boxplotType]]; }); - - if(!_is_empty) { - items = items.sort( (first, second) => { + + if (!_is_empty) { + items = items.sort((first, second) => { return second[1][metric][attribute] - first[1][metric][attribute]; }); - } + } return items.reduce(function (map, obj) { if (obj[1] !== undefined) { @@ -74,14 +77,17 @@ } require(['d3', 'd3-utils'], (d3, d3_utils) => { + // -------------------------------------------------------------------------------- + // Main logic. + // -------------------------------------------------------------------------------- + const data = JSON.parse(variableString); const callsites = Object.keys(data); - const MODE = Object.keys(data[callsites[0]]).length == 2 ? "COMPARISON" : "NORMAL"; - + // Assign an index to the callsites. const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); - const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}) + const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); // Selection dropdown for metrics. const metrics = Object.keys(data[callsites[0]]["tgt"]); @@ -97,18 +103,13 @@ const sortedTgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); - // Setup VIS area. - const margin = {top: 20, right: 20, bottom: 0, left: 20}, - containerHeight = 100 * Object.keys(callsites).length, - width = element.clientWidth - margin.right - margin.left, - height = containerHeight - margin.top - margin.bottom; - const svgArea = d3_utils.prepareSvgArea(width, height, margin); - const svg = d3_utils.prepareSvg(element, svgArea); + visualize(callsites, sortedTgtCallsites, sortedBkgCallsites, nameToIdxMap); - visualize(sortedTgtCallsites, sortedBkgCallsites, nameToIdxMap, false); - + // -------------------------------------------------------------------------------- + // Visualization functions. + // -------------------------------------------------------------------------------- function _format(d) { - return { + return { "min": d3_utils.formatRuntime(d.min), "max": d3_utils.formatRuntime(d.max), "mean": d3_utils.formatRuntime(d.mean), @@ -119,7 +120,7 @@ }; } - function visualizeStats (g, d, type, boxWidth) { + function visualizeStats(g, d, type, boxWidth) { const stats = _format(d); const TYPE_TEXTS = { "tgt": "Target", @@ -128,7 +129,7 @@ // Text fpr statistics title. const xOffset = type === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; - const textColor = type === "tgt" ? "#4DAF4A": "#202020"; + const textColor = type === "tgt" ? "#4DAF4A" : "#202020"; const statsG = g.append("g") .attr("class", "stats"); @@ -137,14 +138,14 @@ // Text for statistics let statIdx = 1; - for( let [stat, val] of Object.entries(stats)) { + for (let [stat, val] of Object.entries(stats)) { d3_utils.drawText(statsG, `${stat}: ${val}`, xOffset, 15, statIdx, textColor); statIdx += 1; } } function visualizeBoxplot(g, d, type, xScale, drawCenterLine) { - const fillColor = { + const fillColor = { "tgt": "#4DAF4A", "bkg": "#D9D9D9" }; @@ -158,10 +159,9 @@ // Centerline if (drawCenterLine) { const [min, max] = xScale.domain(); - d3_utils.drawLine(boxG, xScale(min), boxYOffset + boxHeight/2, xScale(max), boxYOffset + boxHeight/2, strokeColor); + d3_utils.drawLine(boxG, xScale(min), boxYOffset + boxHeight / 2, xScale(max), boxYOffset + boxHeight / 2, strokeColor); } - // Tooltip const tooltipWidth = 100; const tooltipHeight = 30; @@ -172,7 +172,7 @@ // Box d3_utils.drawRect(boxG, { - "class": "rect", + "class": "rect", "x": xScale(d.q[1]), "y": boxYOffset, "height": boxHeight, @@ -201,8 +201,16 @@ } d3_utils.drawCircle(boxG, outliers, outlierRadius, outlierYOffset, fillColor[type]); } - - function visualize(tgtCallsites, bkgCallsites, idxMap) { + + function visualize(callsites, tgtCallsites, bkgCallsites, idxMap) { + // Setup VIS area. + const margin = { top: 20, right: 20, bottom: 0, left: 20 }, + containerHeight = 150 * Object.keys(callsites).length, + width = element.clientWidth - margin.right - margin.left, + height = containerHeight - margin.top - margin.bottom; + const svgArea = d3_utils.prepareSvgArea(width, height, margin); + const svg = d3_utils.prepareSvg(element, svgArea); + const boxWidth = 0.6 * width; const allCallsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; @@ -227,8 +235,8 @@ max = Math.max(tgt.max, bkg.max); } const xScale = d3.scaleLinear() - .domain([min, max]) - .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); + .domain([min, max]) + .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); // Set up a g container const idx = idxMap[callsite]; @@ -237,7 +245,7 @@ const g = svg.append("g") .attr("id", gId) .attr("width", boxWidth) - .attr("transform", "translate(0, " + gYOffset * idx + ")"); + .attr("transform", "translate(20, " + gYOffset * idx + ")"); const axisOffset = gYOffset * 0.6; d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); @@ -256,6 +264,5 @@ } } } - }); })(element); \ No newline at end of file From 0bdf399b382e951849125ccad656fbafd12fe7fb Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 16:21:46 -0700 Subject: [PATCH 15/45] More aesthetics --- hatchet/external/roundtrip/boxplot.js | 9 ++++----- hatchet/external/roundtrip/lib/d3_utils.js | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 3304fc03..da7f96f0 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -48,7 +48,7 @@ console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'") } - // Sanity check to see if the boxplotType is present in the callsites. + // Sanity check to see if the boxplotType (i.e., "tgt", "bkg") is present in the callsites. let _is_empty = false; Object.keys(callsites).map(function (key) { if (callsites[key][boxplotType] === undefined) { @@ -189,17 +189,16 @@ // Outliers const outlierRadius = 4; - const outlierYOffset = 20; let outliers = []; for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { outliers.push({ x: xScale(d.outliers["values"][idx]), value: d.outliers["values"][idx], rank: d.outliers["ranks"][idx], - // dataset: d.dataset # TODO: pass dataset to differentiate. + y: 10 }); } - d3_utils.drawCircle(boxG, outliers, outlierRadius, outlierYOffset, fillColor[type]); + d3_utils.drawCircle(boxG, outliers, outlierRadius, fillColor[type]); } function visualize(callsites, tgtCallsites, bkgCallsites, idxMap) { @@ -251,7 +250,7 @@ d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); // Text for callsite name. - d3_utils.drawText(g, "callsite: " + callsite, 0, 0); + d3_utils.drawText(g, "Callsite: " + callsite, 0, 0, 0, "#000", "underline"); visualizeStats(g, tgt, "tgt", boxWidth); if (bkg !== undefined) { diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index e35db955..14c3766f 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -126,14 +126,14 @@ define(function (require) { .attr("stroke", strokeColor) .style("stroke-width", strokeWidth); }, - drawCircle: (element, data, radius, yOffset, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { + drawCircle: (element, data, radius, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { return element .selectAll(".circle") .data(data) .join("circle") .attr("r", radius) .attr("cx", (d) => d.x) - .attr("cy", (d) => d.y + yOffset) + .attr("cy", (d) => d.y) .attr("class", "circle") .style("fill", fillColor) .on("click", (d) => click(d)) From 5df544d10ef80443af9d95040695357244cb079b Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 16:48:58 -0700 Subject: [PATCH 16/45] Bring back menu and style them --- hatchet/external/roundtrip/boxplot.js | 30 +++++++++++++--------- hatchet/external/roundtrip/lib/d3_utils.js | 8 +++--- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index da7f96f0..28657ea2 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -80,24 +80,14 @@ // -------------------------------------------------------------------------------- // Main logic. // -------------------------------------------------------------------------------- - const data = JSON.parse(variableString); - const callsites = Object.keys(data); // Assign an index to the callsites. const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); - - // Selection dropdown for metrics. - const metrics = Object.keys(data[callsites[0]]["tgt"]); - const selectedMetric = metrics[0] - d3_utils.selectionDropDown(element, metrics, "metricSelect"); - - // Selection dropdown for attributes. - const attributes = ["min", "max", "mean", "var", "imb", "kurt", "skew"]; - const selectedAttribute = "mean"; - d3_utils.selectionDropDown(element, attributes, "attributeSelect"); + + const { selectedAttribute, selectedMetric } = menu(); // Sort the callsites by the selected attribute and metric. const sortedTgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); @@ -120,6 +110,22 @@ }; } + function menu() { + // Selection dropdown for metrics. + const metrics = Object.keys(data[callsites[0]]["tgt"]); + const selectedMetric = metrics[0] + const metricSelectTitle = "Metric: "; + d3_utils.selectionDropDown(element, metrics, "metricSelect", metricSelectTitle); + + // Selection dropdown for attributes. + const attributes = ["min", "max", "mean", "var", "imb", "kurt", "skew"]; + const selectedAttribute = "mean"; + const attributeSelectTitle = "Sort by: "; + d3_utils.selectionDropDown(element, attributes, "attributeSelect", attributeSelectTitle); + + return { selectedAttribute, selectedMetric } + } + function visualizeStats(g, d, type, boxWidth) { const stats = _format(d); const TYPE_TEXTS = { diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 14c3766f..f78f2a52 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -15,7 +15,7 @@ define(function (require) { } }, prepareSvg: (id, svgArea) => { - d3.select(id).selectAll('*').remove(); + // d3.select(id).selectAll('*').remove(); const svg = d3.select(id) .append('svg') .attr('width', svgArea.width + svgArea.margin.left + svgArea.margin.right) @@ -71,9 +71,11 @@ define(function (require) { }, // UI Components - selectionDropDown: (element, data, id) => { - return d3.select(element).append("select") + selectionDropDown: (element, data, id, title) => { + d3.select(element).append('label').attr('for', id).text(title); + d3.select(element).append("select") .attr("id", id) + .style("margin", "10px 10px 10px 0px") .selectAll('option') .data(data) .enter() From 8da477c88ee3c15639a11c1eee33863fca277ca8 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 18:41:14 -0700 Subject: [PATCH 17/45] POC: v1 --- hatchet/external/roundtrip/boxplot.js | 82 ++++++++++++++-------- hatchet/external/roundtrip/lib/d3_utils.js | 22 +++--- 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 28657ea2..1d3997b3 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -83,17 +83,19 @@ const data = JSON.parse(variableString); const callsites = Object.keys(data); - // Assign an index to the callsites. - const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); - const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); - - const { selectedAttribute, selectedMetric } = menu(); - - // Sort the callsites by the selected attribute and metric. - const sortedTgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); - const sortedBkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); + const globals = Object.freeze({ + "id": "boxplot-vis", + "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"] + }) - visualize(callsites, sortedTgtCallsites, sortedBkgCallsites, nameToIdxMap); + // State for the module. + const state = { + selectedMetric: null, + selectedAttribute: null, + }; + + menu(data); + visualize(data); // -------------------------------------------------------------------------------- // Visualization functions. @@ -110,20 +112,27 @@ }; } - function menu() { + function menu(data) { // Selection dropdown for metrics. const metrics = Object.keys(data[callsites[0]]["tgt"]); - const selectedMetric = metrics[0] + if (state.selectedMetric == null) state.selectedMetric = metrics[0] const metricSelectTitle = "Metric: "; - d3_utils.selectionDropDown(element, metrics, "metricSelect", metricSelectTitle); + const metricSelectId = "metricSelect"; + const metricOnChange = (d) => { + state.selectedMetric = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, metrics, metricSelectId, metricSelectTitle, metricOnChange); // Selection dropdown for attributes. - const attributes = ["min", "max", "mean", "var", "imb", "kurt", "skew"]; - const selectedAttribute = "mean"; + if (state.selectedAttribute == null) state.selectedAttribute = globals.attributes[0]; const attributeSelectTitle = "Sort by: "; - d3_utils.selectionDropDown(element, attributes, "attributeSelect", attributeSelectTitle); - - return { selectedAttribute, selectedMetric } + const attributeSelectId = "attributeSelect"; + const attributeOnChange = (d) => { + state.selectedAttribute = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.attributes, attributeSelectId, attributeSelectTitle, attributeOnChange); } function visualizeStats(g, d, type, boxWidth) { @@ -207,28 +216,36 @@ d3_utils.drawCircle(boxG, outliers, outlierRadius, fillColor[type]); } - function visualize(callsites, tgtCallsites, bkgCallsites, idxMap) { + function visualize(data) { + const { selectedAttribute, selectedMetric } = state; + console.debug(`Selected metric: ${selectedAttribute}`); + console.debug(`Selected Attribute: ${selectedMetric}`); + + // Sort the callsites by the selected attribute and metric. + const tgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); + const bkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); + + const callsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; + + // Assign an index to the callsites. + const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); + const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); + // Setup VIS area. const margin = { top: 20, right: 20, bottom: 0, left: 20 }, containerHeight = 150 * Object.keys(callsites).length, width = element.clientWidth - margin.right - margin.left, height = containerHeight - margin.top - margin.bottom; - const svgArea = d3_utils.prepareSvgArea(width, height, margin); + const svgArea = d3_utils.prepareSvgArea(width, height, margin, globals.id); const svg = d3_utils.prepareSvg(element, svgArea); const boxWidth = 0.6 * width; - const allCallsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; - - for (let callsite of allCallsites) { + for (let callsite of callsites) { let tgt = null; - if (callsite in tgtCallsites) { - tgt = tgtCallsites[callsite]; - } + if (callsite in tgtCallsites) tgt = tgtCallsites[callsite]; let bkg = null; - if (callsite in bkgCallsites) { - bkg = bkgCallsites[callsite]; - } + if (callsite in bkgCallsites) bkg = bkgCallsites[callsite]; // Set the min and max for xScale. let min = 0, max = 0; @@ -244,7 +261,7 @@ .range([0.05 * boxWidth, boxWidth - 0.05 * boxWidth]); // Set up a g container - const idx = idxMap[callsite]; + const idx = nameToIdxMap[callsite]; const gId = "box-" + idx; const gYOffset = 200; const g = svg.append("g") @@ -269,5 +286,10 @@ } } } + + function reset() { + d3_utils.clearSvg('svg'); + visualize(data); + } }); })(element); \ No newline at end of file diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index f78f2a52..d42cdcf2 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -7,15 +7,15 @@ define(function (require) { calcCellWidth: (width, colNames) => width / colNames.length, calcCellHeight: (height, rowNames) => height / rowNames.length, calcCellSize: (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], - prepareSvgArea: (windowWidth, windowHeight, margin) => { + prepareSvgArea: (windowWidth, windowHeight, margin, id) => { return { width: windowWidth - margin.left - margin.right, height: windowHeight - margin.top - margin.bottom, - margin: margin + margin: margin, + id: id } }, prepareSvg: (id, svgArea) => { - // d3.select(id).selectAll('*').remove(); const svg = d3.select(id) .append('svg') .attr('width', svgArea.width + svgArea.margin.left + svgArea.margin.right) @@ -26,6 +26,9 @@ define(function (require) { return svg; }, + clearSvg: (id) => { + d3.select('svg').remove(); + }, initSvgInfo: (targetView, margin) => { const sd = targetView.svgData; const domId = targetView.domId; @@ -71,16 +74,19 @@ define(function (require) { }, // UI Components - selectionDropDown: (element, data, id, title) => { + selectionDropDown: (element, data, id, title, onChange) => { d3.select(element).append('label').attr('for', id).text(title); - d3.select(element).append("select") + const dropdown = d3.select(element).append("select") .attr("id", id) .style("margin", "10px 10px 10px 0px") - .selectAll('option') + .on('change', onChange); + + const options = dropdown.selectAll('option') .data(data) .enter() - .append('option') - .text(d => d) + .append('option'); + + options.text(d => d) .attr('value', d => d); }, From 5639503271db0b2cc5bd2f217850543b8d2d2c0a Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 20:18:50 -0700 Subject: [PATCH 18/45] Add sorting in desc and asc --- hatchet/external/roundtrip/boxplot.js | 33 +++++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 1d3997b3..42e10d74 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -43,7 +43,12 @@ * @param {String} attribute - Attribute to sort by. * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. */ - function sortByAttribute(callsites, metric, attribute, boxplotType) { + function sortByAttribute(callsites, metric, attribute, sortOrder, boxplotType) { + const SORT_MULTIPLIER = { + "inc": -1, + "desc": 1 + } + if (!BOXPLOT_TYPES.includes(boxplotType)) { console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'") } @@ -62,7 +67,7 @@ if (!_is_empty) { items = items.sort((first, second) => { - return second[1][metric][attribute] - first[1][metric][attribute]; + return SORT_MULTIPLIER[sortOrder] * (second[1][metric][attribute] - first[1][metric][attribute]); }); } @@ -85,13 +90,15 @@ const globals = Object.freeze({ "id": "boxplot-vis", - "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"] + "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"], + "sortOrders": ["desc", "inc"], }) // State for the module. const state = { selectedMetric: null, selectedAttribute: null, + selectedSortOrder: 'desc', }; menu(data); @@ -129,10 +136,21 @@ const attributeSelectTitle = "Sort by: "; const attributeSelectId = "attributeSelect"; const attributeOnChange = (d) => { - state.selectedAttribute = d.target.value; + state.selectedSortOrder = d.target.value; reset(); }; d3_utils.selectionDropDown(element, globals.attributes, attributeSelectId, attributeSelectTitle, attributeOnChange); + + // Selection dropdown for sortrder. + if (state.selectedAttribute == null) state.selectedAttribute = globals.attributes[0]; + const sortOrderSelectTitle = "Sort order: "; + const sortOrderSelectId = "sortingSelect"; + const sortOrderOnChange = (d) => { + state.selectedSortOrder = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.sortOrders, sortOrderSelectId, sortOrderSelectTitle, sortOrderOnChange); + } function visualizeStats(g, d, type, boxWidth) { @@ -217,13 +235,14 @@ } function visualize(data) { - const { selectedAttribute, selectedMetric } = state; + const { selectedAttribute, selectedMetric, selectedSortOrder } = state; console.debug(`Selected metric: ${selectedAttribute}`); console.debug(`Selected Attribute: ${selectedMetric}`); + console.debug(`Selected Attribute: ${selectedSortOrder}`) // Sort the callsites by the selected attribute and metric. - const tgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "tgt"); - const bkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, "bkg"); + const tgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "tgt"); + const bkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "bkg"); const callsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; From e0e9ea36e383672995b2356fd5d2c5eeff2d3d74 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 20:25:04 -0700 Subject: [PATCH 19/45] Add number of callsites --- hatchet/external/roundtrip/boxplot.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 42e10d74..8ce7282b 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -258,6 +258,8 @@ const svgArea = d3_utils.prepareSvgArea(width, height, margin, globals.id); const svg = d3_utils.prepareSvg(element, svgArea); + d3_utils.drawText(svg, "Total number of callsites: " + callsites.length, 0, 0, 0, "#000", "underline"); + const boxWidth = 0.6 * width; for (let callsite of callsites) { let tgt = null; @@ -286,7 +288,7 @@ const g = svg.append("g") .attr("id", gId) .attr("width", boxWidth) - .attr("transform", "translate(20, " + gYOffset * idx + ")"); + .attr("transform", "translate(0, " + ((gYOffset * idx) + 30) + ")"); const axisOffset = gYOffset * 0.6; d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); From 47e48f5e4808bd8b89317df6d59596d03687875d Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 21:39:21 -0700 Subject: [PATCH 20/45] Add top N callsites selection --- hatchet/external/roundtrip/boxplot.js | 35 ++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 8ce7282b..082b49e5 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -92,6 +92,7 @@ "id": "boxplot-vis", "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"], "sortOrders": ["desc", "inc"], + "topNCallsites": [5, 10, 25, 100, "all"], }) // State for the module. @@ -99,6 +100,7 @@ selectedMetric: null, selectedAttribute: null, selectedSortOrder: 'desc', + selectedTopNCallsites: 5, }; menu(data); @@ -141,8 +143,7 @@ }; d3_utils.selectionDropDown(element, globals.attributes, attributeSelectId, attributeSelectTitle, attributeOnChange); - // Selection dropdown for sortrder. - if (state.selectedAttribute == null) state.selectedAttribute = globals.attributes[0]; + // Selection dropdown for sortOrder. const sortOrderSelectTitle = "Sort order: "; const sortOrderSelectId = "sortingSelect"; const sortOrderOnChange = (d) => { @@ -151,6 +152,15 @@ }; d3_utils.selectionDropDown(element, globals.sortOrders, sortOrderSelectId, sortOrderSelectTitle, sortOrderOnChange); + // Selection dropdown for topNCallsites. + const topNCallsitesSelectTitle = "Top N callsites: "; + const topNCallsitesSelectId = "topNCallsitesSelect"; + const topNCallsitesOnChange = (d) => { + state.selectedTopNCallsites = d.target.value; + reset(); + }; + d3_utils.selectionDropDown(element, globals.topNCallsites, topNCallsitesSelectId, topNCallsitesSelectTitle, topNCallsitesOnChange); + } function visualizeStats(g, d, type, boxWidth) { @@ -235,24 +245,30 @@ } function visualize(data) { - const { selectedAttribute, selectedMetric, selectedSortOrder } = state; + const { selectedAttribute, selectedMetric, selectedSortOrder, selectedTopNCallsites } = state; console.debug(`Selected metric: ${selectedAttribute}`); console.debug(`Selected Attribute: ${selectedMetric}`); - console.debug(`Selected Attribute: ${selectedSortOrder}`) + console.debug(`Selected SortOrder: ${selectedSortOrder}`) + console.debug(`Selected Top N callsites: ${selectedTopNCallsites}`) // Sort the callsites by the selected attribute and metric. const tgtCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "tgt"); const bkgCallsites = sortByAttribute(data, selectedMetric, selectedAttribute, selectedSortOrder, "bkg"); const callsites = [...new Set([...Object.keys(tgtCallsites), ...Object.keys(bkgCallsites)])]; + + let topNCallsites = callsites; + if(selectedTopNCallsites !== "all" && selectedTopNCallsites < callsites.length) { + topNCallsites = callsites.slice(0, selectedTopNCallsites); + } // Assign an index to the callsites. - const idxToNameMap = Object.assign({}, callsites.map((callsite) => (callsite))); + const idxToNameMap = Object.assign({}, topNCallsites.map((callsite) => (callsite))); const nameToIdxMap = Object.entries(idxToNameMap).reduce((acc, [key, value]) => (acc[value] = key, acc), {}); // Setup VIS area. - const margin = { top: 20, right: 20, bottom: 0, left: 20 }, - containerHeight = 150 * Object.keys(callsites).length, + const margin = { top: 30, right: 0, bottom: 0, left: 0 }, + containerHeight = 200 * Object.keys(topNCallsites).length + 2 * margin.top, width = element.clientWidth - margin.right - margin.left, height = containerHeight - margin.top - margin.bottom; const svgArea = d3_utils.prepareSvgArea(width, height, margin, globals.id); @@ -261,7 +277,7 @@ d3_utils.drawText(svg, "Total number of callsites: " + callsites.length, 0, 0, 0, "#000", "underline"); const boxWidth = 0.6 * width; - for (let callsite of callsites) { + for (let callsite of topNCallsites) { let tgt = null; if (callsite in tgtCallsites) tgt = tgtCallsites[callsite]; @@ -294,7 +310,8 @@ d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); // Text for callsite name. - d3_utils.drawText(g, "Callsite: " + callsite, 0, 0, 0, "#000", "underline"); + const callsiteIndex = parseInt(idx) + 1 + d3_utils.drawText(g, `(${callsiteIndex}) Callsite : ` + callsite, 0, 0, 0, "#000"); visualizeStats(g, tgt, "tgt", boxWidth); if (bkg !== undefined) { From 96107331f3fb3fb3ceaae039fe9864dd8fef25d7 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 13 Jul 2021 22:22:30 -0700 Subject: [PATCH 21/45] Cleaning the svg based on the id --- hatchet/external/roundtrip/boxplot.js | 13 +++++++++---- hatchet/external/roundtrip/lib/d3_utils.js | 5 ++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 082b49e5..2961804b 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -88,11 +88,16 @@ const data = JSON.parse(variableString); const callsites = Object.keys(data); + // We add a random number to avoid deleting an existing boxplot in the + // jupyter cell. + // TODO: use the parent's id instead of random number. const globals = Object.freeze({ - "id": "boxplot-vis", + "id": "boxplot-vis-" + Math.ceil(Math.random() * 100), "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"], "sortOrders": ["desc", "inc"], "topNCallsites": [5, 10, 25, 100, "all"], + "tickCount": 5, + "boxContainerHeight": 200, }) // State for the module. @@ -268,7 +273,7 @@ // Setup VIS area. const margin = { top: 30, right: 0, bottom: 0, left: 0 }, - containerHeight = 200 * Object.keys(topNCallsites).length + 2 * margin.top, + containerHeight = globals.boxContainerHeight * Object.keys(topNCallsites).length + 2 * margin.top, width = element.clientWidth - margin.right - margin.left, height = containerHeight - margin.top - margin.bottom; const svgArea = d3_utils.prepareSvgArea(width, height, margin, globals.id); @@ -307,7 +312,7 @@ .attr("transform", "translate(0, " + ((gYOffset * idx) + 30) + ")"); const axisOffset = gYOffset * 0.6; - d3_utils.drawXAxis(g, xScale, 5, d3_utils.formatRuntime, 0, axisOffset, "black"); + d3_utils.drawXAxis(g, xScale, globals.tickCount, d3_utils.formatRuntime, 0, axisOffset, "black"); // Text for callsite name. const callsiteIndex = parseInt(idx) + 1 @@ -326,7 +331,7 @@ } function reset() { - d3_utils.clearSvg('svg'); + d3_utils.clearSvg(globals.id); visualize(data); } }); diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index d42cdcf2..366bfc84 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -7,6 +7,8 @@ define(function (require) { calcCellWidth: (width, colNames) => width / colNames.length, calcCellHeight: (height, rowNames) => height / rowNames.length, calcCellSize: (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], + + // SVG init. prepareSvgArea: (windowWidth, windowHeight, margin, id) => { return { width: windowWidth - margin.left - margin.right, @@ -18,6 +20,7 @@ define(function (require) { prepareSvg: (id, svgArea) => { const svg = d3.select(id) .append('svg') + .attr("id", svgArea.id) .attr('width', svgArea.width + svgArea.margin.left + svgArea.margin.right) .attr('height', svgArea.height + svgArea.margin.top + svgArea.margin.bottom) .append('g') @@ -27,7 +30,7 @@ define(function (require) { return svg; }, clearSvg: (id) => { - d3.select('svg').remove(); + d3.selectAll("#" + id).remove(); }, initSvgInfo: (targetView, margin) => { const sd = targetView.svgData; From 7817234ad25dd10da889967d072f6b5fa7c0852f Mon Sep 17 00:00:00 2001 From: jarusified Date: Wed, 14 Jul 2021 15:04:52 -0700 Subject: [PATCH 22/45] remove rank from data --- hatchet/external/roundtrip/boxplot.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 2961804b..228e7438 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -242,7 +242,7 @@ outliers.push({ x: xScale(d.outliers["values"][idx]), value: d.outliers["values"][idx], - rank: d.outliers["ranks"][idx], + // rank: d.outliers["ranks"][idx], y: 10 }); } From 69e2dc93e32736bb8e30be4f43676daba7194ee6 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 10:27:37 -0700 Subject: [PATCH 23/45] Add mapping VIS_TO_DATA --- hatchet/external/roundtrip/boxplot.js | 12 +++++++--- hatchet/external/roundtrip/roundtrip.py | 32 ++++++++++++++++--------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 228e7438..5e29a2ad 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -1,6 +1,7 @@ // TODO: Adopt MVC pattern for this module. (function (element) { const BOXPLOT_TYPES = ["tgt", "bkg"]; + const SORTORDER_TYPES = ["asc", "desc"]; const [path, visType, variableString] = cleanInputs(argList); // Quit if visType is not boxplot. @@ -41,16 +42,21 @@ * @param {Array} callsites - Callsites as a list. * @param {Stirng} metric - Metric (e.g., time or time (inc)). * @param {String} attribute - Attribute to sort by. + * @param {String} sortOrder - Sorting order * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. */ function sortByAttribute(callsites, metric, attribute, sortOrder, boxplotType) { const SORT_MULTIPLIER = { - "inc": -1, + "asc": -1, "desc": 1 } + if (!SORTORDER_TYPES.includes(sortOrder)) { + console.error("Invalid sortOrder. Use either 'asc' or 'desc'"); + } + if (!BOXPLOT_TYPES.includes(boxplotType)) { - console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'") + console.error("Invalid boxplot type. Use either 'tgt' or 'bkg'"); } // Sanity check to see if the boxplotType (i.e., "tgt", "bkg") is present in the callsites. @@ -94,7 +100,7 @@ const globals = Object.freeze({ "id": "boxplot-vis-" + Math.ceil(Math.random() * 100), "attributes": ["mean", "min", "max", "var", "imb", "kurt", "skew"], - "sortOrders": ["desc", "inc"], + "sortOrders": ["desc", "asc"], "topNCallsites": [5, 10, 25, 100, "all"], "tickCount": 5, "boxContainerHeight": 200, diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 1213191e..591f09ca 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -18,16 +18,21 @@ class Roundtrip(Magics): # Note to self: Custom magic classes MUST call parent's constructor def __init__(self, shell): super(Roundtrip, self).__init__(shell) - global VIS_TO_FILE, DATA_TO_VALIDATION + global VIS_TO_FILE, VIS_TO_VALIDATION, VIS_TO_DATA VIS_TO_FILE = { "literal_tree": "roundtripTree.js", "boxplot": "boxplot.js" } - DATA_TO_VALIDATION = { + VIS_TO_VALIDATION = { "literal_tree": self._validate_literal_tree, "boxplot": self._validate_boxplot } + VIS_TO_DATA = { + "literal_tree": "jsNodeSelected", + "boxplot": "variance_df" + } + self.id_number = 0 # Clean up namespace function display( @@ -83,7 +88,7 @@ def loadVisualization(self, line): displayObj.update(Javascript('argList.push("' + str(visType) + '")')) displayObj.update(Javascript('argList.push("' + str(data) + '")')) - DATA_TO_VALIDATION[visType](data) + VIS_TO_VALIDATION[visType](data) # Get curent cell id. self.codeMap[name] = javascriptFile @@ -137,20 +142,25 @@ def runVis(self, name, javascriptFile): display(HTML(header + javascriptFile + footer)) @line_magic - def fetchData(self, dest): + def fetchData(self, line): # added eval() to 'execute' the JS list-as-string as a Python list + # Get command line args for loading the vis. + args = line.split(" ") + visType = self.cleanLineArgument(args[0]) + dest = self.cleanLineArgument(args[1]) + hook = ( """ - var holder = jsNodeSelected; + var holder = '""" + str(VIS_TO_DATA[visType]) + """'; holder = '"' + holder + '"'; IPython.notebook.kernel.execute('""" - + str(dest) - + """ = '+ eval(holder)); - //console.log('""" - + str(dest) - + """ = '+ holder); - """ + + str(dest) + + """ = '+ eval(holder)); + //console.log('""" + + str(dest) + + """ = '+ holder); + """ ) display(Javascript(hook)) From 40dd64f747a6d08e47808d32eb6c7b57109525e1 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 12:00:40 -0700 Subject: [PATCH 24/45] Add method to convert a dict to dataframe (csv format). --- hatchet/external/roundtrip/boxplot.js | 47 +++++++++++++++++++++++-- hatchet/external/roundtrip/roundtrip.py | 6 ++-- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 5e29a2ad..e23c35bb 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -4,6 +4,7 @@ const SORTORDER_TYPES = ["asc", "desc"]; const [path, visType, variableString] = cleanInputs(argList); + // Quit if visType is not boxplot. if (visType !== "boxplot") { console.error("Incorrect visualization type passed.") @@ -19,11 +20,13 @@ paths: { d3src: 'https://d3js.org', lib: 'lib', + jsdelivr: 'https://cdn.jsdelivr.net/npm', }, map: { '*': { 'd3': 'd3src/d3.v6.min', 'd3-utils': 'lib/d3_utils', + 'data-forge': 'jsdelivr/data-forge@1.8.17/build/index.min' } } }); @@ -87,7 +90,7 @@ }, {}); } - require(['d3', 'd3-utils'], (d3, d3_utils) => { + require(['d3', 'd3-utils', 'data-forge'], (d3, d3_utils, dataForge) => { // -------------------------------------------------------------------------------- // Main logic. // -------------------------------------------------------------------------------- @@ -115,7 +118,9 @@ }; menu(data); - visualize(data); + const variance_dict = visualize(data); + variance_df = dict_to_df(variance_dict, "tgt"); + console.log(variance_df); // -------------------------------------------------------------------------------- // Visualization functions. @@ -132,6 +137,38 @@ }; } + /** + * + * @param {Object} dict + * @return {dataForge.DataFrame} + */ + function dict_to_df(dict, boxplotType) { + const callsites = Object.keys(dict); + const stats = Object.keys(dict[callsites[0]][boxplotType]); + let string = `name,` + stats.join(","); + + for (let callsite of callsites){ + const d = dict[callsite][boxplotType]; + + let statsString = `${callsite},`; + for (let stat of stats) { + // console.log(stat, d[stat]); + if (stat === "q") { + statsString += "[" + d[stat].join(",") + "],"; + } + else if (stat === "outliers") { + statsString += " ,"; + } + else { + statsString += d[stat] + ","; + } + } + string += statsString + "\n"; + } + + return string; + } + function menu(data) { // Selection dropdown for metrics. const metrics = Object.keys(data[callsites[0]]["tgt"]); @@ -256,6 +293,8 @@ } function visualize(data) { + const variance_dict = {} + const { selectedAttribute, selectedMetric, selectedSortOrder, selectedTopNCallsites } = state; console.debug(`Selected metric: ${selectedAttribute}`); console.debug(`Selected Attribute: ${selectedMetric}`); @@ -333,7 +372,11 @@ if (bkg !== undefined) { visualizeBoxplot(g, bkg, "bkg", xScale, false); } + + variance_dict[callsite] = { tgt, bkg }; } + + return variance_dict } function reset() { diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 591f09ca..7f7087e0 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -148,16 +148,16 @@ def fetchData(self, line): # Get command line args for loading the vis. args = line.split(" ") visType = self.cleanLineArgument(args[0]) - dest = self.cleanLineArgument(args[1]) + dest = args[1] hook = ( """ - var holder = '""" + str(VIS_TO_DATA[visType]) + """'; + var holder = """ + VIS_TO_DATA[visType] + """; holder = '"' + holder + '"'; IPython.notebook.kernel.execute('""" + str(dest) + """ = '+ eval(holder)); - //console.log('""" + console.log('""" + str(dest) + """ = '+ holder); """ From c1f32441a604f246f681fbd35967cb9725cfbc3a Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 13:52:13 -0700 Subject: [PATCH 25/45] Pass data back to the jupyter interface --- hatchet/external/roundtrip/boxplot.js | 22 ++++++++++------------ hatchet/external/roundtrip/roundtrip.py | 19 ++++++++++++------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index e23c35bb..9da80421 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -20,13 +20,11 @@ paths: { d3src: 'https://d3js.org', lib: 'lib', - jsdelivr: 'https://cdn.jsdelivr.net/npm', }, map: { '*': { 'd3': 'd3src/d3.v6.min', 'd3-utils': 'lib/d3_utils', - 'data-forge': 'jsdelivr/data-forge@1.8.17/build/index.min' } } }); @@ -90,7 +88,7 @@ }, {}); } - require(['d3', 'd3-utils', 'data-forge'], (d3, d3_utils, dataForge) => { + require(['d3', 'd3-utils'], (d3, d3_utils) => { // -------------------------------------------------------------------------------- // Main logic. // -------------------------------------------------------------------------------- @@ -119,8 +117,7 @@ menu(data); const variance_dict = visualize(data); - variance_df = dict_to_df(variance_dict, "tgt"); - console.log(variance_df); + variance_df = "'" + dict_to_df(variance_dict, "tgt") + "'"; // -------------------------------------------------------------------------------- // Visualization functions. @@ -144,29 +141,30 @@ */ function dict_to_df(dict, boxplotType) { const callsites = Object.keys(dict); + const columns = "name,min,max,mean,var,imb,kurt,skew" const stats = Object.keys(dict[callsites[0]][boxplotType]); - let string = `name,` + stats.join(","); + // let string = `name,` + stats.join(",") + ";"; + let string = columns + ";"; for (let callsite of callsites){ const d = dict[callsite][boxplotType]; let statsString = `${callsite},`; for (let stat of stats) { - // console.log(stat, d[stat]); if (stat === "q") { - statsString += "[" + d[stat].join(",") + "],"; + // statsString += "[" + d[stat].join(",") + "],"; + continue; } else if (stat === "outliers") { - statsString += " ,"; + continue; } else { statsString += d[stat] + ","; } } - string += statsString + "\n"; + string += statsString + ";"; } - - return string; + return string.substring(0, string.length - 1); } function menu(data) { diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 7f7087e0..65dd2f72 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -98,7 +98,7 @@ def loadVisualization(self, line): elementTop = element.get(0);""" displayObj.update(Javascript(preRun)) - self.runVis(name, javascriptFile) + self.runVis(name, javascriptFile, visType) self.id_number += 1 def _validate_literal_tree(self, data): @@ -123,7 +123,7 @@ def _validate_literal_tree(self, data): def _validate_boxplot(self, data): pass - def runVis(self, name, javascriptFile): + def runVis(self, name, javascriptFile, visType): name = "roundtripTreeVis" + str(self.id_number) header = ( """ @@ -137,6 +137,10 @@ def runVis(self, name, javascriptFile): element = document.getElementById('""" + str(name) + """');""" + + """var """ + + VIS_TO_DATA[visType] + + """ = {};""" + ) footer = """""" display(HTML(header + javascriptFile + footer)) @@ -152,14 +156,15 @@ def fetchData(self, line): hook = ( """ - var holder = """ + VIS_TO_DATA[visType] + """; + var holder = variance_df; holder = '"' + holder + '"'; - IPython.notebook.kernel.execute('""" - + str(dest) - + """ = '+ eval(holder)); - console.log('""" + console.log('""" + str(dest) + """ = '+ holder); + IPython.notebook.kernel.execute('""" + + str(dest) + + """ = '+ eval(holder)); + """ ) From c37e6830a3d57889652de2fdf1d22bd578a4f300 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 14:37:40 -0700 Subject: [PATCH 26/45] Fix reset action and remove unnecessary columns. --- hatchet/external/roundtrip/boxplot.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 9da80421..1542aed0 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -141,16 +141,16 @@ */ function dict_to_df(dict, boxplotType) { const callsites = Object.keys(dict); - const columns = "name,min,max,mean,var,imb,kurt,skew" - const stats = Object.keys(dict[callsites[0]][boxplotType]); + const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] + // const stats = Object.keys(dict[callsites[0]][boxplotType]); // let string = `name,` + stats.join(",") + ";"; - let string = columns + ";"; + let string = 'name,' + stat_columns.join(",") + ";"; for (let callsite of callsites){ const d = dict[callsite][boxplotType]; let statsString = `${callsite},`; - for (let stat of stats) { + for (let stat of stat_columns) { if (stat === "q") { // statsString += "[" + d[stat].join(",") + "],"; continue; @@ -162,7 +162,7 @@ statsString += d[stat] + ","; } } - string += statsString + ";"; + string += statsString.substring(0, statsString.length - 1) + ";"; } return string.substring(0, string.length - 1); } @@ -379,7 +379,9 @@ function reset() { d3_utils.clearSvg(globals.id); - visualize(data); + const variance_dict = visualize(data); + variance_df = "'" + dict_to_df(variance_dict, "tgt") + "'"; + } }); })(element); \ No newline at end of file From d87128785e5e552f66bb60ec71c5a545be0182bd Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 15:10:39 -0700 Subject: [PATCH 27/45] Fix the sorting --- hatchet/external/roundtrip/boxplot.js | 2 +- hatchet/external/roundtrip/roundtrip.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 1542aed0..796a0a75 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -184,7 +184,7 @@ const attributeSelectTitle = "Sort by: "; const attributeSelectId = "attributeSelect"; const attributeOnChange = (d) => { - state.selectedSortOrder = d.target.value; + state.selectedAttribute = d.target.value; reset(); }; d3_utils.selectionDropDown(element, globals.attributes, attributeSelectId, attributeSelectTitle, attributeOnChange); diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 65dd2f72..2393f1c9 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -158,7 +158,7 @@ def fetchData(self, line): """ var holder = variance_df; holder = '"' + holder + '"'; - console.log('""" + console.debug('""" + str(dest) + """ = '+ holder); IPython.notebook.kernel.execute('""" From 49bf08e7e5992d54b27dff7e3effa219a6cccc10 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 15:30:06 -0700 Subject: [PATCH 28/45] Add assertions to check csv dump and columns --- hatchet/external/roundtrip/boxplot.js | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 796a0a75..7ba1784f 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -142,8 +142,6 @@ function dict_to_df(dict, boxplotType) { const callsites = Object.keys(dict); const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] - // const stats = Object.keys(dict[callsites[0]][boxplotType]); - // let string = `name,` + stats.join(",") + ";"; let string = 'name,' + stat_columns.join(",") + ";"; for (let callsite of callsites){ @@ -151,20 +149,26 @@ let statsString = `${callsite},`; for (let stat of stat_columns) { - if (stat === "q") { - // statsString += "[" + d[stat].join(",") + "],"; - continue; - } - else if (stat === "outliers") { - continue; - } - else { + if (Object.keys(d).includes(stat)) { statsString += d[stat] + ","; } } string += statsString.substring(0, statsString.length - 1) + ";"; } - return string.substring(0, string.length - 1); + + const result = string.substring(0, string.length - 1) + + // Assertions to check if the right number of columns are being + // passed. + for (let str of result.split(";")) { + if (str.split(",").length !== stat_columns.length + 1){ + console.error("Mismatch in the number of stats metrics and data"); + console.debug("Columns: ", result.split(";")[0]); + console.debug("Data: ", str); + } + } + + return result; } function menu(data) { From bd18d94af2681b78c0d3eae3dbf0300f7ddc5c4b Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 15 Jul 2021 23:03:45 -0700 Subject: [PATCH 29/45] Add detailed documentation --- hatchet/external/roundtrip/boxplot.js | 104 ++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 7ba1784f..4fed867d 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -4,7 +4,6 @@ const SORTORDER_TYPES = ["asc", "desc"]; const [path, visType, variableString] = cleanInputs(argList); - // Quit if visType is not boxplot. if (visType !== "boxplot") { console.error("Incorrect visualization type passed.") @@ -33,6 +32,12 @@ // Utility functions. // -------------------------------------------------------------------------------- // TODO: Move this to a common utils folder. + /** + * Utility to remove single quotes. + * + * @param {String} strings strings with single quotes. + * @returns {String} strings without single quotes. + */ function cleanInputs(strings) { return strings.map((_) => _.replace(/'/g, '"')); } @@ -41,10 +46,10 @@ * Sort the callsite ordering based on the attribute. * * @param {Array} callsites - Callsites as a list. - * @param {Stirng} metric - Metric (e.g., time or time (inc)). + * @param {String} metric - Metric passed by user (e.g., time or time (inc)). * @param {String} attribute - Attribute to sort by. - * @param {String} sortOrder - Sorting order - * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. + * @param {String} sortOrder - Sorting order - for options, refer SORTORDER_TYPES. + * @param {String} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. */ function sortByAttribute(callsites, metric, attribute, sortOrder, boxplotType) { const SORT_MULTIPLIER = { @@ -117,11 +122,18 @@ menu(data); const variance_dict = visualize(data); - variance_df = "'" + dict_to_df(variance_dict, "tgt") + "'"; + variance_df = "'" + dict_to_csv(variance_dict, "tgt") + "'"; // -------------------------------------------------------------------------------- // Visualization functions. // -------------------------------------------------------------------------------- + /** + * Format the statistics runtime. We use the mantessa and exponent + * format. For more info, refer d3_utils.formatRuntime. + * + * @param {Object} d Statistics object + * @returns {Object} Formatted statistics object. + */ function _format(d) { return { "min": d3_utils.formatRuntime(d.min), @@ -135,11 +147,13 @@ } /** + * Convert the stats dictionary to a csv. * - * @param {Object} dict - * @return {dataForge.DataFrame} + * @param {Object} dict Statistics Object + * @param {Object} boxplotType - boxplot type - for options, refer BOXPLOT_TYPES. + * @return {String} result dictionary reformatted as a string (csv format) */ - function dict_to_df(dict, boxplotType) { + function dict_to_csv(dict, boxplotType) { const callsites = Object.keys(dict); const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] let string = 'name,' + stat_columns.join(",") + ";"; @@ -171,6 +185,12 @@ return result; } + /** + * Renders menu view for selecting metric, attribute, sortOrder and + * callsites. + * + * @param {Object} data + */ function menu(data) { // Selection dropdown for metrics. const metrics = Object.keys(data[callsites[0]]["tgt"]); @@ -210,10 +230,19 @@ reset(); }; d3_utils.selectionDropDown(element, globals.topNCallsites, topNCallsitesSelectId, topNCallsitesSelectTitle, topNCallsitesOnChange); - } - function visualizeStats(g, d, type, boxWidth) { + /** + * Renders the statistics as rows. + * + * @param {svg.g} g HTML element. + * @param {Object} d Data + * @param {String} boxplotType boxplot type - for options, refer BOXPLOT_TYPES. + * @param {Number} boxWidth Width of the boxplot view. + * + * d - format : {"tgt": stats, "bkg": stats } + */ + function visualizeStats(g, d, boxplotType, boxWidth) { const stats = _format(d); const TYPE_TEXTS = { "tgt": "Target", @@ -221,13 +250,13 @@ }; // Text fpr statistics title. - const xOffset = type === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; - const textColor = type === "tgt" ? "#4DAF4A" : "#202020"; + const xOffset = boxplotType === "tgt" ? 1.1 * boxWidth : 1.4 * boxWidth; + const textColor = boxplotType === "tgt" ? "#4DAF4A" : "#202020"; const statsG = g.append("g") .attr("class", "stats"); - d3_utils.drawText(statsG, TYPE_TEXTS[type], xOffset, 15, 0, textColor, "underline"); + d3_utils.drawText(statsG, TYPE_TEXTS[boxplotType], xOffset, 15, 0, textColor, "underline"); // Text for statistics let statIdx = 1; @@ -237,6 +266,15 @@ } } + /** + * Renders boxplots for the callsites. + * + * @param {svg.g} g HTML element. + * @param {Object} d Data + * @param {String} boxplotType boxplot type - for options, refer BOXPLOT_TYPES. + * @param {d3.scale} xScale Scale for layouting the boxplot. + * @param {Boolean} drawCenterLine draws center line, if true. + */ function visualizeBoxplot(g, d, type, xScale, drawCenterLine) { const fillColor = { "tgt": "#4DAF4A", @@ -294,6 +332,39 @@ d3_utils.drawCircle(boxG, outliers, outlierRadius, fillColor[type]); } + /** + * Renders the vis for the provided callsites object. + * + * @param {Object} data + * @returns {Object} variance_dict = { "tgt": stats, "bkg": stats } + * + * data = { + * "callsite_name": { + * "tgt": { + * "metric1": stats, + * "metric2": stats, + * }, + * "bkg": { + * "metric1": stats, + * "metric2": stats, + * } + * } + * } + * + * stats = { + * "min": {float}, + * "max": {float}, + * "mean": {float}, + * "imb": {float}, + * "kurt": {float}, + * "skew": {float}, + * "q": {Array} = [q0, q1, q2, q3, q4], + * "outliers": {Object} = { + * "values": {Array}, + * "keys": {Array} + * } + * } + */ function visualize(data) { const variance_dict = {} @@ -381,11 +452,14 @@ return variance_dict } + /** + * Clears the view and resets the view. + * + */ function reset() { d3_utils.clearSvg(globals.id); const variance_dict = visualize(data); - variance_df = "'" + dict_to_df(variance_dict, "tgt") + "'"; - + variance_df = "'" + dict_to_csv(variance_dict, "tgt") + "'"; } }); })(element); \ No newline at end of file From 8a9b99a1bc73db5d5d8d98fbf31269bee80fdff3 Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 16 Jul 2021 10:09:11 -0700 Subject: [PATCH 30/45] Improve validation for roundtrip:boxplot --- hatchet/external/roundtrip/roundtrip.py | 69 ++++++++++++++++++++++++- requirements.txt | 1 + 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 2393f1c9..6ca955d7 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -2,6 +2,7 @@ from IPython.core.magic import Magics, magics_class, line_magic from IPython.display import HTML, Javascript, display import os +import jsonschema """ File: roundtrip.py @@ -121,7 +122,71 @@ def _validate_literal_tree(self, data): raise Exception("Bad argument") def _validate_boxplot(self, data): - pass + STATS_SCHEMA = { + "type": "object", + "properties": { + "min": {"type": "number"}, + "max": {"type": "number"}, + "mean": {"type": "number"}, + "imb": {"type": "number"}, + "var": {"type": "number"}, + "kurt": {"type": "number"}, + "skew": {"type": "number"}, + "q": {"type": "array"}, + "outliers": {"type": "object"}, + } + } + + if isinstance(data, dict): + callsites = data.keys() + for cs in callsites: + if isinstance(data[cs], dict): + boxplotTypes = data[cs].keys() + for boxplotType in boxplotTypes: + if boxplotType in ["tgt", "bgk"]: + for metric in data[cs][boxplotType]: + jsonschema.validate(instance=data[cs][boxplotType][metric], schema=STATS_SCHEMA) + else: + self._print_exception_boxplot() + raise Exception("Incorrect boxplot type key provided. Use 'tgt' or 'bgk'.") + else: + self._print_exception_boxplot() + raise Exception("Bad argument.") + else: + self._print_exception_boxplot() + raise Exception("Bad argument.") + + def _print_exception_boxplot(): + print( + """The argument is not a valid boxplot dictionary. Please check that + you have provided the data in the following form to + loadVisualization: + boxplot = { + "tgt" : { + "metric1": { + "min": number, + "max": number, + "mean": number, + "imb": number, + "kurt": number, + "skew": number, + "q": [q0, q1, q2, q3, q4], + "outliers: { + "values": array, + "keys": array + } + }, + "metric2": { + ... + } + }, + "bkg": { + // Refer "tgt" key. + } + } + + """ + ) def runVis(self, name, javascriptFile, visType): name = "roundtripTreeVis" + str(self.id_number) @@ -156,7 +221,7 @@ def fetchData(self, line): hook = ( """ - var holder = variance_df; + var holder = """ + VIS_TO_DATA[visType] + """; holder = '"' + holder + '"'; console.debug('""" + str(dest) diff --git a/requirements.txt b/requirements.txt index 652d4121..509062ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ PyYAML cython multiprocess textX +jsonschema \ No newline at end of file From 2d6b813f7f2e7045917b12936d73a5f06b3f66b7 Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 16 Jul 2021 11:58:21 -0700 Subject: [PATCH 31/45] Fix flake8 and black formatting --- hatchet/external/boxplot.py | 95 ++++++++++++++++--------- hatchet/external/roundtrip/roundtrip.py | 51 +++++++------ 2 files changed, 87 insertions(+), 59 deletions(-) diff --git a/hatchet/external/boxplot.py b/hatchet/external/boxplot.py index 7e92a821..a90fe31e 100644 --- a/hatchet/external/boxplot.py +++ b/hatchet/external/boxplot.py @@ -1,18 +1,26 @@ import hatchet as ht -import numpy as np +import numpy as np import pandas as pd from scipy import stats + class BoxPlot: """ Boxplot computation for a dataframe segment """ - def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], iqr_scale=1.5): + def __init__( + self, + tgt_gf, + bkg_gf=None, + callsites=[], + metrics=["time", "time (inc)"], + iqr_scale=1.5, + ): """ - Boxplot for callsite - - :param tgt_gf: (ht.GraphFrame) Target GraphFrame + Boxplot for callsite + + :param tgt_gf: (ht.GraphFrame) Target GraphFrame :param bkg_gf: (ht.GraphFrame) Relative supergraph :param callsites: (list) Callsite name :param metrics: (list) Runtime metrics @@ -21,21 +29,35 @@ def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (in assert isinstance(tgt_gf, ht.GraphFrame) assert isinstance(callsites, list) assert isinstance(iqr_scale, float) - + self.metrics = metrics self.iqr_scale = iqr_scale self.callsites = callsites - + tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=["rank"], apply_func=lambda _: _.mean()) - + tgt_dict = BoxPlot.df_bi_level_group( + tgt_gf.dataframe, + "name", + None, + cols=metrics + ["nid"], + group_by=["rank"], + apply_func=lambda _: _.mean(), + ) + if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=["rank"], apply_func=lambda _: _.mean()) - + bkg_dict = BoxPlot.df_bi_level_group( + bkg_gf.dataframe, + "name", + None, + cols=metrics + ["nid"], + group_by=["rank"], + apply_func=lambda _: _.mean(), + ) + self.result = {} - self.box_types = ["tgt"] + self.box_types = ["tgt"] if bkg_gf is not None: self.box_types = ["tgt", "bkg"] @@ -47,11 +69,13 @@ def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (in if bkg_gf is not None: bkg_df = bkg_dict[callsite] ret["bkg"] = self.compute(bkg_df) - + self.result[callsite] = ret - + @staticmethod - def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}): + def df_bi_level_group( + df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={} + ): _cols = cols + group_by # If there is only one attribute to group by, we use the 1st index. @@ -73,16 +97,22 @@ def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, appl if scnd_group_attr is not None: if len(group_by) == 0: _cols = _cols + ["rank"] - return { _ : _df.xs(_)[_cols] for (_, __) in _levels } - return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels } + return {_: _df.xs(_)[_cols] for (_, __) in _levels} + return { + _: (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() + for (_, __) in _levels + } else: if len(group_by) == 0: _cols = _cols + ["rank"] - return { _ : _df.xs(_)[_cols] for _ in _levels } - return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels } - else: - return { _ : _df.xs(_)[_cols] for _ in _levels} - + return {_: _df.xs(_)[_cols] for _ in _levels} + return { + _: (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() + for _ in _levels + } + else: + return {_: _df.xs(_)[_cols] for _ in _levels} + @staticmethod def outliers(data, scale=1.5, side="both"): assert isinstance(data, (pd.Series, np.ndarray)) @@ -108,7 +138,6 @@ def outliers(data, scale=1.5, side="both"): if side == "both": return np.logical_or(upper_outlier, lower_outlier) - def compute(self, df): """ Compute boxplot related information. @@ -123,8 +152,8 @@ def compute(self, df): mask = BoxPlot.outliers(df[tv], scale=self.iqr_scale) mask = np.where(mask)[0] - if 'rank' in df.columns: - rank = df['rank'].to_numpy()[mask] + if "rank" in df.columns: + rank = df["rank"].to_numpy()[mask] else: rank = np.zeros(mask.shape[0], dtype=int) @@ -145,11 +174,11 @@ def compute(self, df): "imb": _imb, "ks": (_kurt, _skew), } - if 'dataset' in df.columns: - ret[tk]['odset'] = df['dataset'].to_numpy()[mask] + if "dataset" in df.columns: + ret[tk]["odset"] = df["dataset"].to_numpy()[mask] return ret - + def unpack(self): """ Unpack the boxplot data into JSON format. @@ -165,7 +194,7 @@ def unpack(self): "q": box["q"].tolist(), "outliers": { "values": box["oval"].tolist(), - "ranks": box["orank"].tolist() + "ranks": box["orank"].tolist(), }, "min": box["rng"][0], "max": box["rng"][1], @@ -176,7 +205,9 @@ def unpack(self): "skew": box["ks"][1], } - if 'odset' in box: - result[callsite][box_type][metric]['odset'] = box['odset'].tolist() + if "odset" in box: + result[callsite][box_type][metric]["odset"] = box[ + "odset" + ].tolist() - return result \ No newline at end of file + return result diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 6ca955d7..c762ad71 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -14,25 +14,18 @@ @magics_class class Roundtrip(Magics): - # Note to self: Custom magic classes MUST call parent's constructor def __init__(self, shell): super(Roundtrip, self).__init__(shell) global VIS_TO_FILE, VIS_TO_VALIDATION, VIS_TO_DATA - VIS_TO_FILE = { - "literal_tree": "roundtripTree.js", - "boxplot": "boxplot.js" - } + VIS_TO_FILE = {"literal_tree": "roundtripTree.js", "boxplot": "boxplot.js"} VIS_TO_VALIDATION = { "literal_tree": self._validate_literal_tree, - "boxplot": self._validate_boxplot - } - VIS_TO_DATA = { - "literal_tree": "jsNodeSelected", - "boxplot": "variance_df" + "boxplot": self._validate_boxplot, } + VIS_TO_DATA = {"literal_tree": "jsNodeSelected", "boxplot": "variance_df"} self.id_number = 0 # Clean up namespace function @@ -59,7 +52,7 @@ def cleanLineArgument(self, arg): else: # Path is a variable from the nb namespace return self.shell.user_ns[arg] - + @line_magic def loadVisualization(self, line): # Get command line args for loading the vis. @@ -70,11 +63,11 @@ def loadVisualization(self, line): data = self.shell.user_ns[args[2]] if visType not in VIS_TO_FILE.keys(): - assert(f"Invalid visualization type provided. Valid types include {''.join(VIS_TO_FILE.keys())}") + assert f"Invalid visualization type provided. Valid types include {''.join(VIS_TO_FILE.keys())}" # Set a name to visualization cell. name = "roundtripTreeVis" + str(self.id_number) - + # Read the appropriate JS file. fileAndPath = os.path.join(path, VIS_TO_FILE[visType]) javascriptFile = open(fileAndPath).read() @@ -134,7 +127,7 @@ def _validate_boxplot(self, data): "skew": {"type": "number"}, "q": {"type": "array"}, "outliers": {"type": "object"}, - } + }, } if isinstance(data, dict): @@ -145,10 +138,15 @@ def _validate_boxplot(self, data): for boxplotType in boxplotTypes: if boxplotType in ["tgt", "bgk"]: for metric in data[cs][boxplotType]: - jsonschema.validate(instance=data[cs][boxplotType][metric], schema=STATS_SCHEMA) + jsonschema.validate( + instance=data[cs][boxplotType][metric], + schema=STATS_SCHEMA, + ) else: self._print_exception_boxplot() - raise Exception("Incorrect boxplot type key provided. Use 'tgt' or 'bgk'.") + raise Exception( + "Incorrect boxplot type key provided. Use 'tgt' or 'bgk'." + ) else: self._print_exception_boxplot() raise Exception("Bad argument.") @@ -181,10 +179,9 @@ def _print_exception_boxplot(): } }, "bkg": { - // Refer "tgt" key. + // Refer "tgt" key. } } - """ ) @@ -202,10 +199,9 @@ def runVis(self, name, javascriptFile, visType): element = document.getElementById('""" + str(name) + """');""" - + """var """ + + """var """ + VIS_TO_DATA[visType] + """ = {};""" - ) footer = """""" display(HTML(header + javascriptFile + footer)) @@ -221,15 +217,16 @@ def fetchData(self, line): hook = ( """ - var holder = """ + VIS_TO_DATA[visType] + """; + var holder = """ + + VIS_TO_DATA[visType] + + """; holder = '"' + holder + '"'; console.debug('""" - + str(dest) - + """ = '+ holder); - IPython.notebook.kernel.execute('""" - + str(dest) - + """ = '+ eval(holder)); - + + str(dest) + + """ = '+ holder); + IPython.notebook.kernel.execute('""" + + str(dest) + + """ = '+ eval(holder)); """ ) From 279dd4ce5f15e4077720782958360efa5dd37442 Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 16 Jul 2021 13:50:30 -0700 Subject: [PATCH 32/45] Fix the typo --- hatchet/external/roundtrip/roundtrip.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index c762ad71..06d88dd1 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -142,11 +142,11 @@ def _validate_boxplot(self, data): instance=data[cs][boxplotType][metric], schema=STATS_SCHEMA, ) - else: - self._print_exception_boxplot() - raise Exception( - "Incorrect boxplot type key provided. Use 'tgt' or 'bgk'." - ) + else: + self._print_exception_boxplot() + raise Exception( + "Incorrect boxplot type key provided. Use 'tgt' or 'bgk'." + ) else: self._print_exception_boxplot() raise Exception("Bad argument.") @@ -154,7 +154,7 @@ def _validate_boxplot(self, data): self._print_exception_boxplot() raise Exception("Bad argument.") - def _print_exception_boxplot(): + def _print_exception_boxplot(self): print( """The argument is not a valid boxplot dictionary. Please check that you have provided the data in the following form to From 18eaecbfdee862a80f9ed31f9e537d35f2ffd26e Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 20 Jul 2021 17:07:44 -0700 Subject: [PATCH 33/45] Add boxplot.py to external.scripts --- .../performance_variability_boxplots.ipynb | 989 ++++++++---------- hatchet/external/scripts/__init__.py | 5 + hatchet/external/{ => scripts}/boxplot.py | 104 +- requirements.txt | 3 +- setup.py | 1 + 5 files changed, 489 insertions(+), 613 deletions(-) create mode 100644 hatchet/external/scripts/__init__.py rename hatchet/external/{ => scripts}/boxplot.py (68%) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index b63ebebb..b88b14fa 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -6,14 +6,7 @@ "source": [ "# Performance Variability Boxplots\n", "\n", - "Boxplots provide an insight into the runtime distribution among its MPI ranks. We provide 3 modes to visualize the performance variability of a GraphFrame:\n", - "\n", - "1) Single GraphFrame.\n", - "\n", - "2) Compare two GraphFrame.\n", - "\n", - "3) Compare GraphFrame against an unified GraphFrame.\n", - "\n", + "Boxplots provide an insight into the runtime distribution among its MPI ranks. We provide 3 modes to visualize the performance variability of a GraphFrame.\n", "\n", "Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond to the ranks which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness across the MPI ranks are also provided.\"\n", "\n", @@ -62,200 +55,13 @@ "%load_ext roundtrip" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Boxplot - High-level function API on top of the GraphFrame" - ] - }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "import numpy as np \n", - "import pandas as pd\n", - "from scipy import stats\n", - "\n", - "class BoxPlot:\n", - " \"\"\"\n", - " Boxplot computation for a dataframe segment\n", - " \"\"\"\n", - "\n", - " def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=[\"time\", \"time (inc)\"], iqr_scale=1.5):\n", - " \"\"\"\n", - " Boxplot for callsite \n", - " \n", - " :param tgt_gf: (ht.GraphFrame) Target GraphFrame \n", - " :param bkg_gf: (ht.GraphFrame) Relative supergraph\n", - " :param callsite: (list) List of callsites\n", - " :param metrics: (list) List of metrics to compute.\n", - " :param iqr_scale: (float) IQR range for outliers.\n", - " \"\"\"\n", - " assert isinstance(tgt_gf, ht.GraphFrame)\n", - " assert isinstance(callsites, list)\n", - " assert isinstance(iqr_scale, float)\n", - " \n", - " self.metrics = metrics\n", - " self.iqr_scale = iqr_scale\n", - " self.callsites = callsites\n", - " \n", - " tgt_gf.dataframe.reset_index(inplace=True)\n", - " tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, \"name\", None, cols=metrics + [\"nid\"], group_by=[\"rank\"], apply_func=lambda _: _.mean())\n", - " \n", - " if bkg_gf is not None:\n", - " bkg_gf.dataframe.reset_index(inplace=True)\n", - " bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, \"name\", None, cols=metrics + [\"nid\"], group_by=[\"rank\"], apply_func=lambda _: _.mean())\n", - " \n", - " self.result = {}\n", - "\n", - " self.box_types = [\"tgt\"] \n", - " if bkg_gf is not None:\n", - " self.box_types = [\"tgt\", \"bkg\"]\n", - "\n", - " for callsite in self.callsites:\n", - " ret = {}\n", - " tgt_df = tgt_dict[callsite]\n", - " ret[\"tgt\"] = self.compute(tgt_df)\n", - "\n", - " if bkg_gf is not None:\n", - " bkg_df = bkg_dict[callsite]\n", - " ret[\"bkg\"] = self.compute(bkg_df)\n", - " \n", - " self.result[callsite] = ret\n", - " \n", - " @staticmethod\n", - " def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}):\n", - " _cols = cols + group_by\n", - "\n", - " # If there is only one attribute to group by, we use the 1st index.\n", - " if len(group_by) == 1:\n", - " group_by = group_by[0]\n", - "\n", - " # Find the grouping\n", - " if scnd_group_attr is not None:\n", - " _groups = [frst_group_attr, scnd_group_attr]\n", - " else:\n", - " _groups = [frst_group_attr]\n", - "\n", - " # Set the df.index as the _groups\n", - " _df = df.set_index(_groups)\n", - " _levels = _df.index.unique().tolist()\n", - "\n", - " # If \"rank\" is present in the columns, group by \"rank\".\n", - " if \"rank\" in _df.columns and len(df[\"rank\"].unique().tolist()) > 1:\n", - " if scnd_group_attr is not None:\n", - " if len(group_by) == 0:\n", - " _cols = _cols + [\"rank\"]\n", - " return { _ : _df.xs(_)[_cols] for (_, __) in _levels }\n", - " return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels }\n", - " else:\n", - " if len(group_by) == 0:\n", - " _cols = _cols + [\"rank\"]\n", - " return { _ : _df.xs(_)[_cols] for _ in _levels }\n", - " return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels }\n", - " else: \n", - " return { _ : _df.xs(_)[_cols] for _ in _levels}\n", - " \n", - " @staticmethod\n", - " def outliers(data, scale=1.5, side=\"both\"):\n", - " assert isinstance(data, (pd.Series, np.ndarray))\n", - " assert len(data.shape) == 1\n", - " assert isinstance(scale, float)\n", - " assert side in [\"gt\", \"lt\", \"both\"]\n", - "\n", - " d_q13 = np.percentile(data, [25.0, 75.0])\n", - " iqr_distance = np.multiply(stats.iqr(data), scale)\n", - "\n", - " if side in [\"gt\", \"both\"]:\n", - " upper_range = d_q13[1] + iqr_distance\n", - " upper_outlier = np.greater(data - upper_range.reshape(1), 0)\n", - "\n", - " if side in [\"lt\", \"both\"]:\n", - " lower_range = d_q13[0] - iqr_distance\n", - " lower_outlier = np.less(data - lower_range.reshape(1), 0)\n", - "\n", - " if side == \"gt\":\n", - " return upper_outlier\n", - " if side == \"lt\":\n", - " return lower_outlier\n", - " if side == \"both\":\n", - " return np.logical_or(upper_outlier, lower_outlier)\n", - "\n", - " \n", - " def compute(self, df):\n", - " \"\"\"\n", - " Compute boxplot related information.\n", - "\n", - " :param df: Dataframe to calculate the boxplot information.\n", - " :return:\n", - " \"\"\"\n", - "\n", - " ret = {_: {} for _ in self.metrics}\n", - " for tk, tv in zip(self.metrics, self.metrics):\n", - " q = np.percentile(df[tv], [0.0, 25.0, 50.0, 75.0, 100.0])\n", - " mask = BoxPlot.outliers(df[tv], scale=self.iqr_scale)\n", - " mask = np.where(mask)[0]\n", - "\n", - " if 'rank' in df.columns:\n", - " rank = df['rank'].to_numpy()[mask]\n", - " else:\n", - " rank = np.zeros(mask.shape[0], dtype=int)\n", - "\n", - " _data = df[tv].to_numpy()\n", - " _min, _mean, _max = _data.min(), _data.mean(), _data.max()\n", - " _var = _data.var() if _data.shape[0] > 0 else 0.0\n", - " _imb = (_max - _mean) / _mean if not np.isclose(_mean, 0.0) else _max\n", - " _skew = stats.skew(_data)\n", - " _kurt = stats.kurtosis(_data)\n", - "\n", - " ret[tk] = {\n", - " \"q\": q,\n", - " \"oval\": df[tv].to_numpy()[mask],\n", - " \"orank\": rank,\n", - " \"d\": _data,\n", - " \"rng\": (_min, _max),\n", - " \"uv\": (_mean, _var),\n", - " \"imb\": _imb,\n", - " \"ks\": (_kurt, _skew),\n", - " }\n", - " if 'dataset' in df.columns:\n", - " ret[tk]['odset'] = df['dataset'].to_numpy()[mask]\n", - "\n", - " return ret\n", - " \n", - " def unpack(self):\n", - " \"\"\"\n", - " Unpack the boxplot data into JSON format.\n", - " \"\"\"\n", - " result = {}\n", - " for callsite in self.callsites:\n", - " result[callsite] = {}\n", - " for box_type in self.box_types:\n", - " result[callsite][box_type] = {}\n", - " for metric in self.metrics:\n", - " box = self.result[callsite][box_type][metric]\n", - " result[callsite][box_type][metric] = {\n", - " \"q\": box[\"q\"].tolist(),\n", - " \"outliers\": {\n", - " \"values\": box[\"oval\"].tolist(),\n", - " \"ranks\": box[\"orank\"].tolist()\n", - " },\n", - " \"min\": box[\"rng\"][0],\n", - " \"max\": box[\"rng\"][1],\n", - " \"mean\": box[\"uv\"][0],\n", - " \"var\": box[\"uv\"][1],\n", - " \"imb\": box[\"imb\"],\n", - " \"kurt\": box[\"ks\"][0],\n", - " \"skew\": box[\"ks\"][1],\n", - " }\n", - "\n", - " if 'odset' in box:\n", - " result[callsite][box_type][metric]['odset'] = box['odset'].tolist()\n", - "\n", - " return result" + "from hatchet.external.scripts import BoxPlot" ] }, { @@ -267,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -278,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -289,26 +95,26 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'outliers': {'values': [], 'ranks': []}, 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" + "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'outliers': {'values': []}, 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" ] } ], "source": [ - "print(boxplot[\"main\"])" + "print(boxplot['main'])" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { - "scrolled": true + "scrolled": false }, "outputs": [ { @@ -332,9 +138,10 @@ "
\n", " " ], @@ -530,50 +616,20 @@ "%loadVisualization roundtrip_path \"boxplot\" boxplot" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Comparing a target GraphFrame against a background GraphFrame." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "data_dir = os.path.realpath(\"../../../../callflow_data/lulesh-8-runs\")\n", - "lulesh_data_path_1 = os.path.join(data_dir, \"27-cores.json\")\n", - "lulesh_data_path_2 = os.path.join(data_dir, \"64-cores.json\")\n", - "gf1 = ht.GraphFrame.from_caliper_json(lulesh_data_path_1)\n", - "gf2 = ht.GraphFrame.from_caliper_json(lulesh_data_path_2)" - ] - }, { "cell_type": "code", "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [ - "c1 = gf1.dataframe.name.unique().tolist()\n", - "c2 = gf2.dataframe.name.unique().tolist()\n", - "callsites = list(set(c1) | set(c2))\n", - "bp = BoxPlot(tgt_gf=gf1, bkg_gf=gf2, callsites=callsites, metrics=[\"time\"])\n", - "boxplot_comparison = bp.unpack()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "\n", - " // Grab current context\n", - " elementTop = element.get(0);" + " var holder = variance_df;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('variance_df = '+ holder);\n", + " IPython.notebook.kernel.execute('variance_df = '+ eval(holder));\n", + " " ], "text/plain": [ "" @@ -584,199 +640,16 @@ }, { "data": { - "text/html": [ - "\n", - "
\n", - " " + " var holder = variance_df;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('variance_df = '+ holder);\n", + " IPython.notebook.kernel.execute('variance_df = '+ eval(holder));\n", + " " ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -784,7 +657,39 @@ } ], "source": [ - "%loadVisualization roundtrip_path \"boxplot\" boxplot_comparison" + "%fetchData \"boxplot\" variance_df" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'variance_df' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariance_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'variance_df' is not defined" + ] + } + ], + "source": [ + "print(variance_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "columns = variance_df.split(';')[0].split(',')\n", + "data = [x.split(',') for x in variance_df.split(';')[1:]]\n", + "df = pd.DataFrame(data, columns=columns).set_index('name')" ] }, { @@ -792,7 +697,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df" + ] } ], "metadata": { diff --git a/hatchet/external/scripts/__init__.py b/hatchet/external/scripts/__init__.py new file mode 100644 index 00000000..2328c084 --- /dev/null +++ b/hatchet/external/scripts/__init__.py @@ -0,0 +1,5 @@ +# Copyright 2017-2021 Lawrence Livermore National Security, LLC and other +# Hatchet Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT +from .boxplot import BoxPlot \ No newline at end of file diff --git a/hatchet/external/boxplot.py b/hatchet/external/scripts/boxplot.py similarity index 68% rename from hatchet/external/boxplot.py rename to hatchet/external/scripts/boxplot.py index a90fe31e..d37c02d3 100644 --- a/hatchet/external/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -1,63 +1,41 @@ -import hatchet as ht -import numpy as np +import numpy as np import pandas as pd from scipy import stats - +import hatchet as ht class BoxPlot: """ Boxplot computation for a dataframe segment """ - def __init__( - self, - tgt_gf, - bkg_gf=None, - callsites=[], - metrics=["time", "time (inc)"], - iqr_scale=1.5, - ): + def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], column='rank', iqr_scale=1.5): """ - Boxplot for callsite - - :param tgt_gf: (ht.GraphFrame) Target GraphFrame + Boxplot for callsite + + :param tgt_gf: (ht.GraphFrame) Target GraphFrame :param bkg_gf: (ht.GraphFrame) Relative supergraph - :param callsites: (list) Callsite name - :param metrics: (list) Runtime metrics + :param callsite: (list) List of callsites + :param metrics: (list) List of metrics to compute. :param iqr_scale: (float) IQR range for outliers. """ assert isinstance(tgt_gf, ht.GraphFrame) assert isinstance(callsites, list) assert isinstance(iqr_scale, float) - + self.metrics = metrics self.iqr_scale = iqr_scale self.callsites = callsites - + tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_bi_level_group( - tgt_gf.dataframe, - "name", - None, - cols=metrics + ["nid"], - group_by=["rank"], - apply_func=lambda _: _.mean(), - ) - + tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[column], apply_func=lambda _: _.mean()) + if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_bi_level_group( - bkg_gf.dataframe, - "name", - None, - cols=metrics + ["nid"], - group_by=["rank"], - apply_func=lambda _: _.mean(), - ) - + bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[column], apply_func=lambda _: _.mean()) + self.result = {} - self.box_types = ["tgt"] + self.box_types = ["tgt"] if bkg_gf is not None: self.box_types = ["tgt", "bkg"] @@ -69,13 +47,11 @@ def __init__( if bkg_gf is not None: bkg_df = bkg_dict[callsite] ret["bkg"] = self.compute(bkg_df) - + self.result[callsite] = ret - + @staticmethod - def df_bi_level_group( - df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={} - ): + def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}): _cols = cols + group_by # If there is only one attribute to group by, we use the 1st index. @@ -92,27 +68,21 @@ def df_bi_level_group( _df = df.set_index(_groups) _levels = _df.index.unique().tolist() - # If "rank" is present in the columns, we will group by "rank". + # If "rank" is present in the columns, group by "rank". if "rank" in _df.columns and len(df["rank"].unique().tolist()) > 1: if scnd_group_attr is not None: if len(group_by) == 0: _cols = _cols + ["rank"] - return {_: _df.xs(_)[_cols] for (_, __) in _levels} - return { - _: (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() - for (_, __) in _levels - } + return { _ : _df.xs(_)[_cols] for (_, __) in _levels } + return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels } else: if len(group_by) == 0: _cols = _cols + ["rank"] - return {_: _df.xs(_)[_cols] for _ in _levels} - return { - _: (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() - for _ in _levels - } - else: - return {_: _df.xs(_)[_cols] for _ in _levels} - + return { _ : _df.xs(_)[_cols] for _ in _levels } + return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels } + else: + return { _ : _df.xs(_)[_cols] for _ in _levels} + @staticmethod def outliers(data, scale=1.5, side="both"): assert isinstance(data, (pd.Series, np.ndarray)) @@ -138,6 +108,7 @@ def outliers(data, scale=1.5, side="both"): if side == "both": return np.logical_or(upper_outlier, lower_outlier) + def compute(self, df): """ Compute boxplot related information. @@ -152,11 +123,6 @@ def compute(self, df): mask = BoxPlot.outliers(df[tv], scale=self.iqr_scale) mask = np.where(mask)[0] - if "rank" in df.columns: - rank = df["rank"].to_numpy()[mask] - else: - rank = np.zeros(mask.shape[0], dtype=int) - _data = df[tv].to_numpy() _min, _mean, _max = _data.min(), _data.mean(), _data.max() _var = _data.var() if _data.shape[0] > 0 else 0.0 @@ -167,18 +133,17 @@ def compute(self, df): ret[tk] = { "q": q, "oval": df[tv].to_numpy()[mask], - "orank": rank, "d": _data, "rng": (_min, _max), "uv": (_mean, _var), "imb": _imb, "ks": (_kurt, _skew), } - if "dataset" in df.columns: - ret[tk]["odset"] = df["dataset"].to_numpy()[mask] + if 'dataset' in df.columns: + ret[tk]['odset'] = df['dataset'].to_numpy()[mask] return ret - + def unpack(self): """ Unpack the boxplot data into JSON format. @@ -194,7 +159,6 @@ def unpack(self): "q": box["q"].tolist(), "outliers": { "values": box["oval"].tolist(), - "ranks": box["orank"].tolist(), }, "min": box["rng"][0], "max": box["rng"][1], @@ -205,9 +169,7 @@ def unpack(self): "skew": box["ks"][1], } - if "odset" in box: - result[callsite][box_type][metric]["odset"] = box[ - "odset" - ].tolist() + if 'odset' in box: + result[callsite][box_type][metric]['odset'] = box['odset'].tolist() - return result + return result \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 509062ae..43e509c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ PyYAML cython multiprocess textX -jsonschema \ No newline at end of file +jsonschema +scipy \ No newline at end of file diff --git a/setup.py b/setup.py index 9035692e..f02324b0 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ "hatchet.readers", "hatchet.util", "hatchet.external", + "hatchet.external.scripts", "hatchet.tests", "hatchet.cython_modules.libs", ], From 485c1f8242e78c47cd1bb1353381313ad6c111b8 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 20 Jul 2021 17:57:31 -0700 Subject: [PATCH 34/45] Fix breaking changes to the roundtrip tree_literal interface --- docs/examples/tutorial/roundtrip-demo.ipynb | 2006 ++++++++++++++++++- hatchet/external/roundtrip/roundtripTree.js | 43 +- 2 files changed, 2021 insertions(+), 28 deletions(-) diff --git a/docs/examples/tutorial/roundtrip-demo.ipynb b/docs/examples/tutorial/roundtrip-demo.ipynb index ee0442c0..0ccc2d1c 100644 --- a/docs/examples/tutorial/roundtrip-demo.ipynb +++ b/docs/examples/tutorial/roundtrip-demo.ipynb @@ -15,9 +15,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "import os, sys\n", "from IPython.display import HTML, display\n", @@ -48,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -151,13 +164,897 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "%loadVisualization roundtrip_path literal_tree" + "%loadVisualization roundtrip_path \"literal_tree\" literal_tree" ] }, { @@ -172,19 +1069,62 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('myQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('myQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('myQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('myQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Execute this cell first to populate myQuery with your selection\n", - "%fetchData myQuery" + "%fetchData \"literal_tree\" myQuery" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['*']\n" + ] + } + ], "source": [ "# Now myQuery is loaded and can be used\n", "# If no nodes are clicked/selected, the default behavior is to return a query that shows all nodes.\n", @@ -196,9 +1136,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of input nodes: 12\n", + "Number of output nodes: 12\n", + " __ __ __ __ \n", + " / /_ ____ _/ /______/ /_ ___ / /_\n", + " / __ \\/ __ `/ __/ ___/ __ \\/ _ \\/ __/\n", + " / / / / /_/ / /_/ /__/ / / / __/ /_ \n", + "/_/ /_/\\__,_/\\__/\\___/_/ /_/\\___/\\__/ v1.3.1a0\n", + "\n", + "\u001b[38;5;22m0.000\u001b[0m foo\u001b[0m\n", + "├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "└─ \u001b[38;5;22m0.000\u001b[0m qux\u001b[0m\n", + " └─ \u001b[38;5;46m5.000\u001b[0m quux\u001b[0m\n", + " └─ \u001b[38;5;220m10.000\u001b[0m corge\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + " │ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + " ├─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + " └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "\n", + "\u001b[4mLegend\u001b[0m (Metric: time Min: 0.00 Max: 15.00)\n", + "\u001b[38;5;196m█ \u001b[0m13.50 - 15.00\n", + "\u001b[38;5;208m█ \u001b[0m10.50 - 13.50\n", + "\u001b[38;5;220m█ \u001b[0m7.50 - 10.50\n", + "\u001b[38;5;46m█ \u001b[0m4.50 - 7.50\n", + "\u001b[38;5;34m█ \u001b[0m1.50 - 4.50\n", + "\u001b[38;5;22m█ \u001b[0m0.00 - 1.50\n", + "\n", + "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", + "\n" + ] + } + ], "source": [ "# Load the string-literal tree defined above into a GraphFrame\n", "gf = ht.GraphFrame.from_literal(literal_tree)\n", @@ -228,7 +1206,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -393,30 +1371,957 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " // Grab current context\n", + " elementTop = element.get(0);" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "%loadVisualization roundtrip_path multiple_roots" + "%loadVisualization roundtrip_path \"literal_tree\" multiple_roots" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('anotherQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('anotherQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " var holder = jsNodeSelected;\n", + " holder = '\"' + holder + '\"';\n", + " console.debug('anotherQuery = '+ holder);\n", + " IPython.notebook.kernel.execute('anotherQuery = '+ eval(holder));\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Execute this cell first to load anotherQuery\n", - "%fetchData anotherQuery" + "%fetchData \"literal_tree\" anotherQuery" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['*']\n" + ] + } + ], "source": [ "# Now anotherQuery is loaded and can be used (after %fetchData executes)\n", "# If no nodes are clicked/selected, the default behavior is to return a query\n", @@ -429,9 +2334,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of input nodes: 25\n", + "Number of output nodes: 25\n", + " __ __ __ __ \n", + " / /_ ____ _/ /______/ /_ ___ / /_\n", + " / __ \\/ __ `/ __/ ___/ __ \\/ _ \\/ __/\n", + " / / / / /_/ / /_/ /__/ / / / __/ /_ \n", + "/_/ /_/\\__,_/\\__/\\___/_/ /_/\\___/\\__/ v1.3.1a0\n", + "\n", + "\u001b[38;5;22m0.000\u001b[0m foo\u001b[0m\n", + "├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "├─ \u001b[38;5;22m0.000\u001b[0m qux\u001b[0m\n", + "│ └─ \u001b[38;5;46m5.000\u001b[0m quux\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m corge\u001b[0m\n", + "│ ├─ \u001b[38;5;46m5.000\u001b[0m bar\u001b[0m\n", + "│ │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + "│ │ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "│ ├─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + "│ └─ \u001b[38;5;220m10.000\u001b[0m grault\u001b[0m\n", + "└─ \u001b[38;5;22m0.000\u001b[0m waldo\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m fred\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m plugh\u001b[0m\n", + " │ └─ \u001b[38;5;46m5.000\u001b[0m xyzzy\u001b[0m\n", + " │ └─ \u001b[38;5;46m5.000\u001b[0m thud\u001b[0m\n", + " │ ├─ \u001b[38;5;46m5.000\u001b[0m baz\u001b[0m\n", + " │ └─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + " └─ \u001b[38;5;196m15.000\u001b[0m garply\u001b[0m\n", + "\u001b[38;5;22m0.000\u001b[0m kap\u001b[0m\n", + "├─ \u001b[38;5;196m15.000\u001b[0m kat\u001b[0m\n", + "└─ \u001b[38;5;46m5.000\u001b[0m kow\u001b[0m\n", + " ├─ \u001b[38;5;46m5.000\u001b[0m kraze\u001b[0m\n", + " └─ \u001b[38;5;46m5.000\u001b[0m krazy\u001b[0m\n", + "\n", + "\u001b[4mLegend\u001b[0m (Metric: time Min: 0.00 Max: 15.00)\n", + "\u001b[38;5;196m█ \u001b[0m13.50 - 15.00\n", + "\u001b[38;5;208m█ \u001b[0m10.50 - 13.50\n", + "\u001b[38;5;220m█ \u001b[0m7.50 - 10.50\n", + "\u001b[38;5;46m█ \u001b[0m4.50 - 7.50\n", + "\u001b[38;5;34m█ \u001b[0m1.50 - 4.50\n", + "\u001b[38;5;22m█ \u001b[0m0.00 - 1.50\n", + "\n", + "name\u001b[0m User code \u001b[38;5;160m◀ \u001b[0m Only in left graph \u001b[38;5;28m▶ \u001b[0m Only in right graph\n", + "\n" + ] + } + ], "source": [ "# Load the string-literal multi-rooted tree defined above into a GraphFrame\n", "multi_gf = ht.GraphFrame.from_literal(multiple_roots)\n", @@ -467,7 +2423,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.7.7" } }, "nbformat": 4, diff --git a/hatchet/external/roundtrip/roundtripTree.js b/hatchet/external/roundtrip/roundtripTree.js index a5b53e67..efb45d10 100644 --- a/hatchet/external/roundtrip/roundtripTree.js +++ b/hatchet/external/roundtrip/roundtripTree.js @@ -1,9 +1,46 @@ //d3.v4 (function (element) { - require(['https://d3js.org/d3.v4.min.js'], function (d3) { - var cleanTree = argList[0].replace(/'/g, '"'); + const [roundtrip_path, visType, variableString] = cleanInputs(argList); + + // Quit if visType is not literal_tree. + if (visType !== "literal_tree") { + console.error("Incorrect visualization type passed.") + return; + } + + // -------------------------------------------------------------------------------- + // RequireJS setup. + // -------------------------------------------------------------------------------- + // Setup the requireJS config to get required libraries. + requirejs.config({ + baseUrl: roundtrip_path, + paths: { + d3src: 'https://d3js.org', + lib: 'lib', + }, + map: { + '*': { + 'd3': 'd3src/d3.v4.min', + } + } + }); - var forestData = JSON.parse(cleanTree); + // -------------------------------------------------------------------------------- + // Utility functions. + // -------------------------------------------------------------------------------- + // TODO: Move this to a common utils folder. + /** + * Utility to remove single quotes. + * + * @param {String} strings strings with single quotes. + * @returns {String} strings without single quotes. + */ + function cleanInputs(strings) { + return strings.map((_) => _.replace(/'/g, '"')); + } + + require(['d3'], function (d3) { + var forestData = JSON.parse(variableString); var rootNodeNames = []; var numberOfTrees = forestData.length; From fda30c45e19b8781252b8c3b650c05ea2f46e448 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 20 Jul 2021 18:14:19 -0700 Subject: [PATCH 35/45] Clean the BoxPlot class --- hatchet/external/scripts/boxplot.py | 43 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index d37c02d3..63b19d25 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -4,34 +4,47 @@ import hatchet as ht class BoxPlot: - """ - Boxplot computation for a dataframe segment - """ - - def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], column='rank', iqr_scale=1.5): + def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], iqr_scale=1.5): """ - Boxplot for callsite + Boxplot computation for callsites. The data can be computed for two use + cases: + 1. Examining runtime distributions of a single GraphFrame. + 2. Comparing runtime distributions of a target GraphFrame against a + background GraphFrame. - :param tgt_gf: (ht.GraphFrame) Target GraphFrame - :param bkg_gf: (ht.GraphFrame) Relative supergraph - :param callsite: (list) List of callsites + :param cat_column: (string) Categorical column to aggregate the boxplot computation. + :param tgt_gf: (ht.GraphFrame) Target GraphFrame. + :param bkg_gf: (ht.GraphFrame) Background GraphFrame. + :param callsite: (list) List of callsites. :param metrics: (list) List of metrics to compute. :param iqr_scale: (float) IQR range for outliers. """ assert isinstance(tgt_gf, ht.GraphFrame) assert isinstance(callsites, list) + assert isinstance(metrics, list) assert isinstance(iqr_scale, float) + + if bkg_gf is not None: + assert isinstance(bkg_gf, ht.GraphFrame) + assert cat_column in bkg_gf.dataframe.column + + if cat_column not in tgt_gf.dataframe.columns: + raise Exception(f"{cat_column} not found in tgt_gf.") + + if cat_column not in bkg_gf.dataframe.columns: + raise Exception(f"{cat_column} not found in bkg_gf.") self.metrics = metrics self.iqr_scale = iqr_scale self.callsites = callsites + self.cat_column = cat_column tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[column], apply_func=lambda _: _.mean()) + tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[cat_column], apply_func=lambda _: _.mean()) if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[column], apply_func=lambda _: _.mean()) + bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[cat_column], apply_func=lambda _: _.mean()) self.result = {} @@ -51,7 +64,9 @@ def __init__(self, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (in self.result[callsite] = ret @staticmethod - def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, apply_func, proxy={}): + def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by): + """ + """ _cols = cols + group_by # If there is only one attribute to group by, we use the 1st index. @@ -85,6 +100,9 @@ def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by, appl @staticmethod def outliers(data, scale=1.5, side="both"): + """ + + """ assert isinstance(data, (pd.Series, np.ndarray)) assert len(data.shape) == 1 assert isinstance(scale, float) @@ -108,7 +126,6 @@ def outliers(data, scale=1.5, side="both"): if side == "both": return np.logical_or(upper_outlier, lower_outlier) - def compute(self, df): """ Compute boxplot related information. From 8e101a44eeea7343713490969d4eed1dabe0906c Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 20 Jul 2021 22:32:07 -0700 Subject: [PATCH 36/45] Clean up boxplots and documentation --- hatchet/external/roundtrip/boxplot.js | 6 +- hatchet/external/scripts/boxplot.py | 142 ++++++++++++++++---------- 2 files changed, 90 insertions(+), 58 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 4fed867d..058acbfa 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -321,10 +321,10 @@ // Outliers const outlierRadius = 4; let outliers = []; - for (let idx = 0; idx < d.outliers["values"].length; idx += 1) { + for (let idx = 0; idx < d.ometric.length; idx += 1) { outliers.push({ - x: xScale(d.outliers["values"][idx]), - value: d.outliers["values"][idx], + x: xScale(d.ometric[idx]), + value: d.ometric[idx], // rank: d.outliers["ranks"][idx], y: 10 }); diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index 63b19d25..5e898919 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -4,7 +4,7 @@ import hatchet as ht class BoxPlot: - def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=["time", "time (inc)"], iqr_scale=1.5): + def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iqr_scale=1.5): """ Boxplot computation for callsites. The data can be computed for two use cases: @@ -12,12 +12,15 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=["time 2. Comparing runtime distributions of a target GraphFrame against a background GraphFrame. - :param cat_column: (string) Categorical column to aggregate the boxplot computation. - :param tgt_gf: (ht.GraphFrame) Target GraphFrame. - :param bkg_gf: (ht.GraphFrame) Background GraphFrame. - :param callsite: (list) List of callsites. - :param metrics: (list) List of metrics to compute. - :param iqr_scale: (float) IQR range for outliers. + Arguments: + cat_column: (string) Categorical column to aggregate the boxplot computation. + tgt_gf: (ht.GraphFrame) Target GraphFrame. + bkg_gf: (ht.GraphFrame) Background GraphFrame. + callsite: (list) List of callsites. + metrics: (list) List of metrics to compute. + iqr_scale: (float) IQR range for outliers. + + Return: None """ assert isinstance(tgt_gf, ht.GraphFrame) assert isinstance(callsites, list) @@ -27,24 +30,32 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=["time if bkg_gf is not None: assert isinstance(bkg_gf, ht.GraphFrame) assert cat_column in bkg_gf.dataframe.column - + + tgt_gf.dataframe = tgt_gf.dataframe.reset_index() if cat_column not in tgt_gf.dataframe.columns: raise Exception(f"{cat_column} not found in tgt_gf.") - - if cat_column not in bkg_gf.dataframe.columns: - raise Exception(f"{cat_column} not found in bkg_gf.") + + if bkg_gf is not None: + bkg_gf.dataframe = bkg_gf.dataframe.reset_index() + if cat_column not in bkg_gf.dataframe.columns: + raise Exception(f"{cat_column} not found in bkg_gf.") self.metrics = metrics self.iqr_scale = iqr_scale self.callsites = callsites self.cat_column = cat_column - + + if len(metrics) == 0: + self.metrics = tgt_gf.inc_metrics + tgt_gf.exc_metrics + + hatchet_cols = ["nid", "node"] + tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_bi_level_group(tgt_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[cat_column], apply_func=lambda _: _.mean()) + tgt_dict = BoxPlot.df_groupby(tgt_gf.dataframe, groupby="name", cols=self.metrics + hatchet_cols + [self.cat_column]) if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_bi_level_group(bkg_gf.dataframe, "name", None, cols=metrics + ["nid"], group_by=[cat_column], apply_func=lambda _: _.mean()) + bkg_dict = BoxPlot.df_groupby(bkg_gf.dataframe, groupby="name", cols=self.metrics + hatchet_cols + [self.cat_column]) self.result = {} @@ -64,44 +75,36 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=["time self.result[callsite] = ret @staticmethod - def df_bi_level_group(df, frst_group_attr, scnd_group_attr, cols, group_by): - """ + def df_groupby(df, groupby, cols): """ - _cols = cols + group_by + Group the dataframe by groupby column. - # If there is only one attribute to group by, we use the 1st index. - if len(group_by) == 1: - group_by = group_by[0] + Arguments: + df (graphframe): self's graphframe + groupby: groupby columns on dataframe + cols: columns from the dataframe - # Find the grouping - if scnd_group_attr is not None: - _groups = [frst_group_attr, scnd_group_attr] - else: - _groups = [frst_group_attr] - - # Set the df.index as the _groups - _df = df.set_index(_groups) + Return: + (dict): A dictionary of dataframes (columns) keyed by groups. + """ + _df = df.set_index([groupby]) _levels = _df.index.unique().tolist() - # If "rank" is present in the columns, group by "rank". - if "rank" in _df.columns and len(df["rank"].unique().tolist()) > 1: - if scnd_group_attr is not None: - if len(group_by) == 0: - _cols = _cols + ["rank"] - return { _ : _df.xs(_)[_cols] for (_, __) in _levels } - return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for (_, __) in _levels } - else: - if len(group_by) == 0: - _cols = _cols + ["rank"] - return { _ : _df.xs(_)[_cols] for _ in _levels } - return { _ : (_df.xs(_)[_cols].groupby(group_by).mean()).reset_index() for _ in _levels } - else: - return { _ : _df.xs(_)[_cols] for _ in _levels} + return { _ : _df.xs(_)[cols] for _ in _levels} @staticmethod def outliers(data, scale=1.5, side="both"): """ - + Calculate outliers from the data. + + Arguments: + data (np.ndarray or pd.Series): Array of values. + scale (float): IQR range for outliers. + side (str): directions for calculating the outliers, i.e., left, + right, both. + + Return: + outliers (np.ndarray): Array of outlier values. """ assert isinstance(data, (pd.Series, np.ndarray)) assert len(data.shape) == 1 @@ -128,10 +131,24 @@ def outliers(data, scale=1.5, side="both"): def compute(self, df): """ - Compute boxplot related information. + Compute boxplot quartiles and statistics. - :param df: Dataframe to calculate the boxplot information. - :return: + Arguments: + df: Dataframe to calculate the boxplot information. + + Return: + ret (dict): { + "metric1": { + "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), + "ometric": (array) outlier from metric, + "ocat": (array) outlier from cat_column, + "d": (array) metric values, + "rng": (tuple) (min, max), + "uv": (tuple) (mean, variance), + "imb": (number) imbalance, + "ks": (tuple) (kurtosis, skewness) + } + } """ ret = {_: {} for _ in self.metrics} @@ -149,21 +166,40 @@ def compute(self, df): ret[tk] = { "q": q, - "oval": df[tv].to_numpy()[mask], + "ometric": df[tv].to_numpy()[mask], + "ocat": df[self.cat_column].to_numpy()[mask], "d": _data, "rng": (_min, _max), "uv": (_mean, _var), "imb": _imb, "ks": (_kurt, _skew), } - if 'dataset' in df.columns: - ret[tk]['odset'] = df['dataset'].to_numpy()[mask] return ret def unpack(self): """ Unpack the boxplot data into JSON format. + + Arguments: + + Return: + result (dict): { + "callsite1": { + "metric1": { + "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), + "ocat": (array) outlier from cat_column, + "ometric": (array) outlier from metri, + "min": (number) minimum, + "max": (number) maximum, + "mean": (number) mean, + "var": (number) variance, + "imb": (number) imbalance, + "kurt": (number) kurtosis, + "skew": (number) skewness, + }, ... + }, ... + } """ result = {} for callsite in self.callsites: @@ -174,9 +210,8 @@ def unpack(self): box = self.result[callsite][box_type][metric] result[callsite][box_type][metric] = { "q": box["q"].tolist(), - "outliers": { - "values": box["oval"].tolist(), - }, + "ocat": box["ocat"].tolist(), + "ometric": box["ometric"].tolist(), "min": box["rng"][0], "max": box["rng"][1], "mean": box["uv"][0], @@ -186,7 +221,4 @@ def unpack(self): "skew": box["ks"][1], } - if 'odset' in box: - result[callsite][box_type][metric]['odset'] = box['odset'].tolist() - return result \ No newline at end of file From 5db15b94b860cb28300f0298d350a9aeb1a787f8 Mon Sep 17 00:00:00 2001 From: jarusified Date: Tue, 20 Jul 2021 23:21:25 -0700 Subject: [PATCH 37/45] Add node, nid to the columns in dataframe --- hatchet/external/roundtrip/boxplot.js | 8 ++++++-- hatchet/external/roundtrip/roundtrip.py | 5 ++++- hatchet/external/scripts/boxplot.py | 15 +++++++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index 058acbfa..bf432e1b 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -143,6 +143,8 @@ "imb": d3_utils.formatRuntime(d.imb), "kurt": d3_utils.formatRuntime(d.kurt), "skew": d3_utils.formatRuntime(d.skew), + "node": d.node, + "nid": d.nid }; } @@ -156,12 +158,14 @@ function dict_to_csv(dict, boxplotType) { const callsites = Object.keys(dict); const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] - let string = 'name,' + stat_columns.join(",") + ";"; + let string = 'name,node,nid,' + stat_columns.join(",") + ";"; for (let callsite of callsites){ const d = dict[callsite][boxplotType]; + const node = dict[callsite][boxplotType]["node"]; + const nid = dict[callsite][boxplotType]["nid"]; - let statsString = `${callsite},`; + let statsString = `${callsite},${node},${nid},`; for (let stat of stat_columns) { if (Object.keys(d).includes(stat)) { statsString += d[stat] + ","; diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 06d88dd1..faa35ee1 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -126,7 +126,10 @@ def _validate_boxplot(self, data): "kurt": {"type": "number"}, "skew": {"type": "number"}, "q": {"type": "array"}, - "outliers": {"type": "object"}, + "ocat": {"type": "array"}, + "ometric": {"type": "array"}, + "nid": {"type": "string"}, + "node": {"type": "string"}, }, } diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index 5e898919..ca822e6a 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -48,14 +48,15 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iq if len(metrics) == 0: self.metrics = tgt_gf.inc_metrics + tgt_gf.exc_metrics - hatchet_cols = ["nid", "node"] + self.hatchet_cols = ["nid"] + # self.hatchet_cols = ["nid", "node"] tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_groupby(tgt_gf.dataframe, groupby="name", cols=self.metrics + hatchet_cols + [self.cat_column]) + tgt_dict = BoxPlot.df_groupby(tgt_gf.dataframe, groupby="name", cols=self.metrics + self.hatchet_cols + [self.cat_column]) if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_groupby(bkg_gf.dataframe, groupby="name", cols=self.metrics + hatchet_cols + [self.cat_column]) + bkg_dict = BoxPlot.df_groupby(bkg_gf.dataframe, groupby="name", cols=self.metrics + self.hatchet_cols + [self.cat_column]) self.result = {} @@ -175,6 +176,9 @@ def compute(self, df): "ks": (_kurt, _skew), } + for col in self.hatchet_cols: + ret[tk][col] = df[col].unique()[0] + return ret def unpack(self): @@ -220,5 +224,8 @@ def unpack(self): "kurt": box["ks"][0], "skew": box["ks"][1], } - + + for col in self.hatchet_cols: + result[callsite][box_type][metric][col] = str(self.result[callsite][box_type][metric][col]) + return result \ No newline at end of file From ada2421b7af6874990b544f132ca1d45454f32ee Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 23 Jul 2021 13:25:45 -0700 Subject: [PATCH 38/45] Fix flake and black errors --- hatchet/external/scripts/__init__.py | 6 ++- hatchet/external/scripts/boxplot.py | 55 +++++++++++++++++----------- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/hatchet/external/scripts/__init__.py b/hatchet/external/scripts/__init__.py index 2328c084..bde29b6a 100644 --- a/hatchet/external/scripts/__init__.py +++ b/hatchet/external/scripts/__init__.py @@ -2,4 +2,8 @@ # Hatchet Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: MIT -from .boxplot import BoxPlot \ No newline at end of file + +# make flake8 unused names in this file. +# flake8: noqa: F401 + +from .boxplot import BoxPlot diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index ca822e6a..f0ad2afa 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -1,17 +1,20 @@ -import numpy as np +import numpy as np import pandas as pd from scipy import stats import hatchet as ht + class BoxPlot: - def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iqr_scale=1.5): + def __init__( + self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iqr_scale=1.5 + ): """ Boxplot computation for callsites. The data can be computed for two use cases: 1. Examining runtime distributions of a single GraphFrame. 2. Comparing runtime distributions of a target GraphFrame against a - background GraphFrame. - + background GraphFrame. + Arguments: cat_column: (string) Categorical column to aggregate the boxplot computation. tgt_gf: (ht.GraphFrame) Target GraphFrame. @@ -39,7 +42,7 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iq bkg_gf.dataframe = bkg_gf.dataframe.reset_index() if cat_column not in bkg_gf.dataframe.columns: raise Exception(f"{cat_column} not found in bkg_gf.") - + self.metrics = metrics self.iqr_scale = iqr_scale self.callsites = callsites @@ -52,15 +55,23 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iq # self.hatchet_cols = ["nid", "node"] tgt_gf.dataframe.reset_index(inplace=True) - tgt_dict = BoxPlot.df_groupby(tgt_gf.dataframe, groupby="name", cols=self.metrics + self.hatchet_cols + [self.cat_column]) - + tgt_dict = BoxPlot.df_groupby( + tgt_gf.dataframe, + groupby="name", + cols=self.metrics + self.hatchet_cols + [self.cat_column], + ) + if bkg_gf is not None: bkg_gf.dataframe.reset_index(inplace=True) - bkg_dict = BoxPlot.df_groupby(bkg_gf.dataframe, groupby="name", cols=self.metrics + self.hatchet_cols + [self.cat_column]) - + bkg_dict = BoxPlot.df_groupby( + bkg_gf.dataframe, + groupby="name", + cols=self.metrics + self.hatchet_cols + [self.cat_column], + ) + self.result = {} - self.box_types = ["tgt"] + self.box_types = ["tgt"] if bkg_gf is not None: self.box_types = ["tgt", "bkg"] @@ -72,9 +83,9 @@ def __init__(self, cat_column, tgt_gf, bkg_gf=None, callsites=[], metrics=[], iq if bkg_gf is not None: bkg_df = bkg_dict[callsite] ret["bkg"] = self.compute(bkg_df) - + self.result[callsite] = ret - + @staticmethod def df_groupby(df, groupby, cols): """ @@ -91,8 +102,8 @@ def df_groupby(df, groupby, cols): _df = df.set_index([groupby]) _levels = _df.index.unique().tolist() - return { _ : _df.xs(_)[cols] for _ in _levels} - + return {_: _df.xs(_)[cols] for _ in _levels} + @staticmethod def outliers(data, scale=1.5, side="both"): """ @@ -136,7 +147,7 @@ def compute(self, df): Arguments: df: Dataframe to calculate the boxplot information. - + Return: ret (dict): { "metric1": { @@ -180,7 +191,7 @@ def compute(self, df): ret[tk][col] = df[col].unique()[0] return ret - + def unpack(self): """ Unpack the boxplot data into JSON format. @@ -214,7 +225,7 @@ def unpack(self): box = self.result[callsite][box_type][metric] result[callsite][box_type][metric] = { "q": box["q"].tolist(), - "ocat": box["ocat"].tolist(), + "ocat": box["ocat"].tolist(), "ometric": box["ometric"].tolist(), "min": box["rng"][0], "max": box["rng"][1], @@ -224,8 +235,10 @@ def unpack(self): "kurt": box["ks"][0], "skew": box["ks"][1], } - + for col in self.hatchet_cols: - result[callsite][box_type][metric][col] = str(self.result[callsite][box_type][metric][col]) - - return result \ No newline at end of file + result[callsite][box_type][metric][col] = str( + self.result[callsite][box_type][metric][col] + ) + + return result From 1dafca872d1cb9ff3a42e23ec4e13d41aefc811a Mon Sep 17 00:00:00 2001 From: jarusified Date: Fri, 23 Jul 2021 13:43:16 -0700 Subject: [PATCH 39/45] Format the d3_utils.js file --- hatchet/external/roundtrip/lib/d3_utils.js | 66 +++++++++++----------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/hatchet/external/roundtrip/lib/d3_utils.js b/hatchet/external/roundtrip/lib/d3_utils.js index 366bfc84..78af0d19 100644 --- a/hatchet/external/roundtrip/lib/d3_utils.js +++ b/hatchet/external/roundtrip/lib/d3_utils.js @@ -7,7 +7,7 @@ define(function (require) { calcCellWidth: (width, colNames) => width / colNames.length, calcCellHeight: (height, rowNames) => height / rowNames.length, calcCellSize: (width, height, colNames, rowNames, widthMax, heightMax) => [Math.min(calcCellWidth(width, colNames), widthMax), Math.min(calcCellHeight(height, rowNames), heightMax)], - + // SVG init. prepareSvgArea: (windowWidth, windowHeight, margin, id) => { return { @@ -92,7 +92,7 @@ define(function (require) { options.text(d => d) .attr('value', d => d); }, - + // Formatting numbers formatRuntime: (val) => { @@ -128,28 +128,28 @@ define(function (require) { }, drawLine: (element, x1, y1, x2, y2, strokeColor, strokeWidth) => { return element - .append("line") - .attr("class", "line") - .attr("x1", x1) + .append("line") + .attr("class", "line") + .attr("x1", x1) .attr("y1", y1) .attr("x2", x2) .attr("y2", y2) .attr("stroke", strokeColor) - .style("stroke-width", strokeWidth); + .style("stroke-width", strokeWidth); }, drawCircle: (element, data, radius, fillColor, click = () => { }, mouseover = () => { }, mouseout = () => { }) => { return element - .selectAll(".circle") - .data(data) - .join("circle") - .attr("r", radius) + .selectAll(".circle") + .data(data) + .join("circle") + .attr("r", radius) .attr("cx", (d) => d.x) .attr("cy", (d) => d.y) - .attr("class", "circle") - .style("fill", fillColor) - .on("click", (d) => click(d)) - .on("mouseover", (d) => mouseover(d)) - .on("mouseout", (d) => mouseout(d)); + .attr("class", "circle") + .style("fill", fillColor) + .on("click", (d) => click(d)) + .on("mouseover", (d) => mouseover(d)) + .on("mouseout", (d) => mouseout(d)); }, drawXAxis: (element, xScale, numOfTicks, tickFormatFn, xOffset, yOffset, strokeColor) => { const axis = d3.axisBottom(xScale) @@ -160,26 +160,26 @@ define(function (require) { .attr("class", "xAxis") .attr("transform", `translate(${xOffset}, ${yOffset})`) .call(axis); - + line.selectAll("path") - .style("fill", "none") - .style("stroke", strokeColor) - .style("stroke-width", "1px"); - - line.selectAll("line") - .style("fill", "none") - .style("stroke", strokeColor) - .style("stroke-width", "1px"); - - line.selectAll("text") - .style("font-size", "12px") - .style("font-family", "sans-serif") - .style("font-weight", "lighter"); - + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("line") + .style("fill", "none") + .style("stroke", strokeColor) + .style("stroke-width", "1px"); + + line.selectAll("text") + .style("font-size", "12px") + .style("font-family", "sans-serif") + .style("font-weight", "lighter"); + return line; }, drawToolTip: (element, event, text, width, height) => { - const [ mousePosX, mousePosY] = d3.pointer(event, element.node()); + const [mousePosX, mousePosY] = d3.pointer(event, element.node()); const toolTipG = element .append("g") .attr("class", "tooltip") @@ -194,8 +194,8 @@ define(function (require) { toolTipG.append("text") .attr("class", "tooltip-content") - .style("font-family", "sans-serif") - .style("font-size", "12px") + .style("font-family", "sans-serif") + .style("font-size", "12px") .attr("fill", "#000") .text(text); }, From e7b5694eea1a497e311ff0ab9558e908855fcaed Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 2 Sep 2021 09:48:58 -0700 Subject: [PATCH 40/45] Remove dependence of nid column --- .../performance_variability_boxplots.ipynb | 225 +++++++++++++++--- hatchet/external/scripts/boxplot.py | 14 +- 2 files changed, 195 insertions(+), 44 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index b88b14fa..be31e0d4 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -73,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -84,25 +84,25 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "callsites = gf.dataframe.name.unique().tolist()\n", - "bp = BoxPlot(tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])\n", + "bp = BoxPlot(cat_column='rank', tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])\n", "boxplot = bp.unpack()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'outliers': {'values': []}, 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" + "{'tgt': {'time': {'q': [105528.0, 113072.25, 116494.0, 124430.75, 137098.0], 'ocat': [], 'ometric': [], 'min': 105528.0, 'max': 137098.0, 'mean': 119373.5, 'var': 104497970.25, 'imb': 0.14847935262013764, 'kurt': -0.9421848873183336, 'skew': 0.5436725364039101}}}\n" ] } ], @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "scrolled": false }, @@ -135,10 +135,12 @@ "data": { "text/html": [ "\n", - "
\n", - " " + "})(element);\n", + " \n", + " " ], "text/plain": [ "" @@ -618,7 +626,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -662,18 +670,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'variance_df' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvariance_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'variance_df' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "name,node,nid,min,max,mean,var,imb,kurt,skew;CalcFBHourglassForceForElems,undefined,undefined,1088315,1276744,1197360.375,3561043884.734375,0.066298857601664,-0.8618185329919692,-0.336770351062538;CalcKinematicsForElems,undefined,undefined,493338,907675,740734,20585329027.5,0.22537240088884808,-1.323030118573988,-0.3042530153918946;IntegrateStressForElems,undefined,undefined,448597,987804,725254.375,29868514054.234375,0.3620103980758475,-1.2658383358291696,-0.1038366357478744;CalcHourglassControlForElems,undefined,undefined,494580,599077,574309,982583388.75,0.04312660954294639,2.322254192176139,-1.930747431397297;CalcMonotonicQGradientsForElems,undefined,undefined,326522,448753,393558.125,1927822359.609375,0.140245802319543,-1.5265491924225043,-0.08914394549811265\n" ] } ], @@ -683,10 +687,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", + "\n", "columns = variance_df.split(';')[0].split(',')\n", "data = [x.split(',') for x in variance_df.split(';')[1:]]\n", "df = pd.DataFrame(data, columns=columns).set_index('name')" @@ -694,19 +700,174 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nodenidminmaxmeanvarimbkurtskew
name
CalcFBHourglassForceForElemsundefinedundefined108831512767441197360.3753561043884.7343750.066298857601664-0.8618185329919692-0.336770351062538
CalcKinematicsForElemsundefinedundefined49333890767574073420585329027.50.22537240088884808-1.323030118573988-0.3042530153918946
IntegrateStressForElemsundefinedundefined448597987804725254.37529868514054.2343750.3620103980758475-1.2658383358291696-0.1038366357478744
CalcHourglassControlForElemsundefinedundefined494580599077574309982583388.750.043126609542946392.322254192176139-1.930747431397297
CalcMonotonicQGradientsForElemsundefinedundefined326522448753393558.1251927822359.6093750.140245802319543-1.5265491924225043-0.08914394549811265
\n", + "
" + ], + "text/plain": [ + " node nid min max \\\n", + "name \n", + "CalcFBHourglassForceForElems undefined undefined 1088315 1276744 \n", + "CalcKinematicsForElems undefined undefined 493338 907675 \n", + "IntegrateStressForElems undefined undefined 448597 987804 \n", + "CalcHourglassControlForElems undefined undefined 494580 599077 \n", + "CalcMonotonicQGradientsForElems undefined undefined 326522 448753 \n", + "\n", + " mean var \\\n", + "name \n", + "CalcFBHourglassForceForElems 1197360.375 3561043884.734375 \n", + "CalcKinematicsForElems 740734 20585329027.5 \n", + "IntegrateStressForElems 725254.375 29868514054.234375 \n", + "CalcHourglassControlForElems 574309 982583388.75 \n", + "CalcMonotonicQGradientsForElems 393558.125 1927822359.609375 \n", + "\n", + " imb kurt \\\n", + "name \n", + "CalcFBHourglassForceForElems 0.066298857601664 -0.8618185329919692 \n", + "CalcKinematicsForElems 0.22537240088884808 -1.323030118573988 \n", + "IntegrateStressForElems 0.3620103980758475 -1.2658383358291696 \n", + "CalcHourglassControlForElems 0.04312660954294639 2.322254192176139 \n", + "CalcMonotonicQGradientsForElems 0.140245802319543 -1.5265491924225043 \n", + "\n", + " skew \n", + "name \n", + "CalcFBHourglassForceForElems -0.336770351062538 \n", + "CalcKinematicsForElems -0.3042530153918946 \n", + "IntegrateStressForElems -0.1038366357478744 \n", + "CalcHourglassControlForElems -1.930747431397297 \n", + "CalcMonotonicQGradientsForElems -0.08914394549811265 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3.7.6 64-bit", + "display_name": "python-3.9", "language": "python", - "name": "python37664bit3a5637fa2c7f4443bca7a2894d18d23d" + "name": "python-3.9" }, "language_info": { "codemirror_mode": { @@ -718,7 +879,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index f0ad2afa..316228d2 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -51,14 +51,11 @@ def __init__( if len(metrics) == 0: self.metrics = tgt_gf.inc_metrics + tgt_gf.exc_metrics - self.hatchet_cols = ["nid"] - # self.hatchet_cols = ["nid", "node"] - tgt_gf.dataframe.reset_index(inplace=True) tgt_dict = BoxPlot.df_groupby( tgt_gf.dataframe, groupby="name", - cols=self.metrics + self.hatchet_cols + [self.cat_column], + cols=self.metrics + [self.cat_column], ) if bkg_gf is not None: @@ -66,7 +63,7 @@ def __init__( bkg_dict = BoxPlot.df_groupby( bkg_gf.dataframe, groupby="name", - cols=self.metrics + self.hatchet_cols + [self.cat_column], + cols=self.metrics + [self.cat_column], ) self.result = {} @@ -187,9 +184,6 @@ def compute(self, df): "ks": (_kurt, _skew), } - for col in self.hatchet_cols: - ret[tk][col] = df[col].unique()[0] - return ret def unpack(self): @@ -236,9 +230,5 @@ def unpack(self): "skew": box["ks"][1], } - for col in self.hatchet_cols: - result[callsite][box_type][metric][col] = str( - self.result[callsite][box_type][metric][col] - ) return result From cc9caaab0c3c1a4d6fd33cfd65ef6edf49430809 Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 2 Sep 2021 10:16:24 -0700 Subject: [PATCH 41/45] Remove nid in the javascript --- .../performance_variability_boxplots.ipynb | 88 +++++++------------ hatchet/external/roundtrip/boxplot.js | 10 +-- 2 files changed, 34 insertions(+), 64 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index be31e0d4..695388e7 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -284,9 +284,7 @@ " \"var\": d3_utils.formatRuntime(d.var),\n", " \"imb\": d3_utils.formatRuntime(d.imb),\n", " \"kurt\": d3_utils.formatRuntime(d.kurt),\n", - " \"skew\": d3_utils.formatRuntime(d.skew),\n", - " \"node\": d.node,\n", - " \"nid\": d.nid\n", + " \"skew\": d3_utils.formatRuntime(d.skew)\n", " };\n", " }\n", "\n", @@ -300,14 +298,12 @@ " function dict_to_csv(dict, boxplotType) {\n", " const callsites = Object.keys(dict);\n", " const stat_columns = [\"min\", \"max\", \"mean\", \"var\", \"imb\", \"kurt\", \"skew\"]\n", - " let string = 'name,node,nid,' + stat_columns.join(\",\") + \";\";\n", + " let string = 'name,' + stat_columns.join(\",\") + \";\";\n", "\n", " for (let callsite of callsites){\n", " const d = dict[callsite][boxplotType];\n", - " const node = dict[callsite][boxplotType][\"node\"];\n", - " const nid = dict[callsite][boxplotType][\"nid\"];\n", "\n", - " let statsString = `${callsite},${node},${nid},`;\n", + " let statsString = `${callsite},`;\n", " for (let stat of stat_columns) {\n", " if (Object.keys(d).includes(stat)) {\n", " statsString += d[stat] + \",\";\n", @@ -626,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -670,14 +666,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "name,node,nid,min,max,mean,var,imb,kurt,skew;CalcFBHourglassForceForElems,undefined,undefined,1088315,1276744,1197360.375,3561043884.734375,0.066298857601664,-0.8618185329919692,-0.336770351062538;CalcKinematicsForElems,undefined,undefined,493338,907675,740734,20585329027.5,0.22537240088884808,-1.323030118573988,-0.3042530153918946;IntegrateStressForElems,undefined,undefined,448597,987804,725254.375,29868514054.234375,0.3620103980758475,-1.2658383358291696,-0.1038366357478744;CalcHourglassControlForElems,undefined,undefined,494580,599077,574309,982583388.75,0.04312660954294639,2.322254192176139,-1.930747431397297;CalcMonotonicQGradientsForElems,undefined,undefined,326522,448753,393558.125,1927822359.609375,0.140245802319543,-1.5265491924225043,-0.08914394549811265\n" + "name,min,max,mean,var,imb,kurt,skew;CalcFBHourglassForceForElems,1088315,1276744,1197360.375,3561043884.734375,0.066298857601664,-0.8618185329919692,-0.336770351062538;CalcKinematicsForElems,493338,907675,740734,20585329027.5,0.22537240088884808,-1.323030118573988,-0.3042530153918946;IntegrateStressForElems,448597,987804,725254.375,29868514054.234375,0.3620103980758475,-1.2658383358291696,-0.1038366357478744;CalcHourglassControlForElems,494580,599077,574309,982583388.75,0.04312660954294639,2.322254192176139,-1.930747431397297;CalcMonotonicQGradientsForElems,326522,448753,393558.125,1927822359.609375,0.140245802319543,-1.5265491924225043,-0.08914394549811265\n" ] } ], @@ -687,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -700,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -724,8 +720,6 @@ " \n", " \n", " \n", - " node\n", - " nid\n", " min\n", " max\n", " mean\n", @@ -743,15 +737,11 @@ " \n", " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", " CalcFBHourglassForceForElems\n", - " undefined\n", - " undefined\n", " 1088315\n", " 1276744\n", " 1197360.375\n", @@ -762,8 +752,6 @@ " \n", " \n", " CalcKinematicsForElems\n", - " undefined\n", - " undefined\n", " 493338\n", " 907675\n", " 740734\n", @@ -774,8 +762,6 @@ " \n", " \n", " IntegrateStressForElems\n", - " undefined\n", - " undefined\n", " 448597\n", " 987804\n", " 725254.375\n", @@ -786,8 +772,6 @@ " \n", " \n", " CalcHourglassControlForElems\n", - " undefined\n", - " undefined\n", " 494580\n", " 599077\n", " 574309\n", @@ -798,8 +782,6 @@ " \n", " \n", " CalcMonotonicQGradientsForElems\n", - " undefined\n", - " undefined\n", " 326522\n", " 448753\n", " 393558.125\n", @@ -813,40 +795,32 @@ "" ], "text/plain": [ - " node nid min max \\\n", - "name \n", - "CalcFBHourglassForceForElems undefined undefined 1088315 1276744 \n", - "CalcKinematicsForElems undefined undefined 493338 907675 \n", - "IntegrateStressForElems undefined undefined 448597 987804 \n", - "CalcHourglassControlForElems undefined undefined 494580 599077 \n", - "CalcMonotonicQGradientsForElems undefined undefined 326522 448753 \n", - "\n", - " mean var \\\n", - "name \n", - "CalcFBHourglassForceForElems 1197360.375 3561043884.734375 \n", - "CalcKinematicsForElems 740734 20585329027.5 \n", - "IntegrateStressForElems 725254.375 29868514054.234375 \n", - "CalcHourglassControlForElems 574309 982583388.75 \n", - "CalcMonotonicQGradientsForElems 393558.125 1927822359.609375 \n", - "\n", - " imb kurt \\\n", + " min max mean \\\n", + "name \n", + "CalcFBHourglassForceForElems 1088315 1276744 1197360.375 \n", + "CalcKinematicsForElems 493338 907675 740734 \n", + "IntegrateStressForElems 448597 987804 725254.375 \n", + "CalcHourglassControlForElems 494580 599077 574309 \n", + "CalcMonotonicQGradientsForElems 326522 448753 393558.125 \n", + "\n", + " var imb \\\n", + "name \n", + "CalcFBHourglassForceForElems 3561043884.734375 0.066298857601664 \n", + "CalcKinematicsForElems 20585329027.5 0.22537240088884808 \n", + "IntegrateStressForElems 29868514054.234375 0.3620103980758475 \n", + "CalcHourglassControlForElems 982583388.75 0.04312660954294639 \n", + "CalcMonotonicQGradientsForElems 1927822359.609375 0.140245802319543 \n", + "\n", + " kurt skew \n", "name \n", - "CalcFBHourglassForceForElems 0.066298857601664 -0.8618185329919692 \n", - "CalcKinematicsForElems 0.22537240088884808 -1.323030118573988 \n", - "IntegrateStressForElems 0.3620103980758475 -1.2658383358291696 \n", - "CalcHourglassControlForElems 0.04312660954294639 2.322254192176139 \n", - "CalcMonotonicQGradientsForElems 0.140245802319543 -1.5265491924225043 \n", - "\n", - " skew \n", - "name \n", - "CalcFBHourglassForceForElems -0.336770351062538 \n", - "CalcKinematicsForElems -0.3042530153918946 \n", - "IntegrateStressForElems -0.1038366357478744 \n", - "CalcHourglassControlForElems -1.930747431397297 \n", - "CalcMonotonicQGradientsForElems -0.08914394549811265 " + "CalcFBHourglassForceForElems -0.8618185329919692 -0.336770351062538 \n", + "CalcKinematicsForElems -1.323030118573988 -0.3042530153918946 \n", + "IntegrateStressForElems -1.2658383358291696 -0.1038366357478744 \n", + "CalcHourglassControlForElems 2.322254192176139 -1.930747431397297 \n", + "CalcMonotonicQGradientsForElems -1.5265491924225043 -0.08914394549811265 " ] }, - "execution_count": 15, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } diff --git a/hatchet/external/roundtrip/boxplot.js b/hatchet/external/roundtrip/boxplot.js index bf432e1b..800adf25 100644 --- a/hatchet/external/roundtrip/boxplot.js +++ b/hatchet/external/roundtrip/boxplot.js @@ -142,9 +142,7 @@ "var": d3_utils.formatRuntime(d.var), "imb": d3_utils.formatRuntime(d.imb), "kurt": d3_utils.formatRuntime(d.kurt), - "skew": d3_utils.formatRuntime(d.skew), - "node": d.node, - "nid": d.nid + "skew": d3_utils.formatRuntime(d.skew) }; } @@ -158,14 +156,12 @@ function dict_to_csv(dict, boxplotType) { const callsites = Object.keys(dict); const stat_columns = ["min", "max", "mean", "var", "imb", "kurt", "skew"] - let string = 'name,node,nid,' + stat_columns.join(",") + ";"; + let string = 'name,' + stat_columns.join(",") + ";"; for (let callsite of callsites){ const d = dict[callsite][boxplotType]; - const node = dict[callsite][boxplotType]["node"]; - const nid = dict[callsite][boxplotType]["nid"]; - let statsString = `${callsite},${node},${nid},`; + let statsString = `${callsite},`; for (let stat of stat_columns) { if (Object.keys(d).includes(stat)) { statsString += d[stat] + ","; From 04b6fcb9f40cd1d73f210b91760f83023a538601 Mon Sep 17 00:00:00 2001 From: jarusified Date: Wed, 8 Sep 2021 23:27:15 -0700 Subject: [PATCH 42/45] Add to_json and to_gf methods --- .../performance_variability_boxplots.ipynb | 60 +++++-- hatchet/external/roundtrip/roundtrip.py | 2 +- hatchet/external/scripts/boxplot.py | 159 +++++++++++++----- 3 files changed, 159 insertions(+), 62 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index 695388e7..c7835d20 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -86,17 +86,54 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'time': }\n" + ] + } + ], "source": [ "callsites = gf.dataframe.name.unique().tolist()\n", "bp = BoxPlot(cat_column='rank', tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])\n", - "boxplot = bp.unpack()" + "print(bp.tgt_gf)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'time': }" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bp.tgt_gf" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "boxplot = bp.to_json()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -112,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": { "scrolled": false }, @@ -622,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -666,7 +703,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -683,7 +720,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -696,7 +733,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -820,7 +857,7 @@ "CalcMonotonicQGradientsForElems -1.5265491924225043 -0.08914394549811265 " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -828,13 +865,6 @@ "source": [ "df" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/hatchet/external/roundtrip/roundtrip.py b/hatchet/external/roundtrip/roundtrip.py index 7c3215ab..5f07d867 100644 --- a/hatchet/external/roundtrip/roundtrip.py +++ b/hatchet/external/roundtrip/roundtrip.py @@ -129,7 +129,7 @@ def _validate_boxplot(self, data): "ocat": {"type": "array"}, "ometric": {"type": "array"}, "nid": {"type": "string"}, - "node": {"type": "string"}, + "node": {"type": "object"}, }, } diff --git a/hatchet/external/scripts/boxplot.py b/hatchet/external/scripts/boxplot.py index 316228d2..eeb46acb 100644 --- a/hatchet/external/scripts/boxplot.py +++ b/hatchet/external/scripts/boxplot.py @@ -30,6 +30,8 @@ def __init__( assert isinstance(metrics, list) assert isinstance(iqr_scale, float) + self.df_index = ["node"] + if bkg_gf is not None: assert isinstance(bkg_gf, ht.GraphFrame) assert cat_column in bkg_gf.dataframe.column @@ -51,27 +53,22 @@ def __init__( if len(metrics) == 0: self.metrics = tgt_gf.inc_metrics + tgt_gf.exc_metrics - tgt_gf.dataframe.reset_index(inplace=True) tgt_dict = BoxPlot.df_groupby( tgt_gf.dataframe, groupby="name", - cols=self.metrics + [self.cat_column], + cols=self.metrics + [self.cat_column] + self.df_index, ) + self.box_types = ["tgt"] if bkg_gf is not None: - bkg_gf.dataframe.reset_index(inplace=True) bkg_dict = BoxPlot.df_groupby( bkg_gf.dataframe, groupby="name", - cols=self.metrics + [self.cat_column], + cols=self.metrics + [self.cat_column] + self.df_index, ) - - self.result = {} - - self.box_types = ["tgt"] - if bkg_gf is not None: self.box_types = ["tgt", "bkg"] + self.result = {} for callsite in self.callsites: ret = {} tgt_df = tgt_dict[callsite] @@ -83,6 +80,10 @@ def __init__( self.result[callsite] = ret + self.tgt_gf = self.to_gf(tgt_gf, "tgt") + if bkg_gf is not None: + self.bkg_gf = self.to_gf(bkg_gf, "bkg") + @staticmethod def df_groupby(df, groupby, cols): """ @@ -159,7 +160,6 @@ def compute(self, df): } } """ - ret = {_: {} for _ in self.metrics} for tk, tv in zip(self.metrics, self.metrics): q = np.percentile(df[tv], [0.0, 25.0, 50.0, 75.0, 100.0]) @@ -182,11 +182,12 @@ def compute(self, df): "uv": (_mean, _var), "imb": _imb, "ks": (_kurt, _skew), + "node": df["node"].unique().tolist()[0], } return ret - def unpack(self): + def to_json(self): """ Unpack the boxplot data into JSON format. @@ -195,40 +196,106 @@ def unpack(self): Return: result (dict): { "callsite1": { - "metric1": { - "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), - "ocat": (array) outlier from cat_column, - "ometric": (array) outlier from metri, - "min": (number) minimum, - "max": (number) maximum, - "mean": (number) mean, - "var": (number) variance, - "imb": (number) imbalance, - "kurt": (number) kurtosis, - "skew": (number) skewness, - }, ... - }, ... + "tgt": self._unpack_callsite, + "bkg": self._unpack_callsite + }, } """ - result = {} - for callsite in self.callsites: - result[callsite] = {} - for box_type in self.box_types: - result[callsite][box_type] = {} - for metric in self.metrics: - box = self.result[callsite][box_type][metric] - result[callsite][box_type][metric] = { - "q": box["q"].tolist(), - "ocat": box["ocat"].tolist(), - "ometric": box["ometric"].tolist(), - "min": box["rng"][0], - "max": box["rng"][1], - "mean": box["uv"][0], - "var": box["uv"][1], - "imb": box["imb"], - "kurt": box["ks"][0], - "skew": box["ks"][1], - } - - - return result + return { + callsite: { + _type: self._unpack_callsite(callsite, _type) + for _type in self.box_types + } + for callsite in self.callsites + } + + def _unpack_callsite(self, callsite, box_type, with_htnode=False): + """ + Helper function to unpack the data by callsite. + + Arguments: + callsite: Callsite's name + box_type: (string) Boxplot type (i.e., "tgt" or "bkg") + with_htnode: (bool) An option to add hatchet.Node to the dict. + + Return: + ret (dict): { + "metric": { + "q": (array) quartiles (i.e., [q0, q1, q2, q3, q4]), + "ocat": (array) outlier from cat_column, + "ometric": (array) outlier from metri, + "min": (number) minimum, + "max": (number) maximum, + "mean": (number) mean, + "var": (number) variance, + "imb": (number) imbalance, + "kurt": (number) kurtosis, + "skew": (number) skewness, + } + } + """ + ret = {} + for metric in self.metrics: + box = self.result[callsite][box_type][metric] + ret[metric] = { + "q": box["q"].tolist(), + "ocat": box["ocat"].tolist(), + "ometric": box["ometric"].tolist(), + "min": box["rng"][0], + "max": box["rng"][1], + "mean": box["uv"][0], + "var": box["uv"][1], + "imb": box["imb"], + "kurt": box["ks"][0], + "skew": box["ks"][1], + } + if with_htnode: + ret[metric]["node"] = box["node"] + + return ret + + def _to_gf_by_metric(self, gf, box_type, metric): + """ + Wrapper function to unpack the boxplot data into Hatchet.GraphFrame by + respective metric. + + Argument: + gf: (hatchet.GraphFrame) GraphFrame + box_type: (string) Boxplot type (i.e., "tgt" or "bkg") + metric: (string) Metric + + Return: + hatchet.GraphFrame with boxplot information as columns. + + """ + _dict = { + callsite: self._unpack_callsite(callsite, box_type, with_htnode=True)[ + metric + ] + for callsite in self.callsites + } + tmp_df = pd.DataFrame.from_dict(data=_dict).T + tmp_df.set_index(self.df_index, inplace=True) + + return ht.GraphFrame(gf.graph, tmp_df, gf.exc_metrics, gf.inc_metrics) + + def to_gf(self, gf, box_type): + """ + Unpack the boxplot data into GraphFrame object. + + Note: In this case, only the hatchet.dataframe will be updated, with + hatchet.Graph being the same as the input gf. + + Arguments: + gf: (hatchet.GraphFrame) GraphFrame + box_type: (string) Boxplot type (i.e., "tgt" or "bkg") + + Return: + (dict) : { + "metric": hatchet.GraphFrame, ... + } + """ + return { + metric: self._to_gf_by_metric(gf, box_type, metric) + for metric in self.metrics + } From 2545f2401bb7d5fe27a5d9da5abe3aeedf1b342d Mon Sep 17 00:00:00 2001 From: jarusified Date: Thu, 9 Sep 2021 11:09:06 -0700 Subject: [PATCH 43/45] Incorporate suggestions from the standup meeting --- .../performance_variability_boxplots.ipynb | 213 +++++++++++------- hatchet/external/scripts/boxplot.py | 4 +- 2 files changed, 131 insertions(+), 86 deletions(-) diff --git a/docs/examples/tutorial/performance_variability_boxplots.ipynb b/docs/examples/tutorial/performance_variability_boxplots.ipynb index c7835d20..4eb250c4 100644 --- a/docs/examples/tutorial/performance_variability_boxplots.ipynb +++ b/docs/examples/tutorial/performance_variability_boxplots.ipynb @@ -6,61 +6,20 @@ "source": [ "# Performance Variability Boxplots\n", "\n", - "Boxplots provide an insight into the runtime distribution among its MPI ranks. We provide 3 modes to visualize the performance variability of a GraphFrame.\n", - "\n", - "Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond to the ranks which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness across the MPI ranks are also provided.\"\n", - "\n", - "### Load roundtrip" + "Performance variability boxplots provide an insight into the runtime distribution and its varibility across callsites. Boxplots are calculated to represent the range of the distribution and outliers (dots) correspond which are beyond the 1.5*IQR. Additionally, several statistical measures like mean, variance, kurtosis, skewness are also provided." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import os, sys\n", "from IPython.display import HTML, display\n", "\n", + "# Hatchet imports\n", "import hatchet as ht\n", - "\n", - "# This is the relative path from the notebook to Roundtrip files in hatchet/external/roundtrip/\n", - "roundtrip_path = '../../../hatchet/external/roundtrip/'\n", - "hatchet_path = \".\"\n", - "\n", - "# Add the path so that the notebook can find the Roundtrip extension\n", - "module_path = os.path.abspath(os.path.join(roundtrip_path)) \n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - " sys.path.append(hatchet_path)\n", - "\n", - " \n", - "# Uncomment this line to widen the cells to handle large trees \n", - "#display(HTML(\"\"))\n", - "\n", - "# Load the Roundtrip extension. This only needs to be loaded once.\n", - "%load_ext roundtrip" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ "from hatchet.external.scripts import BoxPlot" ] }, @@ -68,7 +27,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Boxplots for target GraphFrame" + "First, we will construct a **hatchet.GraphFrame** using a sample dataset in our repository, **caliper-lulesh-json**. " ] }, { @@ -82,57 +41,116 @@ "gf = ht.GraphFrame.from_caliper_json(data_path)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, using the **hatchet.GraphFrame**, we can calculate the data required for performance variability boxplot using an exposed hatchet API, **Boxplot**.\n", + "\n", + "The interface excepts the following attributes:\n", + "1. `tgt_gf` - Target hatchet.GraphFrame \n", + "2. `bkg_gf` - Background hatchet.GraphFrame (optional)\n", + "3. `callsites` - List of callsite names for which we want to compute/visualize the boxplots.\n", + "4. `metrics` - Runtime metrics for which we need to calculate the boxplots.\n", + "5. `iqr_scale` - Interquartile range scale (by default = 1.5)" + ] + }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'time': }\n" - ] - } - ], + "outputs": [], "source": [ "callsites = gf.dataframe.name.unique().tolist()\n", - "bp = BoxPlot(cat_column='rank', tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])\n", - "print(bp.tgt_gf)" + "bp = BoxPlot(cat_column='rank', tgt_gf=gf, bkg_gf=None, callsites=callsites, metrics=[\"time\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Boxplot** API calculates the results and stores as a GraphFrames in a dictionary (i.e., `tgt` and `bkg`). " ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'time': }" + "{'time': }" ] }, - "execution_count": 5, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "bp.tgt_gf" + "bp.tgt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the **roundtrip** interface, we can then visualize the compute boxplot information. Below, we load the roundtrip interface that allows users to visualize plots on jupyter notebook cells directly. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The roundtrip extension is already loaded. To reload it, use:\n", + " %reload_ext roundtrip\n" + ] + } + ], + "source": [ + "# This is the relative path from the notebook to Roundtrip files in hatchet/external/roundtrip/\n", + "roundtrip_path = '../../../hatchet/external/roundtrip/'\n", + "hatchet_path = \".\"\n", + "\n", + "# Add the path so that the notebook can find the Roundtrip extension\n", + "module_path = os.path.abspath(os.path.join(roundtrip_path)) \n", + "if module_path not in sys.path:\n", + " sys.path.append(module_path)\n", + " sys.path.append(hatchet_path)\n", + "\n", + " \n", + "# Uncomment this line to widen the cells to handle large trees \n", + "#display(HTML(\"\"))\n", + "\n", + "# Load the Roundtrip extension. This only needs to be loaded once.\n", + "%load_ext roundtrip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since **roundtrip** excepts the data in JSON format, **Boxplot** API exposes a method, `to_json()` which will dump the boxplot's graphframes (i.e., `tgt` and `bkg`) in JSON." ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "boxplot = bp.to_json()" + "bp_json = bp.to_json()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -144,12 +162,25 @@ } ], "source": [ - "print(boxplot['main'])" + "print(bp_json['main'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can trigger the visualization using **roundtrip** magic command, `%loadVisualization`. `%loadVisualization` expects the `roundtrip_path` (path in which roundtrip resides), `\"boxplot\"` (identifier to the visualization type) and variable containing the data for the boxplots (here it is bp_json).\n", + "\n", + "Interactions on the boxplot visualization:\n", + "1. Users can select the metric of interest to visualize the corresponding runtime information.\n", + "2. Users can sort the callsites by their statistical attributes (i.e., mean, min, max, variance, imbalance, kurtosis and skewness).\n", + "3. Users can select the sorting order (i.e., ascending or descending).\n", + "4. Users can select the number of callsites that would be visualized." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": { "scrolled": false }, @@ -172,11 +203,11 @@ "data": { "text/html": [ "\n", - "
\n", + "
\n", "
\n", "