10000 Fix #475 - add flowchart by mattroumaya · Pull Request #545 · Gilead-BioStats/gsm · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Fix #475 - add flowchart #545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Jul 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Encoding: UTF-8
Imports:
broom,
cli,
DiagrammeR,
dplyr,
fontawesome,
glue,
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@ export(Summarize)
export(Transform_EventCount)
export(Visualize_Count)
export(Visualize_Scatter)
export(Visualize_Workflow)
export(generate_md_table)
export(is_mapping_valid)
export(parse_data_mapping)
export(parse_data_spec)
export(rank_chg)
import(dplyr)
import(ggplot2)
import(purrr)
importFrom(DiagrammeR,create_graph)
importFrom(DiagrammeR,create_node_df)
importFrom(DiagrammeR,render_graph)
importFrom(broom,augment)
importFrom(broom,glance)
importFrom(cli,cli_alert_danger)
Expand Down Expand Up @@ -95,4 +100,5 @@ importFrom(tidyr,replace_na)
importFrom(tidyr,spread)
importFrom(tidyr,unnest)
importFrom(utils,hasName)
importFrom(utils,head)
importFrom(yaml,read_yaml)
18 changes: 9 additions & 9 deletions R/Study_Assess.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,16 @@ Study_Assess <- function(
if(hasName(lAssessment,"group")){
StratifiedAssessment <- MakeStratifiedAssessment(
lData = lData,
lAssessment = lAssessment,
lAssessment = lAssessment,
lMapping = lMapping,
bQuiet=bQuiet
)
)

# replace original assessment with stratified assessment list
lAssessments[[lAssessment$name]]<-NULL
lAssessment 10000 s[[lAssessment$name]]<-NULL
lAssessments <- c(lAssessments, StratifiedAssessment)
}
}
}

# Filter data$dfSUBJ based on lSubjFilters --------------------------------
if (!is.null(lSubjFilters)) {
Expand All @@ -103,10 +103,10 @@ Study_Assess <- function(
### --- Attempt to run each assessment --- ###
lAssessments <- lAssessments %>% map(
~ gsm::RunAssessment(
.x,
lData = lData,
lMapping = lMapping,
lTags = lTags,
.x,
lData = lData,
lMapping = lMapping,
lTags = lTags,
bQuiet = bQuiet
)
)
Expand Down
181 changes: 181 additions & 0 deletions R/Visualize_Workflow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#' Flowchart visualization of data pipeline steps from filtering to summary data for an assessment workflow.
#'
#' @param lAssessments `list` A list of assessment-specific metadata.
#'
#' @return A flowchart of type `grViz`/`htmlwidget`.
#'
#' @examples
#' lAssessments <- MakeAssessmentList()
#' lData <- list(
#' dfSUBJ = clindata::rawplus_subj,
#' dfAE = clindata::rawplus_ae,
#' dfPD = clindata::rawplus_pd,
#' dfCONSENT = clindata::rawplus_consent,
#' dfIE = clindata::rawplus_ie
#' )
#' lTags <- list(
#' Study = "myStudy"
#' )
#' lMapping <- clindata::mapping_rawplus
#'
#' ae_assessment <- RunAssessment(lAssessments$ae, lData = lData, lMapping = lMapping, lTags = lTags)
#'
#' Visualize_Workflow(list(ae = ae_assessment))
#'
#' @importFrom DiagrammeR create_node_df create_graph render_graph
#' @importFrom utils head
#' @import purrr
#'
#' @export

Visualize_Workflow <- function(lAssessments) {

if(!is.null(lAssessments[[1]][["workflow"]])) {

dfFlowchart <- map(lAssessments, function(studyObject) {

name <- studyObject[["name"]]
checks <- studyObject[["checks"]]
workflow <- studyObject[["workflow"]]

# rename workflow when checks are missing
diff <- length(workflow) - length(checks)
vec <- c(names(checks), rep("", diff))

workflow <- workflow %>%
imap(~ append(., list(n_step = .y))) %>%
set_names(nm = vec)

# make checks and workflow the same length so map2_dfr below doesn't fail.
# empty lists will result in NA and will be accounted for to show domains that were not checked.
if(diff > 0) {
checks <- append(checks, vector(mode = "list", length = diff))
}

preAssessment <- map2_dfr(checks, workflow, function(checks, workflow) {

domains <- workflow$inputs
map_df(domains, function(x) {
tibble(
assessment = name,
name = workflow[["name"]],
inputs = x,
n_row = checks[[x]][["dim"]][1],
n_col = checks[[x]][["dim"]][2],
checks = checks[[x]][["status"]],
n_step = workflow[["n_step"]]
)
})
})

if(nrow(preAssessment) > 1){
preAssessment <- preAssessment %>%
slice(1:(n()-1))
}

preAssessment <- preAssessment %>%
mutate(
from = row_number()
) %>%
group_by(.data$n_step) %>%
mutate(
step = n(),
to = .data$n_step + .data$step
) %>%
select(-.data$step) %>%
ungroup()


pipeline <- studyObject$lResults[grep("df", names(studyObject$lResults))] %>%
purrr::imap_dfr(
~ tibble(
assessment = name,
name = .y,
inputs = .y,
n_row = nrow(.x),
n_col = ncol(.x),
checks = TRUE
)
) %>%
mutate(n_step = max(preAssessment$n_step) + row_number())

bind_rows(preAssessment, pipeline) %>%
mutate(
from = ifelse(is.na(.data$from), row_number(), from),
to = ifelse(is.na(.data$to), row_number() + 1, to)
)
})

# create_node_df for flowchart
# add custom labels/tooltips
flowchart <- map(dfFlowchart, function(assessment) {
df <- DiagrammeR::create_node_df(
n = nrow(assessment),
type = "a",
label = assessment$inputs,
value = assessment$name,
style = "filled",
color = "Black",
fontcolor = "Black",
fillcolor = "Honeydew",
shape = "rectangle",
n_row = assessment$n_row,
n_col = assessment$n_col,
checks = assessment$checks,
fixedsize = "false"
)

df <- replace(df, is.na(df), "")

node_df <- df %>%
mutate(
label = ifelse(.data$n_row != "", paste0(.data$label, "\n", .data$n_col, " x ", .data$n_row), .data$label),
tooltip = paste0("Data dimensions: \n", .data$label),
label = ifelse(
substr(.data$value, 1, 2) != "df",
paste0("[", .data$value, "]\n\n", .data$label),
.data$label
),
fillcolor = case_when(.data$checks == FALSE ~ "Tomato",
.data$checks == "" ~ "LightSlateGray",
TRUE ~ fillcolor),
tooltip = ifelse(checks == "", paste0(tooltip, "\nCheck Not Run"), tooltip)
)

if (FALSE %in% node_df$checks) {
node_df <- node_df %>%
add_row(
id = max(node_df$id) + 1,
type = "a",
label = "Error!",
value = "Error!",
style = "filled",
color = "Black",
fontcolor = "Black",
fillcolor = "Tomato",
shape = "rectangle",
fixedsize = "false",
tooltip = "Error in preceeding step(s). Check all workflow steps highlighted in red."
)
}

edge_df <- assessment %>%
filter(.data$to <= nrow(node_df)) %>%
select(.data$from, .data$to) %>%
as.data.frame()

DiagrammeR::create_graph(
nodes_df = node_df,
edges_df = edge_df,
attr_theme = "lr"
) %>%
DiagrammeR::render_graph()
})


return(flowchart)
} else {
return(list(lAssessments$name))
}

}
1 change: 1 addition & 0 deletions R/util-FilterDomain.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ FilterDomain <- function(
}
}


if (bReturnChecks) {
return(list(df = df, lChecks = checks))
} else {
Expand Down
9 changes: 8 additions & 1 deletion R/util-RunAssessment.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#' @importFrom cli cli_alert_success cli_alert_warning cli_h1 cli_h2 cli_text
#' @importFrom stringr str_detect
#' @importFrom yaml read_yaml
#' @importFrom purrr map_df
#'
#' @export

Expand All @@ -41,6 +42,7 @@ RunAssessment <- function(lAssessment, lData, lMapping, lTags = NULL, bQuiet = F
lAssessment$bStatus <- TRUE
if(exists("workflow", where = lAssessment)) {
# Run through each step in lAssessment$workflow

stepCount <- 1
for (step in lAssessment$workflow) {
if (!bQuiet) cli::cli_h2(paste0("Workflow Step ", stepCount, " of ", length(lAssessment$workflow), ": `", step$name, "`"))
Expand Down Expand Up @@ -72,13 +74,18 @@ RunAssessment <- function(lAssessment, lData, lMapping, lTags = NULL, bQuiet = F
} else {
if(!bQuiet) cli::cli_text("Skipping {.fn {step$name}} ...")
}

stepCount <- stepCount + 1
}
} else {
if(!bQuiet) cli::cli_alert_warning("Workflow not found for {lAssessment$name} assessment - Skipping remaining steps")
lAssessment$bStatus <- FALSE
}


lAssessment$lChecks$flowchart <- Visualize_Workflow(list(temp_name = lAssessment)) %>%
set_names(nm = lAssessment$name)
if(!bQuiet) cli::cli_alert_success("{.fn Visualize_Workflow} created a flowchart.")


return(lAssessment)
}
5 changes: 4 additions & 1 deletion R/util-is_mapping_valid.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,10 @@ is_mapping_valid <- function(df, mapping, spec, bQuiet = TRUE) {
if (!is.data.frame(df)) {
tests_if$is_data_frame$status <- FALSE
tests_if$is_data_frame$warning <- "df is not a data.frame()"
dim <- NA
} else {
tests_if$is_data_frame$status <- TRUE
dim <- dim(df)
}

# basic `mapping` checks
Expand Down Expand Up @@ -193,7 +195,8 @@ is_mapping_valid <- function(df, mapping, spec, bQuiet = TRUE) {
# if not, FALSE
is_valid <- list(
status = all(map_lgl(tests_if, ~ .$status)),
tests_if = tests_if
tests_if = tests_if,
dim = dim
)

return(is_valid)
Expand Down
36 changes: 36 additions & 0 deletions man/Visualize_Workflow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
0