From 75079bebea762e414b9f292285d7edacdc234f9c Mon Sep 17 00:00:00 2001 From: Oswaldo Lorenzo-Betancor Date: Mon, 25 Sep 2017 15:59:16 -0700 Subject: [PATCH] Add files via upload The modified code allows downloading the Table and the Plot that are generated by the subRVIS tool. The table data will be downloaded in .csv format and it will be called subRVIS table.csv The plot will be downloaded in .svg format and it will be called subRVIS plot.svg. --- server.R | 292 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ ui.R | 102 +++++++++++++++++++ 2 files changed, 394 insertions(+) create mode 100644 server.R create mode 100644 ui.R diff --git a/server.R b/server.R new file mode 100644 index 0000000..e76df13 --- /dev/null +++ b/server.R @@ -0,0 +1,292 @@ +library(shiny) +source("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/src/geneplots.R") + +## functions +Scores2Percentiles <- function(scores) { + stopifnot(is.numeric(scores)) + return((rank(scores) / length(scores)) * 100) +} + +# for domains +dmn.data <- ReadSubrgnData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_table.txt") +dmn.data$perc <- Scores2Percentiles(dmn.data$subRVIS) +dmn.bed <- ReadSubrgnBed("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains.bed") +dmn.map <- ReadDomainMap("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/cdd_table.txt") +dmn.pvals <- ReadGenePvals("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_gene_pvals.txt") +dmn.sds <- ReadGeneSds("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_gene_sds.txt") + +# for exons +exn.data <- ReadSubrgnData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_table.txt") +exn.data$perc <- Scores2Percentiles(exn.data$subRVIS) +exn.bed <- ReadSubrgnBed("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons.bed") +exn.map <- NULL +exn.pvals <- ReadGenePvals("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_gene_pvals.txt") +exn.sds <- ReadGeneSds("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_gene_sds.txt") + +# for all +strand.map <- ReadStrandData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/strand_data.txt") + +# Define server logic +shinyServer(function(input, output) { + + # Expression that generates the plot. The expression is + # wrapped in a call to renderPlot to indicate that: + # + # 1) It is "reactive" and therefore should re-execute automatically + # when inputs change + # 2) Its output type is a plot + + GetGene <- eventReactive(input$plot, { + input$gene + }) + + GetRgn <- eventReactive(input$plot, { + input$rgn + }) + + GetScoreType <- eventReactive(input$plot, { + input$score.type + }) + + + GetVariants <- eventReactive(input$plot, { + + if (is.null(input$variants) || input$variants == "") { + return(NULL) + } + + + variants <- tryCatch({ + read.table(textConnection(input$variants), header=F) + }, error=function(e) { + stop("Unrecognized format for variants.") + }) + + if (ncol(variants) != 2) { + stop("Variants should have two columns: chromosome and position.") + } + + + colnames(variants) <- c("chrom", "pos") + if (!is.numeric(variants$pos)){ + stop("Variant position should be numeric.") + } + + return(variants) + }) + + GetPlotData <- eventReactive(input$plot, { + rgn <- GetRgn() + score.type <- GetScoreType() + + if (rgn == "Domains") { + subrgn.data <- dmn.data + subrgn.bed <- dmn.bed + subrgn.map <- dmn.map + gene.pvals <- dmn.pvals + gene.sds <- dmn.sds + } else { + subrgn.data <- exn.data + subrgn.bed <- exn.bed + subrgn.map <- exn.map + gene.pvals <- exn.pvals + gene.sds <- exn.sds + } + + gene <- GetGene() + if (!(toupper(gene) %in% toupper(subrgn.data$gene))) { + err.msg <- paste("Gene", gene, "cannot be found.") + stop(err.msg) + } + + strand <- strand.map[toupper(rownames(strand.map)) == toupper(gene), ] + if((length(strand) != 1) || (!(strand %in% c("+", "-")))) { + err.msg <- paste("Gene", gene, "has no strand data.") + stop(err.msg) + } + + + variants <- GetVariants() + + gene.subrgns <- GetGeneSubrgn(subrgn.data, subrgn.bed, subrgn.map, gene, rgn, score.type) + gene.name <- unique(gene.subrgns$gene) + stopifnot(length(gene.name)==1) + + p.val <- gene.pvals[toupper(gene.name), ] + gene.sd <- gene.sds[toupper(gene.name), "percentile"] + gene.bed <- subrgn.bed[toupper(subrgn.bed$gene) == toupper(gene), ] + + return(list(gene.subrgns = gene.subrgns, + subrgn.bed = subrgn.bed, + gene.bed = gene.bed, + gene.name = gene.name, + p.val = p.val, + gene.sd = gene.sd, + variants = variants, + rgn = rgn, + score.type = score.type, + strand = strand)) + }) + + + output$genePlot <- renderPlot({ + + plot.data <- GetPlotData() + + gene.plot <- PlotGeneSkel(plot.data$gene.subrgns, plot.data$gene.name, plot.data$rgn, plot.data$score.type) + + gene.plot <- PlotConnectors(gene.plot, plot.data$gene.subrgns, plot.data$strand) + if (plot.data$strand == "-") { + gene.plot <- gene.plot + scale_x_reverse(label=function(x) -(x-max(plot.data$gene.subrgns$end))) + } + + if (!is.null(plot.data$variants)) { + gene.plot <- AddVariants(gene.plot, plot.data$gene.subrgns, plot.data$subrgn.bed, plot.data$variants, plot.data$strand) + } + + ylab.txt <- ifelse(plot.data$score.type=="Percentiles", "subRVIS Percentile", "subRVIS Raw Score") + plot(gene.plot + labs(x=paste0("Gene Position\n", + "subRVIS P-value for known pathogenic mutations: ", + round(plot.data$p.val, digits=3), + "\nsubRVIS SDP: ", + round(plot.data$gene.sd, digits=1), + "%"), + y=ylab.txt)) + }) + + output$var.table = renderDataTable({ + plot.data <- GetPlotData() + + plot.table <- plot.data$gene.subrgns + plot.table$var.count <- rep(0, nrow(plot.table)) + + gene.chrom <- unique(plot.data$gene.bed$chrom) + stopifnot(length(gene.chrom) == 1) + + variants <- plot.data$variants + variants <- variants[variants$chrom == gene.chrom, ] + + positions <- variants$pos - 1 # convert to 0-based BED coords + + for (pos in positions) { + subrgn.name <- plot.data$gene.bed[pos >= plot.data$gene.bed$start & pos < plot.data$gene.bed$end, "subrgn"] + stopifnot(length(subrgn.name) <= 1) + + if(length(subrgn.name) == 1) { + plot.table[subrgn.name, "var.count"] <- plot.table[subrgn.name, "var.count"] + 1 + } + + } + + plot.table$name <- rownames(plot.table) + + cols.to.plot <- c("name", "start", "end", "size", "coverage_percent", "subRVIS", plot.data$rgn, "var.count") + + plot.table <- plot.table[, cols.to.plot] + plot.table$start <- plot.table$start + 1 + + if((plot.data$strand == "-") && (nrow(plot.table) != 1)) { + # Need to reverse coordinates + plot.table <- plot.table[order(-plot.table$start), ] + plot.table$start[1] <- 1 + plot.table$end[1] <- plot.table$size[1] + for (i in 2:nrow(plot.table)) { + plot.table[i, "start"] <- plot.table$end[i-1] + 1 + plot.table$end[i] <- plot.table$size[i] + plot.table$start[i] - 1 + } + } + + colnames(plot.table) <- c("name", "start", "end", "size", + "percent_covered", "score", + "sub-region", "variant_count") + + return(plot.table) + }) + + output$downloadData <- downloadHandler( + filename = function() { + paste('subRVIS table','.csv', sep='') + }, + content = function(file){ + plot.data <- GetPlotData() + + plot.table <- plot.data$gene.subrgns + plot.table$var.count <- rep(0, nrow(plot.table)) + + gene.chrom <- unique(plot.data$gene.bed$chrom) + stopifnot(length(gene.chrom) == 1) + + variants <- plot.data$variants + variants <- variants[variants$chrom == gene.chrom, ] + + positions <- variants$pos - 1 # convert to 0-based BED coords + + for (pos in positions) { + subrgn.name <- plot.data$gene.bed[pos >= plot.data$gene.bed$start & pos < plot.data$gene.bed$end, "subrgn"] + stopifnot(length(subrgn.name) <= 1) + + if(length(subrgn.name) == 1) { + plot.table[subrgn.name, "var.count"] <- plot.table[subrgn.name, "var.count"] + 1 + } + + } + + plot.table$name <- rownames(plot.table) + + cols.to.plot <- c("name", "start", "end", "size", "coverage_percent", "subRVIS", plot.data$rgn, "var.count") + + plot.table <- plot.table[, cols.to.plot] + plot.table$start <- plot.table$start + 1 + + if((plot.data$strand == "-") && (nrow(plot.table) != 1)) { + # Need to reverse coordinates + plot.table <- plot.table[order(-plot.table$start), ] + plot.table$start[1] <- 1 + plot.table$end[1] <- plot.table$size[1] + for (i in 2:nrow(plot.table)) { + plot.table[i, "start"] <- plot.table$end[i-1] + 1 + plot.table$end[i] <- plot.table$size[i] + plot.table$start[i] - 1 + } + } + + colnames(plot.table) <- c("name", "start", "end", "size", + "percent_covered", "score", + "sub-region", "variant_count") + + table_data <- plot.table + + write.csv(table_data, file) + }) + + + output$downloadPlot <- downloadHandler( + filename = function() { + paste("subRVIS plot", ".svg", sep='') + }, + content = function(file) { + plot.data <- GetPlotData() + + gene.plot <- PlotGeneSkel(plot.data$gene.subrgns, plot.data$gene.name, plot.data$rgn, plot.data$score.type) + + gene.plot <- PlotConnectors(gene.plot, plot.data$gene.subrgns, plot.data$strand) + if (plot.data$strand == "-") { + gene.plot <- gene.plot + scale_x_reverse(label=function(x) -(x-max(plot.data$gene.subrgns$end))) + } + + if (!is.null(plot.data$variants)) { + gene.plot <- AddVariants(gene.plot, plot.data$gene.subrgns, plot.data$subrgn.bed, plot.data$variants, plot.data$strand) + } + + ylab.txt <- ifelse(plot.data$score.type=="Percentiles", "subRVIS Percentile", "subRVIS Raw Score") + + plot(gene.plot + labs(x=paste0("Gene Position\n", + "subRVIS P-value for known pathogenic mutations: ", + round(plot.data$p.val, digits=3), + "\nsubRVIS SDP: ", + round(plot.data$gene.sd, digits=1), + "%"), + y=ylab.txt)) + + ggsave(file, plot = last_plot (), dpi = 300, width = 12, height = 6, units = "in") + }) +}) \ No newline at end of file diff --git a/ui.R b/ui.R new file mode 100644 index 0000000..afd5e1e --- /dev/null +++ b/ui.R @@ -0,0 +1,102 @@ +library(shiny) + +# Define UI +shinyUI(fluidPage( + + # Google Analytics + tags$head(includeScript("google-analytics.js")), + + # Application title + titlePanel("Plot Variants Across subRVIS Regions"), + + # Sidebar + sidebarLayout( + sidebarPanel( + textInput("gene", label = h3("Gene Name"), + value = "MAPT"), + + tags$style(type="text/css", "textarea {width:100%}"), + tags$textarea(id = 'variants', + placeholder = 'Variants (build 37), e.g. chr1 10003', + rows = 8, ""), + radioButtons("rgn", NULL, + choices=c("Domains", "Exons"), + selected = NULL, + inline = T, + width = NULL), + radioButtons("score.type", NULL, + choices=c("Raw", "Percentiles"), + selected = NULL, + inline = T, + width = NULL), + actionButton("plot", label = "Plot"), + downloadButton('downloadData', 'Download Table'), + downloadButton('downloadPlot', 'Download Plot') + ), + + # Show a plot of the generated distribution + mainPanel( + tabsetPanel( + tabPanel('Plot', + plotOutput("genePlot")), + tabPanel('Table', + dataTableOutput("var.table")) + ) + )), + p("This online tool can be used to plot the domain or exon subRVIS scores of a given gene."), + p("To the right of the plot is a legend. For domains, the legend denotes each aligned + conserved domain, with the CDD ID in parentheses next to the domain name. For exons, + the legend denotes the exon number. Note that genes that are on the minus strand + are plotted in reverse, i.e. regardless of its strand the first base of the gene + is at position zero at the leftmost point of the plot."), + p("There is the option to plot either the raw subRVIS scores or the subRVIS percentiles. The raw subRVIS + scores are the scores obtained from directly applying the subRVIS methodology (see ", a("Gussow et al", href="http://www.genomebiology.com/2016/17/1/9", target="_blank"), "). + The subRVIS percentiles are a scaled version of the score, where the raw scores were converted to + percentiles across the genome. Though the raw scores offer superior resolution, the percentiles + are easier to interpret as they are put into the context of the rest of the sub regions + in the genome. Lower subRVIS scores and percentiles correspond to more intolerant regions. + We have found that 35% and lower is a reasonable threshold for the subRVIS percentiles - in other words, we are more inclined to believe + that a mutation is likely to cause disease when it falls in a sub region in the lower 35th percentile of + subRVIS scores. However, by no means does that mean that mutations falling in sub regions below this percentile + are necessarily pathogenic, nor does it mean that mutations falling in sub regions above this percentile + are necessarily benign."), + p("Underneath the plot are two subRVIS related measures, per gene, intended to aid in the + interpretation of the results. The first, labelled + \"subRVIS P-value for known pathogenic mutations\", + assesses the relationship between subRVIS scores and the distribution of known + pathogenic mutations (see ", a("Gussow et al", href="http://www.genomebiology.com/2016/17/1/9", target="_blank"), "). The subRVIS p-value for known pathogenic + mutations obviously is only reported for known disease causing genes."), + p("The second measure, labelled \"subRVIS SDP\", is intended as a general score + to help assess whether the location of a mutation in a gene is an important factor + in determining whether the mutation is pathogenic. Unlike the p-value for + pathogenic mutations, this score can be constructed for both genes already + implicated in diseases and for genes not currently implicated. + This score is the standard deviation of the gene's domain or exon raw subRVIS scores, converted to percentiles. + A higher SDP indicates a higher degree of variation in the intolerance scores across the + gene's regions. Though these scores can be useful in predicting whether we expect + pathogenic mutations to cluster in specific sub regions within a given gene, + currently there is not a relationship between these scores and whether known + pathogenic mutations actually do so."), + h3("About"), + p("SubRVIS is a gene sub region based score intended to help in the interpretation of human + sequence data. The sub regions are defined as the regions within the gene that aligned + to the Conserved Domain Database (http://www.ncbi.nlm.nih.gov/cdd/) and the unaligned regions + between each conserved domain alignment."), + p("In its current form, subRVIS is based upon allele frequency as represented + in whole exome sequence data from the NHLBI-ESP6500 data set. The score is designed to + rank the genic sub regions based on their intolerance to functional variation. A lower score + indicates a more intolerant sub region, while a higher score indicates a more tolerant sub region."), + p("The paper can be found ", a("here.", href="http://www.genomebiology.com/2016/17/1/9", target="_blank")), + h3("Terms of use"), + p("The content of the subRVIS site is intended strictly for educational + and research purposes. The data derived from this website may not be used + for any commercial purpose. The data from this website may not be + replicated on any other website without written consent."), + h3("Citation"), + p("Gussow AB, Petrovski S, Wang Q, Allen AS, Goldstein DB. + The intolerance to functional genetic variation of protein domains predicts the localization of pathogenic mutations within genes. + Genome Biology 2016, 17:9. doi:10.1186/s13059-016-0869-4."), + h3("Contact"), + p("For bug reports, please contact Ayal Gussow (abg2164@columbia.edu).") +)) +