From 2da41568ccaf67e9fc7de1c3b1476ea1ac260a3c Mon Sep 17 00:00:00 2001 From: Michael Langan Date: Tue, 28 Feb 2017 10:52:03 -0800 Subject: [PATCH 01/69] Setting version to 1.4.11-SNAPSHOT --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 5d79750..a2b7cd7 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "1.4.10" \ No newline at end of file +version in ThisBuild := "1.4.11-SNAPSHOT" \ No newline at end of file From c99d0edc708f83c819ac759a844d00da9ed6cdda Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Mon, 9 Sep 2019 17:19:10 -0700 Subject: [PATCH 02/69] WIP on cross-compiling --- build.sbt | 104 +++++++++++++++--------- core/src/test/resources/log4j2-test.xml | 13 +++ project/Dependencies.scala | 38 +++++++-- project/build.properties | 2 +- project/plugins.sbt | 2 +- webapp/build.sbt | 10 +-- 6 files changed, 116 insertions(+), 53 deletions(-) create mode 100644 core/src/test/resources/log4j2-test.xml diff --git a/build.sbt b/build.sbt index fc666e3..f16c8d6 100644 --- a/build.sbt +++ b/build.sbt @@ -1,68 +1,94 @@ +import Dependencies._ + +lazy val scala211 = "2.11.12" +lazy val scala212 = "2.12.9" +lazy val scala213 = "2.13.0" +lazy val supportedScalaVersions = List(scala211) + +ThisBuild / organization := "org.allenai.common" +ThisBuild / version := "1.4.11-SNAPSHOT" +ThisBuild / scalaVersion := scala211 + +lazy val common = (project in file(".")) + .aggregate(cache, + core, + guice, + indexing, + testkit, + webapp) + .settings( + crossScalaVersions := Nil, + publish / skip := true, + buildSettings + ) + lazy val buildSettings = Seq( organization := "org.allenai.common", - crossScalaVersions := Seq(Dependencies.defaultScalaVersion), - scalaVersion <<= crossScalaVersions { (vs: Seq[String]) => vs.head }, publishMavenStyle := true, publishArtifact in Test := false, pomIncludeRepository := { _ => false }, licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), homepage := Some(url("https://github.com/allenai/common")), apiURL := Some(url("https://allenai.github.io/common/")), - scmInfo := Some(ScmInfo( - url("https://github.com/allenai/common"), - "https://github.com/allenai/common.git")), + scmInfo := Some( + ScmInfo( + url("https://github.com/allenai/common"), + "https://github.com/allenai/common.git" + ) + ), pomExtra := ( - - - allenai-dev-role - Allen Institute for Artificial Intelligence - dev-role@allenai.org - - ), + + + allenai-dev-role + Allen Institute for Artificial Intelligence + dev-role@allenai.org + + ), bintrayPackage := s"${organization.value}:${name.value}_${scalaBinaryVersion.value}" ) lazy val cache = Project(id = "cache", base = file("cache")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings + ) .dependsOn(core, testkit % "test->compile") lazy val core = Project(id = "core", base = file("core")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings + ) .dependsOn(testkit % "test->compile") lazy val guice = Project(id = "guice", base = file("guice")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings + ) .dependsOn(core, testkit % "test->compile") lazy val indexing = Project(id = "indexing", base = file("indexing")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings + ) .dependsOn(core, testkit % "test->compile") lazy val testkit = Project(id = "testkit", base = file("testkit")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings + ) lazy val webapp = Project(id = "webapp", base = file("webapp")) - .settings(buildSettings) - .enablePlugins(LibraryPlugin) + .settings( + crossScalaVersions := supportedScalaVersions, + buildSettings, + libraryDependencies ++= Seq( + "org.scala-lang" % "scala-reflect" % scalaVersion.value + ) + ) .dependsOn(core, testkit % "test->compile") -lazy val common = Project(id = "common", base = file(".")).settings( - // Don't publish a jar for the root project. - publishArtifact := false, - publishTo := Some("dummy" at "nowhere"), - publish := { }, - publishLocal := { }, - scaladocGenGitRemoteRepo := "git@github.com:allenai/common.git" -).aggregate( - cache, - core, - guice, - indexing, - testkit, - webapp -).enablePlugins(LibraryPlugin, ScaladocGenPlugin) + diff --git a/core/src/test/resources/log4j2-test.xml b/core/src/test/resources/log4j2-test.xml new file mode 100644 index 0000000..9f905c9 --- /dev/null +++ b/core/src/test/resources/log4j2-test.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 662a6ec..c408548 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,9 +1,7 @@ import sbt._ -import org.allenai.plugins.CoreDependencies - /** Object holding the dependencies Common has, plus resolvers and overrides. */ -object Dependencies extends CoreDependencies { +object Dependencies { val apacheLang3 = "org.apache.commons" % "commons-lang3" % "3.4" @@ -34,7 +32,37 @@ object Dependencies extends CoreDependencies { val scalaCheck = "org.scalacheck" %% "scalacheck" % "1.11.4" - def scalaReflection(scalaVersion: String) = "org.scala-lang" % "scala-reflect" % scalaVersion - val scalaTest = "org.scalatest" %% "scalatest" % "2.2.1" + + val defaultAkkaVersion = "2.4.10" + + def akkaModule(id: String, version: String = defaultAkkaVersion): ModuleID = + "com.typesafe.akka" %% s"akka-$id" % version + + val akkaActor = akkaModule("actor") exclude ("com.typesafe", "config") + val akkaTestkit = akkaModule("testkit") + + val sprayVersion = "1.3.3" + def sprayModule(id: String): ModuleID = "io.spray" %% s"spray-$id" % sprayVersion + val sprayRouting = sprayModule("routing") + val sprayClient = sprayModule("client") + val sprayTestkit = sprayModule("testkit") + + // Spray json (separate from Spray toolkit) + val sprayJson = "io.spray" %% "spray-json" % "1.3.2" + + val typesafeConfig = "com.typesafe" % "config" % "1.2.1" + + val scopt = "com.github.scopt" %% "scopt" % "3.3.0" + + object Logging { + val slf4jVersion = "1.7.10" + val logbackVersion = "1.1.2" + // The logging API to use. This should be the only logging dependency of any API artifact + // (anything that's going to be depended on outside of this SBT project). + val slf4jApi = "org.slf4j" % "slf4j-api" % slf4jVersion + val logbackCore = "ch.qos.logback" % "logback-core" % logbackVersion + val logbackClassic = "ch.qos.logback" % "logback-classic" % logbackVersion + } + } diff --git a/project/build.properties b/project/build.properties index 43b8278..080a737 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.13.11 +sbt.version=1.3.0 diff --git a/project/plugins.sbt b/project/plugins.sbt index 08f6bd4..945dd61 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1 @@ -addSbtPlugin("org.allenai.plugins" % "allenai-sbt-plugins" % "1.5.0") +addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") diff --git a/webapp/build.sbt b/webapp/build.sbt index 93cc606..072e6d3 100644 --- a/webapp/build.sbt +++ b/webapp/build.sbt @@ -12,10 +12,6 @@ libraryDependencies ++= Seq( sprayTestkit % Test ) -dependencyOverrides ++= Set( - // Override needed because spray testkit declares dependency on an older version of akka - akkaTestkit, - pegdown, - scalaCheck, - scalaReflection(defaultScalaVersion) -) +dependencyOverrides += akkaTestkit +dependencyOverrides += pegdown +dependencyOverrides += scalaCheck From ce7bebfb5bf2375b48bfd2ff1a8db86ee29a90dd Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:31:46 -0700 Subject: [PATCH 03/69] Remove shared code for indexing module --- indexing/README.md | 87 ---- indexing/build.sbt | 12 - .../org/allenai/common/indexing/indexing.conf | 391 ------------------ .../common/indexing/mappingHelpers.conf | 14 - .../indexing/BarronsDocumentReader.scala | 42 -- .../common/indexing/BuildCorpusIndex.scala | 387 ----------------- .../indexing/BuildCorpusIndexRunner.scala | 42 -- .../indexing/BulkProcessorUtility.scala | 94 ----- .../ElasticSearchTransportClientUtil.scala | 40 -- .../common/indexing/ParsingUtils.scala | 58 --- .../common/indexing/SegmentedDocument.scala | 77 ---- .../indexing/WaterlooSegmentScript.scala | 102 ----- .../indexing/BarronsDocumentReaderSpec.scala | 52 --- .../indexing/BuildCorpusIndexSpec.scala | 155 ------- .../indexing/SegmentedDocumentSpec.scala | 71 ---- 15 files changed, 1624 deletions(-) delete mode 100644 indexing/README.md delete mode 100644 indexing/build.sbt delete mode 100644 indexing/src/main/resources/org/allenai/common/indexing/indexing.conf delete mode 100644 indexing/src/main/resources/org/allenai/common/indexing/mappingHelpers.conf delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/BarronsDocumentReader.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndex.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndexRunner.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/BulkProcessorUtility.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/ElasticSearchTransportClientUtil.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/ParsingUtils.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/SegmentedDocument.scala delete mode 100644 indexing/src/main/scala/org/allenai/common/indexing/WaterlooSegmentScript.scala delete mode 100644 indexing/src/test/scala/org/allenai/common/indexing/BarronsDocumentReaderSpec.scala delete mode 100644 indexing/src/test/scala/org/allenai/common/indexing/BuildCorpusIndexSpec.scala delete mode 100644 indexing/src/test/scala/org/allenai/common/indexing/SegmentedDocumentSpec.scala diff --git a/indexing/README.md b/indexing/README.md deleted file mode 100644 index 5b9c7ce..0000000 --- a/indexing/README.md +++ /dev/null @@ -1,87 +0,0 @@ -`indexing` -============= - -**Boss**: Roie - -Builds an elasticsearch index on an existing ElasticSearch instance, using one of the configurations defined in the indexing.conf file in resources/org/allenai/ari/indexing. - -To use this, you need to have a running instance of ElasticSearch (can be local or remote). As of this writing, the latest version is `1.7.2`. To install: Refer to http://joelabrahamsson.com/elasticsearch-101/ to get started, or use `brew install elasticsearch`. NOTE: you need to have the _same_ version of elastic search installed as the code is using, or you will get abstruse errors. `brew install elasticsearch` installed version 2.1.0 as of 12/2015, and the code is currently using version 1.7.1. You need to use the command `brew install homebrew/versions/elasticsearch17` to install the correct version of elasticsearch. -Once you have ElasticSearch, go to the `bin` directory and run: `./elasticsearch` - -Configurations are of the form: - -``` -org.allenai.common.indexing.[NAME_OF_INDEX] - - elasticSearch { - clusterName: [CLUSTER_NAME] - hostAddress: "127.0.0.1" - indexName: [NAME_OF_INDEX] - indexType: "sentence" - - mapping = ${org.allenai.common.indexing.standardMapping} - } - - buildIndexOptions { - // If set to true will throw an exception if the index already exists - // NOTE: if set to false, and a document already exists in the index, will create a duplicate document - // This was an intentional design decision (otherwise elasticsearch would have to first issue an exists query). - buildFromScratch = true - // specifies where to dump serialized failed bulk index requests - dumpFolder: "[PATH_TO_DUMP]" - } - - // Template list of corpora - corpora: [ - { - // Specifies the format of the corpus, either "waterloo" or "datastore". Defaults to "datastore" - corpusType: "waterloo" - directory: "[PATH_TO_WATERLOO_FORMAT]" - }, - { - corpusType: "datastore" - group: [DATASTORE_GROUP] - directory: [DATASTORE_FOLDER_PATH] - version: [VERSION_NO] - file: [FILE_NAME] - // specifies whether to use public or private datastore - privacy: "public" - }, - ] - } -``` -Configuration notes: - -1. Do not change the mapping field unless you know what you are doing, otherwise existing solvers will not be able to query the index. -2. If using a waterloo format corpus, will attempt to split documents in folder based on `...` tags. -3. If using a datastore corpus: if a file is specified with no directory, will attempt to find the file in the default location. If a directory is specified with no file, will automatically walk the entire file tree and add index files that do not begin with "." -4. The `clusterName` of an instance is set in the `elasticsearch.yml` in `$ES_HOME/config`, which can be located by submitting the curl request: `curl -XGET 'localhost:9200/_nodes/settings'`. It is recommended you change this from `elasticsearch`, since elasticsearch has an autodiscovery feature that will automatically join your machine as a node to any clusters on the network with the same name. You will need to restart elasticsearch for configuration changes to take effect. - -### Running instructions - -To run, specify which configuration you which to use as the argument to BuildCorpusIndex in BuildCorpusIndexRunner.scala, and run BuildCorpusIndexRunner.scala. Running notes: - -1. Make sure the ip address and port in the configuration correspond to those of the machine you which to build the index on. -2. If the buildFromScratch flag is set to true will not throw an exception if the index already exists, and add to the current index. However, if a document already exists in the index, will create a duplicate document. This was an intentional design decision (otherwise elasticsearch would have to first issue an exists query). -3. After executing all requests, will dump failed queries to a dump folder, and retry failed queries once. - -Sample Command lines: - -With Overrides to one or more index-building config parameters: - -``` -sbt "indexing/runMain org.allenai.common.indexing.BuildCorpusIndexRunner --index-name barrons --config-overrides-file /path/to/overrides.conf" -``` - -Sample overrides.conf: -``` -{ - elasticSearch.clusterName: "solvercorpora" -} -``` - -Without Config Overrides: - -``` -sbt "indexing/runMain org.allenai.common.indexing.BuildCorpusIndexRunner --index-name barrons" -``` diff --git a/indexing/build.sbt b/indexing/build.sbt deleted file mode 100644 index 83227b0..0000000 --- a/indexing/build.sbt +++ /dev/null @@ -1,12 +0,0 @@ -import Dependencies._ - -name := "common-indexing" - -libraryDependencies ++= Seq( - elasticSearch, - scopt, - sprayClient, - typesafeConfig, - datastore, - nlpstack("segment") -) diff --git a/indexing/src/main/resources/org/allenai/common/indexing/indexing.conf b/indexing/src/main/resources/org/allenai/common/indexing/indexing.conf deleted file mode 100644 index ba5b1f6..0000000 --- a/indexing/src/main/resources/org/allenai/common/indexing/indexing.conf +++ /dev/null @@ -1,391 +0,0 @@ -include "mappingHelpers.conf" - -org.allenai.common.indexing.stoplist { - group: "org.allenai.nlp.resources" - name: "smart_stopwords_2.txt" - version: 1 -} - -org.allenai.common.indexing.base { - elasticSearch { - hostAddress: "127.0.0.1" - - // Your cluster name can be obtained with the following command. - // curl -XGET 'http://:9200/_cluster/health?pretty=true'. - clusterName: "elasticsearch" - indexName: "lucene" - - // Aristo should override the above values with the following: - //clusterName: "aristo-es" - //hostAddress: "aristo-es.dev.ai2" - } - - buildIndexOptions { - buildFromScratch: true - dumpFolder: "common/src/main/resources/org/allenai/ari/indexing/dump" - } -} - -org.allenai.common.indexing.sentence_base: ${org.allenai.common.indexing.base} { - elasticSearch { - indexType: "sentence" - mapping { - sentence { - dynamic: false, - properties { - text = ${stemmedTextField} - source = ${rawTextField} - } - } - } - } -} - -org.allenai.common.indexing.paragraph_base: ${org.allenai.common.indexing.base} { - elasticSearch { - indexType: "paragraph" - mapping { - paragraph { - dynamic: false, - properties { - text = ${stemmedTextField} - source = ${rawTextField} - } - } - } - } -} - -org.allenai.common.indexing.qa_base: ${org.allenai.common.indexing.base} { - elasticSearch { - indexType: "question-answer" - mapping { - question-answer { - dynamic: false, - properties { - question = ${stemmedTextField} - answer = ${stemmedTextField} - source = ${rawTextField} - } - } - } - } -} - -org.allenai.common.indexing.termdef_base: ${org.allenai.common.indexing.base} { - elasticSearch { - indexType: "term-definition" - mapping { - term-definition { - dynamic: false, - properties { - term = ${stemmedTextField} - definition = ${stemmedTextField} - source = ${rawTextField} - } - } - } - } -} - -org.allenai.common.indexing.waterloo: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "waterloo" - corpora: [ - { - pathIsLocal: true - documentFormat: "waterloo" - directory: "/mnt/wumpus/Waterloo/CorpusSegmented" - } - ] -} - -org.allenai.common.indexing.waterloofiltered1: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "waterloofiltered1" - corpora: [ - { - group: "org.allenai.aristo.corpora.derivative" - directory: "WaterlooFiltered" - version: 2 - file: "waterloo-filtered-0.6-1-confidence.txt" - } - ] -} - -org.allenai.common.indexing.waterloofiltered2: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "waterloofiltered2" - corpora: [ - { - group: "org.allenai.aristo.corpora.derivative" - directory: "WaterlooFiltered" - version: 2 - file: "waterloo-filtered-0.47-0.6-confidence.txt" - } - ] -} - -org.allenai.common.indexing.simplewikipedia: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "simplewikipedia" - corpora: [ - { - group: "org.allenai.corpora.wikipedia" - version: 1 - file: "SimpleWikipedia-all.txt" - privacy: "public" - } - ] -} - -org.allenai.common.indexing.simplewikipedia_paragraph: ${org.allenai.common.indexing.paragraph_base} { - elasticSearch.indexName: "simplewikipedia-paragraph" - corpora: [ - { - documentFormat: "simple wikipedia" - group: "org.allenai.corpora.wikipedia" - file: "SimpleWikipedia-all.txt" - version: 1 - privacy: "public" - } - ] -} - -org.allenai.common.indexing.wikipedia: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "wikipedia" - corpora: [ - { - group: "org.allenai.corpora.wikipedia" - directory: "Wikipedia-all" - version: 1 - privacy: "public" - } - ] -} - -org.allenai.common.indexing.barrons: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "barrons" - corpora: [ - { - group: "org.allenai.aristo.corpora.derivative" - directory: "Barrons-4thGrade.sentences" - version: 1 - file: "Barrons.sentences.txt" - } - ] -} - -org.allenai.common.indexing.websentences: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "websentences" - corpora: [ - { - group: "org.allenai.aristo.corpora.source" - directory: "WebSentences-allSources" - version: 1 - file:"CurrentWebCorpus-allSources.txt" - } - ] -} - -org.allenai.common.indexing.ck12biov44: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "ck12biov44" - corpora: [ - { - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Biology-v44.sentences" - version: 1 - file: "CK-12-Biology_b_v44_fhp.txt.sentences.txt" - } - ] -} - -org.allenai.common.indexing.barrons_paragraph: ${org.allenai.common.indexing.paragraph_base} { - elasticSearch.indexName: "barrons-paragraph" - corpora: [ - { - documentFormat: "barrons" - group: "org.allenai.aristo.corpora.source" - directory: "Barrons-4th-Grade" - version: 1 - file: "Barrons.txt" - } - ] -} - -org.allenai.common.indexing.quizlet_qna: ${org.allenai.common.indexing.qa_base} { - elasticSearch.indexName: "quizlet-qna" - corpora: [ - { - documentFormat: "question-answer" - group: "org.allenai.aristo.corpora.derivative" - directory: "QuizletQnA" - version: 1 - file: "QuizletQnA.txt" - } - ] -} - -org.allenai.common.indexing.quizlet_termdef: ${org.allenai.common.indexing.termdef_base} { - elasticSearch.indexName: "quizlet-termdef" - corpora: [ - { - documentFormat: "term-definition" - group: "org.allenai.aristo.corpora.derivative" - directory: "QuizletTermDefinitions" - version: 1 - file: "QuizletTermDefinitions.txt" - } - ] -} - -org.allenai.common.indexing.studystack_qna: ${org.allenai.common.indexing.qa_base} { - elasticSearch.indexName: "studystack-qna" - corpora: [ - { - documentFormat: "question-answer" - group: "org.allenai.aristo.corpora.derivative" - directory: "StudyStackQnA" - version: 1 - file: "StudyStackQnA.txt" - } - ] -} - -org.allenai.common.indexing.studystack_termdef: ${org.allenai.common.indexing.termdef_base} { - elasticSearch.indexName: "studystack-termdef" - corpora: [ - { - documentFormat: "term-definition" - group: "org.allenai.aristo.corpora.derivative" - directory: "StudyStackTermDefinitions" - version: 1 - file: "StudyStackTermDefinitions.txt" - } - ] -} - -org.allenai.common.indexing.virginiaflashcards: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "virginiaflashcard-sentences" - corpora: [ - { - group: "org.allenai.aristo.corpora.derivative" - directory: "UArizonaScienceCorpora" - version: 1 - file: "virginia_SOL_flashcards-science5.filtered.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr3_sentences: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "ck12-flexbook-gr3-sentences" - corpora: [ - { - documentFormat: "sentence per line" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr3-sentences.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr4_sentences: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "ck12-flexbook-gr4-sentences" - corpora: [ - { - documentFormat: "sentence per line" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr4-sentences.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr5_sentences: ${org.allenai.common.indexing.sentence_base} { - elasticSearch.indexName: "ck12-flexbook-gr5-sentences" - corpora: [ - { - documentFormat: "sentence per line" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr5-sentences.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr3_qna: ${org.allenai.common.indexing.qa_base} { - elasticSearch.indexName: "ck12-flexbook-gr3-qna" - corpora: [ - { - documentFormat: "question-answer" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr3-qna.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr4_qna: ${org.allenai.common.indexing.qa_base} { - elasticSearch.indexName: "ck12-flexbook-gr4-qna" - corpora: [ - { - documentFormat: "question-answer" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr4-qna.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr5_qna: ${org.allenai.common.indexing.qa_base} { - elasticSearch.indexName: "ck12-flexbook-gr5-qna" - corpora: [ - { - documentFormat: "question-answer" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr5-qna.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr3_termdef: ${org.allenai.common.indexing.termdef_base} { - elasticSearch.indexName: "ck12-flexbook-gr3-termdef" - corpora: [ - { - documentFormat: "term-definition" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr3-termdef.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr4_termdef: ${org.allenai.common.indexing.termdef_base} { - elasticSearch.indexName: "ck12-flexbook-gr4-termdef" - corpora: [ - { - documentFormat: "term-definition" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr4-termdef.txt" - } - ] -} - -org.allenai.common.indexing.ck12_flexbook_gr5_termdef: ${org.allenai.common.indexing.termdef_base} { - elasticSearch.indexName: "ck12-flexbook-gr5-termdef" - corpora: [ - { - documentFormat: "term-definition" - group: "org.allenai.aristo.corpora.derivative" - directory: "CK-12-Flexbooks" - version: 1 - file: "ck12-gr5-termdef.txt" - } - ] -} diff --git a/indexing/src/main/resources/org/allenai/common/indexing/mappingHelpers.conf b/indexing/src/main/resources/org/allenai/common/indexing/mappingHelpers.conf deleted file mode 100644 index 55d7b78..0000000 --- a/indexing/src/main/resources/org/allenai/common/indexing/mappingHelpers.conf +++ /dev/null @@ -1,14 +0,0 @@ -// Format of all raw (non-analyzed) text fields: -rawTextField { - type = "string" - index = "not_analyzed" -} - -// Format of all stemmed text fields, including a raw interpretation: -stemmedTextField { - type = "string" - analyzer = "snowball" - fields { - raw = ${rawTextField} - } -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/BarronsDocumentReader.scala b/indexing/src/main/scala/org/allenai/common/indexing/BarronsDocumentReader.scala deleted file mode 100644 index 69da679..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/BarronsDocumentReader.scala +++ /dev/null @@ -1,42 +0,0 @@ -package org.allenai.common.indexing - -import java.io.File -import scala.io.Codec -import scala.io.Source - -class BarronsDocumentReader(file: File, codec: Codec) { - def read(): SegmentedDocument = { - // Barron's isn't all that large, and this makes testing this a lot easier. - val lines = Source.fromFile(file)(codec).getLines.toSeq - _readLines(lines) - } - - def _readLines(lines: Seq[String]): SegmentedDocument = { - // At this point, I'm just going to worry about getting paragraph structure out. Sections and - // chapters will have to wait for another time, if someone cares about doing that some day. - // So, this is a really simple parsing algorithm that will just group consecutive sentences - // into paragraphs, where "consecutive" is defined by the sentence numbers in the original - // file. - var prevKey = "" - var prevSentence = "" - val builder = new SegmentedDocumentBuilder(lines.mkString("\n")) - for (line <- lines) { - val fields = line.split("\t") - val longNumber = fields(0) - val sentenceText = fields(1) - val (key, sentenceNumber) = longNumber.splitAt(longNumber.lastIndexOf(".")) - if (key != prevKey) { - if (prevKey != "") { - builder.finishNonTerminalSegment() - } - prevKey = key - builder.startNewNonTerminalSegment("paragraph") - } - // I could check that the sentence number increments by one here, but I don't think that's - // actually necessary. Just checking the key should be enough. - builder.addTerminalSegment("sentence", sentenceText) - } - builder.finishNonTerminalSegment() - builder.build() - } -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndex.scala b/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndex.scala deleted file mode 100644 index f190688..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndex.scala +++ /dev/null @@ -1,387 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.Config._ -import org.allenai.common.Logging -import org.allenai.common.ParIterator._ -import org.allenai.datastore.Datastore -import org.allenai.nlpstack.segment.defaultSegmenter - -import com.typesafe.config.{ ConfigRenderOptions, ConfigFactory, ConfigObject, Config } -import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder -import org.elasticsearch.action.bulk.BulkProcessor -import org.elasticsearch.action.index.IndexRequest -import org.elasticsearch.client.transport.TransportClient -import org.elasticsearch.common.settings.Settings -import org.elasticsearch.common.xcontent.XContentFactory._ -import org.elasticsearch.index.query.QueryBuilders - -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.duration.Duration -import scala.concurrent.{ Await, Future } -import scala.io.{ Source, Codec } -import scala.collection.JavaConverters._ -import scala.util.{ Failure, Success } -import java.io.File -import java.nio.file.{ Files, Path, Paths } -import java.text.SimpleDateFormat -import java.util.Calendar -import java.util.concurrent.TimeUnit - -/** CLI to build an Elastic Search index on Aristo corpora. - * In order to build the index, you need to have elasticsearch running. - * Download latest version of elasticsearch, go to the 'bin' folder and run it: - * ./elasticsearch - * Refer http://joelabrahamsson.com/elasticsearch-101/ to get started. - * Takes in Config object containing corpus and other information necessary to build the index. - */ -class BuildCorpusIndex(config: Config) extends Logging { - - /** Get Index Name and Index Type. */ - val esConfig: Config = config[Config]("elasticSearch") - val indexName: String = { - // The index name must be appended with the current date. - val name = esConfig[String]("indexName") - val dateFormat = new SimpleDateFormat("yyyy-MM-dd") - name + "-" + dateFormat.format(Calendar.getInstance().getTime()) - } - - val indexType: String = esConfig[String]("indexType") - - val buildFromScratch = config.get[Boolean]("buildIndexOptions.buildFromScratch").getOrElse(true) - - val nThreads = config.get[Int]("buildIndexOptions.nThreads") getOrElse { - Runtime.getRuntime.availableProcessors() - } - - /** On failure, dump serialized requests to this path. */ - val dumpFolderPath = config[String]("buildIndexOptions.dumpFolder") - val bulkProcessorUtility = new BulkProcessorUtility - - /** Regex used to split sentences in waterloo corpus. */ - val sentenceSplitRegex = """""".r.unanchored - - /** Build an index in ElasticSearch using the corpora specified in config. */ - def buildElasticSearchIndex(): Unit = { - if (buildFromScratch) { - - // Get Transport Client. - val esClient = ElasticSearchTransportClientUtil.ConstructTransportClientFromESconfig(esConfig) - val createIndexRequestBuilder: CreateIndexRequestBuilder = - esClient.admin().indices().prepareCreate(indexName) - - createIndexRequestBuilder.setSettings(Settings.settingsBuilder()) - - val indexSetting = esConfig.get[ConfigObject]("setting").getOrElse(ConfigFactory.empty.root) - val indexMapping = esConfig.get[ConfigObject]("mapping").getOrElse(ConfigFactory.empty.root) - - if (!indexSetting.isEmpty) { - // Add custom settings to index - val indexSettingString = indexSetting.render(ConfigRenderOptions.concise()) - createIndexRequestBuilder.setSettings(indexSettingString) - } - - if (!indexMapping.isEmpty) { - // Add mapping to index - val indexMappingString = indexMapping.render(ConfigRenderOptions.concise()) - createIndexRequestBuilder.addMapping(indexType, indexMappingString) - } - - createIndexRequestBuilder.execute().actionGet() - esClient.close() - } - - val corpusConfigs = config.get[Seq[Config]]("corpora").getOrElse(Seq.empty[Config]) - val parsedConfigs = corpusConfigs.map(parseCorpusConfig) - - val results: Future[Seq[Unit]] = Future.sequence(parsedConfigs.flatMap(corpus => { - if (corpus.isDirectory) { - val iterator = Files.walk(corpus.path).iterator().asScala - addTreeToIndex(iterator, corpus.encoding, corpus.documentFormat) - } else { - addTreeToIndex(Seq(corpus.path).iterator, corpus.encoding, corpus.documentFormat) - } - })) - - results onComplete { - case Success(l) => - logger.debug(s"Done creating index ${indexName}, type: ${indexType}!") - case Failure(l) => - logger.error(s"Unable to create index: ${l.printStackTrace()}") - } - - Await.result(results, Duration.Inf) - - val failedRequests = bulkProcessorUtility.getFailedRequests() - - if (failedRequests.length > 0) { - - // Retry failed requests - logger.debug("Retrying failed requests") - - val esClient = ElasticSearchTransportClientUtil.ConstructTransportClientFromESconfig(esConfig) - for (bulkRequest <- failedRequests; request <- bulkRequest.requests().asScala) { - BuildCorpusIndex.indexWithoutDuplicate( - request.asInstanceOf[IndexRequest], esClient, indexName - ) - } - esClient.close - } else { - logger.debug("No failed requests") - } - } - - /** Index a file tree into the elasticSearch instance. Divides work into nThreads*4 Futures. Each - * future syncs on currentFile which is a logging variable, and then grabs the next file from the - * stream if it is not empty. - * @param fileTree file stream to be indexed - * @return a sequence of Futures each representing the work done by a thread on this file tree. - */ - def addTreeToIndex( - fileTree: Iterator[Path], - codec: Codec, - documentFormat: String - ): Seq[Future[Unit]] = { - for (i <- 0 until nThreads * 4) yield { - Future { - val esClient = - ElasticSearchTransportClientUtil.ConstructTransportClientFromESconfig(esConfig) - val bulkProcessor: BulkProcessor = - bulkProcessorUtility.buildDumpOnErrorBulkProcessor(esClient, dumpFolderPath) - - // Implicit conversion here to ParIteratorEnrichment - fileTree parForeach (path => { - val file = path.toFile - // ignore .DS_STORE and any other hidden surprises that should not be indexed - if (!file.isDirectory && !file.isHidden) { - addFileToIndex(file, bulkProcessor, codec, documentFormat) - } - }) - - bulkProcessor.flush() - bulkProcessor.awaitClose(Integer.MAX_VALUE, TimeUnit.DAYS) - esClient.close() - } - } - } - - /** Index a single file into elasticsearch. - * @param file to be indexed - * @param bulkProcessor to communicate with the elasticsearch instance - */ - def addFileToIndex( - file: File, - bulkProcessor: BulkProcessor, - codec: Codec, - documentFormat: String - ): Unit = { - if (documentFormat == "waterloo") { - addWaterlooFileToIndex(file, documentFormat, bulkProcessor, codec) - } else { - val segments = segmentFile(file, codec, documentFormat) - segments.zipWithIndex.foreach { - case (segment, segmentIndex) => { - addSegmentToIndex(segment, documentFormat, file.getName, segmentIndex, bulkProcessor) - } - } - } - } - - /** Index a file into the elasticsearch instance, following the convention of the waterloo corpus. - * Sentences are encapsulated by ... tags. - * @param inputFile path to the input directory - * @param bulkProcessor to communicate with the elasticsearch instace - */ - def addWaterlooFileToIndex(inputFile: File, documentFormat: String, bulkProcessor: BulkProcessor, codec: Codec): Unit = { - var filePositionCounter = 0 - - def segmentFunction(segment: String): Unit = { - addSegmentToIndex(segment, documentFormat, inputFile.getName, filePositionCounter, bulkProcessor) - filePositionCounter += 1 - } - ParsingUtils.splitOnTag( - inputFile = inputFile, - splitString = "DOC", - splitRegex = sentenceSplitRegex, - segmentFunction = segmentFunction, - bufferSize = 16384, - codec - ) - } - - def segmentFile(file: File, codec: Codec, documentFormat: String): Iterator[String] = { - documentFormat match { - case "plain text" => segmentPlainTextFile(file, codec) - case "sentence per line" | "question-answer" | "term-definition" => getFileLines(file, codec) - case "barrons" => getSegmentsFromDocument(new BarronsDocumentReader(file, codec).read()) - case "simple wikipedia" => segmentWikipediaFile(file, codec) - case "waterloo" => throw new IllegalStateException("you shouldn't have gotten here") - case _ => throw new IllegalStateException("Unrecognized document format") - } - } - - def getSegmentsFromDocument(document: SegmentedDocument): Iterator[String] = { - val segments = document.getSegmentsOfType(indexType) - segments.map(_.getTextSegments.mkString(" ")).iterator - } - - def getFileLines(file: File, codec: Codec): Iterator[String] = { - val bufSource = Source.fromFile(file, 8192)(codec) - bufSource.getLines - } - - def segmentPlainTextFile(file: File, codec: Codec): Iterator[String] = { - (getFileLines(file, codec) flatMap { defaultSegmenter.segmentTexts }) - } - - def segmentWikipediaFile(file: File, codec: Codec): Iterator[String] = { - indexType match { - case "sentence" => segmentPlainTextFile(file, codec) - case "paragraph" => { - val bufSource = Source.fromFile(file, 8192)(codec) - val lines = bufSource.getLines - lines.flatMap(line => if (line.trim.isEmpty) Seq[String]() else Seq[String](line)) - } - case _ => throw new IllegalStateException("unrecognized index type") - } - } - - /** Index a single segment into elasticsearch. - * @param segment to be indexed - * @param documentFormat also describes the format of the segment - * @param source name of source for reference - * @param segmentIndex index of segment in file (for deduplication) - * @param bulkProcessor to communicate with the elasticsearch instance - */ - def addSegmentToIndex( - segment: String, - documentFormat: String, - source: String, - segmentIndex: Int, - bulkProcessor: BulkProcessor - ): Unit = { - // Helper Function - def breakQAline(line: String): Option[(String, String)] = { - line.split(""":\|:""").map(_.trim) match { - case Array(lhs, rhs, _*) if (!lhs.isEmpty() && !rhs.isEmpty()) => - Some((lhs, rhs)) - case _ => None - } - } - - val requestOption = documentFormat match { - case "question-answer" => - breakQAline(segment.trim) match { - case Some((lhs, rhs)) => - Some(new IndexRequest(indexName, indexType).source(jsonBuilder().startObject() - .field("question", lhs) - .field("answer", rhs) - .field("source", source + "_" + segmentIndex.toString) - .endObject())) - case _ => None - } - case "term-definition" => - breakQAline(segment.trim) match { - case Some((lhs, rhs)) => - Some(new IndexRequest(indexName, indexType).source(jsonBuilder().startObject() - .field("term", lhs) - .field("definition", rhs) - .field("source", source + "_" + segmentIndex.toString) - .endObject())) - case _ => None - } - case _ => - Some(new IndexRequest(indexName, indexType).source(jsonBuilder().startObject() - .field("text", segment.trim) - .field("source", source + "_" + segmentIndex.toString) - .endObject())) - } - requestOption match { - case Some(request) => bulkProcessor.add(request) - case _ => - } - } - - /** Take the config for a corpus, resolve paths, and return a simple object containing information - * about the corpus. - */ - def parseCorpusConfig(corpusConfig: Config): ParsedConfig = { - val documentFormat = corpusConfig.get[String]("documentFormat").getOrElse("plain text") - val encoding = corpusConfig.get[String]("encoding").getOrElse("UTF-8") - // We could be a little smarter at detecting whether the intent was a local path, but this will - // do for now. - val pathIsLocal = corpusConfig.get[Boolean]("pathIsLocal").getOrElse(false) - val (path, isDirectory) = pathIsLocal match { - case true => getLocalPathFromConfig(corpusConfig) - case false => getDatastorePathFromConfig(corpusConfig) - } - ParsedConfig(path, isDirectory, encoding, documentFormat) - } - - def getLocalPathFromConfig(corpusConfig: Config): (Path, Boolean) = { - val directory = corpusConfig.get[String]("directory") - val file = corpusConfig.get[String]("file") - file match { - case Some(f) => { - directory match { - case Some(d) => (Paths.get(d, f), false) - case None => (Paths.get(f), false) - } - } - case None => (Paths.get(directory.get), true) - } - } - - def getDatastorePathFromConfig(corpusConfig: Config): (Path, Boolean) = { - val directory = corpusConfig.get[String]("directory") - val file = corpusConfig.get[String]("file") - val privacy = corpusConfig.get[String]("privacy").getOrElse("private") - val group = corpusConfig[String]("group") - val version = corpusConfig[Int]("version") - file match { - case Some(f) => (getFileFromDatastore(privacy, group, directory, f, version), false) - case None => (getDirectoryFromDatastore(privacy, group, directory.get, version), true) - } - } - - def getFileFromDatastore( - privacy: String, - group: String, - directory: Option[String], - file: String, - version: Int - ): Path = { - directory match { - case Some(d) => Datastore(privacy).directoryPath(group, d, version).resolve(file) - case None => Datastore(privacy).filePath(group, file, version) - } - } - - def getDirectoryFromDatastore( - privacy: String, - group: String, - directory: String, - version: Int - ): Path = { - Datastore(privacy).directoryPath(group, directory, version) - } -} - -case class ParsedConfig(path: Path, isDirectory: Boolean, encoding: String, documentFormat: String) - -object BuildCorpusIndex { - /** Execute a given index request if the document is not already in the index. */ - def indexWithoutDuplicate( - request: IndexRequest, - esClient: TransportClient, - indexName: String - ): Unit = { - val source = request.sourceAsMap().asScala("source") - val result = esClient.prepareSearch(indexName) - .setQuery(QueryBuilders.termQuery("source", source)) - .execute() - .actionGet() - if (result.getHits.getTotalHits == 0) { - esClient.index(request).actionGet() - } - } -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndexRunner.scala b/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndexRunner.scala deleted file mode 100644 index cdb8125..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/BuildCorpusIndexRunner.scala +++ /dev/null @@ -1,42 +0,0 @@ -package org.allenai.common.indexing - -import com.typesafe.config.ConfigFactory - -import java.io.File - -/** Indexing main object. Configuration specified in indexing.conf in org.allenai.common.indexing. - * See common/Readme for details. - */ -object BuildCorpusIndexRunner extends App { - - case class IndexConfig(indexConfigName: String = null, configOverrideFile: Option[File] = None) - - val parser = new scopt.OptionParser[IndexConfig]("BuildCorpusIndexRunner") { - head("BuildCorpusIndexRunner") - note("See common/Readme for details on how to use this.\n") - help("help") text ("prints this usage text") - opt[String]("index-name") required () valueName ("") action { (x, c) => - c.copy(indexConfigName = x) - } text ("name of configuration to use") - opt[File]("config-overrides-file") valueName ("") action { (x, c) => - c.copy(configOverrideFile = Some(x)) - } text ("Config file with overrides if any") - } - - parser.parse(args, IndexConfig()).foreach(config => { - val rootConfig = ConfigFactory.parseResources(getClass, "indexing.conf").resolve() - - val originalConfig = - rootConfig.getConfig(s"org.allenai.common.indexing.${config.indexConfigName}").resolve() - val configOverrides = config.configOverrideFile map { f => ConfigFactory.parseFile(f) } - - /** Get merged Config object from applying requested overrides to original config. */ - val buildIndexConfig = configOverrides match { - case Some(overrides) => overrides.withFallback(originalConfig) - case None => originalConfig - } - - new BuildCorpusIndex(buildIndexConfig).buildElasticSearchIndex() - }) - -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/BulkProcessorUtility.scala b/indexing/src/main/scala/org/allenai/common/indexing/BulkProcessorUtility.scala deleted file mode 100644 index 793cd3b..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/BulkProcessorUtility.scala +++ /dev/null @@ -1,94 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.Logging -import BuildCorpusIndex.indexWithoutDuplicate - -import com.typesafe.config.Config -import org.elasticsearch.action.bulk.{ BulkResponse, BulkProcessor, BulkRequest } -import org.elasticsearch.action.index.IndexRequest -import org.elasticsearch.client.transport.TransportClient -import org.elasticsearch.common.io.stream.{ InputStreamStreamInput, OutputStreamStreamOutput } -import org.elasticsearch.common.unit.{ TimeValue, ByteSizeUnit, ByteSizeValue } - -import scala.collection.JavaConverters._ -import scala.util.Random -import java.io.{ FileInputStream, FileOutputStream } -import java.util.concurrent.ConcurrentLinkedQueue - -/** Factory for elasticsearch BulkProcessor. - */ -class BulkProcessorUtility extends Logging { - - private val failedRequests: ConcurrentLinkedQueue[BulkRequest] = - new ConcurrentLinkedQueue[BulkRequest]() - - /** Builds a bulk processor, with listener configured with nice behavior on fail: dump to fail - * and save failed requests to a sequence to be retried later. - * @param esClient TransportClient with which to send requests to the elasticsearch server - * @param dumpFilePath path to dump files to - * @return The contructed bulk processor - */ - def buildDumpOnErrorBulkProcessor( - esClient: TransportClient, - dumpFilePath: String - ): BulkProcessor = { - BulkProcessor.builder( - esClient, - new BulkProcessor.Listener() { - override def afterBulk(executionId: Long, request: BulkRequest, - failure: Throwable): Unit = { - logger.warn(s"${failure.getMessage}, retrying later") - dumpRequestToFile(request, dumpFilePath) - failedRequests.add(request) - } - override def beforeBulk(l: Long, bulkRequest: BulkRequest): Unit = {} - override def afterBulk(l: Long, bulkRequest: BulkRequest, - bulkResponse: BulkResponse): Unit = {} - } - ) - .setBulkActions(10000) - .setBulkSize(new ByteSizeValue(1, ByteSizeUnit.GB)) - .setFlushInterval(TimeValue.timeValueSeconds(600)) - .setConcurrentRequests(1) - .build() - } - - /** Serialize a request and save to file. - * @param request failed bulk request - * @param dumpFilePath path to dump folder - */ - def dumpRequestToFile(request: BulkRequest, dumpFilePath: String): Unit = { - val outStream = new OutputStreamStreamOutput(new FileOutputStream( - dumpFilePath + "/" + randomRequestFileName() - )) - request.writeTo(outStream) - outStream.close() - } - - /** Method for retrying failed request. - * @param dumpFilePath path to dumped request - * @param esConfig configuration of elasticsearch index for which request should be executed - */ - def retryDumpedRequest(dumpFilePath: String, esConfig: Config, indexName: String): Unit = { - val bulkRequest = new BulkRequest - bulkRequest.readFrom(new InputStreamStreamInput(new FileInputStream(dumpFilePath))) - val esClient = ElasticSearchTransportClientUtil.ConstructTransportClientFromESconfig(esConfig) - for (request <- bulkRequest.requests().asScala) { - indexWithoutDuplicate(request.asInstanceOf[IndexRequest], esClient, indexName) - } - esClient.close() - } - - /** Utility method for generating random file name. - * @return random file name - */ - def randomRequestFileName(): String = { - "request_" + Random.alphanumeric.take(10).mkString + ".txt" - } - - /** Get list of bulk requests that failed.*/ - def getFailedRequests(): List[BulkRequest] = { - failedRequests.asScala.toList - } - -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/ElasticSearchTransportClientUtil.scala b/indexing/src/main/scala/org/allenai/common/indexing/ElasticSearchTransportClientUtil.scala deleted file mode 100644 index 348ddca..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/ElasticSearchTransportClientUtil.scala +++ /dev/null @@ -1,40 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.Logging - -import com.typesafe.config.Config -import org.elasticsearch.client.transport.TransportClient -import org.elasticsearch.common.settings.Settings -import org.elasticsearch.common.transport.InetSocketTransportAddress - -import java.net.InetSocketAddress - -/** Utility object that takes config parameters from application config file and constructs a - * transport client to talk to ElasticSearch. - */ -object ElasticSearchTransportClientUtil extends Logging { - - /** Build the Transport client from the config. - * @param esConfig config with the address/port/name of the target cluster - * @param sniffMode flag that specifies whether to auto-detects other nodes of the cluster on - * connection fault. - * See: "https://www.elastic.co/guide/en/elasticsearch/client/java-api/current/transport-client.html" - * @return the constructed TransportClient - */ - def ConstructTransportClientFromESconfig( - esConfig: Config, - sniffMode: Boolean = false - ): TransportClient = { - val settings = Settings.builder() - .put("cluster.name", esConfig.getString("clusterName")) - .put("client.transport.sniff", sniffMode) - .put("sniffOnConnectionFault", sniffMode) - .build() - val host = esConfig.getString("hostAddress") - val address = new InetSocketTransportAddress(new InetSocketAddress(host, 9300)) - - logger.debug(s"Created Elastic Search Client in cluster ${esConfig.getString("clusterName")}") - val clientBuilder = TransportClient.builder().settings(settings) - clientBuilder.build().addTransportAddress(address) - } -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/ParsingUtils.scala b/indexing/src/main/scala/org/allenai/common/indexing/ParsingUtils.scala deleted file mode 100644 index bf907f8..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/ParsingUtils.scala +++ /dev/null @@ -1,58 +0,0 @@ -package org.allenai.common.indexing - -import scala.io.{ Source, Codec } -import scala.util.matching.Regex -import java.io.File - -object ParsingUtils { - - /** Splits a file based on tags of the form <$splitString> ... and performs a - * function on each segment. If a tag is missing for whatever reason will treat the next tag - * (whether it is <$splitString> or as the delimiter of this segemnt. - * @param inputFile file to be segmented - * @param splitString string that defines doc delimiting tags - * @param splitRegex passed in so that the regex does not have to be built with each call (should - * look like """""") - * @param segmentFunction function to be called on each segment - * @param bufferSize size of readingBuffer - */ - def splitOnTag(inputFile: File, splitString: String, splitRegex: Regex, - segmentFunction: String => Unit, bufferSize: Int, codec: Codec): Unit = { - val lines = Source.fromFile(inputFile, bufferSize = bufferSize)(codec).getLines() - val endOfLastLine = new StringBuilder("") - var inDocFlag = false - for (currentLine <- lines if !currentLine.trim.equals("")) { - val docs = splitRegex.split(currentLine) - if (docs.nonEmpty) { - if (docs.length == 1) { - endOfLastLine.append("\n" + docs.head) - } else { - processIfValid(endOfLastLine.append("\n" + docs.head).toString()) - endOfLastLine.setLength(0) - if (docs.tail.length >= 2) { - docs.tail.init.foreach(doc => processIfValid(doc)) - } - if (!(currentLine.endsWith(s"<$splitString>") | - currentLine.endsWith(s""))) { - endOfLastLine.setLength(0) - endOfLastLine.append(docs.last) - inDocFlag = true - } else { - processIfValid(docs.last) - } - } - } - } - - /** Ignores tag fragments created by parser above.*/ - def processIfValid(input: String): Unit = { - val trimmed = input.trim - if (!trimmed.equals("") && !trimmed.equals(s"""<$splitString>""") && - !trimmed.equals(s"""<$splitString>""") && !trimmed.equals(s"""""")) { - segmentFunction(input) - } - } - - } - -} diff --git a/indexing/src/main/scala/org/allenai/common/indexing/SegmentedDocument.scala b/indexing/src/main/scala/org/allenai/common/indexing/SegmentedDocument.scala deleted file mode 100644 index d6e00af..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/SegmentedDocument.scala +++ /dev/null @@ -1,77 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.nlpstack.core.repr.Document - -import scala.collection.mutable - -/** A document that has been broken up into (potentially nested) segments. Note that there's a - * notion of a segment and segmenter in the nlpstack, but those are used exclusively for sentences. - * This class aims to capture higher-level document structure than sentences. - */ -class SegmentedDocument(text: String, val segments: Seq[Segment]) extends Document(text) { - def getSegmentsOfType(segmentType: String): Seq[Segment] = { - segments.flatMap(_.getSegmentsOfType(segmentType)) - } - - override def equals(that: Any) = that match { - case that: SegmentedDocument => { that.text == this.text && that.segments == this.segments } - case _ => false - } - - override def hashCode() = text.hashCode * 41 + segments.hashCode -} - -class SegmentedDocumentBuilder(text: String) { - val finishedTopLevelSegments = new mutable.ListBuffer[Segment] - val segmentStack = new mutable.Stack[(String, mutable.ListBuffer[Segment])] - - def startNewNonTerminalSegment(segmentType: String) { - segmentStack.push((segmentType, new mutable.ListBuffer[Segment])) - } - - def finishNonTerminalSegment() { - val segmentToFinish = segmentStack.pop() - val finishedSegment = NonTerminalSegment(segmentToFinish._1, segmentToFinish._2.toSeq) - if (segmentStack.size == 0) { - finishedTopLevelSegments.append(finishedSegment) - } else { - segmentStack.top._2.append(finishedSegment) - } - } - - def addTerminalSegment(segmentType: String, text: String) { - segmentStack.top._2.append(TerminalSegment(segmentType, text)) - } - - def build() = new SegmentedDocument(text, finishedTopLevelSegments.toSeq) -} - -sealed abstract class Segment(segmentType: String) { - def getSegmentsOfType(requestedType: String): Seq[Segment] = { - this match { - case NonTerminalSegment(sType, segments) => { - val matchingSegmentsBelowMe = segments.flatMap(_.getSegmentsOfType(requestedType)) - if (requestedType == sType) { - Seq(this) ++ matchingSegmentsBelowMe - } else { - matchingSegmentsBelowMe - } - } - case TerminalSegment(sType, text) => { - if (requestedType == sType) Seq(this) else Seq.empty[Segment] - } - } - } - - def getTextSegments(): Seq[String] = { - this match { - case NonTerminalSegment(sType, segments) => segments.flatMap(_.getTextSegments) - case TerminalSegment(sType, text) => Seq(text) - } - } -} - -case class NonTerminalSegment(segmentType: String, segments: Seq[Segment]) - extends Segment(segmentType) - -case class TerminalSegment(segmentType: String, text: String) extends Segment(segmentType) diff --git a/indexing/src/main/scala/org/allenai/common/indexing/WaterlooSegmentScript.scala b/indexing/src/main/scala/org/allenai/common/indexing/WaterlooSegmentScript.scala deleted file mode 100644 index cea1640..0000000 --- a/indexing/src/main/scala/org/allenai/common/indexing/WaterlooSegmentScript.scala +++ /dev/null @@ -1,102 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.Logging -import org.allenai.datastore.Datastore -import org.allenai.nlpstack.segment.defaultSegmenter - -import com.typesafe.config.ConfigFactory - -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.duration.Duration -import scala.concurrent.{ Await, Future } -import scala.io.{ Codec, Source } -import java.io.{ FileWriter, BufferedWriter, File } - -/** Script used to segment waterloo corpus on a sentence level. - * Splits docs based on ... tags, determines whether the doc is in "English" by - * counting the fraction of stop words, and throws out the doc if it is not. Sentence segments the - * doc using nlp stack, wraps each sentence in ... tags, and then rewrites the - * entire doc to file. - */ -object WaterlooSegmentScript extends App with Logging { - - val englishThreshold = 0.2 - - val rootConfig = ConfigFactory.systemProperties.withFallback(ConfigFactory.load) - - val config = ConfigFactory.parseResources(getClass, "application.conf").resolve(). - getConfig("org.allenai.common.indexing.waterloo-lucene") - - // Get Index Name and Index Type - val esConfig = config.getConfig("elasticSearch") - val indexName = esConfig.getString("indexName") - val splitString = "DOC" - val splitRegex = """""".r.unanchored - - val stopWordsConfig = config.getConfig("stoplist") - - val stopWords = Source.fromFile(Datastore("public"). - filePath(stopWordsConfig.getString("group"), stopWordsConfig.getString("name"), - stopWordsConfig.getInt("version")).toFile) - .getLines().toVector.toSet - - val corpusConfig = config.getConfig("CorpusIOConfig") - val indirPath = corpusConfig.getString("inputFolder") - val outdirPath = corpusConfig.getString("outputFolder") - - segmentDirectory(indirPath, outdirPath) - - def segmentDirectory(inputDirectoryName: String, outputDirectoryName: String): Unit = { - - val indir = new File(inputDirectoryName) - val outdir = new File(outputDirectoryName) - - if (!outdir.exists()) outdir.mkdir() - - val results: Seq[Future[Unit]] = for ( - file <- indir.listFiles; - if !file.getName.startsWith(".") - ) yield { - Future { - logger.debug("Now segmenting: " + file.getName) - segmentIntoDocs(file, new File(outdir.getAbsolutePath + "/" + file.getName)) - logger.debug("Done segmenting: " + file.getName) - } - } - Await.result(Future.sequence(results), Duration.Inf) - logger.debug("Done segmenting!") - } - - def segmentIntoDocs(inputFile: File, outputFile: File): Unit = { - - outputFile.delete() - val writer = new BufferedWriter(new FileWriter(outputFile, false)) - - ParsingUtils.splitOnTag(inputFile, splitString, splitRegex, dealWithDocHelper, 8192, Codec.UTF8) - - writer.flush() - writer.close() - - def dealWithDocHelper(input: String): Unit = { - dealWithDoc(input, writer, inputFile.getName) - } - } - - def dealWithDoc(input: String, bufferedWriter: BufferedWriter, source: String): Unit = { - if (!input.trim.equals("") && isEnglish(input)) { - bufferedWriter.write("") - val sentences = defaultSegmenter.segmentTexts(input). - map(sentence => s"$sentence") - sentences.foreach(s => bufferedWriter.write(s)) - bufferedWriter.write("") - } - } - - def isEnglish(input: String): Boolean = { - val arr = input.split(" ") - val total = arr.length - val count = arr.foldLeft(0)((x: Int, y: String) => x + (if (stopWords.contains(y)) 1 else 0)) - .toDouble - count / total > englishThreshold - } -} diff --git a/indexing/src/test/scala/org/allenai/common/indexing/BarronsDocumentReaderSpec.scala b/indexing/src/test/scala/org/allenai/common/indexing/BarronsDocumentReaderSpec.scala deleted file mode 100644 index 68e1fc7..0000000 --- a/indexing/src/test/scala/org/allenai/common/indexing/BarronsDocumentReaderSpec.scala +++ /dev/null @@ -1,52 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.testkit.UnitSpec - -class BarronsDocumentReaderSpec extends UnitSpec { - - val sentences = (0 to 20).map("sentence " + _) - val sampleLines = Seq( - s"5.1.1.1.1\t${sentences(0)}", - s"5.1.1.1.2\t${sentences(1)}", - s"5.1.1.1.3\t${sentences(2)}", - s"5.1.1.2.1\t${sentences(3)}", - s"5.1.1.2.2\t${sentences(4)}", - s"5.1.1.2.3\t${sentences(5)}", - s"5.1.1.1.1.1\t${sentences(6)}", - s"5.1.1.1.2.1\t${sentences(7)}", - s"5.1.1.1.2.2\t${sentences(8)}", - s"5.1.1.1.2.3\t${sentences(9)}", - s"5.1.1.1.3.1\t${sentences(10)}", - s"5.1.1.1.3.2\t${sentences(11)}" - ) - - "read" should "get paragraphs out" in { - val readDocument = new BarronsDocumentReader(null, "UTF-8")._readLines(sampleLines) - val expectedDocument = new SegmentedDocument(sampleLines.mkString("\n"), Seq( - NonTerminalSegment("paragraph", Seq( - TerminalSegment("sentence", sentences(0)), - TerminalSegment("sentence", sentences(1)), - TerminalSegment("sentence", sentences(2)) - )), - NonTerminalSegment("paragraph", Seq( - TerminalSegment("sentence", sentences(3)), - TerminalSegment("sentence", sentences(4)), - TerminalSegment("sentence", sentences(5)) - )), - NonTerminalSegment("paragraph", Seq( - TerminalSegment("sentence", sentences(6)) - )), - NonTerminalSegment("paragraph", Seq( - TerminalSegment("sentence", sentences(7)), - TerminalSegment("sentence", sentences(8)), - TerminalSegment("sentence", sentences(9)) - )), - NonTerminalSegment("paragraph", Seq( - TerminalSegment("sentence", sentences(10)), - TerminalSegment("sentence", sentences(11)) - )) - )) - readDocument should be(expectedDocument) - } -} - diff --git a/indexing/src/test/scala/org/allenai/common/indexing/BuildCorpusIndexSpec.scala b/indexing/src/test/scala/org/allenai/common/indexing/BuildCorpusIndexSpec.scala deleted file mode 100644 index 08dfee2..0000000 --- a/indexing/src/test/scala/org/allenai/common/indexing/BuildCorpusIndexSpec.scala +++ /dev/null @@ -1,155 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.testkit.UnitSpec - -import com.typesafe.config.{ ConfigFactory, Config } -import java.nio.file.{ Path, Paths } - -class BuildCorpusIndexSpec extends UnitSpec { - - val baseConfig = ConfigFactory.parseString(""" - elasticSearch: { - indexName: "dummy name" - indexType: "dummy type" - } - buildIndexOptions: { - dumpFolder: "dummy folder" - } - """) - val buildCorpusIndex = new BuildCorpusIndex(baseConfig) - - // It's unfortunate that I need to write these methods, but this was the only way I found to - // reasonably test the datastore stuff. It would be better to just be able to pass in a fake - // datastore, but the calls to Datastore are static... - val bciWithMockedDatastore = new BuildCorpusIndex(baseConfig) { - override def getFileFromDatastore( - privacy: String, - group: String, - directory: Option[String], - file: String, - version: Int - ): Path = { - val d = directory match { case Some(d) => d + s"-d${version}"; case None => "" } - val f = directory match { - case Some(d) => s"/${file}" - case None => { - val (base, extension) = file.splitAt(file.lastIndexOf(".")) - s"${base}-v${version}${extension}" - } - } - Paths.get(s"/fake/cache/dir/org.allenai.datastore/${privacy}/${group}/${d}${f}") - } - override def getDirectoryFromDatastore( - privacy: String, - group: String, - directory: String, - version: Int - ): Path = { - Paths.get(s"/fake/dir/org.allenai.datastore/${privacy}/${group}/${directory}-d${version}") - } - } - val dir1 = "/test/path/dir1/" - val file1 = "test/file1" - - /** Test an given parse result against expected results. We need to test like this when we're - * accessing something from the Datastore, because we have to check a suffix on the file path, - * instead of just checking for object equality. - */ - def expectParse( - parsedConfig: ParsedConfig, - pathSuffix: String, - isDirectory: Boolean, - encoding: String, - documentFormat: String - ) { - parsedConfig.path.toString should endWith(pathSuffix) - parsedConfig.isDirectory should be(isDirectory) - parsedConfig.encoding should be(encoding) - parsedConfig.documentFormat should be(documentFormat) - } - - "parseCorpusConfig" should "parse a local directory" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |pathIsLocal: true - |documentFormat: "waterloo" - |directory: "${dir1}" - |}""".stripMargin) - val expectedParse = ParsedConfig(Paths.get(dir1), true, "UTF-8", "waterloo") - buildCorpusIndex.parseCorpusConfig(corpusConfig) should be(expectedParse) - } - - it should "parse a local file with directory" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |pathIsLocal: true - |documentFormat: "waterloo" - |directory: "${dir1}" - |file: "${file1}" - |}""".stripMargin) - val expectedConfig = ParsedConfig(Paths.get(dir1, file1), false, "UTF-8", "waterloo") - buildCorpusIndex.parseCorpusConfig(corpusConfig) should be(expectedConfig) - } - - it should "parse a local file without a directory" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |pathIsLocal: true - |documentFormat: "waterloo" - |file: "${file1}" - |}""".stripMargin) - val expectedConfig = ParsedConfig(Paths.get(file1), false, "UTF-8", "waterloo") - buildCorpusIndex.parseCorpusConfig(corpusConfig) should be(expectedConfig) - } - - it should "parse a datastore file" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |group: "org.allenai.corpora.wikipedia" - |file: "simple_wikipedia_first_few_articles.txt" - |version: 1 - |privacy: "public" - |}""".stripMargin) - val parsed = bciWithMockedDatastore.parseCorpusConfig(corpusConfig) - expectParse( - parsed, - "org.allenai.datastore/public/org.allenai.corpora.wikipedia/" + - "simple_wikipedia_first_few_articles-v1.txt", - false, - "UTF-8", - "plain text" - ) - } - - it should "parse a datastore directory" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |group: "org.allenai.aristo.corpora.derivative" - |directory: "Barrons-4thGrade.sentences" - |version: 1 - |}""".stripMargin) - val parsed = bciWithMockedDatastore.parseCorpusConfig(corpusConfig) - expectParse( - parsed, - "org.allenai.datastore/private/org.allenai.aristo.corpora.derivative/" + - "Barrons-4thGrade.sentences-d1", - true, - "UTF-8", - "plain text" - ) - } - - it should "parse a datastore file with a directory" in { - val corpusConfig = ConfigFactory.parseString(s"""{ - |group: "org.allenai.aristo.corpora.derivative" - |directory: "Barrons-4thGrade.sentences" - |file: "Barrons-5.sentences.txt" - |encoding: "fake encoding" - |version: 1 - |}""".stripMargin) - val parsed = bciWithMockedDatastore.parseCorpusConfig(corpusConfig) - expectParse( - parsed, - "org.allenai.datastore/private/org.allenai.aristo.corpora.derivative/" + - "Barrons-4thGrade.sentences-d1/Barrons-5.sentences.txt", - false, - "fake encoding", - "plain text" - ) - } -} diff --git a/indexing/src/test/scala/org/allenai/common/indexing/SegmentedDocumentSpec.scala b/indexing/src/test/scala/org/allenai/common/indexing/SegmentedDocumentSpec.scala deleted file mode 100644 index 35624ed..0000000 --- a/indexing/src/test/scala/org/allenai/common/indexing/SegmentedDocumentSpec.scala +++ /dev/null @@ -1,71 +0,0 @@ -package org.allenai.common.indexing - -import org.allenai.common.testkit.UnitSpec - -class SegmentedDocumentSpec extends UnitSpec { - - val sentence1 = TerminalSegment("sentence", "sentence1") - val sentence2 = TerminalSegment("sentence", "sentence2") - val sentence3 = TerminalSegment("sentence", "sentence3") - val sentence4 = TerminalSegment("sentence", "sentence4") - val paragraph1 = NonTerminalSegment("paragraph", Seq(sentence1, sentence2)) - val paragraph2 = NonTerminalSegment("paragraph", Seq(sentence3)) - val paragraph3 = NonTerminalSegment("paragraph", Seq(sentence4)) - val section1 = NonTerminalSegment("section", Seq(paragraph1)) - val section2 = NonTerminalSegment("section", Seq(paragraph2, paragraph3)) - val chapter1 = NonTerminalSegment("chapter", Seq(section1, section2)) - val document = new SegmentedDocument("dummy text", Seq(chapter1)) - - "Segment.getSegmentsOfType" should "recursively get all segments of the correct type" in { - chapter1.getSegmentsOfType("chapter") should be(Seq(chapter1)) - chapter1.getSegmentsOfType("section") should be(Seq(section1, section2)) - chapter1.getSegmentsOfType("paragraph") should be(Seq(paragraph1, paragraph2, paragraph3)) - chapter1.getSegmentsOfType("sentence") should be( - Seq(sentence1, sentence2, sentence3, sentence4) - ) - } - - "Segment.getTextSegments" should "recursively get text from all terminal segments" in { - chapter1.getTextSegments() should be(Seq("sentence1", "sentence2", "sentence3", "sentence4")) - section1.getTextSegments() should be(Seq("sentence1", "sentence2")) - section2.getTextSegments() should be(Seq("sentence3", "sentence4")) - paragraph1.getTextSegments() should be(Seq("sentence1", "sentence2")) - paragraph2.getTextSegments() should be(Seq("sentence3")) - paragraph3.getTextSegments() should be(Seq("sentence4")) - sentence1.getTextSegments() should be(Seq("sentence1")) - sentence2.getTextSegments() should be(Seq("sentence2")) - sentence3.getTextSegments() should be(Seq("sentence3")) - sentence4.getTextSegments() should be(Seq("sentence4")) - } - - "SegmentedDocument.getSegmentsOfType" should "get all segments of the correct type" in { - document.getSegmentsOfType("chapter") should be(Seq(chapter1)) - document.getSegmentsOfType("section") should be(Seq(section1, section2)) - document.getSegmentsOfType("paragraph") should be(Seq(paragraph1, paragraph2, paragraph3)) - document.getSegmentsOfType("sentence") should be( - Seq(sentence1, sentence2, sentence3, sentence4) - ) - } - - "SegmentedDocumentBuilder" should "correctly build a segmented document" in { - val builder = new SegmentedDocumentBuilder("dummy text") - builder.startNewNonTerminalSegment("chapter") - builder.startNewNonTerminalSegment("section") - builder.startNewNonTerminalSegment("paragraph") - builder.addTerminalSegment("sentence", "sentence1") - builder.addTerminalSegment("sentence", "sentence2") - builder.finishNonTerminalSegment() - builder.finishNonTerminalSegment() - builder.startNewNonTerminalSegment("section") - builder.startNewNonTerminalSegment("paragraph") - builder.addTerminalSegment("sentence", "sentence3") - builder.finishNonTerminalSegment() - builder.startNewNonTerminalSegment("paragraph") - builder.addTerminalSegment("sentence", "sentence4") - builder.finishNonTerminalSegment() - builder.finishNonTerminalSegment() - builder.finishNonTerminalSegment() - - builder.build() should be(document) - } -} From ddde2929d41df5ae5e0c7a08f87b575038d4b960 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:32:08 -0700 Subject: [PATCH 04/69] Remove shared code for webapp module --- webapp/build.sbt | 17 -- .../allenai/common/webapp/DummyServer.scala | 37 ----- .../org/allenai/common/webapp/Protocol.scala | 13 -- .../webapp/SprayClientHelpersSpec.scala | 114 ------------- .../allenai/common/webapp/BingClient.scala | 108 ------------ .../allenai/common/webapp/Directives.scala | 47 ------ .../org/allenai/common/webapp/Headers.scala | 24 --- .../org/allenai/common/webapp/InfoRoute.scala | 70 -------- .../common/webapp/SprayClientHelpers.scala | 155 ------------------ .../common/webapp/BingClientSpec.scala | 40 ----- .../common/webapp/DirectivesSpec.scala | 111 ------------- 11 files changed, 736 deletions(-) delete mode 100644 webapp/build.sbt delete mode 100644 webapp/src/it/scala/org/allenai/common/webapp/DummyServer.scala delete mode 100644 webapp/src/it/scala/org/allenai/common/webapp/Protocol.scala delete mode 100644 webapp/src/it/scala/org/allenai/common/webapp/SprayClientHelpersSpec.scala delete mode 100644 webapp/src/main/scala/org/allenai/common/webapp/BingClient.scala delete mode 100644 webapp/src/main/scala/org/allenai/common/webapp/Directives.scala delete mode 100644 webapp/src/main/scala/org/allenai/common/webapp/Headers.scala delete mode 100644 webapp/src/main/scala/org/allenai/common/webapp/InfoRoute.scala delete mode 100644 webapp/src/main/scala/org/allenai/common/webapp/SprayClientHelpers.scala delete mode 100644 webapp/src/test/scala/org/allenai/common/webapp/BingClientSpec.scala delete mode 100644 webapp/src/test/scala/org/allenai/common/webapp/DirectivesSpec.scala diff --git a/webapp/build.sbt b/webapp/build.sbt deleted file mode 100644 index 072e6d3..0000000 --- a/webapp/build.sbt +++ /dev/null @@ -1,17 +0,0 @@ -import Dependencies._ - -name := "common-webapp" - -libraryDependencies ++= Seq( - akkaActor, - okHttp, - sprayClient, - sprayJson, - sprayRouting, - typesafeConfig, - sprayTestkit % Test -) - -dependencyOverrides += akkaTestkit -dependencyOverrides += pegdown -dependencyOverrides += scalaCheck diff --git a/webapp/src/it/scala/org/allenai/common/webapp/DummyServer.scala b/webapp/src/it/scala/org/allenai/common/webapp/DummyServer.scala deleted file mode 100644 index 3ca7e72..0000000 --- a/webapp/src/it/scala/org/allenai/common/webapp/DummyServer.scala +++ /dev/null @@ -1,37 +0,0 @@ -package org.allenai.common.webapp - -import akka.actor.ActorSystem -import akka.pattern.after -import spray.can.Http -import spray.httpx.SprayJsonSupport -import spray.routing.{ Route, SimpleRoutingApp } - -import scala.concurrent.Future -import scala.concurrent.duration._ - -class DummyServer(implicit actorSystem: ActorSystem) extends SimpleRoutingApp with SprayJsonSupport { - import actorSystem.dispatcher - - // format: OFF - def route: Route = { - get { - path("hello") { complete("hi!") } ~ - path("addOne") { - parameter('number.as[Int]) { number => complete((number + 1).toString()) } - } ~ - path("sleep" / Segment) { sleepMillis => - complete { - after(sleepMillis.toInt.millis, actorSystem.scheduler) { - Future.successful("done") - } - } - } - } ~ - post { - entity(as[Ping]) { ping => complete(Pong(ping.message)) } - } - } - // format: ON - - def start(port: Int): Future[Http.Bound] = startServer("0.0.0.0", port)(route) -} diff --git a/webapp/src/it/scala/org/allenai/common/webapp/Protocol.scala b/webapp/src/it/scala/org/allenai/common/webapp/Protocol.scala deleted file mode 100644 index 2fe5ef6..0000000 --- a/webapp/src/it/scala/org/allenai/common/webapp/Protocol.scala +++ /dev/null @@ -1,13 +0,0 @@ -package org.allenai.common.webapp - -import spray.json.DefaultJsonProtocol._ - -case class Ping(message: String) -object Ping { - implicit val pingJsonFormat = jsonFormat1(Ping.apply) -} - -case class Pong(message: String) -object Pong { - implicit val pongJsonFormat = jsonFormat1(Pong.apply) -} diff --git a/webapp/src/it/scala/org/allenai/common/webapp/SprayClientHelpersSpec.scala b/webapp/src/it/scala/org/allenai/common/webapp/SprayClientHelpersSpec.scala deleted file mode 100644 index c215dde..0000000 --- a/webapp/src/it/scala/org/allenai/common/webapp/SprayClientHelpersSpec.scala +++ /dev/null @@ -1,114 +0,0 @@ -package org.allenai.common.webapp - -import org.allenai.common.testkit.ActorSpec - -import akka.actor.ActorSystem -import spray.client.pipelining._ -import spray.httpx.SprayJsonSupport - -import scala.concurrent.{ Await, Future, TimeoutException } -import scala.concurrent.duration._ - -class SprayClientHelpersSpec extends ActorSpec(ActorSystem("SprayClientHelpersSpec")) - with SprayJsonSupport { - - import system.dispatcher - - // Set up a dummy server to test sending requests and parsing responses. - val server = new DummyServer() - val testHost = "localhost" - val testPort = 6000 - val connectionTimeout = 250.millis - val requestTimeout = 500.millis - - val connector = SprayClientHelpers.getConnectionSetup( - testHost, - testPort, - connectionTimeout, - requestTimeout, - 1 - ) - - override def beforeAll(): Unit = Await.result(server.start(testPort), 2.seconds) - - "SprayClientHelpers" should "support receiving raw string responses" in { - val request = SprayClientHelpers.sendRequest(Get("/hello"), connector) { response => - response ~> unmarshal[String] - } - - Await.result(request, 2 * requestTimeout) shouldBe "hi!" - } - - it should "support transforming unmarshalled values within its response-parser" in { - val request = SprayClientHelpers.sendRequest(Get("/addOne?number=5"), connector) { response => - (response ~> unmarshal[String]).toInt - 7 - } - - Await.result(request, 2 * requestTimeout) shouldBe (5 + 1 - 7) - } - - it should "support unmarshalling to more than just strings" in { - val message = "hello json" - val request = SprayClientHelpers.sendRequest(Post("/", Ping(message)), connector) { response => - response ~> unmarshal[Pong] - } - - Await.result(request, 2 * requestTimeout) shouldBe Pong(message) - } - - it should "allow for a variable number of requests to run in parallel" in { - val connector2 = SprayClientHelpers.getConnectionSetup( - testHost, - testPort, - connectionTimeout, - requestTimeout, - 2 - ) - - val requests2 = Seq.fill(2) { - SprayClientHelpers.sendRequest(Get(s"/sleep/${requestTimeout.toMillis}"), connector2)(identity) - } - - // All we care about here is that the two requests run in parallel. - Await.result(Future.sequence(requests2), requestTimeout * 1.1) - - val requests3 = Seq.fill(3) { - SprayClientHelpers.sendRequest(Get(s"/sleep/${requestTimeout.toMillis}"), connector2)(identity) - } - - // Waiting with the same timeout on 3 requests should fail, indicating there are only 2 - // connectors. - intercept[TimeoutException] { - Await.result(Future.sequence(requests3), requestTimeout * 1.1) - } - } - - it should "support sending requests to a single host via separate connectors" in { - val connectorA = SprayClientHelpers.getConnectionSetup( - testHost, - testPort, - connectionTimeout, - requestTimeout, - 1, - Some("A") - ) - - val connectorB = SprayClientHelpers.getConnectionSetup( - testHost, - testPort, - connectionTimeout, - requestTimeout, - 1, - Some("B") - ) - - val requests = Seq( - SprayClientHelpers.sendRequest(Get(s"/sleep/${requestTimeout.toMillis}"), connectorA)(identity), - SprayClientHelpers.sendRequest(Get(s"/sleep/${requestTimeout.toMillis}"), connectorB)(identity) - ) - - // Again, we only care that the two requests run in parallel even though each host setup only - // has one connector. - Await.result(Future.sequence(requests), requestTimeout * 1.1) - } -} diff --git a/webapp/src/main/scala/org/allenai/common/webapp/BingClient.scala b/webapp/src/main/scala/org/allenai/common/webapp/BingClient.scala deleted file mode 100644 index 226f830..0000000 --- a/webapp/src/main/scala/org/allenai/common/webapp/BingClient.scala +++ /dev/null @@ -1,108 +0,0 @@ -package org.allenai.common.webapp - -import okhttp3._ -import spray.json._ - -import java.net.URLEncoder - -/** A simple case class representing one of the "blue links" from a Bing API query. - */ -case class BingResult( - query: String, - pos: Int, - id: String, - url: String, - title: String, - description: String -) - -/** A client that wraps calls to the Bing API. - * @param apiKey the Azure key - */ -class BingClient(apiKey: String) { - import scala.concurrent.ExecutionContext.Implicits.global - - val client = new OkHttpClient() - - def closeConnection() = { - val req = new Request.Builder() - .url("https://api.cognitive.microsoft.com") - .header("Connection", "close") - .get() - .build(); - client.newCall(req).execute() - } - - /** The new v5 Bing API returns URLs as bing redirects, with the original url in the query - * string as the r= parameter. This extracts that parameter. - * - * @param redirectUrl the Bing redirect url - * @return the original url - */ - def extractUrlFromBingRedirect(redirectUrl: String): Option[String] = { - new java.net.URI(redirectUrl) - .getQuery() - .split('&') - // Find the query param that looks like r=.... - .flatMap("(?s)^r=(.*)$".r.findFirstMatchIn) - // There should be exactly one, but use .headOption to be safe - .headOption - .map(_.group(1)) - } - - /** Synchronously issues a query to the Bing API. - * @param query what to search for - * @param responseFilter ... - * @param top number of desired results, defaults to 10 - * @return all valid results as a sequence - */ - def query(query: String, responseFilter: String = "webpages", top: Int = 10): Seq[BingResult] = { - // Create the URI representing the query - val encodedQuery = "%27" + URLEncoder.encode(query, "UTF-8") + "%27" - val filter = if (responseFilter.isEmpty) "" else s"&responseFilter=${responseFilter}" - - val uri = s"https://api.cognitive.microsoft.com/bing/v5.0/" + - s"search?q=${encodedQuery}&count=${top}${filter}" - - val request = new Request.Builder() - .url(uri) - .header("Ocp-Apim-Subscription-Key", apiKey) - .build() - - val response = client.newCall(request).execute() - val rawData = response.body.string - val json = JsonParser(rawData).asJsObject - - // The results we want are an array at json["webPages"]["value"]. - val rawResults = json - .getFields("webPages").head.asJsObject - .getFields("value").head.asInstanceOf[JsArray] - - // Extract the results from the JsArray and map them to our case class. - rawResults match { - case JsArray(elements) => elements.zipWithIndex.flatMap { - case (jsValue, pos) => - val jsMap = jsValue.asJsObject.fields - for { - id <- getString("id", jsMap) - redirectUrl <- getString("url", jsMap) - url = extractUrlFromBingRedirect(redirectUrl).getOrElse(redirectUrl) - title <- getString("name", jsMap) - description <- getString("snippet", jsMap) - } yield BingResult(query, pos, id, url, title, description) - } - } - } - - /** A helper function to get a string out of a JsObject - * @param key name of the field - * @param jsMap the map of Json fields - * @return the string, or None if unavailable - */ - private def getString(key: String, jsMap: Map[String, JsValue]): Option[String] = { - jsMap.get(key).flatMap { - case JsString(s) => Some(s) - case _ => None - } - } -} diff --git a/webapp/src/main/scala/org/allenai/common/webapp/Directives.scala b/webapp/src/main/scala/org/allenai/common/webapp/Directives.scala deleted file mode 100644 index 379d84f..0000000 --- a/webapp/src/main/scala/org/allenai/common/webapp/Directives.scala +++ /dev/null @@ -1,47 +0,0 @@ -package org.allenai.common.webapp - -import spray.http.{ HttpHeaders, HttpOrigin, SomeOrigins } -import spray.routing.Directive0 -import spray.routing.Directives._ - -/** Helper spray directives. */ -trait Directives { - /** Directive providing CORS header support. This should be included in any application serving - * a REST API that's queried cross-origin (from a different host than the one serving the API). - * See http://www.w3.org/TR/cors/ for full specification. - * @param allowedHostnames the set of hosts that are allowed to query the API. These should - * not include the scheme or port; they're matched only against the hostname of the Origin - * header. - */ - def allowHosts(allowedHostnames: Set[String]): Directive0 = mapInnerRoute { innerRoute => - // Conditionally responds with "allowed" CORS headers, if the request origin's host is in the - // allowed set, or if the request doesn't have an origin. - optionalHeaderValueByType[HttpHeaders.Origin]() { originOption => - // If Origin is set and the host is in our allowed set, add CORS headers and pass through. - originOption flatMap { - case HttpHeaders.Origin(list) => list.find { - case HttpOrigin(_, HttpHeaders.Host(hostname, _)) => allowedHostnames.contains(hostname) - } - } map { goodOrigin => - respondWithHeaders( - Headers.AccessControlAllowHeadersAll, - Headers.AccessControlAllowMethodsAll, - HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(Seq(goodOrigin))) - ) { - options { - complete { - "" - } - } ~ - innerRoute - } - } getOrElse { - // Else, pass through without headers. - innerRoute - } - } - } - - def allowHosts(allowedHostnames: String*): Directive0 = allowHosts(allowedHostnames.toSet) -} -object Directives extends Directives diff --git a/webapp/src/main/scala/org/allenai/common/webapp/Headers.scala b/webapp/src/main/scala/org/allenai/common/webapp/Headers.scala deleted file mode 100644 index c0a2dbb..0000000 --- a/webapp/src/main/scala/org/allenai/common/webapp/Headers.scala +++ /dev/null @@ -1,24 +0,0 @@ -package org.allenai.common.webapp - -import spray.http.{ HttpHeaders, HttpMethods } - -/** Helpers for setting HTTP headers. */ -object Headers { - /** Allows any reasonable header to be sent cross-site. */ - val AccessControlAllowHeadersAll = HttpHeaders.`Access-Control-Allow-Headers`( - Seq("Origin", "X-Requested-With", "Content-Type", "Accept") - ) - val AccessControlAllowMethodsAll = HttpHeaders.`Access-Control-Allow-Methods`( - Seq( - HttpMethods.CONNECT, - HttpMethods.DELETE, - HttpMethods.GET, - HttpMethods.HEAD, - HttpMethods.OPTIONS, - HttpMethods.PATCH, - HttpMethods.POST, - HttpMethods.PUT, - HttpMethods.TRACE - ) - ) -} diff --git a/webapp/src/main/scala/org/allenai/common/webapp/InfoRoute.scala b/webapp/src/main/scala/org/allenai/common/webapp/InfoRoute.scala deleted file mode 100644 index db18d96..0000000 --- a/webapp/src/main/scala/org/allenai/common/webapp/InfoRoute.scala +++ /dev/null @@ -1,70 +0,0 @@ -package org.allenai.common.webapp - -import org.allenai.common.Version - -import spray.http.{ MediaTypes, StatusCodes } -import spray.json._ -import spray.json.DefaultJsonProtocol._ -import spray.routing.Directives._ -import spray.routing.Route - -/** Class providing a spray route with common information, handling requests to the /info path. - * Requests to the root of the path return a string with all the info keys separated by newlines, - * while requests to subpaths return the value of the given key, or a 404 for invalid keys. - * - * @param info the info to serve - */ -class InfoRoute(val info: Map[String, String] = Map.empty) { - def withVersion(version: Version): InfoRoute = { - new InfoRoute( - info ++ - Map( - "artifactVersion" -> version.artifactVersion, - "gitVersion" -> version.git.sha1, - "gitDate" -> version.git.commitDate.toString, - "gitDatePretty" -> version.git.prettyCommitDate - ) ++ - version.git.repoUrl.map("gitRepoUrl" -> _) ++ - version.git.commitUrl.map("gitCommitUrl" -> _) ++ - version.cacheKey.map("cacheKey" -> _) - ) - } - - def withName(name: String): InfoRoute = new InfoRoute(info + ("name" -> name)) - - def withStartupTime(startupTime: Long = System.currentTimeMillis()): InfoRoute = - new InfoRoute(info + ("startupTime" -> startupTime.toString)) - - def withCpuCount: InfoRoute = - new InfoRoute(info + ("cpuCount" -> Runtime.getRuntime.availableProcessors().toString)) - - // format: OFF - def route: Route = get { - pathPrefix("info") { - pathEndOrSingleSlash { - respondWithMediaType(MediaTypes.`application/json`) { - complete { - info.toJson.prettyPrint - } - } - } - } ~ - path("info" / Segment) { key => - complete { - info.get(key) match { - case Some(key) => key - case None => (StatusCodes.NotFound, "Could not find info: " + key) - } - } - } ~ - pathPrefix("ichooseyou") { - pathEndOrSingleSlash { - complete { - val name = info.getOrElse("name", "component").toUpperCase - s"A wild $name appeared!" - } - } - } - } - // format: ON -} diff --git a/webapp/src/main/scala/org/allenai/common/webapp/SprayClientHelpers.scala b/webapp/src/main/scala/org/allenai/common/webapp/SprayClientHelpers.scala deleted file mode 100644 index d347ed1..0000000 --- a/webapp/src/main/scala/org/allenai/common/webapp/SprayClientHelpers.scala +++ /dev/null @@ -1,155 +0,0 @@ -package org.allenai.common.webapp - -import akka.actor.ActorSystem -import akka.io.IO -import akka.pattern.ask -import akka.util.Timeout -import spray.can.Http -import spray.can.Http.HostConnectorSetup -import spray.can.client.{ ClientConnectionSettings, HostConnectorSettings } -import spray.http.HttpHeaders.`User-Agent` -import spray.http.{ HttpRequest, HttpResponse } - -import scala.concurrent.Future -import scala.concurrent.duration._ - -/** Utility methods for sending HTTP requests through spray without being tripped-up by the - * nastiness of spray's API / underlying implementation. The two methods in this object are - * intended for use together. - * - * Example: Making quick GET requests with little internal buffering. - * format: OFF - * {{{ - * import SprayClientHelpers._ - * - * // Define parameters for sending requests to the foo service. - * // This can be done once per web client instance as part of initialization, if all of the - * // fields are constant. - * val quickConnectorSetup = getConnectionSetup( - * host = "foo.com", - * port = 1234, - * connectionTimeout = 500.millis, - * requestTimeout = 1.second, - * maxConnections = 100 - * ) - * - * // Define a function that requests a Foo object from the remote service and parses the - * // response JSON. - * def getFoo: Future[Foo] = sendRequest(Get("/foo"), quickConnectorSetup) { response => - * response ~> unmarshal[Foo] - * } - * - * // Completes in ~ 1 second, either with a Seq of 100 Foos or spray's RequestTimeoutException. - * Future.sequence(Seq.fill(100) { getFoo }) - * }}} - * format: ON - * - * Example: Making slow, CPU-intensive POST requests with internal rate-limiting. - * format: OFF - * {{{ - * import SprayClientHelpers._ - * - * val slowConnectorSetup = getConnectorSetup( - * host = "bar.com", - * port = 9876, - * connectionTimeout = 1.second, - * requestTimeout = 10.seconds, - * // Limit the number of in-flight requests at any one time to 4, to avoid overloading the - * // remote service. - * maxConnections = 4 - * ) - * - * def fooToBar(foo: Foo): Future[Bar] = { - * sendRequest(Post("/bar", foo), slowConnectorSetup) { response => - * response ~> unmarshal[Bar] - * } - * } - * - * // Takes longer than 10 seconds! Only 4 requests will be sent over the wire at a time, and - * // the 10-second request timeout doesn't apply until a request gets sent out. - * Future.sequence(Seq.fill(16) { fooToBar(someFoo) }) - * }}} - * format: ON - */ -object SprayClientHelpers { - /** Send an HTTP request through spray using a dedicated `HostConnectorSetup`, and process the - * response using the given function. This gives you much more control over how and when your - * request is sent over the wire / when it times out than does use of `sendReceive`. - * Caveat: this function is designed to practically prevent you from needing to worry about - * catching `AskTimeoutExceptions` when using spray, but it's not actually possible to - * guarantee. If you somehow send so many requests that one is internally buffered for more than - * `Int.MaxValue.millis`, you'll see an `AskTimeoutException` thrown. - * @param request the `HttpRequest` object to send over the wire - * @param connectorSetup the `HostConnectorSetup` object defining connection and timeout - * information for your request. See `getConnectionSetup` for a method of building these - * connectors. - * @param parseResponse a function from `HttpResponse` to a generic value `T` you want to - * extract from your request's response - */ - def sendRequest[T]( - request: HttpRequest, - connectorSetup: HostConnectorSetup - )(parseResponse: HttpResponse => T)(implicit actorSystem: ActorSystem): Future[T] = { - import actorSystem.dispatcher - - // Spray requires a top-level timeout for sending requests into its infrastructure even - // though it has internal timeouts built-in. We set this ridiculously high timeout here so we - // never have to deal with uninformative `AskTimeoutExceptions`. - implicit val askTimeout = Timeout(Int.MaxValue.millis) - - IO(Http).ask((request, connectorSetup)) map { - case response: HttpResponse => parseResponse(response) - } - } - - /** Override spray's default settings for sending requests with the given timeouts. - * Note this automatically sets two different idle timeouts to be `2 * requestTimeout`: - * format: OFF - * 1. The time after which an idle HTTP connection will be automatically closed. - * 2. The time after which idle `HttpHostConnector` actors (internal to spray) without open - * connections will automatically terminate themselves. - * format: ON - * @param host the name of the remote host you want to communicate with - * @param port the port the remote host is listening on - * @param connectionTimeout the timeout to use when establishing a remote connection to the - * remote host - * @param requestTimeout the timeout to use when waiting for a response from the remote host - * @param maxConnections the maximum number of connections to the remote host that will be held - * open at a time by the returned connector - * @param connectorId an optional unique ID to use as the User-Agent header for requests to - * this host. If you want to maintain multiple host connector pools to a single remote host - * (for example, to prevent long-running requests from interfering with quick requests), - * setting this field to different values for each will prevent spray from sharing a connector - * between the different types of requests. - */ - def getConnectionSetup( - host: String, - port: Int, - connectionTimeout: FiniteDuration, - requestTimeout: FiniteDuration, - maxConnections: Int, - connectorId: Option[String] = None - )(implicit system: ActorSystem): HostConnectorSetup = { - val clientConnectionSettings = ClientConnectionSettings(system).copy( - requestTimeout = requestTimeout, - // Amount of time an idle HTTP connection will be held open before being closed. - idleTimeout = requestTimeout * 2, - connectingTimeout = connectionTimeout, - userAgentHeader = connectorId map { `User-Agent`(_) } - ) - - HostConnectorSetup( - host = host, - port = port, - settings = Some( - HostConnectorSettings(system).copy( - // Amount of time one of spray's HostConnector actors will sit idle before terminating - // itself. - idleTimeout = requestTimeout * 2, - maxConnections = maxConnections, - connectionSettings = clientConnectionSettings - ) - ) - ) - } -} diff --git a/webapp/src/test/scala/org/allenai/common/webapp/BingClientSpec.scala b/webapp/src/test/scala/org/allenai/common/webapp/BingClientSpec.scala deleted file mode 100644 index 438ba84..0000000 --- a/webapp/src/test/scala/org/allenai/common/webapp/BingClientSpec.scala +++ /dev/null @@ -1,40 +0,0 @@ -package org.allenai.common.webapp - -import org.allenai.common.testkit.UnitSpec - -import java.util.NoSuchElementException -import scala.concurrent.ExecutionContext.Implicits.global -import scala.util.{ Failure, Success } - -import java.net.URISyntaxException - -class BingClientSpec extends UnitSpec { - // Not perfect, I got it from http://stackoverflow.com/a/20039133/1076346 - def isValidUri(maybeUri: String): Boolean = { - try { - val uri = new java.net.URI(maybeUri) - uri.getHost() != null - } catch { - case e: URISyntaxException => false - } - } - - // If the Azure auth key isn't defined, just skip these tests. - val apiKey = try { - Some(sys.env("AZURE_AUTH_KEY")) - } catch { - case e: NoSuchElementException => None - } - - val bingClient = apiKey.map(new BingClient(_)) - - "bingClient" should "execute a single query" in { - if (bingClient.isDefined) { - val results = bingClient.get.query("aardvark") - assert(results.nonEmpty) - assert(results.map(_.url).forall(isValidUri)) - } else { - cancel("AZURE_AUTH_KEY not defined, skipping test") - } - } -} diff --git a/webapp/src/test/scala/org/allenai/common/webapp/DirectivesSpec.scala b/webapp/src/test/scala/org/allenai/common/webapp/DirectivesSpec.scala deleted file mode 100644 index f0177fd..0000000 --- a/webapp/src/test/scala/org/allenai/common/webapp/DirectivesSpec.scala +++ /dev/null @@ -1,111 +0,0 @@ -package org.allenai.common.webapp - -import org.allenai.common.testkit.UnitSpec - -import spray.http.{ HttpHeader, HttpHeaders, HttpOrigin, SomeOrigins } -import spray.routing.HttpService -import spray.testkit.ScalatestRouteTest - -/** Tests for our custom directives. */ -class DirectivesSpec extends UnitSpec with ScalatestRouteTest with HttpService { - def actorRefFactory = system - - // Test route. Has API and non-API routes. - // format: OFF - val testRoute = - get { path("foo") { complete { "foo" } } } ~ - Directives.allowHosts("localhost", "ari.dev.allenai.org", "ari.prod.allenai.org") { - get { path("api") { complete { "api" } } } - } ~ - Directives.allowHosts("localhost2") { - get{ path("api2") { complete { "api2" } } } - } ~ - get { path("bar") { complete { "bar" } } } - // format: ON - - def allowOriginHeader(hostname: String): HttpHeader = { - HttpHeaders.`Access-Control-Allow-Origin`( - SomeOrigins(Seq(HttpOrigin("http", HttpHeaders.Host(hostname)))) - ) - } - - def addOriginHeader(origin: String): RequestTransformer = { - addHeader(HttpHeaders.Origin(Seq(HttpOrigin("http", HttpHeaders.Host(origin))))) - } - - "jsonApi" should "complete without CORS headers by default" in { - Get("/api") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be(None) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be(None) - responseAs[String] should be("api") - } - } - it should "complete directives before the api directive" in { - Get("/foo") ~> addOriginHeader("localhost") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be(None) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be(None) - responseAs[String] should be("foo") - } - } - it should "complete directives after the api directive" in { - Get("/bar") ~> addOriginHeader("localhost") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be(None) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be(None) - responseAs[String] should be("bar") - } - } - it should "complete with CORS headers when given a matching origin" in { - Get("/api") ~> addOriginHeader("localhost") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be( - Some(allowOriginHeader("localhost")) - ) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be( - Some(Headers.AccessControlAllowHeadersAll) - ) - responseAs[String] should be("api") - } - } - it should "ignore ports and non-HTTP schemes" in { - val origin = HttpOrigin("https", HttpHeaders.Host("ari.dev.allenai.org", 8081)) - Get("/api") ~> addHeader(HttpHeaders.Origin(Seq(origin))) ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be( - Some(HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(Seq(origin)))) - ) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be( - Some(Headers.AccessControlAllowHeadersAll) - ) - responseAs[String] should be("api") - } - } - it should "complete an OPTIONS request" in { - Options("/api") ~> addOriginHeader("localhost") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be( - Some(allowOriginHeader("localhost")) - ) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be( - Some(Headers.AccessControlAllowHeadersAll) - ) - } - } - it should "complete properly to a secondary api" in { - Get("/api2") ~> addOriginHeader("localhost2") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be( - Some(allowOriginHeader("localhost2")) - ) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be( - Some(Headers.AccessControlAllowHeadersAll) - ) - responseAs[String] should be("api2") - } - } - it should "complete an OPTIONS request to a seconary api" in { - Options("/api2") ~> addOriginHeader("localhost2") ~> testRoute ~> check { - header[HttpHeaders.`Access-Control-Allow-Origin`] should be( - Some(allowOriginHeader("localhost2")) - ) - header[HttpHeaders.`Access-Control-Allow-Headers`] should be( - Some(Headers.AccessControlAllowHeadersAll) - ) - } - } -} From 5091e05704fb12ef53c34c51efaad1d76f826cf4 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:32:41 -0700 Subject: [PATCH 05/69] Remove testkit items that require Akka --- .../allenai/common/testkit/ActorSpec.scala | 27 -------------- .../common/testkit/ActorSpecSpec.scala | 37 ------------------- 2 files changed, 64 deletions(-) delete mode 100644 testkit/src/main/scala/org/allenai/common/testkit/ActorSpec.scala delete mode 100644 testkit/src/test/scala/org/allenai/common/testkit/ActorSpecSpec.scala diff --git a/testkit/src/main/scala/org/allenai/common/testkit/ActorSpec.scala b/testkit/src/main/scala/org/allenai/common/testkit/ActorSpec.scala deleted file mode 100644 index 0d1122c..0000000 --- a/testkit/src/main/scala/org/allenai/common/testkit/ActorSpec.scala +++ /dev/null @@ -1,27 +0,0 @@ -package org.allenai.common.testkit - -import akka.actor.ActorSystem -import akka.testkit.ImplicitSender -import akka.testkit.TestKit - -import org.scalatest._ - -/** Base class for Akka Actor integration specs - * - * By extending akka.testkit.TestKit and akka.testkit.ImplicitSender, - * we get many helpers for testing Actors against a live actor system. - * - * For more information on Akka TestKit, see: http://goo.gl/3SE4zz - */ -abstract class ActorSpec(actorSystem: ActorSystem) - extends TestKit(actorSystem) - with AllenAiBaseSpec - with ImplicitSender - with FutureHelpers - with BeforeAndAfterAll { - - /** Ensure the actor system is shutdown once all tests are complete or have failed */ - override def afterAll { - TestKit.shutdownActorSystem(system) - } -} diff --git a/testkit/src/test/scala/org/allenai/common/testkit/ActorSpecSpec.scala b/testkit/src/test/scala/org/allenai/common/testkit/ActorSpecSpec.scala deleted file mode 100644 index bdec763..0000000 --- a/testkit/src/test/scala/org/allenai/common/testkit/ActorSpecSpec.scala +++ /dev/null @@ -1,37 +0,0 @@ -package org.allenai.common.testkit - -import akka.actor._ - -object ActorSpecSpec { - case class Message(value: String, replies: Int = 1) - - class EchoActor extends Actor { - override def receive = { - case Message("ping", replies) => (1 to replies) foreach { _ => sender ! "pong" } - case Message(x, replies) => (1 to replies) foreach { _ => sender ! x } - } - } -} - -class ActorSpecSpec(actorSystem: ActorSystem) extends ActorSpec(actorSystem) { - import ActorSpecSpec._ - - def this() = this(ActorSystem("ActorSpecSpec")) - - "ActorSpec" should "test for expected message" in { - val echo = system.actorOf(Props[EchoActor]) - echo ! Message("ping") - - // expectMsg is a helper provided by the Akka TestKit - expectMsg("pong") - } - - it should "test for N expected messages" in { - val echo = system.actorOf(Props[EchoActor]) - val messageCount = 10 - echo ! Message("hi", messageCount) - - // receiveN is a helper provided by the Akka TestKit - receiveN(messageCount) - } -} From c15141acae272c7b584ee0594598251a902d9a33 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:32:49 -0700 Subject: [PATCH 06/69] Remove Akka from testkit --- testkit/build.sbt | 3 --- 1 file changed, 3 deletions(-) diff --git a/testkit/build.sbt b/testkit/build.sbt index ec58de1..9a060cb 100644 --- a/testkit/build.sbt +++ b/testkit/build.sbt @@ -3,9 +3,6 @@ import Dependencies._ name := "common-testkit" libraryDependencies ++= Seq( - akkaModule("actor") % Provided, - akkaModule("actor") % Test, - akkaModule("testkit"), scalaCheck, scalaTest, pegdown From 84d9008b969ec73daa1051618dcd9c691bffa674 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:33:18 -0700 Subject: [PATCH 07/69] Remove Version + ComponentId as these are coupled to sbt-plugins --- .../org/allenai/common/ComponentId.scala | 13 -- .../scala/org/allenai/common/Version.scala | 142 ------------------ .../org/allenai/common/VersionSpec.scala | 72 --------- 3 files changed, 227 deletions(-) delete mode 100644 core/src/main/scala/org/allenai/common/ComponentId.scala delete mode 100644 core/src/main/scala/org/allenai/common/Version.scala delete mode 100644 core/src/test/scala/org/allenai/common/VersionSpec.scala diff --git a/core/src/main/scala/org/allenai/common/ComponentId.scala b/core/src/main/scala/org/allenai/common/ComponentId.scala deleted file mode 100644 index c1527e1..0000000 --- a/core/src/main/scala/org/allenai/common/ComponentId.scala +++ /dev/null @@ -1,13 +0,0 @@ -package org.allenai.common - -/** A specific version of a component in AI2. This fully-describes a running system - it contains - * enough information to re-deploy the version that generated the ComponentId instance. - * @param name a human-readable name of the component - * @param version the version of the component that was running - */ -case class ComponentId(name: String, version: Version) - -object ComponentId { - import spray.json.DefaultJsonProtocol._ - implicit val componentIdJsonFormat = jsonFormat2(ComponentId.apply) -} diff --git a/core/src/main/scala/org/allenai/common/Version.scala b/core/src/main/scala/org/allenai/common/Version.scala deleted file mode 100644 index b296376..0000000 --- a/core/src/main/scala/org/allenai/common/Version.scala +++ /dev/null @@ -1,142 +0,0 @@ -package org.allenai.common - -import org.allenai.common.Config._ -import org.allenai.common.json._ - -import com.typesafe.config.ConfigFactory -import spray.json.{ JsNumber, JsObject, JsString, JsValue, RootJsonFormat } - -import scala.collection.JavaConverters._ - -import java.util.Date - -/** Represents a git version. - * @param sha1 the output of `git sha1` in the repository - * @param commitDate commit date in milliseconds - * @param repoUrl the url of the git repo - */ -case class GitVersion(sha1: String, commitDate: Long, repoUrl: Option[String]) { - /** A URL pointing to the specific commit on GitHub. */ - def commitUrl: Option[String] = { - repoUrl.map { base => - base + "/commit/" + sha1 - } - } - - /** @return a formatted date string */ - def prettyCommitDate: String = { - String.format("%1$tF %1$tT GMT%1$tz", new Date(commitDate)) - } -} - -object GitVersion { - import spray.json.DefaultJsonProtocol._ - implicit val gitVersionFormat = jsonFormat3(GitVersion.apply) - - /** The GitHub project URL. - * - * The remotes are searched for one with user "allenai" and then it's transformed into a valid - * GitHub project URL. - * @return a URL to a GitHub repo, or None if no allenai remotes exist - */ - def projectUrl(remotes: Seq[String], user: String): Option[String] = { - val sshRegex = """git@github.com:([\w-]+)/([\w-]+).git""".r - val httpsRegex = """https://github.com/([\w-]+)/([\w-]+).git""".r - - remotes.collect { - case sshRegex(u, repo) if u == user => s"http://github.com/$user/$repo" - case httpsRegex(u, repo) if u == user => s"http://github.com/$user/$repo" - }.headOption - } - - def create(sha1: String, commitDate: Long, remotes: Seq[String]) = { - GitVersion(sha1, commitDate, projectUrl(remotes, "allenai")) - } -} - -/** Represents the version of this component. Should be built with the `fromResources` method on the - * companion object. - * @param git the git version (commit information) of the build. - * @param artifactVersion the version of the artifact in the build. - * @param cacheKey a cacheKey of the project. Changes on git commits to src of project and - * dependency changes. - */ -case class Version( - git: GitVersion, - artifactVersion: String, - cacheKey: Option[String] -) { - @deprecated("Use artifactVersion instead.", "2014.09.09-1-SNAPSHOT") - def artifact = artifactVersion -} - -object Version { - /** Load a Version instance from the resources injected by the - * [[https://git.io/vzdZl Version injector sbt plugin]]. - * This attempts to load using [[Version]]'s class loader. - * @param org the value of the sbt key `organization` to find - * @param name the value of the sbt key `name` to find - */ - def fromResources(org: String, name: String): Version = { - fromResources(org, name, this.getClass.getClassLoader) - } - - /** Load a Version instance from the resources injected by the - * [[https://git.io/vzdZl Version injector sbt plugin]]. - * This attempts to load using the given class loader. - * @param org the value of the sbt key `organization` to find - * @param name the value of the sbt key `name` to find - * @param classLoader the class loader to use - */ - def fromResources(org: String, name: String, classLoader: ClassLoader): Version = { - val prefix = s"$org/${name.replaceAll("-", "")}" - - val artifactConfPath = s"$prefix/artifact.conf" - val gitConfPath = s"$prefix/git.conf" - - val artifactConfUrl = classLoader.getResource(artifactConfPath) - val gitConfUrl = classLoader.getResource(gitConfPath) - - require(artifactConfUrl != null, s"Could not find $artifactConfPath") - require(gitConfUrl != null, s"Could not find $gitConfPath") - - val artifactConf = ConfigFactory.parseURL(artifactConfUrl) - val gitConf = ConfigFactory.parseURL(gitConfUrl) - val artifactVersion = artifactConf[String]("version") - val sha1 = gitConf[String]("sha1") - val commitDate = gitConf[Long]("date") - val remotes = gitConf.getStringList("remotes").asScala - val cacheKey = Option(System.getProperty("application.cacheKey")) - Version(GitVersion.create(sha1, commitDate, remotes), artifactVersion, cacheKey) - } - - /** Custom JSON serialization for backwards-compatibility. */ - implicit val versionJsonFormat = new RootJsonFormat[Version] { - import spray.json.DefaultJsonProtocol._ - override def write(version: Version): JsValue = { - val baseJson = JsObject( - "git" -> JsString(version.git.sha1), - "commitDate" -> JsNumber(version.git.commitDate), - "artifact" -> JsString(version.artifactVersion) - ) - version.git.repoUrl match { - case Some(repoUrl) => baseJson.pack("repoUrl" -> repoUrl) - case _ => baseJson - } - version.cacheKey match { - case Some(cacheKey) => baseJson.pack("cacheKey" -> cacheKey) - case _ => baseJson - } - } - - override def read(json: JsValue): Version = { - val jsObject = json.asJsObject - val gitSha1 = jsObject.apply[String]("git") - val commitDate = jsObject.apply[Long]("commitDate") - val artifactVersion = jsObject.apply[String]("artifact") - val repoUrl = jsObject.get[String]("repoUrl") - val cacheKey = jsObject.get[String]("cacheKey") - Version(GitVersion(gitSha1, commitDate, repoUrl), artifactVersion, cacheKey) - } - } -} diff --git a/core/src/test/scala/org/allenai/common/VersionSpec.scala b/core/src/test/scala/org/allenai/common/VersionSpec.scala deleted file mode 100644 index d9a44aa..0000000 --- a/core/src/test/scala/org/allenai/common/VersionSpec.scala +++ /dev/null @@ -1,72 +0,0 @@ -package org.allenai.common - -import org.allenai.common.JsonFormats._ -import org.allenai.common.testkit.UnitSpec - -import spray.json._ -import spray.json.DefaultJsonProtocol._ - -import java.net.URLClassLoader -import java.nio.file.Paths - -import scala.util.{ Try, Success, Failure } - -class GitVersionSpec extends UnitSpec { - "create" should "find the correct GitHub project URL (ssh)" in { - val version = GitVersion.create("gitSha", 1234, Seq( - "https://github.com/schmmd/parsers.git", - "git@github.com:allenai/common.git" - )) - version.repoUrl shouldBe Some("http://github.com/allenai/common") - } - - it should "find the correct GitHub project URL (https)" in { - val version = GitVersion.create("gitSha", 1234, Seq( - "https://github.com/allenai/ari-datastore.git", - "git@github.com:schmmd/common.git" - )) - version.repoUrl shouldBe Some("http://github.com/allenai/ari-datastore") - } - - it should "find the correct GitHub commit URL" in { - val version = GitVersion.create("e0d972e185bd12b94dedd38834fea150a68f064e", 1234, - Seq("https://github.com/allenai/parsers.git", "git@github.com:schmmd/common.git")) - version.commitUrl shouldBe - Some("http://github.com/allenai/parsers/commit/e0d972e185bd12b94dedd38834fea150a68f064e") - } -} - -class VersionSpec extends UnitSpec { - "Version" should "be backwards compatible for reading" in { - val json = """{ - "git":"0144af4325992689cf5fd6d0e3c2d744b25935d6", - "artifact":"2014.07.21-0-SNAPSHOT","commitDate":1412094251000 - }""" - json.parseJson.convertTo[Version] shouldBe - Version( - GitVersion("0144af4325992689cf5fd6d0e3c2d744b25935d6", 1412094251000L, None), - "2014.07.21-0-SNAPSHOT", - None - ) - } - - "fromResources" should "find common-core's resources" in { - val version = Version.fromResources("org.allenai.common", "common-core") - // No asserts; this will throw an exception if it's unfound. - } - - it should "find a resource using a class loader" in { - val expectedVersion = Version( - GitVersion("sha123", 123456789L, None), - "1.0.0", - None - ) - val classpath = Paths.get("src/test/resources/fakejar").toAbsolutePath.toUri.toURL - val version = Version.fromResources( - "org.fakeorg", - "project-name", - new URLClassLoader(Array(classpath)) - ) - version shouldBe expectedVersion - } -} From 012ee4107e5344bd020960c2b88c252b42230492 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:33:55 -0700 Subject: [PATCH 08/69] Remove Akka from Guice module --- guice/build.sbt | 1 - .../common/guice/ActorSystemModule.scala | 29 ------------------- 2 files changed, 30 deletions(-) delete mode 100644 guice/src/main/scala/org/allenai/common/guice/ActorSystemModule.scala diff --git a/guice/build.sbt b/guice/build.sbt index 3c1d272..7488373 100644 --- a/guice/build.sbt +++ b/guice/build.sbt @@ -3,7 +3,6 @@ import Dependencies._ name := "common-guice" libraryDependencies ++= Seq( - akkaActor, scalaGuice, typesafeConfig ) diff --git a/guice/src/main/scala/org/allenai/common/guice/ActorSystemModule.scala b/guice/src/main/scala/org/allenai/common/guice/ActorSystemModule.scala deleted file mode 100644 index 2f1e489..0000000 --- a/guice/src/main/scala/org/allenai/common/guice/ActorSystemModule.scala +++ /dev/null @@ -1,29 +0,0 @@ -package org.allenai.common.guice - -import akka.actor.{ ActorRefFactory, ActorSystem } -import net.codingwell.scalaguice.ScalaModule - -import scala.concurrent.ExecutionContext - -/** Module that binds ActorSystem and its associated ExecutionContext. The ActorSystem will also be - * bound to ActorRefFactory. - * @param bindingName optional name to create the bindings under - * @param actorSystem the actor system to bind - */ -class ActorSystemModule(bindingName: Option[String] = None)(implicit actorSystem: ActorSystem) - extends ScalaModule { - override def configure(): Unit = { - bindingName match { - case Some(name) => { - bind[ActorRefFactory].annotatedWithName(name).toInstance(actorSystem) - bind[ActorSystem].annotatedWithName(name).toInstance(actorSystem) - bind[ExecutionContext].annotatedWithName(name).toInstance(actorSystem.dispatcher) - } - case None => { - bind[ActorRefFactory].toInstance(actorSystem) - bind[ActorSystem].toInstance(actorSystem) - bind[ExecutionContext].toInstance(actorSystem.dispatcher) - } - } - } -} From 1447581b2d7e776a7e0396fb79d96eada27f7724 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:34:24 -0700 Subject: [PATCH 09/69] Copy Dependencies and update minimally where required for 2.12 --- project/Dependencies.scala | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index c408548..c724aab 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -10,9 +10,6 @@ object Dependencies { val commonsIO = "commons-io" % "commons-io" % "2.4" - // TODO(jkinkead) Break circular dependency between common and datastore - val datastore = "org.allenai" %% "datastore" % "1.0.0" - val elasticSearch = "org.elasticsearch" % "elasticsearch" % "2.3.3" val jedis = "redis.clients" % "jedis" % "2.7.2" @@ -28,41 +25,32 @@ object Dependencies { val pegdown = "org.pegdown" % "pegdown" % "1.4.2" - val scalaGuice = "net.codingwell" %% "scala-guice" % "4.0.1" - - val scalaCheck = "org.scalacheck" %% "scalacheck" % "1.11.4" - - val scalaTest = "org.scalatest" %% "scalatest" % "2.2.1" + val scalaGuice = "net.codingwell" %% "scala-guice" % "4.2.6" - val defaultAkkaVersion = "2.4.10" + val scalaCheck = "org.scalacheck" %% "scalacheck" % "1.14.0" - def akkaModule(id: String, version: String = defaultAkkaVersion): ModuleID = - "com.typesafe.akka" %% s"akka-$id" % version + val scalaTest = "org.scalatest" %% "scalatest" % "3.0.8" - val akkaActor = akkaModule("actor") exclude ("com.typesafe", "config") - val akkaTestkit = akkaModule("testkit") - - val sprayVersion = "1.3.3" - def sprayModule(id: String): ModuleID = "io.spray" %% s"spray-$id" % sprayVersion - val sprayRouting = sprayModule("routing") - val sprayClient = sprayModule("client") - val sprayTestkit = sprayModule("testkit") - - // Spray json (separate from Spray toolkit) - val sprayJson = "io.spray" %% "spray-json" % "1.3.2" + val sprayJson = "io.spray" %% "spray-json" % "1.3.5" val typesafeConfig = "com.typesafe" % "config" % "1.2.1" - val scopt = "com.github.scopt" %% "scopt" % "3.3.0" + val scopt = "com.github.scopt" %% "scopt" % "3.7.1" object Logging { - val slf4jVersion = "1.7.10" - val logbackVersion = "1.1.2" + val slf4jVersion = "1.7.28" + val logbackVersion = "1.2.3" // The logging API to use. This should be the only logging dependency of any API artifact // (anything that's going to be depended on outside of this SBT project). val slf4jApi = "org.slf4j" % "slf4j-api" % slf4jVersion val logbackCore = "ch.qos.logback" % "logback-core" % logbackVersion val logbackClassic = "ch.qos.logback" % "logback-classic" % logbackVersion + + val loggingDependencyOverrides = Seq( + Logging.slf4jApi, + Logging.logbackCore, + Logging.logbackClassic + ) } } From 991d9e4759983674024744c1a80f4b5161c350bb Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:35:08 -0700 Subject: [PATCH 10/69] Explicit cast to avoid compiler failure --- core/src/main/scala/org/allenai/common/json/package.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/allenai/common/json/package.scala b/core/src/main/scala/org/allenai/common/json/package.scala index 369fe3f..ce26c68 100644 --- a/core/src/main/scala/org/allenai/common/json/package.scala +++ b/core/src/main/scala/org/allenai/common/json/package.scala @@ -39,7 +39,9 @@ package object json { * @param packedFormats */ def unpackOptWith[T](packedFormats: PackedJsonFormat[_ <: T] *): Option[T] = { - val unpacks: Seq[PartialFunction[JsValue, T]] = packedFormats map (_.unpack) + val unpacks: Seq[PartialFunction[JsValue, T]] = packedFormats + .map(_.asInstanceOf[PackedJsonFormat[T]]) + .map(_.unpack) val combinedUnpack = unpacks reduce (_ orElse _) combinedUnpack.lift(jsObj) } From e037502a5cecea8bfcd00439ec05607400392a90 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:35:45 -0700 Subject: [PATCH 11/69] HTML is escaped in logger (within table rows) --- .../src/test/scala/org/allenai/common/LoggingConfigSpec.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala index 016b6b8..de8a15f 100644 --- a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala @@ -44,10 +44,12 @@ class LoggingConfigSpec extends UnitSpec with Logging { ) .setLevel(Level.INFO) + // Tags will be escaped for rendering inside table cell of HTML logger logger.info("html") assert( - Source.fromFile(path.toString).mkString.contains("html") + Source.fromFile(path.toString).mkString.contains( + "<i>html</i>") ) } } From dce06ea6703e5ee726e9446822da392855addc4b Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:36:03 -0700 Subject: [PATCH 12/69] Fix strange compiler issue returning Tuple2 from Timing.time --- core/src/test/scala/org/allenai/common/ParIteratorSpec.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala index d7223ae..15384ec 100644 --- a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala +++ b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala @@ -81,7 +81,7 @@ class ParIteratorSpec extends UnitSpec { val values = Range(0, max).reverse val iter = values.toIterator val expected = values.map { i => s"$i" } - val time = Timing.time { + val time: Duration = Timing.time { val result = iter.parMap { i => Thread.sleep(i * 100) s"$i" @@ -89,7 +89,8 @@ class ParIteratorSpec extends UnitSpec { assert(expected === result.toSeq) } - assert(time < ((max * 100) millis) + (50 millis)) + val limit: Duration = ((max * 100) millis) + (50 millis) + assert(time < limit) } it should "map lots of things concurrently" in { From 48c7304d1d1f5ce184b05e9a4ca25595a61a48d3 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:36:27 -0700 Subject: [PATCH 13/69] SBT 1.2.8 + bump version to 2.0-SNAPSHOT --- project/build.properties | 2 +- version.sbt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project/build.properties b/project/build.properties index 080a737..c0bab04 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.3.0 +sbt.version=1.2.8 diff --git a/version.sbt b/version.sbt index a2b7cd7..a25483b 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "1.4.11-SNAPSHOT" \ No newline at end of file +version in ThisBuild := "2.0.0-SNAPSHOT" \ No newline at end of file From c87e94e27217602d432616cc6ba58c53ce4d8e71 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:37:36 -0700 Subject: [PATCH 14/69] Welcome to 2019 build.sbt... * Removes webapp and indexing modules coupled to EOL spray framework * Moves settings from shared sbt-plugin (as few as possible) * Adds cross-version building! --- build.sbt | 62 +++++++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/build.sbt b/build.sbt index f16c8d6..af403f2 100644 --- a/build.sbt +++ b/build.sbt @@ -3,26 +3,37 @@ import Dependencies._ lazy val scala211 = "2.11.12" lazy val scala212 = "2.12.9" lazy val scala213 = "2.13.0" -lazy val supportedScalaVersions = List(scala211) +lazy val supportedScalaVersions = List(scala212, scala211) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "1.4.11-SNAPSHOT" -ThisBuild / scalaVersion := scala211 +ThisBuild / version := "2.0.0-SNAPSHOT" +ThisBuild / scalaVersion := scala213 lazy val common = (project in file(".")) .aggregate(cache, core, guice, - indexing, - testkit, - webapp) + testkit) .settings( crossScalaVersions := Nil, publish / skip := true, buildSettings ) +lazy val spray = "spray" at "http://repo.spray.io/" +lazy val typesafeReleases = "Typesafe Releases" at "http://repo.typesafe.com/typesafe/releases/" + +lazy val projectSettings = Seq( + fork := true, + javaOptions += s"-Dlogback.appname=${name.value}", + scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), + javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), + resolvers ++= Seq(spray, Resolver.jcenterRepo, typesafeReleases), + dependencyOverrides ++= Logging.loggingDependencyOverrides +) + lazy val buildSettings = Seq( + crossScalaVersions := supportedScalaVersions, organization := "org.allenai.common", publishMavenStyle := true, publishArtifact in Test := false, @@ -48,47 +59,16 @@ lazy val buildSettings = Seq( ) lazy val cache = Project(id = "cache", base = file("cache")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings - ) + .settings(buildSettings) .dependsOn(core, testkit % "test->compile") lazy val core = Project(id = "core", base = file("core")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings - ) + .settings(buildSettings) .dependsOn(testkit % "test->compile") lazy val guice = Project(id = "guice", base = file("guice")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings - ) - .dependsOn(core, testkit % "test->compile") - -lazy val indexing = Project(id = "indexing", base = file("indexing")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings - ) + .settings(buildSettings) .dependsOn(core, testkit % "test->compile") lazy val testkit = Project(id = "testkit", base = file("testkit")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings - ) - -lazy val webapp = Project(id = "webapp", base = file("webapp")) - .settings( - crossScalaVersions := supportedScalaVersions, - buildSettings, - libraryDependencies ++= Seq( - "org.scala-lang" % "scala-reflect" % scalaVersion.value - ) - ) - .dependsOn(core, testkit % "test->compile") - - + .settings(buildSettings) From 7701dee9e2f742010336783cf0e798cbbbc035fc Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:39:31 -0700 Subject: [PATCH 15/69] Clarify that 2.13 is not supported yet --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index af403f2..4f4cb70 100644 --- a/build.sbt +++ b/build.sbt @@ -2,7 +2,7 @@ import Dependencies._ lazy val scala211 = "2.11.12" lazy val scala212 = "2.12.9" -lazy val scala213 = "2.13.0" +lazy val scala213 = "2.13.0" // Not supported yet (collections changes required) lazy val supportedScalaVersions = List(scala212, scala211) ThisBuild / organization := "org.allenai.common" From c5a71f2e7266862cb339bbc1c91930db25e0e4c0 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Tue, 10 Sep 2019 13:52:50 -0700 Subject: [PATCH 16/69] Remove testing 2.13 setup since not planning to support immediately --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 4f4cb70..bf04d8a 100644 --- a/build.sbt +++ b/build.sbt @@ -7,7 +7,7 @@ lazy val supportedScalaVersions = List(scala212, scala211) ThisBuild / organization := "org.allenai.common" ThisBuild / version := "2.0.0-SNAPSHOT" -ThisBuild / scalaVersion := scala213 +ThisBuild / scalaVersion := scala212 lazy val common = (project in file(".")) .aggregate(cache, From 05a35d247e53cd315fa63588a0691a8cc040c4c9 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 13:17:19 -0700 Subject: [PATCH 17/69] Add new CI per recommendation of Beaker team --- .circleci/config.yml | 40 ++++++++++++++++++++++++++++++++ build.sbt | 25 ++++++++++++++++++-- project/plugins.sbt | 4 ++++ publish.sh | 54 -------------------------------------------- shippable.yml | 22 ------------------ 5 files changed, 67 insertions(+), 78 deletions(-) create mode 100644 .circleci/config.yml delete mode 100644 publish.sh delete mode 100644 shippable.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..dd97d71 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,40 @@ +version: 2 +jobs: + build: + working_directory: ~/common + docker: + - image: openjdk:8 + environment: + SBT_VERSION: 1.2.8 + steps: + - run: echo 'export ARTIFACT_BUILD=$CIRCLE_PROJECT_REPONAME-$CIRCLE_BUILD_NUM.zip' >> $BASH_ENV + - run: + name: Get sbt binary + command: | + apt update && apt install -y curl + curl -L -o sbt-$SBT_VERSION.deb https://dl.bintray.com/sbt/debian/sbt-$SBT_VERSION.deb + dpkg -i sbt-$SBT_VERSION.deb + rm sbt-$SBT_VERSION.deb + apt-get update && apt-get clean && apt-get autoclean + - checkout + - restore_cache: + # Read about caching dependencies: https://circleci.com/docs/2.0/caching/ + key: sbt-cache + - run: + name: Clean package + command: cat /dev/null | sbt clean + - run: + name: Test package + command: cat /dev/null | sbt +test + - run: + name: Check scalastyle + command: cat /dev/null | sbt scalastyle test:scalastyle + - store_artifacts: # for display in Artifacts: https://circleci.com/docs/2.0/artifacts/ + path: target/universal/common.zip + destination: common + - save_cache: + key: sbt-cache + paths: + - "~/.ivy2/cache" + - "~/.sbt" + - "~/.m2" diff --git a/build.sbt b/build.sbt index bf04d8a..b51c8f0 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,5 @@ import Dependencies._ +import scalariform.formatter.preferences._ lazy val scala211 = "2.11.12" lazy val scala212 = "2.12.9" @@ -10,10 +11,12 @@ ThisBuild / version := "2.0.0-SNAPSHOT" ThisBuild / scalaVersion := scala212 lazy val common = (project in file(".")) - .aggregate(cache, + .aggregate( + cache, core, guice, - testkit) + testkit + ) .settings( crossScalaVersions := Nil, publish / skip := true, @@ -32,7 +35,25 @@ lazy val projectSettings = Seq( dependencyOverrides ++= Logging.loggingDependencyOverrides ) +lazy val scalastyleUrl = Some(url("https://raw.githubusercontent.com/allenai/sbt-plugins/master/src/main/resources/allenai-style-config.xml")) + lazy val buildSettings = Seq( + scalastyleConfigUrl := scalastyleUrl, + scalastyleConfigRefreshHours := 24, + scalastyleFailOnError := true, + + (scalastyleConfigUrl in Test) := scalastyleUrl, + (scalastyleConfigRefreshHours in Test) := 24, + (scalastyleFailOnError in Test) := true, + + scalariformAutoformat := false, + scalariformPreferences := scalariformPreferences.value + .setPreference(MultilineScaladocCommentsStartOnFirstLine, true) + .setPreference(PlaceScaladocAsterisksBeneathSecondAsterisk, true) + .setPreference(DanglingCloseParenthesis, Preserve) + .setPreference(DoubleIndentMethodDeclaration, true) + .setPreference(NewlineAtEndOfFile, true), + crossScalaVersions := supportedScalaVersions, organization := "org.allenai.common", publishMavenStyle := true, diff --git a/project/plugins.sbt b/project/plugins.sbt index 945dd61..6f6df7b 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1 +1,5 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") + +addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.3") + +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/publish.sh b/publish.sh deleted file mode 100644 index 1b9eee0..0000000 --- a/publish.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -set -e - -# Publish to BinTray if the HEAD commit is tagged with a version number. -if [ "$PULL_REQUEST_NUMBER" ]; then - echo "Semaphore is building a pull request, not publishing." - echo "PULL_REQUEST_NUMBER is equal to $PULL_REQUEST_NUMBER" - exit 0 -fi - -if [ "$BRANCH_NAME" != "master" ]; then - echo "Semaphore is building on branch $BRANCH_NAME, not publishing." - echo "BRANCH_NAME is equal to $BRANCH_NAME" - exit 0 -fi - -numParents=`git log --pretty=%P -n 1 | wc -w | xargs` -if [ $numParents -ne 2 ]; then - echo "$numParents parent commits of HEAD when exactly 2 expected, not publishing." - exit 0 -fi - -# One build is run for the merge to master, so we need to list all tags from the merged commits. -firstMergedCommit=`git rev-list HEAD^2 --not HEAD^1 | tail -n 1` -echo "First merged commit: $firstMergedCommit" - -tags=$(git tag --contains $firstMergedCommit) - -if [ `echo "$tags" | wc -l` -eq 0 ]; then - echo "No tags found in merged commits, not publishing." - exit 0 -fi - -if [ `echo "$tags" | wc -l` -gt 1 ]; then - echo "Multiple tags found in merged commits, not publishing." - echo "$tags" - exit 0 -fi - -tag=$tags - -echo "Merged commits contain tag: $tag" - -if [[ $tag =~ ^v[0-9]+\..* ]]; then - echo "Going to release from tag $tag" - version=$(echo $tag | sed -e s/^v//) - - git checkout $tag - sbt publish - echo "Successfully published artifact." - - exit 0 -fi diff --git a/shippable.yml b/shippable.yml deleted file mode 100644 index 20e52fc..0000000 --- a/shippable.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: scala -scala: -- 2.11.5 -jdk: -- oraclejdk8 -cache: true -before_install: -# Update dependencies in before_install so they are accounted for separately -# when timing the build. transitiveUpdate does not try to update dependencies -# that are already cachedo n the filesystem. -- sbt transitiveUpdate -script: -- sbt test formatCheckStrict styleCheckStrict -after_script: -- admin/create-bintray-credentials.sh -- admin/publish-release.sh -- mkdir -p shippable/testresults -- cp */target/test-reports/*.xml shippable/testresults || true -# Credentials for publishing. -env: -- secure: tx0Cd+p3iKJLJr2M7JoFU3J5Z7OqR87MqDiMg34j7RD6aSD2XD6TTZxyr0WRyqa0w0m3o+g9eKCERd2s17b5UEIcFCB+TwligpPZHVsxW2q77Y4AuRhbRizR86GJmdinW3BRGpWeNEumbB7dcqpuViykJvr/yMQSAHRcaMLNqcfeGAPJHbRnpoZUb1+N6kmbM6LA7rlGyQSQxVXURgjTy7GvnLKyT43gXHpe65gXzFR+2mNlwEXL1tebpj+z6xterz1auKoGhGN7NPRe9GUz/Kc7t9MmgXGQibdleebfN2KoFfZbMTxR6Og01q+9l7UPKhJUAc+o2rFh/HosDrKxXw== - From 69530fd97c1c21f03d91026089fd0aa3efac13e5 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 13:57:49 -0700 Subject: [PATCH 18/69] Use scalafmt + some settings + update CI with change --- .circleci/config.yml | 4 ++-- .scalafmt.conf | 10 ++++++++++ build.sbt | 23 ++++------------------- project/plugins.sbt | 4 +--- 4 files changed, 17 insertions(+), 24 deletions(-) create mode 100644 .scalafmt.conf diff --git a/.circleci/config.yml b/.circleci/config.yml index dd97d71..0ea2ecb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,8 +27,8 @@ jobs: name: Test package command: cat /dev/null | sbt +test - run: - name: Check scalastyle - command: cat /dev/null | sbt scalastyle test:scalastyle + name: Check formatting + command: cat /dev/null | sbt scalafmtCheckAll - store_artifacts: # for display in Artifacts: https://circleci.com/docs/2.0/artifacts/ path: target/universal/common.zip destination: common diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000..cd2666f --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,10 @@ +version = 2.0.0 +maxColumn = 100 +docstrings = ScalaDoc +align = none +align.tokens = [] +rewrite.rules = [SortImports, SortModifiers] +rewrite.sortModifiers.order = [ +"implicit", "final", "sealed", "abstract", +"override", "private", "protected", "lazy" +] diff --git a/build.sbt b/build.sbt index b51c8f0..a783366 100644 --- a/build.sbt +++ b/build.sbt @@ -1,5 +1,4 @@ import Dependencies._ -import scalariform.formatter.preferences._ lazy val scala211 = "2.11.12" lazy val scala212 = "2.12.9" @@ -17,7 +16,9 @@ lazy val common = (project in file(".")) guice, testkit ) + .configs(IntegrationTest) .settings( + Defaults.itSettings, crossScalaVersions := Nil, publish / skip := true, buildSettings @@ -35,25 +36,7 @@ lazy val projectSettings = Seq( dependencyOverrides ++= Logging.loggingDependencyOverrides ) -lazy val scalastyleUrl = Some(url("https://raw.githubusercontent.com/allenai/sbt-plugins/master/src/main/resources/allenai-style-config.xml")) - lazy val buildSettings = Seq( - scalastyleConfigUrl := scalastyleUrl, - scalastyleConfigRefreshHours := 24, - scalastyleFailOnError := true, - - (scalastyleConfigUrl in Test) := scalastyleUrl, - (scalastyleConfigRefreshHours in Test) := 24, - (scalastyleFailOnError in Test) := true, - - scalariformAutoformat := false, - scalariformPreferences := scalariformPreferences.value - .setPreference(MultilineScaladocCommentsStartOnFirstLine, true) - .setPreference(PlaceScaladocAsterisksBeneathSecondAsterisk, true) - .setPreference(DanglingCloseParenthesis, Preserve) - .setPreference(DoubleIndentMethodDeclaration, true) - .setPreference(NewlineAtEndOfFile, true), - crossScalaVersions := supportedScalaVersions, organization := "org.allenai.common", publishMavenStyle := true, @@ -79,6 +62,8 @@ lazy val buildSettings = Seq( bintrayPackage := s"${organization.value}:${name.value}_${scalaBinaryVersion.value}" ) +inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) + lazy val cache = Project(id = "cache", base = file("cache")) .settings(buildSettings) .dependsOn(core, testkit % "test->compile") diff --git a/project/plugins.sbt b/project/plugins.sbt index 6f6df7b..90267a8 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,3 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") -addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.3") - -addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.0.4") From 5e487624b453218977b7ed7f9b56e24b75144a9b Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 13:57:58 -0700 Subject: [PATCH 19/69] Readme edits to reflect updates --- README.md | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 0ca7879..efba828 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,24 @@ -common [![Build Status](https://semaphoreci.com/api/v1/allenai/common/branches/master/badge.svg)](https://semaphoreci.com/allenai/common) +common ====== -**Boss**: Michael +[![CircleCI](https://circleci.com/gh/allenai/common/tree/master.svg?style=svg)](https://circleci.com/gh/allenai/common/tree/master) -A collection of useful utility classes and functions. +A collection of useful utility classes and functions. Slowly on the path to deprecation. [API Docs](http://allenai.github.io/common/latest/api/#package). `testkit` - Unit test classes and utilities. -`webapp` - Spray- and web-specific tools. - `guice` - Guice-specific libraries. -`core` - Catchall collection of utilities, with smaller dependency footprint than `webapp`. +`core` - Catchall collection of utilities. Using this project as a library ------------------ `common` is published to [JCenter](https://bintray.com/bintray/jcenter) (an -alternative to Maven Central) via [BinTray](https://bintray.com/) at https://bintray.com/allenai/maven. If you have -the latest version of our [core plugins](https://github.com/allenai/sbt-plugins) the [BinTray resolver will be included -by default](https://github.com/allenai/sbt-plugins/blob/master/src/main/scala/org/allenai/plugins/CoreRepositories.scala#L24). -Otherwise you will need to include [a resolver for the JCenter +alternative to Maven Central) via [BinTray](https://bintray.com/) at https://bintray.com/allenai/maven. +You will need to include [a resolver for the JCenter repo](https://github.com/softprops/bintray-sbt#resolving-bintray-artifacts) using the `sbt-bintray` plugin to find this artifact. @@ -32,18 +28,18 @@ Releasing new versions This project releases to BinTray. To make a release, follow our standard release process. +[WIP PENDING VERIFICATION (rreas@)] + 1. Make sure you are on a branch of the main repository other than `master`. **You cannot use a branch of your fork for releases.** If you do, your tag will not make it to the main repository. -2. Set the upstream repository (`git push --set-upstream origin $branch_name`). -3. Kick of the release with `sbt release`. -4. Create a pull request and make sure Semaphore builds it OK. -5. When the pull request is merged, Semaphore will kick off a build and ultimately publish the artifact. -6. You can verify that it was published by [looking on BinTray.com](https://bintray.com/allenai/maven)! +1. Set the upstream repository (`git push --set-upstream origin $branch_name`). +1. Kick of the release with `sbt release`. +1. Create a pull request and make sure CI passes. +1. You can verify that it was published by [looking on BinTray.com](https://bintray.com/allenai/maven)! You are done! - Guideline for Contributing to `common` --------------------------- @@ -83,4 +79,3 @@ reference the issue number in your TODO comment: ### Have Two Code Reviewers to `common` Pull Requests ### Try and always have at least two reviewers for a pull request to `common` - From 8361e702890a7889ad506fdb6f65c225e86bd0bf Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 13:58:10 -0700 Subject: [PATCH 20/69] Formatting changes per scalafmt --- .../allenai/common/cache/JsonQueryCache.scala | 12 +- .../allenai/common/cache/QueryCaches.scala | 6 +- .../scala/org/allenai/common/Config.scala | 39 +++-- .../main/scala/org/allenai/common/Enum.scala | 3 +- .../scala/org/allenai/common/FileUtils.scala | 8 +- .../org/allenai/common/JsonFormats.scala | 4 +- .../scala/org/allenai/common/JsonIo.scala | 7 +- .../scala/org/allenai/common/Logging.scala | 8 +- .../scala/org/allenai/common/MathUtils.scala | 6 +- .../org/allenai/common/ParIterator.scala | 14 +- .../scala/org/allenai/common/Resource.scala | 4 +- .../org/allenai/common/SeekableSource.scala | 2 + .../allenai/common/SourceInputStream.scala | 7 +- .../org/allenai/common/StringUtils.scala | 135 +++++++++++++++--- .../scala/org/allenai/common/Timing.scala | 1 + .../scala/org/allenai/common/UrlUtil.scala | 2 +- .../allenai/common/immutable/Interval.scala | 30 ++-- .../org/allenai/common/json/package.scala | 22 +-- .../scala/org/allenai/common/ConfigSpec.scala | 20 ++- .../org/allenai/common/JsonFormatsSpec.scala | 2 +- .../scala/org/allenai/common/JsonIoSpec.scala | 2 +- .../allenai/common/LoggingConfigSpec.scala | 12 +- .../org/allenai/common/ParIteratorSpec.scala | 16 ++- .../allenai/common/SeekableSourceSpec.scala | 2 +- .../common/SourceInputStreamSpec.scala | 2 +- .../common/immutable/IntervalSpec.scala | 16 ++- .../common/json/PackedJsonFormatSpec.scala | 2 +- .../common/json/RichJsObjectSpec.scala | 22 +-- .../allenai/common/guice/ConfigModule.scala | 33 +++-- .../common/guice/ConfigModuleSpec.scala | 76 +++++----- 30 files changed, 337 insertions(+), 178 deletions(-) diff --git a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala index aaf1630..8818e99 100644 --- a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala +++ b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala @@ -3,11 +3,12 @@ package org.allenai.common.cache import org.allenai.common.Config._ import com.typesafe.config.Config -import redis.clients.jedis.{ Jedis, JedisPool, JedisPoolConfig, Protocol } +import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig, Protocol} import spray.json._ import scala.collection.JavaConverters._ object JsonQueryCache { + /** Factory method for creating a cache instance from config. * The config must have keys for `hostname` and `clientPrefix`. It may also optionally have * keys for `port` and `timeoutMillis`; if not given, these values are set to the Jedis defaults. @@ -25,10 +26,10 @@ object JsonQueryCache { } def apply[V]( - clientPrefix: String, - hostname: String, - port: Int = Protocol.DEFAULT_PORT, - timeoutMillis: Int = Protocol.DEFAULT_TIMEOUT + clientPrefix: String, + hostname: String, + port: Int = Protocol.DEFAULT_PORT, + timeoutMillis: Int = Protocol.DEFAULT_TIMEOUT )(implicit jsonFormat: JsonFormat[V]): JsonQueryCache[V] = { new JsonQueryCache[V]( clientPrefix, @@ -100,4 +101,3 @@ class JsonQueryCache[V: JsonFormat] protected[cache] (clientPrefix: String, pool client.keys(keyForQuery(pattern)).asScala } } - diff --git a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala b/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala index 064572c..e2ae6b0 100644 --- a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala +++ b/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala @@ -41,8 +41,8 @@ class QueryCaches( def allThereAndEq(): Boolean = { stringQueryCache.get(stringKey).exists(_.equals(stringValue)) && - intQueryCache.get(intKey).exists(_.equals(intValue)) && - seqStringQueryCache.get(seqStringKey).exists(_.equals(seqStringValue)) && - fooQueryCache.get(fooKey).exists(_.equals(fooValue)) + intQueryCache.get(intKey).exists(_.equals(intValue)) && + seqStringQueryCache.get(seqStringKey).exists(_.equals(seqStringValue)) && + fooQueryCache.get(fooKey).exists(_.equals(fooValue)) } } diff --git a/core/src/main/scala/org/allenai/common/Config.scala b/core/src/main/scala/org/allenai/common/Config.scala index c870d96..5033dda 100644 --- a/core/src/main/scala/org/allenai/common/Config.scala +++ b/core/src/main/scala/org/allenai/common/Config.scala @@ -1,6 +1,6 @@ package org.allenai.common -import com.typesafe.config.{ Config => TypesafeConfig, _ } +import com.typesafe.config.{Config => TypesafeConfig, _} import spray.json._ import java.net.URI @@ -37,6 +37,7 @@ object Config { /** Type class that defines method for reading a value of type T from a Typesafe Config key */ trait ConfigReader[T] { + /** Returns Some[T] if key is present, None if key is missing */ def read(config: TypesafeConfig, key: String): T @@ -54,18 +55,31 @@ object Config { } object ConfigReader { + /** Factory for creating a new ConfigReader[T] type class instance */ def apply[T](f: (TypesafeConfig, String) => T): ConfigReader[T] = new ConfigReader[T] { def read(config: TypesafeConfig, key: String): T = f(config, key) } // ConfigReader wrappers for built-in Typesafe Config extractors that may return null - implicit val stringReader = apply[String] { (config, key) => config.getString(key) } - implicit val intReader = apply[Int] { (config, key) => config.getInt(key) } - implicit val longReader = apply[Long] { (config, key) => config.getLong(key) } - implicit val doubleReader = apply[Double] { (config, key) => config.getDouble(key) } - implicit val boolReader = apply[Boolean] { (config, key) => config.getBoolean(key) } - implicit val configValueReader = apply[ConfigValue] { (config, key) => config.getValue(key) } + implicit val stringReader = apply[String] { (config, key) => + config.getString(key) + } + implicit val intReader = apply[Int] { (config, key) => + config.getInt(key) + } + implicit val longReader = apply[Long] { (config, key) => + config.getLong(key) + } + implicit val doubleReader = apply[Double] { (config, key) => + config.getDouble(key) + } + implicit val boolReader = apply[Boolean] { (config, key) => + config.getBoolean(key) + } + implicit val configValueReader = apply[ConfigValue] { (config, key) => + config.getValue(key) + } implicit val stringListReader = apply[Seq[String]] { (config, key) => config.getStringList(key).asScala @@ -123,11 +137,12 @@ object Config { * }}} */ implicit class EnhancedConfig(config: TypesafeConfig) { - private def optional[T](f: => T) = try { - Some(f) - } catch { - case e: ConfigException.Missing => None - } + private def optional[T](f: => T) = + try { + Some(f) + } catch { + case e: ConfigException.Missing => None + } /** Required value extraction. * @throws com.typesafe.config.ConfigException diff --git a/core/src/main/scala/org/allenai/common/Enum.scala b/core/src/main/scala/org/allenai/common/Enum.scala index 92e384d..270ce7a 100644 --- a/core/src/main/scala/org/allenai/common/Enum.scala +++ b/core/src/main/scala/org/allenai/common/Enum.scala @@ -1,6 +1,6 @@ package org.allenai.common -import spray.json.{ deserializationError, JsString, JsValue, RootJsonFormat } +import spray.json.{deserializationError, JsString, JsValue, RootJsonFormat} /** Enumeration implementation that supports automatic Spray JSON serialization of a case object as * a JsString, or using java native serialization for Spark jobs. @@ -24,6 +24,7 @@ import spray.json.{ deserializationError, JsString, JsValue, RootJsonFormat } * (format: ON) */ abstract class Enum[E <: Enum[E]] extends Serializable { + /** The serialization string. By default, use the toString implementation. For a case object, this * uses the object name. */ diff --git a/core/src/main/scala/org/allenai/common/FileUtils.scala b/core/src/main/scala/org/allenai/common/FileUtils.scala index 18d6689..a508327 100644 --- a/core/src/main/scala/org/allenai/common/FileUtils.scala +++ b/core/src/main/scala/org/allenai/common/FileUtils.scala @@ -2,10 +2,10 @@ package org.allenai.common import au.com.bytecode.opencsv.CSVReader -import java.io.{ BufferedInputStream, BufferedReader, File, FileInputStream, InputStreamReader } +import java.io.{BufferedInputStream, BufferedReader, File, FileInputStream, InputStreamReader} import scala.collection.JavaConverters._ -import scala.io.{ BufferedSource, Codec, Source } +import scala.io.{BufferedSource, Codec, Source} /** Various convenient utilities for reading files and resources. */ object FileUtils extends Logging { @@ -58,8 +58,8 @@ object FileUtils extends Logging { * of strings. */ def getCSVContentFromResource( - clazz: Class[_], - name: String + clazz: Class[_], + name: String )(implicit codec: Codec): Seq[Seq[String]] = { logger.debug(s"Loading CSV resource $name") val csvReader = new CSVReader(getResourceAsReader(clazz, name)(codec)) diff --git a/core/src/main/scala/org/allenai/common/JsonFormats.scala b/core/src/main/scala/org/allenai/common/JsonFormats.scala index 964ce3a..d64933e 100644 --- a/core/src/main/scala/org/allenai/common/JsonFormats.scala +++ b/core/src/main/scala/org/allenai/common/JsonFormats.scala @@ -1,13 +1,13 @@ package org.allenai.common -import com.typesafe.config.{ Config => TypesafeConfig } +import com.typesafe.config.{Config => TypesafeConfig} import spray.json.SerializationException import spray.json._ import spray.json.DefaultJsonProtocol._ import java.io.PrintWriter import java.io.StringWriter -import scala.util.{ Try, Success, Failure } +import scala.util.{Failure, Success, Try} /** Common spray.json.JsonFormats, spray.json.JsonReaders, and spray.json.JsonWriters */ object JsonFormats { diff --git a/core/src/main/scala/org/allenai/common/JsonIo.scala b/core/src/main/scala/org/allenai/common/JsonIo.scala index 22e8cf9..9918c04 100644 --- a/core/src/main/scala/org/allenai/common/JsonIo.scala +++ b/core/src/main/scala/org/allenai/common/JsonIo.scala @@ -4,10 +4,11 @@ import spray.json._ import scala.io.Source -import java.io.{ OutputStream, PrintWriter, Writer } +import java.io.{OutputStream, PrintWriter, Writer} /** Helpers for streaming lists of JSON objects to and from disk. */ object JsonIo { + /** Reads single-lines from a given Source, and streams the JSON parsed from them to the caller. * @return a stream of objects of type T */ @@ -25,8 +26,8 @@ object JsonIo { /** Writes the given objects to the given output stream, as one-per-line JSON values. */ def writeJson[T]( - values: Iterable[T], - outputStream: OutputStream + values: Iterable[T], + outputStream: OutputStream )(implicit format: JsonFormat[T]): Unit = { val writer = new PrintWriter(outputStream) writeJson(values, writer) diff --git a/core/src/main/scala/org/allenai/common/Logging.scala b/core/src/main/scala/org/allenai/common/Logging.scala index 8bb7bee..4fe0609 100644 --- a/core/src/main/scala/org/allenai/common/Logging.scala +++ b/core/src/main/scala/org/allenai/common/Logging.scala @@ -1,11 +1,11 @@ package org.allenai.common -import ch.qos.logback.classic.{ Level, Logger } +import ch.qos.logback.classic.{Level, Logger} import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.classic.html.HTMLLayout import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core._ -import ch.qos.logback.core.encoder.{ Encoder, LayoutWrappingEncoder } +import ch.qos.logback.core.encoder.{Encoder, LayoutWrappingEncoder} import org.slf4j.LoggerFactory /** This trait is meant to be mixed into a class to provide logging and logging configuration. @@ -103,8 +103,8 @@ trait Logging { * */ def addAppender( - encoder: Encoder[ILoggingEvent], - appender: OutputStreamAppender[ILoggingEvent] + encoder: Encoder[ILoggingEvent], + appender: OutputStreamAppender[ILoggingEvent] ): Logger = { val loggerContext = logger.getLoggerContext encoder.setContext(loggerContext) diff --git a/core/src/main/scala/org/allenai/common/MathUtils.scala b/core/src/main/scala/org/allenai/common/MathUtils.scala index 4430b1d..6304b10 100644 --- a/core/src/main/scala/org/allenai/common/MathUtils.scala +++ b/core/src/main/scala/org/allenai/common/MathUtils.scala @@ -7,9 +7,9 @@ object MathUtils { /** Round a Double to k decimal digits; by default, 0.5 rounds upwards. */ def round( - double: Double, - precision: Int, - roundingMode: RoundingMode.Value = RoundingMode.HALF_UP + double: Double, + precision: Int, + roundingMode: RoundingMode.Value = RoundingMode.HALF_UP ): Double = { BigDecimal(double).setScale(precision, roundingMode).toDouble } diff --git a/core/src/main/scala/org/allenai/common/ParIterator.scala b/core/src/main/scala/org/allenai/common/ParIterator.scala index 587f9a9..c7a29ab 100644 --- a/core/src/main/scala/org/allenai/common/ParIterator.scala +++ b/core/src/main/scala/org/allenai/common/ParIterator.scala @@ -1,6 +1,6 @@ package org.allenai.common -import java.util.concurrent.{ TimeUnit, Semaphore } +import java.util.concurrent.{Semaphore, TimeUnit} import java.util.concurrent.atomic.AtomicReference import scala.concurrent._ @@ -27,8 +27,8 @@ object ParIterator { * @param ec the execution context to run the function executions in */ def parForeach( - f: T => Unit, - queueLimit: Int = defaultQueueLimit + f: T => Unit, + queueLimit: Int = defaultQueueLimit )(implicit ec: ExecutionContext): Unit = { // If there are a billion items in the iterator, we don't want to create a billion futures, // so we limit the number of futures we create with this semaphore. @@ -69,7 +69,9 @@ object ParIterator { } // throw first exception if there is one - firstException.get().foreach { e => throw e } + firstException.get().foreach { e => + throw e + } } /** Maps an iterator to another iterator, performing the maps on the elements in parallel. @@ -89,8 +91,8 @@ object ParIterator { * @return a new iterator with the mapped values from the old iterator */ def parMap[O]( - f: T => O, - queueLimit: Int = defaultQueueLimit + f: T => O, + queueLimit: Int = defaultQueueLimit )(implicit ec: ExecutionContext): Iterator[O] = new Iterator[O] { private val inner = input.toIterator private val q = new scala.collection.mutable.Queue[Future[O]]() diff --git a/core/src/main/scala/org/allenai/common/Resource.scala b/core/src/main/scala/org/allenai/common/Resource.scala index 7a953a6..34fab59 100644 --- a/core/src/main/scala/org/allenai/common/Resource.scala +++ b/core/src/main/scala/org/allenai/common/Resource.scala @@ -29,8 +29,8 @@ object Resource { } def using2[A1 <: Closeable, A2 <: Closeable, B]( - resource1: A1, - resource2: A2 + resource1: A1, + resource2: A2 )(f: (A1, A2) => B): B = { require(resource1 != null, "The supplied resource was null.") require(resource2 != null, "The supplied resource was null.") diff --git a/core/src/main/scala/org/allenai/common/SeekableSource.scala b/core/src/main/scala/org/allenai/common/SeekableSource.scala index 564e328..afe7082 100644 --- a/core/src/main/scala/org/allenai/common/SeekableSource.scala +++ b/core/src/main/scala/org/allenai/common/SeekableSource.scala @@ -229,8 +229,10 @@ class SeekableSource(inFile: FileChannel, bufferSize: Int = 8 << 20)(implicit co /** The buffer to read the file into, and create strings out of. */ private[common] val lineBuffer: Array[Byte] = new Array(bufferSize) + /** The current index into lineBuffer. Kept in sync with inBuffer.position. */ private[common] var index = 0 + /** The index of the last valid byte in lineBuffer. Kept in sync with inBuffer.limit. */ private[common] var limit = 0 diff --git a/core/src/main/scala/org/allenai/common/SourceInputStream.scala b/core/src/main/scala/org/allenai/common/SourceInputStream.scala index 390047e..1195a18 100644 --- a/core/src/main/scala/org/allenai/common/SourceInputStream.scala +++ b/core/src/main/scala/org/allenai/common/SourceInputStream.scala @@ -1,19 +1,22 @@ package org.allenai.common import scala.collection.Iterator -import scala.io.{ Codec, Source } +import scala.io.{Codec, Source} import java.io.InputStream -import java.nio.{ ByteBuffer, CharBuffer } +import java.nio.{ByteBuffer, CharBuffer} /** Input stream wrapping a Source object, using the codec to convert characters to bytes. Not * thread-safe. */ class SourceInputStream(val source: Source)(implicit codec: Codec) extends InputStream { + /** Buffer to write (potentially multi-byte) character encodings to. */ private val outputBuffer = ByteBuffer.allocate(codec.encoder.maxBytesPerChar.ceil.toInt) + /** Number of bytes left in our output buffer. */ private var availableBytes = 0 + /** Buffer to re-use when passing characters to our encoder. */ private val charBuffer = Array[Char](1) diff --git a/core/src/main/scala/org/allenai/common/StringUtils.scala b/core/src/main/scala/org/allenai/common/StringUtils.scala index b98f674..ad0c871 100644 --- a/core/src/main/scala/org/allenai/common/StringUtils.scala +++ b/core/src/main/scala/org/allenai/common/StringUtils.scala @@ -1,6 +1,6 @@ package org.allenai.common -import org.apache.commons.lang3.{ StringUtils => ApacheStringUtils } +import org.apache.commons.lang3.{StringUtils => ApacheStringUtils} object StringUtils { val whiteSpaceRegex = """\s+""".r @@ -13,20 +13,111 @@ object StringUtils { val articles = Set("a", "an", "the") val simplePrepositions = Set( - "a", "abaft", "aboard", "about", "above", "absent", "across", "afore", - "after", "against", "along", "alongside", "amid", "amidst", "among", - "amongst", "an", "apropos", "around", "as", "aside", "astride", "at", - "athwart", "atop", "barring", "before", "behind", "below", "beneath", - "beside", "besides", "between", "betwixt", "beyond", "but", "by", "circa", - "concerning", "despite", "down", "during", "except", "excluding", - "failing", "following", "for", "from", "given", "in", "including", - "inside", "into", "lest", "like", "mid", "midst", "minus", "modulo", - "near", "next", "notwithstanding", "of", "off", "on", "onto", "opposite", - "out", "outside", "over", "pace", "past", "per", "plus", "pro", "qua", - "regarding", "round", "sans", "save", "since", "than", "through", - "thru", "throughout", "thruout", "till", "times", "to", "toward", - "towards", "under", "underneath", "unlike", "until", "up", "upon", - "versus", "vs.", "v.", "via", "vice", "with", "within", "without", + "a", + "abaft", + "aboard", + "about", + "above", + "absent", + "across", + "afore", + "after", + "against", + "along", + "alongside", + "amid", + "amidst", + "among", + "amongst", + "an", + "apropos", + "around", + "as", + "aside", + "astride", + "at", + "athwart", + "atop", + "barring", + "before", + "behind", + "below", + "beneath", + "beside", + "besides", + "between", + "betwixt", + "beyond", + "but", + "by", + "circa", + "concerning", + "despite", + "down", + "during", + "except", + "excluding", + "failing", + "following", + "for", + "from", + "given", + "in", + "including", + "inside", + "into", + "lest", + "like", + "mid", + "midst", + "minus", + "modulo", + "near", + "next", + "notwithstanding", + "of", + "off", + "on", + "onto", + "opposite", + "out", + "outside", + "over", + "pace", + "past", + "per", + "plus", + "pro", + "qua", + "regarding", + "round", + "sans", + "save", + "since", + "than", + "through", + "thru", + "throughout", + "thruout", + "till", + "times", + "to", + "toward", + "towards", + "under", + "underneath", + "unlike", + "until", + "up", + "upon", + "versus", + "vs.", + "v.", + "via", + "vice", + "with", + "within", + "without", "worth" ) @@ -149,6 +240,7 @@ object StringUtils { * format: ON */ trait StringExtras extends Any { + /** value that is transformed by extension methods. * Must be declared as the constructor argument for the implementing extension method class * (see trait scaladoc) @@ -158,7 +250,8 @@ object StringUtils { /** @return Trim white spaces, lower case, then strip the accents. */ def normalize: String = whiteSpaceRegex.replaceAllIn( - ApacheStringUtils.stripAccents(str.toLowerCase.trim), " " + ApacheStringUtils.stripAccents(str.toLowerCase.trim), + " " ) def removeNonAlpha: String = @@ -191,8 +284,8 @@ object StringUtils { /** @return Trim non-letter chars from the beginning and end */ - def trimNonAlphabetic(): String = str.dropWhile(c => - !Character.isAlphabetic(c)).trimRight(c => !Character.isAlphabetic(c)) + def trimNonAlphabetic(): String = + str.dropWhile(c => !Character.isAlphabetic(c)).trimRight(c => !Character.isAlphabetic(c)) /** @param chars String containing the blacklist chars. * @return Trim characters from the right that belongs to a blacklist. @@ -213,8 +306,8 @@ object StringUtils { words.update(i, ApacheStringUtils.capitalize(word)) } // Capitalize words that are not simple prepositions else if (!articles(word) && - !simplePrepositions(word) && - !coordinatingConjunction(word)) { + !simplePrepositions(word) && + !coordinatingConjunction(word)) { words.update(i, ApacheStringUtils.capitalize(word)) } // Otherwise, leave the word as lowercase else { @@ -234,7 +327,7 @@ object StringUtils { } def unescaped: String = { - import org.apache.commons.lang3.StringEscapeUtils.{ unescapeXml, unescapeHtml4 } + import org.apache.commons.lang3.StringEscapeUtils.{unescapeHtml4, unescapeXml} unescapeHtml4(unescapeXml(str)) } } diff --git a/core/src/main/scala/org/allenai/common/Timing.scala b/core/src/main/scala/org/allenai/common/Timing.scala index 8dc8d8c..3b485f5 100644 --- a/core/src/main/scala/org/allenai/common/Timing.scala +++ b/core/src/main/scala/org/allenai/common/Timing.scala @@ -4,6 +4,7 @@ import scala.concurrent.duration._ /** Methods for measuring the amount of time a method takes. */ object Timing { + /** Time a unit block and return the duration. */ def time(block: => Unit): Duration = { val start = System.nanoTime() diff --git a/core/src/main/scala/org/allenai/common/UrlUtil.scala b/core/src/main/scala/org/allenai/common/UrlUtil.scala index e6d9b16..0a6e1d0 100644 --- a/core/src/main/scala/org/allenai/common/UrlUtil.scala +++ b/core/src/main/scala/org/allenai/common/UrlUtil.scala @@ -6,8 +6,8 @@ import scala.util.control.Exception._ /** Util methods for java.net.URL. */ object UrlUtil { + /** Return the URL, or None if it cannot be parsed. */ def parse(url: String): Option[URL] = catching(classOf[MalformedURLException]) opt new URL(url) } - diff --git a/core/src/main/scala/org/allenai/common/immutable/Interval.scala b/core/src/main/scala/org/allenai/common/immutable/Interval.scala index 66a8174..a68fc3f 100644 --- a/core/src/main/scala/org/allenai/common/immutable/Interval.scala +++ b/core/src/main/scala/org/allenai/common/immutable/Interval.scala @@ -31,7 +31,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ - package org.allenai.common.immutable import Interval.empty @@ -50,7 +49,8 @@ import scala.util.matching.Regex * @author Michael Schmitz */ sealed class Interval private (val start: Int, val end: Int) - extends IndexedSeq[Int] with Ordered[Interval] { + extends IndexedSeq[Int] + with Ordered[Interval] { import Interval._ require(start <= end, "start must be <= end: " + start + ">" + end) @@ -312,6 +312,7 @@ sealed class Interval private (val start: Int, val end: Int) } object Interval { + /** The empty interval. */ val empty: Interval = Empty @@ -439,20 +440,23 @@ object Interval { */ def minimal(intervals: Iterable[Interval]): List[Interval] = { val set = collection.immutable.SortedSet.empty[Int] ++ intervals.flatten - set.foldLeft(List.empty[Interval]) { - case (list, i) => - val singleton = Interval.singleton(i) - list match { - case Nil => List(singleton) - case x :: xs if x borders i => (x union singleton) :: xs - case xs => singleton :: xs - } - }.reverse + set + .foldLeft(List.empty[Interval]) { + case (list, i) => + val singleton = Interval.singleton(i) + list match { + case Nil => List(singleton) + case x :: xs if x borders i => (x union singleton) :: xs + case xs => singleton :: xs + } + } + .reverse } // implementations object Open { + /** Match exposing the bounds as an open interval */ def unapply(interval: Interval): Option[(Int, Int)] = interval match { case `empty` => None @@ -473,13 +477,13 @@ object Interval { /** An interval that includes only a single index. * All intervals with a single element will always extend Singleton. */ - sealed abstract class Singleton private[Interval] (elem: Int) - extends Interval(elem, elem + 1) { + sealed abstract class Singleton private[Interval] (elem: Int) extends Interval(elem, elem + 1) { def index: Int = this.start override def toString: String = "{" + elem + "}" } object Singleton { + /** Match exposing the bounds as a singleton */ def unapply(interval: Interval): Option[Int] = interval match { case singleton: Singleton => Some(singleton.index) diff --git a/core/src/main/scala/org/allenai/common/json/package.scala b/core/src/main/scala/org/allenai/common/json/package.scala index ce26c68..90b069f 100644 --- a/core/src/main/scala/org/allenai/common/json/package.scala +++ b/core/src/main/scala/org/allenai/common/json/package.scala @@ -29,7 +29,7 @@ package object json { } /** Create a new JsObject with an additional field */ - def pack[A : JsonWriter](newField: (String, A)): JsObject = { + def pack[A: JsonWriter](newField: (String, A)): JsObject = { val aJsValue = implicitly[JsonWriter[A]].write(newField._2) pack(newField._1 -> aJsValue) } @@ -38,10 +38,10 @@ package object json { * * @param packedFormats */ - def unpackOptWith[T](packedFormats: PackedJsonFormat[_ <: T] *): Option[T] = { + def unpackOptWith[T](packedFormats: PackedJsonFormat[_ <: T]*): Option[T] = { val unpacks: Seq[PartialFunction[JsValue, T]] = packedFormats - .map(_.asInstanceOf[PackedJsonFormat[T]]) - .map(_.unpack) + .map(_.asInstanceOf[PackedJsonFormat[T]]) + .map(_.unpack) val combinedUnpack = unpacks reduce (_ orElse _) combinedUnpack.lift(jsObj) } @@ -51,10 +51,11 @@ package object json { * @param packedFormats * @throws spray.json.DeserializationException */ - def unpackWith[T](packedFormats: PackedJsonFormat[_ <: T] *): T = { + def unpackWith[T](packedFormats: PackedJsonFormat[_ <: T]*): T = { unpackOptWith[T](packedFormats: _*) getOrElse { deserializationError( - s"Invalid JSON. Expected a JsObject with a valid packed field, but got ${jsObj.toString}") + s"Invalid JSON. Expected a JsObject with a valid packed field, but got ${jsObj.toString}" + ) } } @@ -67,15 +68,16 @@ package object json { def unpackAs[T](implicit unpackers: Seq[PackedJsonFormat[_ <: T]]): T = { unpackOptWith[T](unpackers: _*) getOrElse { deserializationError( - s"Invalid JSON. Expected a JsObject with a valid packed field, but got ${jsObj.toString}") + s"Invalid JSON. Expected a JsObject with a valid packed field, but got ${jsObj.toString}" + ) } } /** Extract a value of type A by the given key */ - def apply[A : JsonReader](key: String): A = jsObj.fields(key).convertTo[A] + def apply[A: JsonReader](key: String): A = jsObj.fields(key).convertTo[A] /** Extract a value of type A by the given key */ - def get[A : JsonReader](key: String): Option[A] = jsObj.fields.get(key) map (_.convertTo[A]) + def get[A: JsonReader](key: String): Option[A] = jsObj.fields.get(key) map (_.convertTo[A]) } implicit class RichJsonFormat[T](val jsFormat: JsonFormat[T]) { @@ -97,7 +99,7 @@ package object json { * @param packField the key -> value pair to pack into the JsObject * on write. Typically, this would be a type indicator. */ - def pack[A : JsonWriter](packField: (String, A)): PackedJsonFormat[T] = + def pack[A: JsonWriter](packField: (String, A)): PackedJsonFormat[T] = pack(packField._1 -> packField._2.toJson) } } diff --git a/core/src/test/scala/org/allenai/common/ConfigSpec.scala b/core/src/test/scala/org/allenai/common/ConfigSpec.scala index 936334e..ca0a0c8 100644 --- a/core/src/test/scala/org/allenai/common/ConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/ConfigSpec.scala @@ -3,7 +3,7 @@ package org.allenai.common import org.allenai.common.testkit.UnitSpec import org.allenai.common.Config._ -import com.typesafe.config.{ Config => TypesafeConfig, _ } +import com.typesafe.config.{Config => TypesafeConfig, _} import spray.json._ import spray.json.DefaultJsonProtocol._ @@ -40,7 +40,9 @@ class ConfigSpec extends UnitSpec { "ConfigReader.map[A]" should "generate a new ConfigReader[A]" in { case class Stringy(value: String) - implicit val stringyConfigReader = ConfigReader.stringReader map { value => Stringy(value) } + implicit val stringyConfigReader = ConfigReader.stringReader map { value => + Stringy(value) + } val stringy = testConfig.get[Stringy]("string") assert(stringy === Some(Stringy("Hello world"))) } @@ -90,8 +92,10 @@ class ConfigSpec extends UnitSpec { } it should "work for Seq[ConfigValue]" in { - assert((testConfig.get[Seq[ConfigValue]]("intList") map { _ map { _.unwrapped } }) === - Some(Seq(1, 2, 3, 4))) + assert( + (testConfig.get[Seq[ConfigValue]]("intList") map { _ map { _.unwrapped } }) === + Some(Seq(1, 2, 3, 4)) + ) } it should "work for URI" in { @@ -103,9 +107,11 @@ class ConfigSpec extends UnitSpec { } it should "work for Seq[com.typesafe.config.Config]" in { - assert(testConfig.get[Seq[TypesafeConfig]]("objectList") === Some( - Seq(createConfig(Map("foo" -> "bar")), createConfig(Map("one" -> "two"))) - )) + assert( + testConfig.get[Seq[TypesafeConfig]]("objectList") === Some( + Seq(createConfig(Map("foo" -> "bar")), createConfig(Map("one" -> "two"))) + ) + ) } // non-happy path cases diff --git a/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala b/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala index cdba06c..60929fd 100644 --- a/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala @@ -6,7 +6,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{ Try, Success, Failure } +import scala.util.{Failure, Success, Try} class JsonFormatsSpec extends UnitSpec { diff --git a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala index 2b822c2..64138e9 100644 --- a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.io.{ Codec, Source } +import scala.io.{Codec, Source} import java.io.ByteArrayOutputStream diff --git a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala index de8a15f..bebfac1 100644 --- a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala @@ -12,7 +12,8 @@ class LoggingConfigSpec extends UnitSpec with Logging { val path = Files.createTempFile("nio-temp", ".tmp") path.toFile().deleteOnExit() - loggerConfig.Logger("org.allenai.common") + loggerConfig + .Logger("org.allenai.common") .reset() .addAppender( loggerConfig.newPatternLayoutEncoder("%-5level: %message%n"), @@ -36,7 +37,8 @@ class LoggingConfigSpec extends UnitSpec with Logging { val path = Files.createTempFile("nio-temp2", ".tmp") path.toFile().deleteOnExit() - loggerConfig.Logger("org.allenai.common") + loggerConfig + .Logger("org.allenai.common") .reset() .addAppender( loggerConfig.newHtmlLayoutEncoder("%relative%thread%level%logger%msg"), @@ -48,8 +50,10 @@ class LoggingConfigSpec extends UnitSpec with Logging { logger.info("html") assert( - Source.fromFile(path.toString).mkString.contains( - "<i>html</i>") + Source + .fromFile(path.toString) + .mkString + .contains("<i>html</i>") ) } } diff --git a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala index 15384ec..933f81d 100644 --- a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala +++ b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala @@ -80,7 +80,9 @@ class ParIteratorSpec extends UnitSpec { val max = 5 val values = Range(0, max).reverse val iter = values.toIterator - val expected = values.map { i => s"$i" } + val expected = values.map { i => + s"$i" + } val time: Duration = Timing.time { val result = iter.parMap { i => Thread.sleep(i * 100) @@ -97,15 +99,21 @@ class ParIteratorSpec extends UnitSpec { val max = 50000 val values = Range(0, max).reverse val iter = values.toIterator - val expected = values.map { i => s"$i" } - val result = iter.parMap { i => s"$i" } + val expected = values.map { i => + s"$i" + } + val result = iter.parMap { i => + s"$i" + } assert(expected === result.toSeq) } it should "return exceptions from foreach functions" in { val successes = synchronized(collection.mutable.Set[Int]()) intercept[ArithmeticException] { - Range(-20, 20).toIterator.parForeach { i => successes.add(10000 / i) } + Range(-20, 20).toIterator.parForeach { i => + successes.add(10000 / i) + } } } diff --git a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala index 01e986e..69e92fc 100644 --- a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala +++ b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala @@ -4,7 +4,7 @@ import org.allenai.common.testkit.UnitSpec import java.nio.ByteBuffer import java.nio.channels.FileChannel -import java.nio.file.{ Files, StandardOpenOption } +import java.nio.file.{Files, StandardOpenOption} import scala.io.Codec diff --git a/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala b/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala index 43d4003..1ea4b19 100644 --- a/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala +++ b/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala @@ -3,7 +3,7 @@ package org.allenai.common import org.allenai.common.testkit.UnitSpec import scala.collection.mutable -import scala.io.{ Codec, Source } +import scala.io.{Codec, Source} class SourceInputStreamSpec extends UnitSpec { "SourceInputStream" should "handle ASCII (single-byte) characters correctly" in { diff --git a/core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala b/core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala index 17f3e83..2de983a 100644 --- a/core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala +++ b/core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala @@ -31,7 +31,6 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ - package org.allenai.common.immutable import org.allenai.common.testkit.UnitSpec @@ -148,7 +147,9 @@ class IntervalSpec extends UnitSpec with Checkers { // for all points in one of the new intervals // no other interval may contain the same point min.forall { interval => - !min.exists { other => !(other eq interval) && (other intersects interval) } + !min.exists { other => + !(other eq interval) && (other intersects interval) + } } // result is sorted @@ -224,11 +225,12 @@ class IntervalSpec extends UnitSpec with Checkers { def roundtripJson(x: Interval): Interval = (x.toJson).convertTo[Interval] - def roundtripsJsonOk(x: Interval): Unit = assert({ - val rtrip = roundtripJson(x) - /* check string form as well to distinguish open/closed intervals */ - rtrip == x && rtrip.toString == x.toString - }) + def roundtripsJsonOk(x: Interval): Unit = + assert({ + val rtrip = roundtripJson(x) + /* check string form as well to distinguish open/closed intervals */ + rtrip == x && rtrip.toString == x.toString + }) "Json serialization for intervals" should "work" in { roundtripsJsonOk(Interval.empty) diff --git a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala index ea7b7bc..fff55c8 100644 --- a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{ Try, Success, Failure } +import scala.util.{Failure, Success, Try} class PackedJsonFormatSpec extends UnitSpec { diff --git a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala index a668053..86f3680 100644 --- a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{ Try, Success, Failure } +import scala.util.{Failure, Success, Try} // scalastyle:off magic.number class RichJsObjectSpec extends UnitSpec { @@ -18,10 +18,12 @@ class RichJsObjectSpec extends UnitSpec { val json = foo.toJson val jsonObj = json.asJsObject val packed = jsonObj.pack("age" -> 10.toJson) - assert(packed === JsObject( - "name" -> JsString("John"), - "age" -> JsNumber(10) - )) + assert( + packed === JsObject( + "name" -> JsString("John"), + "age" -> JsNumber(10) + ) + ) } it should "handle types that are not JsValue but have a JsonWriter" in { @@ -29,9 +31,11 @@ class RichJsObjectSpec extends UnitSpec { val json = foo.toJson val jsonObj = json.asJsObject val packed = jsonObj.pack("age" -> 10) - assert(packed === JsObject( - "name" -> JsString("John"), - "age" -> JsNumber(10) - )) + assert( + packed === JsObject( + "name" -> JsString("John"), + "age" -> JsNumber(10) + ) + ) } } diff --git a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala index af6f57d..e30ae3f 100644 --- a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala +++ b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala @@ -59,6 +59,7 @@ import scala.util.Try * @param config the runtime config to use containing all values to bind */ class ConfigModule(config: Config) extends ScalaModule with Logging { + /** The actual config to bind. */ private lazy val fullConfig = { val resolvedConfig = config.withFallback(defaultConfig).resolve() @@ -84,9 +85,10 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { /** The config to use as a fallback. This is where keys will be looked up if they aren't present * in the provided config. */ - def defaultConfig: Config = configName map { name => - ConfigFactory.parseResources(getClass, name) - } getOrElse ConfigFactory.empty + def defaultConfig: Config = + configName map { name => + ConfigFactory.parseResources(getClass, name) + } getOrElse ConfigFactory.empty /** Configure method for implementing classes to override if they wish to create additional * bindings, or bindings based on config values. @@ -104,7 +106,7 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { /** Internal helper to bind the config key `key` to the given type `T`. */ private def bindConfigKey[T]( - key: String + key: String )(implicit manifest: Manifest[T], configReader: ConfigReader[T]): Unit = { try { fullConfig.get[T](key) match { @@ -112,13 +114,17 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { bind[T].annotatedWithName(key).toInstance(value) bind[Option[T]].annotatedWithName(key).toInstance(Some(value)) case None => - addError(s"Config in ${getClass.getSimpleName} missing key '$key' with expected type " + - s"'${manifest.runtimeClass.getSimpleName}'") + addError( + s"Config in ${getClass.getSimpleName} missing key '$key' with expected type " + + s"'${manifest.runtimeClass.getSimpleName}'" + ) } } catch { case _: ConfigException.WrongType => - addError(s"Config in ${getClass.getSimpleName} has bad type for key '$key'; expected " + - s"value of type '${manifest.runtimeClass.getSimpleName}'") + addError( + s"Config in ${getClass.getSimpleName} has bad type for key '$key'; expected " + + s"value of type '${manifest.runtimeClass.getSimpleName}'" + ) } } @@ -181,13 +187,18 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { // Lazily apply the first method that works. val success = methods.iterator.map(method => Try(method())).exists(_.isSuccess) if (!success) { - logger.warn(s"Could not find list type for key '$fullPath' in in " + - s"${getClass.getSimpleName}. No value will be bound to '$fullPath'.") + logger.warn( + s"Could not find list type for key '$fullPath' in in " + + s"${getClass.getSimpleName}. No value will be bound to '$fullPath'." + ) } case ConfigValueType.OBJECT => bindConfigKey[Config](fullPath) // Recurse. - bindConfigObject(config.toConfig()[Config](ConfigUtil.quoteString(key)).root, fullPathElements) + bindConfigObject( + config.toConfig()[Config](ConfigUtil.quoteString(key)).root, + fullPathElements + ) case other => // Shouldn't happen - but warn if it does. logger.warn(s"Unhandled config value type [$other] for key $fullPath") diff --git a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala index 6661149..8c9c3b4 100644 --- a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala +++ b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala @@ -1,62 +1,62 @@ package org.allenai.common.guice -import com.google.inject.{ Guice, Inject } +import com.google.inject.{Guice, Inject} import com.google.inject.name.Named -import com.typesafe.config.{ Config, ConfigFactory } +import com.typesafe.config.{Config, ConfigFactory} import org.allenai.common.testkit.UnitSpec case class CaseClass(a: String) // Test class, defined in a way that's injectable by Guice (outside of a wrapping class). -case class AnnotatedClass @Inject() ( - @Named("fooString") foo: String, - // This string has a default value in the module.conf file. - @Named("hasDefault") hasDefault: String, - unannotated: Set[String], - @Named("boolbool") boolean: Boolean, - @Named("barNum") bar: Int, - @Named("barNum") barLong: Long, - @Named("barNum") barDouble: Double, - @Named("unsupported") unsupported: CaseClass +case class AnnotatedClass @Inject()( + @Named("fooString") foo: String, + // This string has a default value in the module.conf file. + @Named("hasDefault") hasDefault: String, + unannotated: Set[String], + @Named("boolbool") boolean: Boolean, + @Named("barNum") bar: Int, + @Named("barNum") barLong: Long, + @Named("barNum") barDouble: Double, + @Named("unsupported") unsupported: CaseClass ) -case class OptionalParamClass @Inject() ( - @Named("presentString") present: String, - @Named("presentString") presentOption: Option[String], - @Named("missingString") missingOption: Option[String] +case class OptionalParamClass @Inject()( + @Named("presentString") present: String, + @Named("presentString") presentOption: Option[String], + @Named("missingString") missingOption: Option[String] ) // Test class with nested Config objects. -case class NestedConfig @Inject() ( - @Named("root") root: Config, - @Named("root.nested") nested: Config, - @Named("nested") nestedNone: Option[Config], - @Named("root.string") rootString: String, - @Named("root.nested.string") nestedString: String +case class NestedConfig @Inject()( + @Named("root") root: Config, + @Named("root.nested") nested: Config, + @Named("nested") nestedNone: Option[Config], + @Named("root.string") rootString: String, + @Named("root.nested.string") nestedString: String ) // Test class, using namespaced values. -case class PrefixClass @Inject() ( - @Named("prefix.fooString") foo: String, - // This string has a default value in the module.conf file. - @Named("prefix.hasDefault") hasDefault: String, - @Named("prefix.boolbool") boolean: Boolean, - @Named("prefix.nested.bool") nestedBool: Boolean, - // This doesn't begin with the right prefix, so it shouldn't get a binding. - @Named("ignored_no_prefix") bar: Int +case class PrefixClass @Inject()( + @Named("prefix.fooString") foo: String, + // This string has a default value in the module.conf file. + @Named("prefix.hasDefault") hasDefault: String, + @Named("prefix.boolbool") boolean: Boolean, + @Named("prefix.nested.bool") nestedBool: Boolean, + // This doesn't begin with the right prefix, so it shouldn't get a binding. + @Named("ignored_no_prefix") bar: Int ) // Test class with dotted keys. -case class DottedKeys @Inject() ( - @Named("\"i.have\".dots") dots: String, - @Named("\"i.have.more.dots\".bar") bar: Int +case class DottedKeys @Inject()( + @Named("\"i.have\".dots") dots: String, + @Named("\"i.have.more.dots\".bar") bar: Int ) // Test class with Seq values. -case class SeqValues @Inject() ( - @Named("seq.ofConfig") configs: Seq[Config], - @Named("seq.ofString") strings: Seq[String], - @Named("seq.ofBool") booleans: Seq[Boolean], - @Named("seq.ofDouble") doubles: Seq[Double] +case class SeqValues @Inject()( + @Named("seq.ofConfig") configs: Seq[Config], + @Named("seq.ofString") strings: Seq[String], + @Named("seq.ofBool") booleans: Seq[Boolean], + @Named("seq.ofDouble") doubles: Seq[Double] ) class ConfigModuleSpec extends UnitSpec { From 7ae2ac522db6ab08b131fa8200e4c0580a6c34fd Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 14:08:18 -0700 Subject: [PATCH 21/69] Update bintray settings and re-organize project vs. build --- build.sbt | 68 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/build.sbt b/build.sbt index a783366..269b371 100644 --- a/build.sbt +++ b/build.sbt @@ -9,36 +9,17 @@ ThisBuild / organization := "org.allenai.common" ThisBuild / version := "2.0.0-SNAPSHOT" ThisBuild / scalaVersion := scala212 -lazy val common = (project in file(".")) - .aggregate( - cache, - core, - guice, - testkit - ) - .configs(IntegrationTest) - .settings( - Defaults.itSettings, - crossScalaVersions := Nil, - publish / skip := true, - buildSettings - ) - lazy val spray = "spray" at "http://repo.spray.io/" lazy val typesafeReleases = "Typesafe Releases" at "http://repo.typesafe.com/typesafe/releases/" lazy val projectSettings = Seq( - fork := true, - javaOptions += s"-Dlogback.appname=${name.value}", - scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), - javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), - resolvers ++= Seq(spray, Resolver.jcenterRepo, typesafeReleases), - dependencyOverrides ++= Logging.loggingDependencyOverrides -) - -lazy val buildSettings = Seq( - crossScalaVersions := supportedScalaVersions, - organization := "org.allenai.common", + resolvers ++= Seq( + Resolver.bintrayRepo("allenai", "maven"), + spray, + Resolver.jcenterRepo, + typesafeReleases + ), + dependencyOverrides ++= Logging.loggingDependencyOverrides, publishMavenStyle := true, publishArtifact in Test := false, pomIncludeRepository := { _ => false }, @@ -59,22 +40,47 @@ lazy val buildSettings = Seq( dev-role@allenai.org ), - bintrayPackage := s"${organization.value}:${name.value}_${scalaBinaryVersion.value}" + bintrayPackage := s"${organization.value}:${name.value}_${scalaBinaryVersion.value}", + bintrayOrganization := Some("allenai"), + bintrayRepository := "maven" ) +lazy val buildSettings = Seq( + javaOptions += s"-Dlogback.appname=${name.value}", + scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), + javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), + crossScalaVersions := supportedScalaVersions, +) + +// Not necessary for this repository but here as an example inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) +lazy val common = (project in file(".")) + .aggregate( + cache, + core, + guice, + testkit + ) + .configs(IntegrationTest) + .settings( + Defaults.itSettings, + crossScalaVersions := Nil, + publish / skip := true, + buildSettings + ) + lazy val cache = Project(id = "cache", base = file("cache")) - .settings(buildSettings) + .settings(projectSettings, buildSettings) .dependsOn(core, testkit % "test->compile") lazy val core = Project(id = "core", base = file("core")) - .settings(buildSettings) + .settings(projectSettings, buildSettings) .dependsOn(testkit % "test->compile") lazy val guice = Project(id = "guice", base = file("guice")) - .settings(buildSettings) + .settings(projectSettings, buildSettings) .dependsOn(core, testkit % "test->compile") lazy val testkit = Project(id = "testkit", base = file("testkit")) - .settings(buildSettings) + .settings(projectSettings, buildSettings) From abda6b0776794bc369797e98a5e6af0176dfc70f Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 14:25:16 -0700 Subject: [PATCH 22/69] Use spaces.inImportCurlyBraces = true --- .scalafmt.conf | 1 + .../main/scala/org/allenai/common/cache/JsonQueryCache.scala | 2 +- core/src/main/scala/org/allenai/common/Config.scala | 2 +- core/src/main/scala/org/allenai/common/Enum.scala | 2 +- core/src/main/scala/org/allenai/common/FileUtils.scala | 4 ++-- core/src/main/scala/org/allenai/common/JsonFormats.scala | 4 ++-- core/src/main/scala/org/allenai/common/JsonIo.scala | 2 +- core/src/main/scala/org/allenai/common/Logging.scala | 4 ++-- core/src/main/scala/org/allenai/common/ParIterator.scala | 2 +- .../src/main/scala/org/allenai/common/SourceInputStream.scala | 4 ++-- core/src/main/scala/org/allenai/common/StringUtils.scala | 4 ++-- core/src/test/scala/org/allenai/common/ConfigSpec.scala | 2 +- core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala | 2 +- core/src/test/scala/org/allenai/common/JsonIoSpec.scala | 2 +- .../test/scala/org/allenai/common/SeekableSourceSpec.scala | 2 +- .../test/scala/org/allenai/common/SourceInputStreamSpec.scala | 2 +- .../scala/org/allenai/common/json/PackedJsonFormatSpec.scala | 2 +- .../test/scala/org/allenai/common/json/RichJsObjectSpec.scala | 2 +- .../scala/org/allenai/common/guice/ConfigModuleSpec.scala | 4 ++-- 19 files changed, 25 insertions(+), 24 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index cd2666f..999d802 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -8,3 +8,4 @@ rewrite.sortModifiers.order = [ "implicit", "final", "sealed", "abstract", "override", "private", "protected", "lazy" ] +spaces.inImportCurlyBraces = true diff --git a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala index 8818e99..de01513 100644 --- a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala +++ b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala @@ -3,7 +3,7 @@ package org.allenai.common.cache import org.allenai.common.Config._ import com.typesafe.config.Config -import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig, Protocol} +import redis.clients.jedis.{ Jedis, JedisPool, JedisPoolConfig, Protocol } import spray.json._ import scala.collection.JavaConverters._ diff --git a/core/src/main/scala/org/allenai/common/Config.scala b/core/src/main/scala/org/allenai/common/Config.scala index 5033dda..0f37553 100644 --- a/core/src/main/scala/org/allenai/common/Config.scala +++ b/core/src/main/scala/org/allenai/common/Config.scala @@ -1,6 +1,6 @@ package org.allenai.common -import com.typesafe.config.{Config => TypesafeConfig, _} +import com.typesafe.config.{ Config => TypesafeConfig, _ } import spray.json._ import java.net.URI diff --git a/core/src/main/scala/org/allenai/common/Enum.scala b/core/src/main/scala/org/allenai/common/Enum.scala index 270ce7a..7d28204 100644 --- a/core/src/main/scala/org/allenai/common/Enum.scala +++ b/core/src/main/scala/org/allenai/common/Enum.scala @@ -1,6 +1,6 @@ package org.allenai.common -import spray.json.{deserializationError, JsString, JsValue, RootJsonFormat} +import spray.json.{ deserializationError, JsString, JsValue, RootJsonFormat } /** Enumeration implementation that supports automatic Spray JSON serialization of a case object as * a JsString, or using java native serialization for Spark jobs. diff --git a/core/src/main/scala/org/allenai/common/FileUtils.scala b/core/src/main/scala/org/allenai/common/FileUtils.scala index a508327..784f584 100644 --- a/core/src/main/scala/org/allenai/common/FileUtils.scala +++ b/core/src/main/scala/org/allenai/common/FileUtils.scala @@ -2,10 +2,10 @@ package org.allenai.common import au.com.bytecode.opencsv.CSVReader -import java.io.{BufferedInputStream, BufferedReader, File, FileInputStream, InputStreamReader} +import java.io.{ BufferedInputStream, BufferedReader, File, FileInputStream, InputStreamReader } import scala.collection.JavaConverters._ -import scala.io.{BufferedSource, Codec, Source} +import scala.io.{ BufferedSource, Codec, Source } /** Various convenient utilities for reading files and resources. */ object FileUtils extends Logging { diff --git a/core/src/main/scala/org/allenai/common/JsonFormats.scala b/core/src/main/scala/org/allenai/common/JsonFormats.scala index d64933e..72f9831 100644 --- a/core/src/main/scala/org/allenai/common/JsonFormats.scala +++ b/core/src/main/scala/org/allenai/common/JsonFormats.scala @@ -1,13 +1,13 @@ package org.allenai.common -import com.typesafe.config.{Config => TypesafeConfig} +import com.typesafe.config.{ Config => TypesafeConfig } import spray.json.SerializationException import spray.json._ import spray.json.DefaultJsonProtocol._ import java.io.PrintWriter import java.io.StringWriter -import scala.util.{Failure, Success, Try} +import scala.util.{ Failure, Success, Try } /** Common spray.json.JsonFormats, spray.json.JsonReaders, and spray.json.JsonWriters */ object JsonFormats { diff --git a/core/src/main/scala/org/allenai/common/JsonIo.scala b/core/src/main/scala/org/allenai/common/JsonIo.scala index 9918c04..06c54c6 100644 --- a/core/src/main/scala/org/allenai/common/JsonIo.scala +++ b/core/src/main/scala/org/allenai/common/JsonIo.scala @@ -4,7 +4,7 @@ import spray.json._ import scala.io.Source -import java.io.{OutputStream, PrintWriter, Writer} +import java.io.{ OutputStream, PrintWriter, Writer } /** Helpers for streaming lists of JSON objects to and from disk. */ object JsonIo { diff --git a/core/src/main/scala/org/allenai/common/Logging.scala b/core/src/main/scala/org/allenai/common/Logging.scala index 4fe0609..08f4b65 100644 --- a/core/src/main/scala/org/allenai/common/Logging.scala +++ b/core/src/main/scala/org/allenai/common/Logging.scala @@ -1,11 +1,11 @@ package org.allenai.common -import ch.qos.logback.classic.{Level, Logger} +import ch.qos.logback.classic.{ Level, Logger } import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.classic.html.HTMLLayout import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core._ -import ch.qos.logback.core.encoder.{Encoder, LayoutWrappingEncoder} +import ch.qos.logback.core.encoder.{ Encoder, LayoutWrappingEncoder } import org.slf4j.LoggerFactory /** This trait is meant to be mixed into a class to provide logging and logging configuration. diff --git a/core/src/main/scala/org/allenai/common/ParIterator.scala b/core/src/main/scala/org/allenai/common/ParIterator.scala index c7a29ab..d233430 100644 --- a/core/src/main/scala/org/allenai/common/ParIterator.scala +++ b/core/src/main/scala/org/allenai/common/ParIterator.scala @@ -1,6 +1,6 @@ package org.allenai.common -import java.util.concurrent.{Semaphore, TimeUnit} +import java.util.concurrent.{ Semaphore, TimeUnit } import java.util.concurrent.atomic.AtomicReference import scala.concurrent._ diff --git a/core/src/main/scala/org/allenai/common/SourceInputStream.scala b/core/src/main/scala/org/allenai/common/SourceInputStream.scala index 1195a18..7ae97fe 100644 --- a/core/src/main/scala/org/allenai/common/SourceInputStream.scala +++ b/core/src/main/scala/org/allenai/common/SourceInputStream.scala @@ -1,10 +1,10 @@ package org.allenai.common import scala.collection.Iterator -import scala.io.{Codec, Source} +import scala.io.{ Codec, Source } import java.io.InputStream -import java.nio.{ByteBuffer, CharBuffer} +import java.nio.{ ByteBuffer, CharBuffer } /** Input stream wrapping a Source object, using the codec to convert characters to bytes. Not * thread-safe. diff --git a/core/src/main/scala/org/allenai/common/StringUtils.scala b/core/src/main/scala/org/allenai/common/StringUtils.scala index ad0c871..27ae860 100644 --- a/core/src/main/scala/org/allenai/common/StringUtils.scala +++ b/core/src/main/scala/org/allenai/common/StringUtils.scala @@ -1,6 +1,6 @@ package org.allenai.common -import org.apache.commons.lang3.{StringUtils => ApacheStringUtils} +import org.apache.commons.lang3.{ StringUtils => ApacheStringUtils } object StringUtils { val whiteSpaceRegex = """\s+""".r @@ -327,7 +327,7 @@ object StringUtils { } def unescaped: String = { - import org.apache.commons.lang3.StringEscapeUtils.{unescapeHtml4, unescapeXml} + import org.apache.commons.lang3.StringEscapeUtils.{ unescapeHtml4, unescapeXml } unescapeHtml4(unescapeXml(str)) } } diff --git a/core/src/test/scala/org/allenai/common/ConfigSpec.scala b/core/src/test/scala/org/allenai/common/ConfigSpec.scala index ca0a0c8..b2b1653 100644 --- a/core/src/test/scala/org/allenai/common/ConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/ConfigSpec.scala @@ -3,7 +3,7 @@ package org.allenai.common import org.allenai.common.testkit.UnitSpec import org.allenai.common.Config._ -import com.typesafe.config.{Config => TypesafeConfig, _} +import com.typesafe.config.{ Config => TypesafeConfig, _ } import spray.json._ import spray.json.DefaultJsonProtocol._ diff --git a/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala b/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala index 60929fd..0d56763 100644 --- a/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonFormatsSpec.scala @@ -6,7 +6,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{Failure, Success, Try} +import scala.util.{ Failure, Success, Try } class JsonFormatsSpec extends UnitSpec { diff --git a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala index 64138e9..2b822c2 100644 --- a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.io.{Codec, Source} +import scala.io.{ Codec, Source } import java.io.ByteArrayOutputStream diff --git a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala index 69e92fc..01e986e 100644 --- a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala +++ b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala @@ -4,7 +4,7 @@ import org.allenai.common.testkit.UnitSpec import java.nio.ByteBuffer import java.nio.channels.FileChannel -import java.nio.file.{Files, StandardOpenOption} +import java.nio.file.{ Files, StandardOpenOption } import scala.io.Codec diff --git a/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala b/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala index 1ea4b19..43d4003 100644 --- a/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala +++ b/core/src/test/scala/org/allenai/common/SourceInputStreamSpec.scala @@ -3,7 +3,7 @@ package org.allenai.common import org.allenai.common.testkit.UnitSpec import scala.collection.mutable -import scala.io.{Codec, Source} +import scala.io.{ Codec, Source } class SourceInputStreamSpec extends UnitSpec { "SourceInputStream" should "handle ASCII (single-byte) characters correctly" in { diff --git a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala index fff55c8..96feb04 100644 --- a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{Failure, Success, Try} +import scala.util.{ Failure, Success, Try } class PackedJsonFormatSpec extends UnitSpec { diff --git a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala index 86f3680..dc5fa10 100644 --- a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala @@ -5,7 +5,7 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{Failure, Success, Try} +import scala.util.{ Failure, Success, Try } // scalastyle:off magic.number class RichJsObjectSpec extends UnitSpec { diff --git a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala index 8c9c3b4..b3a0a87 100644 --- a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala +++ b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala @@ -1,8 +1,8 @@ package org.allenai.common.guice -import com.google.inject.{Guice, Inject} +import com.google.inject.{ Guice, Inject } import com.google.inject.name.Named -import com.typesafe.config.{Config, ConfigFactory} +import com.typesafe.config.{ Config, ConfigFactory } import org.allenai.common.testkit.UnitSpec case class CaseClass(a: String) From 4215048aa9701f46d9560639856ca145d58b65f0 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 14:28:09 -0700 Subject: [PATCH 23/69] Use continuationIndent.defnSite = 4 --- .scalafmt.conf | 1 + .../allenai/common/cache/JsonQueryCache.scala | 8 +-- .../allenai/common/cache/QueryCaches.scala | 8 +-- .../scala/org/allenai/common/FileUtils.scala | 4 +- .../scala/org/allenai/common/JsonIo.scala | 4 +- .../scala/org/allenai/common/Logging.scala | 4 +- .../scala/org/allenai/common/MathUtils.scala | 6 +- .../org/allenai/common/ParIterator.scala | 8 +-- .../scala/org/allenai/common/Resource.scala | 4 +- .../allenai/common/guice/ConfigModule.scala | 2 +- .../common/guice/ConfigModuleSpec.scala | 60 +++++++++---------- 11 files changed, 55 insertions(+), 54 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 999d802..25047a3 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -9,3 +9,4 @@ rewrite.sortModifiers.order = [ "override", "private", "protected", "lazy" ] spaces.inImportCurlyBraces = true +continuationIndent.defnSite = 2 diff --git a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala index de01513..c772b28 100644 --- a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala +++ b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala @@ -26,10 +26,10 @@ object JsonQueryCache { } def apply[V]( - clientPrefix: String, - hostname: String, - port: Int = Protocol.DEFAULT_PORT, - timeoutMillis: Int = Protocol.DEFAULT_TIMEOUT + clientPrefix: String, + hostname: String, + port: Int = Protocol.DEFAULT_PORT, + timeoutMillis: Int = Protocol.DEFAULT_TIMEOUT )(implicit jsonFormat: JsonFormat[V]): JsonQueryCache[V] = { new JsonQueryCache[V]( clientPrefix, diff --git a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala b/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala index e2ae6b0..cb2bece 100644 --- a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala +++ b/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala @@ -1,10 +1,10 @@ package org.allenai.common.cache class QueryCaches( - stringQueryCache: JsonQueryCache[String], - intQueryCache: JsonQueryCache[Int], - seqStringQueryCache: JsonQueryCache[Seq[String]], - fooQueryCache: JsonQueryCache[Foo] + stringQueryCache: JsonQueryCache[String], + intQueryCache: JsonQueryCache[Int], + seqStringQueryCache: JsonQueryCache[Seq[String]], + fooQueryCache: JsonQueryCache[Foo] ) { val stringKey = "stringKey" val stringValue = "stringValue" diff --git a/core/src/main/scala/org/allenai/common/FileUtils.scala b/core/src/main/scala/org/allenai/common/FileUtils.scala index 784f584..18d6689 100644 --- a/core/src/main/scala/org/allenai/common/FileUtils.scala +++ b/core/src/main/scala/org/allenai/common/FileUtils.scala @@ -58,8 +58,8 @@ object FileUtils extends Logging { * of strings. */ def getCSVContentFromResource( - clazz: Class[_], - name: String + clazz: Class[_], + name: String )(implicit codec: Codec): Seq[Seq[String]] = { logger.debug(s"Loading CSV resource $name") val csvReader = new CSVReader(getResourceAsReader(clazz, name)(codec)) diff --git a/core/src/main/scala/org/allenai/common/JsonIo.scala b/core/src/main/scala/org/allenai/common/JsonIo.scala index 06c54c6..2c360ea 100644 --- a/core/src/main/scala/org/allenai/common/JsonIo.scala +++ b/core/src/main/scala/org/allenai/common/JsonIo.scala @@ -26,8 +26,8 @@ object JsonIo { /** Writes the given objects to the given output stream, as one-per-line JSON values. */ def writeJson[T]( - values: Iterable[T], - outputStream: OutputStream + values: Iterable[T], + outputStream: OutputStream )(implicit format: JsonFormat[T]): Unit = { val writer = new PrintWriter(outputStream) writeJson(values, writer) diff --git a/core/src/main/scala/org/allenai/common/Logging.scala b/core/src/main/scala/org/allenai/common/Logging.scala index 08f4b65..8bb7bee 100644 --- a/core/src/main/scala/org/allenai/common/Logging.scala +++ b/core/src/main/scala/org/allenai/common/Logging.scala @@ -103,8 +103,8 @@ trait Logging { * */ def addAppender( - encoder: Encoder[ILoggingEvent], - appender: OutputStreamAppender[ILoggingEvent] + encoder: Encoder[ILoggingEvent], + appender: OutputStreamAppender[ILoggingEvent] ): Logger = { val loggerContext = logger.getLoggerContext encoder.setContext(loggerContext) diff --git a/core/src/main/scala/org/allenai/common/MathUtils.scala b/core/src/main/scala/org/allenai/common/MathUtils.scala index 6304b10..4430b1d 100644 --- a/core/src/main/scala/org/allenai/common/MathUtils.scala +++ b/core/src/main/scala/org/allenai/common/MathUtils.scala @@ -7,9 +7,9 @@ object MathUtils { /** Round a Double to k decimal digits; by default, 0.5 rounds upwards. */ def round( - double: Double, - precision: Int, - roundingMode: RoundingMode.Value = RoundingMode.HALF_UP + double: Double, + precision: Int, + roundingMode: RoundingMode.Value = RoundingMode.HALF_UP ): Double = { BigDecimal(double).setScale(precision, roundingMode).toDouble } diff --git a/core/src/main/scala/org/allenai/common/ParIterator.scala b/core/src/main/scala/org/allenai/common/ParIterator.scala index d233430..b92d87a 100644 --- a/core/src/main/scala/org/allenai/common/ParIterator.scala +++ b/core/src/main/scala/org/allenai/common/ParIterator.scala @@ -27,8 +27,8 @@ object ParIterator { * @param ec the execution context to run the function executions in */ def parForeach( - f: T => Unit, - queueLimit: Int = defaultQueueLimit + f: T => Unit, + queueLimit: Int = defaultQueueLimit )(implicit ec: ExecutionContext): Unit = { // If there are a billion items in the iterator, we don't want to create a billion futures, // so we limit the number of futures we create with this semaphore. @@ -91,8 +91,8 @@ object ParIterator { * @return a new iterator with the mapped values from the old iterator */ def parMap[O]( - f: T => O, - queueLimit: Int = defaultQueueLimit + f: T => O, + queueLimit: Int = defaultQueueLimit )(implicit ec: ExecutionContext): Iterator[O] = new Iterator[O] { private val inner = input.toIterator private val q = new scala.collection.mutable.Queue[Future[O]]() diff --git a/core/src/main/scala/org/allenai/common/Resource.scala b/core/src/main/scala/org/allenai/common/Resource.scala index 34fab59..7a953a6 100644 --- a/core/src/main/scala/org/allenai/common/Resource.scala +++ b/core/src/main/scala/org/allenai/common/Resource.scala @@ -29,8 +29,8 @@ object Resource { } def using2[A1 <: Closeable, A2 <: Closeable, B]( - resource1: A1, - resource2: A2 + resource1: A1, + resource2: A2 )(f: (A1, A2) => B): B = { require(resource1 != null, "The supplied resource was null.") require(resource2 != null, "The supplied resource was null.") diff --git a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala index e30ae3f..4dccd80 100644 --- a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala +++ b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala @@ -106,7 +106,7 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { /** Internal helper to bind the config key `key` to the given type `T`. */ private def bindConfigKey[T]( - key: String + key: String )(implicit manifest: Manifest[T], configReader: ConfigReader[T]): Unit = { try { fullConfig.get[T](key) match { diff --git a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala index b3a0a87..89ae89d 100644 --- a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala +++ b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala @@ -8,55 +8,55 @@ import org.allenai.common.testkit.UnitSpec case class CaseClass(a: String) // Test class, defined in a way that's injectable by Guice (outside of a wrapping class). case class AnnotatedClass @Inject()( - @Named("fooString") foo: String, - // This string has a default value in the module.conf file. - @Named("hasDefault") hasDefault: String, - unannotated: Set[String], - @Named("boolbool") boolean: Boolean, - @Named("barNum") bar: Int, - @Named("barNum") barLong: Long, - @Named("barNum") barDouble: Double, - @Named("unsupported") unsupported: CaseClass + @Named("fooString") foo: String, + // This string has a default value in the module.conf file. + @Named("hasDefault") hasDefault: String, + unannotated: Set[String], + @Named("boolbool") boolean: Boolean, + @Named("barNum") bar: Int, + @Named("barNum") barLong: Long, + @Named("barNum") barDouble: Double, + @Named("unsupported") unsupported: CaseClass ) case class OptionalParamClass @Inject()( - @Named("presentString") present: String, - @Named("presentString") presentOption: Option[String], - @Named("missingString") missingOption: Option[String] + @Named("presentString") present: String, + @Named("presentString") presentOption: Option[String], + @Named("missingString") missingOption: Option[String] ) // Test class with nested Config objects. case class NestedConfig @Inject()( - @Named("root") root: Config, - @Named("root.nested") nested: Config, - @Named("nested") nestedNone: Option[Config], - @Named("root.string") rootString: String, - @Named("root.nested.string") nestedString: String + @Named("root") root: Config, + @Named("root.nested") nested: Config, + @Named("nested") nestedNone: Option[Config], + @Named("root.string") rootString: String, + @Named("root.nested.string") nestedString: String ) // Test class, using namespaced values. case class PrefixClass @Inject()( - @Named("prefix.fooString") foo: String, - // This string has a default value in the module.conf file. - @Named("prefix.hasDefault") hasDefault: String, - @Named("prefix.boolbool") boolean: Boolean, - @Named("prefix.nested.bool") nestedBool: Boolean, - // This doesn't begin with the right prefix, so it shouldn't get a binding. - @Named("ignored_no_prefix") bar: Int + @Named("prefix.fooString") foo: String, + // This string has a default value in the module.conf file. + @Named("prefix.hasDefault") hasDefault: String, + @Named("prefix.boolbool") boolean: Boolean, + @Named("prefix.nested.bool") nestedBool: Boolean, + // This doesn't begin with the right prefix, so it shouldn't get a binding. + @Named("ignored_no_prefix") bar: Int ) // Test class with dotted keys. case class DottedKeys @Inject()( - @Named("\"i.have\".dots") dots: String, - @Named("\"i.have.more.dots\".bar") bar: Int + @Named("\"i.have\".dots") dots: String, + @Named("\"i.have.more.dots\".bar") bar: Int ) // Test class with Seq values. case class SeqValues @Inject()( - @Named("seq.ofConfig") configs: Seq[Config], - @Named("seq.ofString") strings: Seq[String], - @Named("seq.ofBool") booleans: Seq[Boolean], - @Named("seq.ofDouble") doubles: Seq[Double] + @Named("seq.ofConfig") configs: Seq[Config], + @Named("seq.ofString") strings: Seq[String], + @Named("seq.ofBool") booleans: Seq[Boolean], + @Named("seq.ofDouble") doubles: Seq[Double] ) class ConfigModuleSpec extends UnitSpec { From 7e5f3ff1634a1706d3afb1b9fa20936ba724b0e1 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Wed, 11 Sep 2019 14:31:57 -0700 Subject: [PATCH 24/69] Also remove cache subproject --- README.md | 2 - build.sbt | 5 - cache/build.sbt | 10 -- .../allenai/common/cache/JsonQueryCache.scala | 103 ------------------ .../scala/org/allenai/common/cache/Foo.scala | 10 -- .../common/cache/JsonQueryCacheSpec.scala | 38 ------- .../allenai/common/cache/QueryCaches.scala | 48 -------- 7 files changed, 216 deletions(-) delete mode 100644 cache/build.sbt delete mode 100644 cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala delete mode 100644 cache/src/test/scala/org/allenai/common/cache/Foo.scala delete mode 100644 cache/src/test/scala/org/allenai/common/cache/JsonQueryCacheSpec.scala delete mode 100644 cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala diff --git a/README.md b/README.md index efba828..4ad3fc9 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ common A collection of useful utility classes and functions. Slowly on the path to deprecation. -[API Docs](http://allenai.github.io/common/latest/api/#package). - `testkit` - Unit test classes and utilities. `guice` - Guice-specific libraries. diff --git a/build.sbt b/build.sbt index 269b371..069b777 100644 --- a/build.sbt +++ b/build.sbt @@ -57,7 +57,6 @@ inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings lazy val common = (project in file(".")) .aggregate( - cache, core, guice, testkit @@ -70,10 +69,6 @@ lazy val common = (project in file(".")) buildSettings ) -lazy val cache = Project(id = "cache", base = file("cache")) - .settings(projectSettings, buildSettings) - .dependsOn(core, testkit % "test->compile") - lazy val core = Project(id = "core", base = file("core")) .settings(projectSettings, buildSettings) .dependsOn(testkit % "test->compile") diff --git a/cache/build.sbt b/cache/build.sbt deleted file mode 100644 index 41ddb3f..0000000 --- a/cache/build.sbt +++ /dev/null @@ -1,10 +0,0 @@ -import Dependencies._ - -name := "common-cache" - -libraryDependencies ++= Seq( - jedis, - mockJedis % Test, - sprayJson, - typesafeConfig -) diff --git a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala b/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala deleted file mode 100644 index c772b28..0000000 --- a/cache/src/main/scala/org/allenai/common/cache/JsonQueryCache.scala +++ /dev/null @@ -1,103 +0,0 @@ -package org.allenai.common.cache - -import org.allenai.common.Config._ - -import com.typesafe.config.Config -import redis.clients.jedis.{ Jedis, JedisPool, JedisPoolConfig, Protocol } -import spray.json._ -import scala.collection.JavaConverters._ - -object JsonQueryCache { - - /** Factory method for creating a cache instance from config. - * The config must have keys for `hostname` and `clientPrefix`. It may also optionally have - * keys for `port` and `timeoutMillis`; if not given, these values are set to the Jedis defaults. - */ - def fromConfig[V](config: Config)(implicit jsonFormat: JsonFormat[V]): JsonQueryCache[V] = { - // Required fields. - val hostname: String = config[String]("hostname") - val clientPrefix: String = config[String]("clientPrefix") - - // Optional overrides for Jedis defaults. - val port: Int = config.get[Int]("port") getOrElse Protocol.DEFAULT_PORT - val timeoutMillis: Int = config.get[Int]("timeoutMillis") getOrElse Protocol.DEFAULT_TIMEOUT - - apply[V](clientPrefix, hostname, port, timeoutMillis) - } - - def apply[V]( - clientPrefix: String, - hostname: String, - port: Int = Protocol.DEFAULT_PORT, - timeoutMillis: Int = Protocol.DEFAULT_TIMEOUT - )(implicit jsonFormat: JsonFormat[V]): JsonQueryCache[V] = { - new JsonQueryCache[V]( - clientPrefix, - new JedisPool(new JedisPoolConfig, hostname, port, timeoutMillis) - ) - } - -} - -/** Class holding a Redis cache of query results. This is meant to store any value `T` where - * `T : spray.json.JsonFormat` (any `T` with a json serialization as per spray json), keyed on - * string query. Multiple cache instances (instances pointing to different Redis caches) need to be - * configured to have different JedisPools. - * @param clientPrefix an identifier for the client using this caching mechanism, which will become - * part of the cache key (prepended to the actual query) - * @param pool the JedisPool that the client should use to serve requests - */ -class JsonQueryCache[V: JsonFormat] protected[cache] (clientPrefix: String, pool: JedisPool) { - - /** @return the cache key for the query, with client prefix prepended */ - protected def keyForQuery(query: String): String = s"${clientPrefix}_$query" - - /** Retrieves the value for a passed key. - * @param query key for stored value (not including client prefix) - * @return Option containing value, None if not found or timed out (async) - */ - def get(query: String): Option[V] = { - withResource[Option[V]] { client: Jedis => - Option(client.get(keyForQuery(query))) map { response: String => - response.parseJson.convertTo[V] - } - } - } - - /** Puts a key->value pair in the cache. - * @param query key for value (not including client prefix) - * @param response Value you want stored in cache - */ - def put(query: String, response: V): Unit = withResource[Unit] { client: Jedis => - client.set(keyForQuery(query), response.toJson.compactPrint) - } - - /** Deletes a key->value pair from the cache. - * @param query key for value you want to delete (not including client prefix) - */ - def del(query: String): Unit = withResource[Unit] { client: Jedis => - client.del(keyForQuery(query)) - } - - /** Runs the given operation, handling fetching and closing the Jedis connection. */ - private def withResource[T](operation: (Jedis => T)): T = { - var resource: Jedis = null - try { - resource = pool.getResource - operation(resource) - } finally { - if (resource != null) { - resource.close() - } - } - } - - /** Returns all the keys matching the glob-style pattern as space separated strings. The time complexity is O(n), - * with n being the number of keys in the DB, and assuming keys and pattern. - * of limited length) - * @param pattern Glob style pattern; examples are "h*llo", "h?llo", h[ea]llo - */ - def keys(pattern: String): Iterable[String] = withResource[Iterable[String]] { client: Jedis => - client.keys(keyForQuery(pattern)).asScala - } -} diff --git a/cache/src/test/scala/org/allenai/common/cache/Foo.scala b/cache/src/test/scala/org/allenai/common/cache/Foo.scala deleted file mode 100644 index e70310f..0000000 --- a/cache/src/test/scala/org/allenai/common/cache/Foo.scala +++ /dev/null @@ -1,10 +0,0 @@ -package org.allenai.common.cache - -import spray.json.DefaultJsonProtocol._ - -/** Simple class for testing the cache can handle object structures. */ -case class Foo(stringVar: String, intVar: Int) - -object Foo { - implicit val fooFormat = jsonFormat2(Foo.apply) -} diff --git a/cache/src/test/scala/org/allenai/common/cache/JsonQueryCacheSpec.scala b/cache/src/test/scala/org/allenai/common/cache/JsonQueryCacheSpec.scala deleted file mode 100644 index d1a5518..0000000 --- a/cache/src/test/scala/org/allenai/common/cache/JsonQueryCacheSpec.scala +++ /dev/null @@ -1,38 +0,0 @@ -package org.allenai.common.cache - -import org.allenai.common.testkit.UnitSpec - -import com.fiftyonred.mock_jedis.MockJedisPool -import redis.clients.jedis.JedisPoolConfig -import spray.json.DefaultJsonProtocol._ - -class JsonQueryCacheSpec extends UnitSpec { - import Foo._ - - val mockJedisPool = new MockJedisPool(new JedisPoolConfig, "localhost") - val stringQueryCache = new JsonQueryCache[String]("test_string", mockJedisPool) - val intQueryCache = new JsonQueryCache[Int]("test_int", mockJedisPool) - val seqStringQueryCache = new JsonQueryCache[Seq[String]]("test_seq", mockJedisPool) - val fooQueryCache = new JsonQueryCache[Foo]("test_foo", mockJedisPool) - - val queryCaches = new QueryCaches( - stringQueryCache, - intQueryCache, - seqStringQueryCache, - fooQueryCache - ) - - "JsonQueryCache" should "return None when items are not in cache" in { - assert(queryCaches.getAll().forall(_.isEmpty)) - } - - it should "put the items in properly and let us get them back" in { - queryCaches.putAll() - assert(queryCaches.allThereAndEq()) - } - - it should "delete the items properly" in { - queryCaches.delAll() - assert(queryCaches.getAll().forall(_.isEmpty)) - } -} diff --git a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala b/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala deleted file mode 100644 index cb2bece..0000000 --- a/cache/src/test/scala/org/allenai/common/cache/QueryCaches.scala +++ /dev/null @@ -1,48 +0,0 @@ -package org.allenai.common.cache - -class QueryCaches( - stringQueryCache: JsonQueryCache[String], - intQueryCache: JsonQueryCache[Int], - seqStringQueryCache: JsonQueryCache[Seq[String]], - fooQueryCache: JsonQueryCache[Foo] -) { - val stringKey = "stringKey" - val stringValue = "stringValue" - - val intKey = "intKey" - val intValue = 32 - - val seqStringKey = "seqStringKey" - val seqStringValue = Seq("a string", "a second string", "third time's the charm") - - val fooKey = "fooKey" - val fooValue = new Foo("stringerino", 42) - - def getAll(): Seq[Option[Any]] = Seq( - stringQueryCache.get(stringKey), - intQueryCache.get(intKey), - seqStringQueryCache.get(seqStringKey), - fooQueryCache.get(fooKey) - ) - - def putAll(): Unit = { - stringQueryCache.put(stringKey, stringValue) - intQueryCache.put(intKey, intValue) - seqStringQueryCache.put(seqStringKey, seqStringValue) - fooQueryCache.put(fooKey, fooValue) - } - - def delAll(): Unit = { - stringQueryCache.del(stringKey) - intQueryCache.del(intKey) - seqStringQueryCache.del(seqStringKey) - fooQueryCache.del(fooKey) - } - - def allThereAndEq(): Boolean = { - stringQueryCache.get(stringKey).exists(_.equals(stringValue)) && - intQueryCache.get(intKey).exists(_.equals(intValue)) && - seqStringQueryCache.get(seqStringKey).exists(_.equals(seqStringValue)) && - fooQueryCache.get(fooKey).exists(_.equals(fooValue)) - } -} From 87ace8be2ca0d8d0f04bce0ae7191540770d1490 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Thu, 12 Sep 2019 13:35:54 -0700 Subject: [PATCH 25/69] Update release process notes + bump to new version --- README.md | 28 ++++++++++++++-------------- build.sbt | 2 +- version.sbt | 1 - 3 files changed, 15 insertions(+), 16 deletions(-) delete mode 100644 version.sbt diff --git a/README.md b/README.md index 4ad3fc9..3dd89d1 100644 --- a/README.md +++ b/README.md @@ -23,20 +23,20 @@ using the `sbt-bintray` plugin to find this artifact. Releasing new versions ---------------------- -This project releases to BinTray. To make a release, follow our standard -release process. - -[WIP PENDING VERIFICATION (rreas@)] - -1. Make sure you are on a branch of the main repository other than `master`. -**You cannot use a branch of your fork for releases.** If you do, your tag -will not make it to the main repository. -1. Set the upstream repository (`git push --set-upstream origin $branch_name`). -1. Kick of the release with `sbt release`. -1. Create a pull request and make sure CI passes. -1. You can verify that it was published by [looking on BinTray.com](https://bintray.com/allenai/maven)! - -You are done! +This project releases to BinTray. To make a release: + +1. Pull the latest code on the master branch that you want to release +1. Edit `build.sbt` to remove "-SNAPSHOT" from the current version +1. Create a pull request if desired or push to master if you are only changing the version +1. Tag the release `git tag -a vX.Y.Z -m "Release X.Y.Z"` replacing X.Y.Z with the correct version +1. Push the tag back to origin `git push origin vX.Y.Z` +1. Release the build on Bintray `sbt +publish` (the "+" is required to cross-compile) +1. Verify publication [on bintray.com](https://bintray.com/allenai/maven) +1. Bump the version in `build.sbt` on master (and push!) with X.Y.Z+1-SNAPSHOT (e.g., 2.5.1 +-SNAPSHOT after releasing 2.5.0) + +If you make a mistake you can rollback the release with `sbt bintrayUnpublish` and retag the + version to a different commit as necessary. Guideline for Contributing to `common` --------------------------- diff --git a/build.sbt b/build.sbt index 069b777..8b11791 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.0" // Not supported yet (collections changes required) lazy val supportedScalaVersions = List(scala212, scala211) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.0.0-SNAPSHOT" +ThisBuild / version := "2.0.0" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" diff --git a/version.sbt b/version.sbt deleted file mode 100644 index a25483b..0000000 --- a/version.sbt +++ /dev/null @@ -1 +0,0 @@ -version in ThisBuild := "2.0.0-SNAPSHOT" \ No newline at end of file From 4055c55b99ca7babebb6ec4f2d39ca239c71ab40 Mon Sep 17 00:00:00 2001 From: Russell Reas Date: Thu, 12 Sep 2019 13:37:00 -0700 Subject: [PATCH 26/69] Update version to 2.0.1-SNAPSHOT --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 8b11791..5be2914 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.0" // Not supported yet (collections changes required) lazy val supportedScalaVersions = List(scala212, scala211) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.0.0" +ThisBuild / version := "2.0.1-SNAPSHOT" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" From a4483985a5296e56788ce93fa0795a207839f679 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Thu, 4 Jun 2020 21:02:10 -0700 Subject: [PATCH 27/69] cross publish with 2.13 --- .gitignore | 3 + build.sbt | 60 +++++++++++-------- .../org/allenai/common/ScalaUtils.scala | 0 .../allenai/common/immutable/Interval.scala | 4 +- .../org/allenai/common/ScalaUtils.scala | 3 + .../scala/org/allenai/common/Config.scala | 2 +- .../scala/org/allenai/common/FileUtils.scala | 4 +- .../org/allenai/common/JsonFormats.scala | 2 - .../scala/org/allenai/common/Logging.scala | 2 +- .../LoggingWithUncaughtExceptions.scala | 2 +- .../org/allenai/common/SeekableSource.scala | 2 +- .../allenai/common/SourceInputStream.scala | 1 - .../org/allenai/common/json/package.scala | 1 - .../org/allenai/common/ScalaUtilsSpec.scala | 0 .../common/immutable/IntervalSpec.scala | 0 .../common/testkit/ScratchDirectory.scala | 6 +- 16 files changed, 52 insertions(+), 40 deletions(-) rename core/src/main/{scala => scala-2.11-2.12}/org/allenai/common/ScalaUtils.scala (100%) rename core/src/main/{scala => scala-2.11-2.12}/org/allenai/common/immutable/Interval.scala (99%) create mode 100644 core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala rename core/src/test/{scala => scala-2.11-2.12}/org/allenai/common/ScalaUtilsSpec.scala (100%) rename core/src/test/{scala => scala-2.11-2.12}/org/allenai/common/immutable/IntervalSpec.scala (100%) diff --git a/.gitignore b/.gitignore index 5a05fe5..4fa0430 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ .settings bin/ target/ +**/.bloop/ +**/.metals/ +project/metals.sbt diff --git a/build.sbt b/build.sbt index 5be2914..dfb9c05 100644 --- a/build.sbt +++ b/build.sbt @@ -1,12 +1,12 @@ import Dependencies._ lazy val scala211 = "2.11.12" -lazy val scala212 = "2.12.9" -lazy val scala213 = "2.13.0" // Not supported yet (collections changes required) -lazy val supportedScalaVersions = List(scala212, scala211) +lazy val scala212 = "2.12.10" +lazy val scala213 = "2.13.2" +lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.0.1-SNAPSHOT" +ThisBuild / version := "2.0.1-SNAPSHOT" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" @@ -22,7 +22,9 @@ lazy val projectSettings = Seq( dependencyOverrides ++= Logging.loggingDependencyOverrides, publishMavenStyle := true, publishArtifact in Test := false, - pomIncludeRepository := { _ => false }, + pomIncludeRepository := { _ => + false + }, licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), homepage := Some(url("https://github.com/allenai/common")), apiURL := Some(url("https://allenai.github.io/common/")), @@ -32,8 +34,7 @@ lazy val projectSettings = Seq( "https://github.com/allenai/common.git" ) ), - pomExtra := ( - + pomExtra := ( allenai-dev-role Allen Institute for Artificial Intelligence @@ -50,32 +51,43 @@ lazy val buildSettings = Seq( scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), crossScalaVersions := supportedScalaVersions, + Compile / unmanagedSourceDirectories ++= { + CrossVersion.partialVersion(scalaVersion.value) match { + case Some((2, x)) if x == 11 || x == 12 => { + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) + } + case Some((2, x)) if x == 13 => { + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) + } + case _ => Seq.empty // dotty support would go here + } + } ) // Not necessary for this repository but here as an example inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) lazy val common = (project in file(".")) - .aggregate( - core, - guice, - testkit - ) - .configs(IntegrationTest) - .settings( - Defaults.itSettings, - crossScalaVersions := Nil, - publish / skip := true, - buildSettings - ) + .aggregate( + core, + guice, + testkit + ) + .configs(IntegrationTest) + .settings( + Defaults.itSettings, + crossScalaVersions := Nil, + publish / skip := true, + buildSettings + ) lazy val core = Project(id = "core", base = file("core")) - .settings(projectSettings, buildSettings) - .dependsOn(testkit % "test->compile") + .settings(projectSettings, buildSettings) + .dependsOn(testkit % "test->compile") lazy val guice = Project(id = "guice", base = file("guice")) - .settings(projectSettings, buildSettings) - .dependsOn(core, testkit % "test->compile") + .settings(projectSettings, buildSettings) + .dependsOn(core, testkit % "test->compile") lazy val testkit = Project(id = "testkit", base = file("testkit")) - .settings(projectSettings, buildSettings) + .settings(projectSettings, buildSettings) diff --git a/core/src/main/scala/org/allenai/common/ScalaUtils.scala b/core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala similarity index 100% rename from core/src/main/scala/org/allenai/common/ScalaUtils.scala rename to core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala diff --git a/core/src/main/scala/org/allenai/common/immutable/Interval.scala b/core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala similarity index 99% rename from core/src/main/scala/org/allenai/common/immutable/Interval.scala rename to core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala index a68fc3f..1975f0f 100644 --- a/core/src/main/scala/org/allenai/common/immutable/Interval.scala +++ b/core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala @@ -33,9 +33,7 @@ */ package org.allenai.common.immutable -import Interval.empty import spray.json._ -import spray.json.DefaultJsonProtocol._ import scala.util.matching.Regex @@ -469,7 +467,7 @@ object Interval { object Empty extends Interval(0, 0) { override def toString: String = "{}" def unapply(interval: Interval): Option[Unit] = interval match { - case `empty` => Some(Unit) + case `empty` => Some(()) case _ => None } } diff --git a/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala b/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala new file mode 100644 index 0000000..7ab935a --- /dev/null +++ b/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala @@ -0,0 +1,3 @@ +package org.allenai.common + +object ScalaUtils {} diff --git a/core/src/main/scala/org/allenai/common/Config.scala b/core/src/main/scala/org/allenai/common/Config.scala index 0f37553..fb0da62 100644 --- a/core/src/main/scala/org/allenai/common/Config.scala +++ b/core/src/main/scala/org/allenai/common/Config.scala @@ -82,7 +82,7 @@ object Config { } implicit val stringListReader = apply[Seq[String]] { (config, key) => - config.getStringList(key).asScala + config.getStringList(key).asScala.toSeq } implicit val intListReader = apply[Seq[Int]] { (config, key) => config.getIntList(key).asScala.toList.map(_.intValue) diff --git a/core/src/main/scala/org/allenai/common/FileUtils.scala b/core/src/main/scala/org/allenai/common/FileUtils.scala index 18d6689..6917250 100644 --- a/core/src/main/scala/org/allenai/common/FileUtils.scala +++ b/core/src/main/scala/org/allenai/common/FileUtils.scala @@ -21,7 +21,7 @@ object FileUtils extends Logging { def getCSVContentFromFile(file: File)(implicit codec: Codec): Seq[Seq[String]] = { logger.debug(s"Loading CSV file ${file.getName}") val csvReader = new CSVReader(new InputStreamReader(new FileInputStream(file), codec.charSet)) - Resource.using(csvReader)(_.readAll.asScala.map(_.toVector)) + Resource.using(csvReader)(_.readAll.asScala.toSeq.map(_.toVector)) } /** Get a resource file for a given class as a Stream. Caller is responsible for closing this @@ -63,6 +63,6 @@ object FileUtils extends Logging { )(implicit codec: Codec): Seq[Seq[String]] = { logger.debug(s"Loading CSV resource $name") val csvReader = new CSVReader(getResourceAsReader(clazz, name)(codec)) - Resource.using(csvReader)(_.readAll.asScala.map(_.toVector)) + Resource.using(csvReader)(_.readAll.asScala.toSeq.map(_.toVector)) } } diff --git a/core/src/main/scala/org/allenai/common/JsonFormats.scala b/core/src/main/scala/org/allenai/common/JsonFormats.scala index 72f9831..024c411 100644 --- a/core/src/main/scala/org/allenai/common/JsonFormats.scala +++ b/core/src/main/scala/org/allenai/common/JsonFormats.scala @@ -1,9 +1,7 @@ package org.allenai.common import com.typesafe.config.{ Config => TypesafeConfig } -import spray.json.SerializationException import spray.json._ -import spray.json.DefaultJsonProtocol._ import java.io.PrintWriter import java.io.StringWriter diff --git a/core/src/main/scala/org/allenai/common/Logging.scala b/core/src/main/scala/org/allenai/common/Logging.scala index 8bb7bee..1c43821 100644 --- a/core/src/main/scala/org/allenai/common/Logging.scala +++ b/core/src/main/scala/org/allenai/common/Logging.scala @@ -1,6 +1,6 @@ package org.allenai.common -import ch.qos.logback.classic.{ Level, Logger } +import ch.qos.logback.classic.Level import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.classic.html.HTMLLayout import ch.qos.logback.classic.spi.ILoggingEvent diff --git a/core/src/main/scala/org/allenai/common/LoggingWithUncaughtExceptions.scala b/core/src/main/scala/org/allenai/common/LoggingWithUncaughtExceptions.scala index 8dcac3c..c68f6d6 100644 --- a/core/src/main/scala/org/allenai/common/LoggingWithUncaughtExceptions.scala +++ b/core/src/main/scala/org/allenai/common/LoggingWithUncaughtExceptions.scala @@ -4,7 +4,7 @@ package org.allenai.common // Alternative: use a selftype of Logging, as jessek@allenai.org suggested. trait LoggingWithUncaughtExceptions extends Logging { Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { - def uncaughtException(t: Thread, e: Throwable) { + def uncaughtException(t: Thread, e: Throwable): Unit = { logger.error("Uncaught exception in thread: " + t.getName, e) } }) diff --git a/core/src/main/scala/org/allenai/common/SeekableSource.scala b/core/src/main/scala/org/allenai/common/SeekableSource.scala index afe7082..0e95db3 100644 --- a/core/src/main/scala/org/allenai/common/SeekableSource.scala +++ b/core/src/main/scala/org/allenai/common/SeekableSource.scala @@ -282,7 +282,7 @@ class SeekableSource(inFile: FileChannel, bufferSize: Int = 8 << 20)(implicit co var stringBuilder: StringBuilder = null var moreChars = true while (moreChars) { - var start = index + val start = index // Read chars until we find a newline or the end-of-buffer. var ch: Byte = 0 diff --git a/core/src/main/scala/org/allenai/common/SourceInputStream.scala b/core/src/main/scala/org/allenai/common/SourceInputStream.scala index 7ae97fe..6303b81 100644 --- a/core/src/main/scala/org/allenai/common/SourceInputStream.scala +++ b/core/src/main/scala/org/allenai/common/SourceInputStream.scala @@ -1,6 +1,5 @@ package org.allenai.common -import scala.collection.Iterator import scala.io.{ Codec, Source } import java.io.InputStream diff --git a/core/src/main/scala/org/allenai/common/json/package.scala b/core/src/main/scala/org/allenai/common/json/package.scala index 90b069f..a1705d4 100644 --- a/core/src/main/scala/org/allenai/common/json/package.scala +++ b/core/src/main/scala/org/allenai/common/json/package.scala @@ -19,7 +19,6 @@ package org.allenai.common package object json { import spray.json._ - import spray.json.DefaultJsonProtocol._ implicit class RichJsObject(val jsObj: JsObject) extends AnyVal { diff --git a/core/src/test/scala/org/allenai/common/ScalaUtilsSpec.scala b/core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala similarity index 100% rename from core/src/test/scala/org/allenai/common/ScalaUtilsSpec.scala rename to core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala diff --git a/core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala b/core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala similarity index 100% rename from core/src/test/scala/org/allenai/common/immutable/IntervalSpec.scala rename to core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala diff --git a/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala b/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala index 95324a0..4999639 100644 --- a/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala +++ b/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala @@ -15,14 +15,14 @@ trait ScratchDirectory extends BeforeAndAfterAll { dir } - override def beforeAll: Unit = require( + override def beforeAll(): Unit = require( scratchDir.exists && scratchDir.isDirectory, s"Unable to create scratch directory $scratchDir" ) - override def afterAll: Unit = delete(scratchDir) + override def afterAll(): Unit = delete(scratchDir) - private def delete(f: File) { + private def delete(f: File): Boolean = { if (f.isDirectory()) { f.listFiles.foreach(delete) } From 00327dafd36f3795a8873026688743457b019a7a Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 06:55:54 -0700 Subject: [PATCH 28/69] bump build minor version --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index dfb9c05..8b7d498 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.0.1-SNAPSHOT" +ThisBuild / version := "2.1.0" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" From 0acac5acbcd55a76358f0f8279ed8f32b15b3084 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:24:41 -0700 Subject: [PATCH 29/69] just delete interval --- build.sbt | 15 +- .../org/allenai/common/ScalaUtils.scala | 14 - .../allenai/common/immutable/Interval.scala | 493 ------------------ .../org/allenai/common/ScalaUtils.scala | 3 - .../org/allenai/common/ScalaUtilsSpec.scala | 13 - .../common/immutable/IntervalSpec.scala | 245 --------- 6 files changed, 2 insertions(+), 781 deletions(-) delete mode 100644 core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala delete mode 100644 core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala delete mode 100644 core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala delete mode 100644 core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala delete mode 100644 core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala diff --git a/build.sbt b/build.sbt index 8b7d498..bd7a51d 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.1.0" +ThisBuild / version := "2.1.1-SNAPSHOT" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" @@ -50,18 +50,7 @@ lazy val buildSettings = Seq( javaOptions += s"-Dlogback.appname=${name.value}", scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), - crossScalaVersions := supportedScalaVersions, - Compile / unmanagedSourceDirectories ++= { - CrossVersion.partialVersion(scalaVersion.value) match { - case Some((2, x)) if x == 11 || x == 12 => { - Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) - } - case Some((2, x)) if x == 13 => { - Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) - } - case _ => Seq.empty // dotty support would go here - } - } + crossScalaVersions := supportedScalaVersions ) // Not necessary for this repository but here as an example diff --git a/core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala b/core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala deleted file mode 100644 index e9937c5..0000000 --- a/core/src/main/scala-2.11-2.12/org/allenai/common/ScalaUtils.scala +++ /dev/null @@ -1,14 +0,0 @@ -package org.allenai.common - -/** Various convenient utilities for Scala constructs. */ -object ScalaUtils { - - /** A common use case for groupBy. Takes in a sequence of pairs, groups them by the first - * element, and returns a map from the group identifier to a sequence of second elements - * of the matching pairs. E.g., ((a,1), (b,2), (a,3), (b,4)) turns into {a -> (1,3), - * b -> (2,4)} - */ - def toMapUsingGroupByFirst[T1, T2](x: Seq[(T1, T2)]): Map[T1, Seq[T2]] = { - x.groupBy(_._1).mapValues(_.unzip._2) - } -} diff --git a/core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala b/core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala deleted file mode 100644 index 1975f0f..0000000 --- a/core/src/main/scala-2.11-2.12/org/allenai/common/immutable/Interval.scala +++ /dev/null @@ -1,493 +0,0 @@ -/** Adapted from BSD software developed by Michael Schmitz - * at the the University of Washington. - * - * https://github.com/knowitall/common-scala - * - * - * Copyright (c) 2012, University of Washington - * BSD 3-clause License / BSD Modified License / New BSD License - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * Neither the name of the University of Washington nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF WASHINGTON BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -package org.allenai.common.immutable - -import spray.json._ - -import scala.util.matching.Regex - -/** Represents an open interval in the Integers. - * - * Intervals are created using the companion object. - * - * @param start the first item in this interval - * @param end one past the last item in this interval - * - * @author Michael Schmitz - */ -sealed class Interval private (val start: Int, val end: Int) - extends IndexedSeq[Int] - with Ordered[Interval] { - import Interval._ - require(start <= end, "start must be <= end: " + start + ">" + end) - - override def toString: String = "[" + start + ", " + end + ")" - override def equals(that: Any): Boolean = that match { - // fast comparison for Intervals - case that: Interval => that.canEqual(this) && that.start == this.start && that.end == this.end - // slower comparison for Seqs - case that: IndexedSeq[_] => super.equals(that) - case _ => false - } - override def hashCode: Int = start * 23 + end - override def canEqual(that: Any): Boolean = that.isInstanceOf[Interval] - override def compare(that: Interval): Int = - if (this.start > that.start) { - 1 - } else if (this.start < that.start) { - -1 - } else { - this.length - that.length - } - - /** Return the ith value of the interval. - * - * @param index the index to get - * @return the ith value of the interval - */ - override def apply(index: Int): Int = { - require(index >= 0, "index < 0: " + index) - require(index < length, "index >= length: " + index + " >= " + length) - - // a fast way of getting the ith index - min + index - } - - override def iterator: Iterator[Int] = { - new Iterator[Int] { - var index = start - - def hasNext: Boolean = index < end - def next(): Int = { - val result = index - index += 1 - result - } - } - } - - override def seq: IndexedSeq[Int] = this - - /** The length of the interval. */ - override def length: Int = end - start - - /** Tests whether this list contains a given value as an element. - * - * @param x the value to check - * @return true if this interval contains `x` - */ - def contains(x: Int): Boolean = x >= start && x < end - - /** Tests whether two intervals border but do not overlap. - * - * @param that the interval to check - * @return true if this interval borders the other interval - */ - def borders(that: Interval): Boolean = { - if (this == empty || that == empty) { - false - } else { - that.max == this.min - 1 || that.min == this.max + 1 - } - } - - /** Tests whether a point border an interval. - * - * @param that the point to check - * @return true if this interval borders the point - */ - def borders(that: Int): Boolean = { - if (this == empty) { - false - } else { - this.start - 1 == that || this.end == that - } - } - - /** Tests whether this interval is a superset of another interval. - * - * @param that the interval to check - * @return true if `this` is a superset of `that` - */ - def superset(that: Interval): Boolean = { - if (that == empty) { - true - } else if (this == empty) { - false - } else { - this.start <= that.start && this.end >= that.end - } - } - - /** Tests whether this interval is a subsert of another interval. - * - * @param that the interval to check - * @return true if `this` is a subset of `that` - */ - def subset(that: Interval): Boolean = { - if (that == empty) { - false - } else if (this == empty) { - true - } else { - this.start >= that.start && this.end <= that.end - } - } - - /** Tests whether another interval intersects this interval. - * - * @param that the interval to check - * @return true if `this` intersects `that` - */ - def intersects(that: Interval): Boolean = { - if (that == empty || this == empty) { - false - } else if (this == that) { - true - } else { - val left = this left that - val right = this right that - left.end > right.start - } - } - - /** Tests whether another interval is disjoint from this interval. - * This is the opposite of `intersects`. - * - * @param that the interval to check - * @return true if `this` is disjoint from `that` - */ - def disjoint(that: Interval): Boolean = !this.intersects(that) - - /** Measure the distance between two intervals. - * Bordering intervals have distance 1 and intersecting - * intervals have distance 0. The distance is always - * a positive number. - * - * @param that the interval to measure against - * @return the distance between two intervals. - */ - def distance(that: Interval): Int = { - require(that != empty && this != empty, "empty interval") - if (this intersects that) { - 0 - } else { - (this.min max that.min) - (this.max min that.max) - } - } - - /** Takes the union of two intervals. - * The two intervals must border or intersect each other. - */ - def union(that: Interval): Interval = { - if (that == empty) { - this - } else if (this == empty) { - that - } else { - require((this borders that) || (this intersects that), "intervals must border or intersect") - Interval.open(that.start min this.start, that.end max this.end) - } - } - - /** Takes the intersection of two intervals, or Interval.empty - * if they do not intersect. - */ - def intersect(that: Interval): Interval = { - if (that == empty || this == empty) { - Interval.empty - } else { - val start = this.start max that.start - val end = this.end min that.end - if (start < end) { - Interval.open(start, end) - } else { - Interval.empty - } - } - } - - /** Move the interval right. - * - * @param by the distance to move the interval - */ - def shift(by: Int): Interval = Interval.open(this.start + by, this.end + by) - - /** Returns true if this is left of that. - */ - def leftOf(that: Interval): Boolean = { - require(that != empty && this != empty, "empty interval") - this.end <= that.start - } - - /** Returns true if this is right of that. - */ - def rightOf(that: Interval): Boolean = { - require(that != empty && this != empty, "empty interval") - this.start >= that.end - } - - /** Determine whether this interval or the supplied interval is left. - * First compare based on the intervals' start, and secondly compare - * based on the intervals' length. - */ - def left(that: Interval): Interval = - if (that == empty) { - this - } else if (this == empty) { - that - } else if (that.start < this.start) { - that - } else if (that.start > this.start) { - this - } else if (that.length < this.length) { - that - } else { - this - } - - /** Determine whether this interval or the supplied interval is right. - * First compare based on the intervals' start, and secondly compare - * based on the intervals' length. - */ - def right(that: Interval): Interval = - if (that == empty) { - this - } else if (this == empty) { - that - } else if (that.start > this.start) { - that - } else if (that.start < this.start) { - this - } else if (that.length > this.length) { - that - } else { - this - } - - /** The minimum index in the interval. */ - def min: Int = { - require(this != empty, "empty interval") - start - } - - /** The maximum index in the interval. */ - def max: Int = { - require(this != empty, "empty interval") - end - 1 - } -} - -object Interval { - - /** The empty interval. */ - val empty: Interval = Empty - - /** Create a new singleton interval. */ - def singleton(x: Int): Singleton = new SingletonImpl(x) - - /** Create a new open interval. */ - def open(start: Int, end: Int): Interval = { - require(end >= start, "end < start: " + end + " < " + start) - if (start == end) { - Interval.empty - } else if (end - start == 1) { - Interval.singleton(start) - } else { - new Interval(start, end) - } - } - - /** Create a new closed interval. */ - def closed(start: Int, end: Int): Interval = { - require(end < Int.MaxValue, "end must be < Int.MaxValue") - require(end >= start, "end < start: " + end + " < " + start) - if (end == start) { - Interval.singleton(start) - } else { - new Interval(start, end + 1) - } - } - - /** Create an interval at the specified starting point of the specified length. */ - def ofLength(start: Int, length: Int): Interval = Interval.open(start, start + length) - - object Format { - val emptyRegex = new Regex("\\{\\}") - val singletonRegex = new Regex("\\{([+-]?\\d+)\\}") - val openIntervalRegex = new Regex("\\[([+-]?\\d+), ([+-]?\\d+)\\)") - val closedIntervalRegex = new Regex("\\[([+-]?\\d+), ([+-]?\\d+)\\]") - - def write(interval: Interval): String = interval.toString - - def read(pickled: String): Interval = { - pickled match { - case emptyRegex() => Interval.empty - case singletonRegex(value) => Interval.singleton(value.toInt) - case openIntervalRegex(a, b) => Interval.open(a.toInt, b.toInt) - case closedIntervalRegex(a, b) => Interval.closed(a.toInt, b.toInt) - } - } - } - - /* Simple Json (de-)serialization for intervals: - * Interval.open(3, 6) -> [3,6] - * Interval.empty -> [] - */ - implicit object IntervalJsonFormat extends RootJsonFormat[Interval] { - def write(i: Interval): JsValue = i match { - case Interval.empty => JsArray() - case _ => JsArray(JsNumber(i.start), JsNumber(i.end)) - } - - def read(value: JsValue): Interval = value match { - case JsArray(Vector()) => empty - case JsArray(Vector(JsNumber(start), JsNumber(end))) => Interval.open(start.toInt, end.toInt) - case _ => deserializationError("Interval expected") - } - } - - /** Create an open interval that includes all points between the two intervals. */ - def between(x: Interval, y: Interval): Interval = { - require(!(x intersects y), "intervals may not intersect") - Interval.open(x.end min y.end, x.start max y.start) - } - - /** create an interval from a sequence of `Int`s. - * - * @throws java.lang.IllegalArgumentException some x such that min < x < max is not in col - */ - def from(col: Seq[Int]): Interval = { - if (col.isEmpty) { - Interval.empty - } else { - val sorted = col.sorted - val min = sorted.head - - require( - sorted.zipWithIndex.forall { case (x, i) => x == min + i }, - "missing elements in collection: " + col - ) - - Interval.closed(min, sorted.last) - } - } - - /** create an interval from a collection of intervals. The intervals will be - * sorted and unioned. - * - * @throws java.lang.IllegalArgumentException gap in intervals - */ - def union(col: Seq[Interval]): Interval = { - val sorted = col.sorted - try { - sorted.reduceRight(_ union _) - } catch { - case _: IllegalArgumentException => - throw new IllegalArgumentException("gap in intervals: " + sorted) - } - } - - /** create the smallest interval that spans a collection of intervals. - * The intervals will be sorted and unioned. - * - * @throws java.lang.IllegalArgumentException gap in intervals - */ - def span(col: Iterable[Interval]): Interval = { - if (col.isEmpty) { - Interval.empty - } else { - Interval.open(col.map(_.min).min, col.map(_.max).max + 1) - } - } - - /** create a minimal spanning set of the supplied intervals. - * - * @return a sorted minimal spanning set - */ - def minimal(intervals: Iterable[Interval]): List[Interval] = { - val set = collection.immutable.SortedSet.empty[Int] ++ intervals.flatten - set - .foldLeft(List.empty[Interval]) { - case (list, i) => - val singleton = Interval.singleton(i) - list match { - case Nil => List(singleton) - case x :: xs if x borders i => (x union singleton) :: xs - case xs => singleton :: xs - } - } - .reverse - } - - // implementations - - object Open { - - /** Match exposing the bounds as an open interval */ - def unapply(interval: Interval): Option[(Int, Int)] = interval match { - case `empty` => None - case open: Interval => Some((open.start, open.end)) - } - } - - /** The empty interval. - */ - object Empty extends Interval(0, 0) { - override def toString: String = "{}" - def unapply(interval: Interval): Option[Unit] = interval match { - case `empty` => Some(()) - case _ => None - } - } - - /** An interval that includes only a single index. - * All intervals with a single element will always extend Singleton. - */ - sealed abstract class Singleton private[Interval] (elem: Int) extends Interval(elem, elem + 1) { - def index: Int = this.start - override def toString: String = "{" + elem + "}" - } - - object Singleton { - - /** Match exposing the bounds as a singleton */ - def unapply(interval: Interval): Option[Int] = interval match { - case singleton: Singleton => Some(singleton.index) - case _ => None - } - } - - private class SingletonImpl(elem: Int) extends Singleton(elem) -} diff --git a/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala b/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala deleted file mode 100644 index 7ab935a..0000000 --- a/core/src/main/scala-2.13/org/allenai/common/ScalaUtils.scala +++ /dev/null @@ -1,3 +0,0 @@ -package org.allenai.common - -object ScalaUtils {} diff --git a/core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala b/core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala deleted file mode 100644 index 9663b4b..0000000 --- a/core/src/test/scala-2.11-2.12/org/allenai/common/ScalaUtilsSpec.scala +++ /dev/null @@ -1,13 +0,0 @@ -package org.allenai.common - -import org.allenai.common.testkit.UnitSpec - -class ScalaUtilsSpec extends UnitSpec { - - "ScalaUtils" should "correctly perform Scala operations" in { - val inputPairs = Seq(("a", 1), ("b", 2), ("a", 3), ("b", 4)) - val outputMap = ScalaUtils.toMapUsingGroupByFirst(inputPairs) - outputMap("a") should be(List(1, 3)) - outputMap("b") should be(List(2, 4)) - } -} diff --git a/core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala b/core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala deleted file mode 100644 index 2de983a..0000000 --- a/core/src/test/scala-2.11-2.12/org/allenai/common/immutable/IntervalSpec.scala +++ /dev/null @@ -1,245 +0,0 @@ -/** Adapted from BSD software developed by Michael Schmitz - * at the the University of Washington. - * - * https://github.com/knowitall/common-scala - * - * - * Copyright (c) 2012, University of Washington - * BSD 3-clause License / BSD Modified License / New BSD License - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * Neither the name of the University of Washington nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ -package org.allenai.common.immutable - -import org.allenai.common.testkit.UnitSpec - -import org.scalacheck.Arbitrary -import org.scalacheck.Gen -import org.scalacheck.Prop.forAll -import org.scalacheck.Prop.propBoolean -import org.scalatest.prop.Checkers -import spray.json._ - -// scalastyle:off magic.number -class IntervalSpec extends UnitSpec with Checkers { - they should "border each other" in { - assert((Interval.open(0, 4) borders Interval.open(4, 8)) == true) - assert((Interval.open(4, 8) borders Interval.open(0, 4)) == true) - assert((Interval.open(0, 3) borders Interval.open(4, 8)) == false) - assert((Interval.open(4, 8) borders Interval.open(0, 3)) == false) - assert((Interval.empty borders Interval.open(4, 8)) == false) - } - - they should "union properly" in { - assert((Interval.open(0, 4) union Interval.open(4, 8)) == (Interval.open(0, 8))) - intercept[IllegalArgumentException] { - (Interval.open(0, 4) union Interval.open(6, 8)) - } - } - - they should "intersect properly" in { - assert((Interval.open(0, 4) intersect Interval.open(4, 8)) == (Interval.empty)) - assert((Interval.open(0, 4) intersect Interval.open(6, 8)) == (Interval.empty)) - assert((Interval.open(0, 4) intersect Interval.open(2, 6)) == (Interval.open(2, 4))) - } - - they should "contain properly" in { - assert((Interval.open(2, 3) contains 0) == false) - assert((Interval.open(2, 3) contains 1) == false) - assert((Interval.open(2, 3) contains 2) == true) - assert((Interval.open(2, 3) contains 3) == false) - } - - they should "shift ok" in { - assert((Interval.open(2, 4) shift 2) == Interval.open(4, 6)) - assert((Interval.open(2, 4) shift -2) == Interval.open(0, 2)) - } - - "the correct left interval" should "be determined" in { - assert((Interval.open(0, 4) left Interval.open(4, 8)) == (Interval.open(0, 4))) - assert((Interval.open(0, 4) left Interval.open(2, 6)) == (Interval.open(0, 4))) - assert((Interval.open(4, 8) left Interval.open(0, 4)) == (Interval.open(0, 4))) - assert((Interval.open(2, 6) left Interval.open(0, 4)) == (Interval.open(0, 4))) - } - - "the correct right interval" should "be determined" in { - assert((Interval.open(0, 4) right Interval.open(4, 8)) == (Interval.open(4, 8))) - assert((Interval.open(0, 4) right Interval.open(2, 6)) == (Interval.open(2, 6))) - assert((Interval.open(4, 8) right Interval.open(0, 4)) == (Interval.open(4, 8))) - assert((Interval.open(2, 6) right Interval.open(0, 4)) == (Interval.open(2, 6))) - } - - "leftOf" should "work" in { - assert((Interval.open(0, 4) leftOf Interval.open(4, 8)) == true) - assert((Interval.open(0, 4) leftOf Interval.open(2, 6)) == false) - assert((Interval.open(4, 8) leftOf Interval.open(0, 4)) == false) - assert((Interval.open(2, 6) leftOf Interval.open(0, 4)) == false) - } - - "rightOf" should "work" in { - assert((Interval.open(0, 4) rightOf Interval.open(4, 8)) == false) - assert((Interval.open(0, 4) rightOf Interval.open(2, 6)) == false) - assert((Interval.open(4, 8) rightOf Interval.open(0, 4)) == true) - assert((Interval.open(2, 6) rightOf Interval.open(0, 4)) == false) - } - - "overlapping intervals" should "have distance 0" in { - assert((Interval.open(0, 4) distance Interval.open(2, 6)) == (0)) - assert((Interval.open(2, 6) distance Interval.open(0, 3)) == (0)) - } - - they should "have the correct distance" in { - assert((Interval.open(0, 2) distance Interval.open(2, 5)) == (1)) - assert((Interval.open(0, 2) distance Interval.open(3, 5)) == (2)) - assert((Interval.open(0, 2) distance Interval.open(4, 6)) == (3)) - } - - "adjacent intervals" should "have the empty set between them" in { - assert(Interval.between(Interval.open(0, 2), Interval.open(2, 3)) == (Interval.empty)) - } - - "between" should "work properly" in { - assert(Interval.between(Interval.open(0, 2), Interval.open(3, 10)) == (Interval.open(2, 3))) - assert(Interval.between(Interval.open(0, 2), Interval.open(6, 10)) == (Interval.open(2, 6))) - } - - val intervalGen = for { - n <- Gen.choose(0, 100) - m <- Gen.choose(n, 100) - } yield Interval.open(n, m) - - "Interval.minimal" should "work properly" in { - implicit def arbInterval: Arbitrary[List[Interval]] = { - Arbitrary { - Gen.listOf(intervalGen) - } - } - - forAll { (intervals: List[Interval]) => - val min = Interval.minimal(intervals) - - // for all points in the original intervals - // that point must be in the new intervals - intervals.forall(i => min.exists(_.contains(i))) - - // for all points in one of the new intervals - // no other interval may contain the same point - min.forall { interval => - !min.exists { other => - !(other eq interval) && (other intersects interval) - } - } - - // result is sorted - min.sorted == min - } - } - - "empty" should "work properly" in { - assert((Interval.empty union Interval.open(2, 4)) == (Interval.open(2, 4))) - assert((Interval.empty intersect Interval.open(2, 4)) == (Interval.empty)) - - assert((Interval.empty left Interval.open(2, 4)) == (Interval.open(2, 4))) - assert((Interval.open(2, 4) left Interval.empty) == (Interval.open(2, 4))) - assert((Interval.empty right Interval.open(2, 4)) == (Interval.open(2, 4))) - assert((Interval.open(2, 4) right Interval.empty) == (Interval.open(2, 4))) - - assert((Interval.open(2, 4) subset Interval.empty) == false) - assert((Interval.empty subset Interval.open(2, 4)) == true) - - assert((Interval.open(2, 4) superset Interval.empty) == true) - assert((Interval.empty superset Interval.open(2, 4)) == false) - - intercept[IllegalArgumentException] { Interval.empty.min } - intercept[IllegalArgumentException] { Interval.empty.max } - intercept[IllegalArgumentException] { Interval.empty leftOf Interval.open(2, 4) } - intercept[IllegalArgumentException] { Interval.open(2, 4) rightOf Interval.empty } - - assert(Interval.empty.shift(5) == Interval.empty) - } - - def roundtripString(s: String): String = Interval.Format.write(Interval.Format.read(s)) - def roundtrip(x: Interval): Interval = Interval.Format.read(Interval.Format.write(x)) - - def roundtripsOkString(s: String): Unit = assert(roundtripString(s) == s) - def roundtripsOk(x: Interval): Unit = assert(roundtrip(x) == x) - "empty" should "round trip through serialization" in { - roundtripsOk(Interval.empty) - } - - "singleton intervals" should "round trip through serialization" in { - check { (x: Int) => - (x < Int.MaxValue) ==> { - val interval: Interval = Interval.singleton(x) - Interval.closed(x, x) == interval - } - } - - check { (x: Int) => - (x < Int.MaxValue) ==> { - val interval: Interval = Interval.singleton(x) - roundtrip(interval) == interval - } - } - } - - "open intervals" should "round trip through serialization" in { - forAll { (a: Int, b: Int) => - (a < b) ==> { - val interval = Interval.open(a, b) - roundtrip(interval) == interval - } - } - } - - "closed intervals" should "round trip through serialization" in { - check { (a: Int, b: Int) => - (a <= b && b < Int.MaxValue) ==> { - val interval = Interval.closed(a, b) - roundtrip(interval) == interval - } - } - } - - def roundtripJson(x: Interval): Interval = (x.toJson).convertTo[Interval] - - def roundtripsJsonOk(x: Interval): Unit = - assert({ - val rtrip = roundtripJson(x) - /* check string form as well to distinguish open/closed intervals */ - rtrip == x && rtrip.toString == x.toString - }) - - "Json serialization for intervals" should "work" in { - roundtripsJsonOk(Interval.empty) - roundtripsJsonOk(Interval.open(3, 4)) - roundtripsJsonOk(Interval.open(2, 4)) - roundtripsJsonOk(Interval.closed(3, 6)) - assert(Interval.empty.toJson.toString == "[]") - assert(Interval.open(3, 3).toJson.toString == "[]") - assert(Interval.open(3, 4).toJson.toString == "[3,4]") - assert(Interval.closed(3, 4).toJson.toString == "[3,5]") - } -} From a2e8af35160ac9abd6d588fd2e4391d2f94b8f2b Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:40:44 -0700 Subject: [PATCH 30/69] more dedotated wam --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0ea2ecb..791ad52 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,7 @@ version: 2 jobs: build: + resource_class: large # need more dedotated wam working_directory: ~/common docker: - image: openjdk:8 From 1113ec21909cf518a40458192772fc3c8e074309 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:44:27 -0700 Subject: [PATCH 31/69] even more dedotated wam --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 791ad52..6b53243 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2 jobs: build: - resource_class: large # need more dedotated wam + resource_class: xlarge # need more dedotated wam working_directory: ~/common docker: - image: openjdk:8 From 305aa6ef5d798044c84cfad22b08117a32ea76bb Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:45:57 -0700 Subject: [PATCH 32/69] decrease box size but add some sbt opts --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6b53243..8b9fe7f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,7 +1,7 @@ version: 2 jobs: build: - resource_class: xlarge # need more dedotated wam + resource_class: large # need more dedotated wam working_directory: ~/common docker: - image: openjdk:8 @@ -9,6 +9,7 @@ jobs: SBT_VERSION: 1.2.8 steps: - run: echo 'export ARTIFACT_BUILD=$CIRCLE_PROJECT_REPONAME-$CIRCLE_BUILD_NUM.zip' >> $BASH_ENV + - run: echo 'export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -Xmx4G"' - run: name: Get sbt binary command: | From f046aaafc5a095aecdd802313f5fa88844ce4d9c Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:50:58 -0700 Subject: [PATCH 33/69] maybe these settings? --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8b9fe7f..30c49e3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,7 +9,7 @@ jobs: SBT_VERSION: 1.2.8 steps: - run: echo 'export ARTIFACT_BUILD=$CIRCLE_PROJECT_REPONAME-$CIRCLE_BUILD_NUM.zip' >> $BASH_ENV - - run: echo 'export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -Xmx4G"' + - run: echo 'export SBT_OPTS="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=2G -Xmx2G"' - run: name: Get sbt binary command: | From b94337c7ecc91f3a6cef3d71c00e6bd1685f8184 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:58:07 -0700 Subject: [PATCH 34/69] split tests to different commands and clean in-between --- .circleci/config.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 30c49e3..5caa420 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -26,8 +26,14 @@ jobs: name: Clean package command: cat /dev/null | sbt clean - run: - name: Test package - command: cat /dev/null | sbt +test + name: Test package - 2.11 + command: cat /dev/null | sbt "clean" "2.11.12" "test" + - run: + name: Test package - 2.12 + command: cat /dev/null | sbt "clean" "2.12.10" "test" + - run: + name: Test package - 2.13 + command: cat /dev/null | sbt "clean" "2.13.2" "test" - run: name: Check formatting command: cat /dev/null | sbt scalafmtCheckAll From 9bc58752c98198fcaf78c1e01b4519c01672fb9b Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 09:59:25 -0700 Subject: [PATCH 35/69] forgot the pluses --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5caa420..b0a2db3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,13 +27,13 @@ jobs: command: cat /dev/null | sbt clean - run: name: Test package - 2.11 - command: cat /dev/null | sbt "clean" "2.11.12" "test" + command: cat /dev/null | sbt "clean" "++2.11.12" "test" - run: name: Test package - 2.12 - command: cat /dev/null | sbt "clean" "2.12.10" "test" + command: cat /dev/null | sbt "clean" "++2.12.10" "test" - run: name: Test package - 2.13 - command: cat /dev/null | sbt "clean" "2.13.2" "test" + command: cat /dev/null | sbt "clean" "++2.13.2" "test" - run: name: Check formatting command: cat /dev/null | sbt scalafmtCheckAll From fb3267dd57a0d27dc9cbdab3e3b7253efc14cb9d Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 10:14:47 -0700 Subject: [PATCH 36/69] remove snapshot for 2.1.0 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index bd7a51d..8375b5d 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.1.1-SNAPSHOT" +ThisBuild / version := "2.1.0" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" From 74c4d7654f4941966febcd0ec40a35f0da119680 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Fri, 5 Jun 2020 10:22:39 -0700 Subject: [PATCH 37/69] Update version to 2.1.1-SNAPSHOT --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 8375b5d..bd7a51d 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.1.0" +ThisBuild / version := "2.1.1-SNAPSHOT" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" From ddde362ba86118dc0f85045a79382fded3277531 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Sun, 7 Jun 2020 10:30:38 -0700 Subject: [PATCH 38/69] bring back version and test --- .../scala/org/allenai/common/Version.scala | 144 ++++++++++++++++++ .../scala/org/allenai/common/JsonIoSpec.scala | 3 +- .../org/allenai/common/VersionSpec.scala | 79 ++++++++++ 3 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 core/src/main/scala/org/allenai/common/Version.scala create mode 100644 core/src/test/scala/org/allenai/common/VersionSpec.scala diff --git a/core/src/main/scala/org/allenai/common/Version.scala b/core/src/main/scala/org/allenai/common/Version.scala new file mode 100644 index 0000000..7a4e8ab --- /dev/null +++ b/core/src/main/scala/org/allenai/common/Version.scala @@ -0,0 +1,144 @@ +package org.allenai.common + +import org.allenai.common.Config._ +import org.allenai.common.json._ + +import com.typesafe.config.ConfigFactory +import spray.json.{ JsNumber, JsObject, JsString, JsValue, RootJsonFormat } + +import scala.collection.JavaConverters._ + +import java.util.Date + +/** Represents a git version. + * @param sha1 the output of `git sha1` in the repository + * @param commitDate commit date in milliseconds + * @param repoUrl the url of the git repo + */ +case class GitVersion(sha1: String, commitDate: Long, repoUrl: Option[String]) { + + /** A URL pointing to the specific commit on GitHub. */ + def commitUrl: Option[String] = { + repoUrl.map { base => + base + "/commit/" + sha1 + } + } + + /** @return a formatted date string */ + def prettyCommitDate: String = { + String.format("%1$tF %1$tT GMT%1$tz", new Date(commitDate)) + } +} + +object GitVersion { + import spray.json.DefaultJsonProtocol._ + implicit val gitVersionFormat = jsonFormat3(GitVersion.apply) + + /** The GitHub project URL. + * + * The remotes are searched for one with user "allenai" and then it's transformed into a valid + * GitHub project URL. + * @return a URL to a GitHub repo, or None if no allenai remotes exist + */ + def projectUrl(remotes: Seq[String], user: String): Option[String] = { + val sshRegex = """git@github.com:([\w-]+)/([\w-]+).git""".r + val httpsRegex = """https://github.com/([\w-]+)/([\w-]+).git""".r + + remotes.collect { + case sshRegex(u, repo) if u == user => s"http://github.com/$user/$repo" + case httpsRegex(u, repo) if u == user => s"http://github.com/$user/$repo" + }.headOption + } + + def create(sha1: String, commitDate: Long, remotes: Seq[String]) = { + GitVersion(sha1, commitDate, projectUrl(remotes, "allenai")) + } +} + +/** Represents the version of this component. Should be built with the `fromResources` method on the + * companion object. + * @param git the git version (commit information) of the build. + * @param artifactVersion the version of the artifact in the build. + * @param cacheKey a cacheKey of the project. Changes on git commits to src of project and + * dependency changes. + */ +case class Version( + git: GitVersion, + artifactVersion: String, + cacheKey: Option[String] +) { + @deprecated("Use artifactVersion instead.", "2014.09.09-1-SNAPSHOT") + def artifact = artifactVersion +} + +object Version { + + /** Load a Version instance from the resources injected by the + * [[https://git.io/vzdZl Version injector sbt plugin]]. + * This attempts to load using [[Version]]'s class loader. + * @param org the value of the sbt key `organization` to find + * @param name the value of the sbt key `name` to find + */ + def fromResources(org: String, name: String): Version = { + fromResources(org, name, this.getClass.getClassLoader) + } + + /** Load a Version instance from the resources injected by the + * [[https://git.io/vzdZl Version injector sbt plugin]]. + * This attempts to load using the given class loader. + * @param org the value of the sbt key `organization` to find + * @param name the value of the sbt key `name` to find + * @param classLoader the class loader to use + */ + def fromResources(org: String, name: String, classLoader: ClassLoader): Version = { + val prefix = s"$org/${name.replaceAll("-", "")}" + + val artifactConfPath = s"$prefix/artifact.conf" + val gitConfPath = s"$prefix/git.conf" + + val artifactConfUrl = classLoader.getResource(artifactConfPath) + val gitConfUrl = classLoader.getResource(gitConfPath) + + require(artifactConfUrl != null, s"Could not find $artifactConfPath") + require(gitConfUrl != null, s"Could not find $gitConfPath") + + val artifactConf = ConfigFactory.parseURL(artifactConfUrl) + val gitConf = ConfigFactory.parseURL(gitConfUrl) + val artifactVersion = artifactConf[String]("version") + val sha1 = gitConf[String]("sha1") + val commitDate = gitConf[Long]("date") + val remotes = gitConf.getStringList("remotes").asScala + val cacheKey = Option(System.getProperty("application.cacheKey")) + Version(GitVersion.create(sha1, commitDate, remotes), artifactVersion, cacheKey) + } + + /** Custom JSON serialization for backwards-compatibility. */ + implicit val versionJsonFormat = new RootJsonFormat[Version] { + import spray.json.DefaultJsonProtocol._ + override def write(version: Version): JsValue = { + val baseJson = JsObject( + "git" -> JsString(version.git.sha1), + "commitDate" -> JsNumber(version.git.commitDate), + "artifact" -> JsString(version.artifactVersion) + ) + version.git.repoUrl match { + case Some(repoUrl) => baseJson.pack("repoUrl" -> repoUrl) + case _ => baseJson + } + version.cacheKey match { + case Some(cacheKey) => baseJson.pack("cacheKey" -> cacheKey) + case _ => baseJson + } + } + + override def read(json: JsValue): Version = { + val jsObject = json.asJsObject + val gitSha1 = jsObject.apply[String]("git") + val commitDate = jsObject.apply[Long]("commitDate") + val artifactVersion = jsObject.apply[String]("artifact") + val repoUrl = jsObject.get[String]("repoUrl") + val cacheKey = jsObject.get[String]("cacheKey") + Version(GitVersion(gitSha1, commitDate, repoUrl), artifactVersion, cacheKey) + } + } +} diff --git a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala index 2b822c2..ce684d5 100644 --- a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala @@ -2,10 +2,9 @@ package org.allenai.common import org.allenai.common.testkit.UnitSpec -import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.io.{ Codec, Source } +import scala.io.Source import java.io.ByteArrayOutputStream diff --git a/core/src/test/scala/org/allenai/common/VersionSpec.scala b/core/src/test/scala/org/allenai/common/VersionSpec.scala new file mode 100644 index 0000000..2bb1b88 --- /dev/null +++ b/core/src/test/scala/org/allenai/common/VersionSpec.scala @@ -0,0 +1,79 @@ +package org.allenai.common + +import org.allenai.common.testkit.UnitSpec + +import spray.json._ + +import java.net.URLClassLoader +import java.nio.file.Paths + +class GitVersionSpec extends UnitSpec { + "create" should "find the correct GitHub project URL (ssh)" in { + val version = GitVersion.create( + "gitSha", + 1234, + Seq( + "https://github.com/schmmd/parsers.git", + "git@github.com:allenai/common.git" + ) + ) + version.repoUrl shouldBe Some("http://github.com/allenai/common") + } + + it should "find the correct GitHub project URL (https)" in { + val version = GitVersion.create( + "gitSha", + 1234, + Seq( + "https://github.com/allenai/ari-datastore.git", + "git@github.com:schmmd/common.git" + ) + ) + version.repoUrl shouldBe Some("http://github.com/allenai/ari-datastore") + } + + it should "find the correct GitHub commit URL" in { + val version = GitVersion.create( + "e0d972e185bd12b94dedd38834fea150a68f064e", + 1234, + Seq("https://github.com/allenai/parsers.git", "git@github.com:schmmd/common.git") + ) + version.commitUrl shouldBe + Some("http://github.com/allenai/parsers/commit/e0d972e185bd12b94dedd38834fea150a68f064e") + } +} + +class VersionSpec extends UnitSpec { + "Version" should "be backwards compatible for reading" in { + val json = """{ + "git":"0144af4325992689cf5fd6d0e3c2d744b25935d6", + "artifact":"2014.07.21-0-SNAPSHOT","commitDate":1412094251000 + }""" + json.parseJson.convertTo[Version] shouldBe + Version( + GitVersion("0144af4325992689cf5fd6d0e3c2d744b25935d6", 1412094251000L, None), + "2014.07.21-0-SNAPSHOT", + None + ) + } + + "fromResources" should "find common-core's resources" in { + // No asserts; this will throw an exception if it's unfound. + Version.fromResources("org.allenai.common", "common-core") + } + + it should "find a resource using a class loader" in { + val expectedVersion = Version( + GitVersion("sha123", 123456789L, None), + "1.0.0", + None + ) + val classpath = Paths.get("src/test/resources/fakejar").toAbsolutePath.toUri.toURL + val version = Version.fromResources( + "org.fakeorg", + "project-name", + new URLClassLoader(Array(classpath)) + ) + version shouldBe expectedVersion + } +} From f478bb95d4d4952407d85d6bdb8ff64c5346ae4b Mon Sep 17 00:00:00 2001 From: bbstilson Date: Mon, 8 Jun 2020 21:44:00 -0700 Subject: [PATCH 39/69] need sbt-plugin sadly --- project/plugins.sbt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/project/plugins.sbt b/project/plugins.sbt index 90267a8..42f8fad 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,3 +1,5 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.0.4") + +addSbtPlugin("org.allenai.plugins" % "allenai-sbt-plugins" % "3.0.0") From e6e00388f1177446d338459a67a9a1fe63479aae Mon Sep 17 00:00:00 2001 From: bbstilson Date: Mon, 8 Jun 2020 21:47:44 -0700 Subject: [PATCH 40/69] enable fatal warnings --- build.sbt | 8 +++++++- .../org/allenai/common/json/PackedJsonFormatSpec.scala | 2 -- .../scala/org/allenai/common/json/RichJsObjectSpec.scala | 2 -- .../scala/org/allenai/common/guice/ConfigModuleSpec.scala | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/build.sbt b/build.sbt index bd7a51d..36b506e 100644 --- a/build.sbt +++ b/build.sbt @@ -48,7 +48,13 @@ lazy val projectSettings = Seq( lazy val buildSettings = Seq( javaOptions += s"-Dlogback.appname=${name.value}", - scalacOptions ++= Seq("-target:jvm-1.8", "-Xlint", "-deprecation", "-feature"), + scalacOptions ++= Seq( + "-target:jvm-1.8", + "-Xlint", + "-deprecation", + "-feature", + "-Xfatal-warnings" + ), javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), crossScalaVersions := supportedScalaVersions ) diff --git a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala index 96feb04..1b68056 100644 --- a/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/PackedJsonFormatSpec.scala @@ -5,8 +5,6 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{ Failure, Success, Try } - class PackedJsonFormatSpec extends UnitSpec { sealed trait Super diff --git a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala index dc5fa10..d43c9df 100644 --- a/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala +++ b/core/src/test/scala/org/allenai/common/json/RichJsObjectSpec.scala @@ -5,8 +5,6 @@ import org.allenai.common.testkit.UnitSpec import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.util.{ Failure, Success, Try } - // scalastyle:off magic.number class RichJsObjectSpec extends UnitSpec { diff --git a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala index 89ae89d..941ddf6 100644 --- a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala +++ b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala @@ -229,7 +229,7 @@ class ConfigModuleSpec extends UnitSpec { val injector = Guice.createInjector(testModule) - val instance = injector.getInstance(classOf[DottedKeys]) + injector.getInstance(classOf[DottedKeys]) } it should "handle sequences" in { From ebca58a97b665dc7009c2ad7d77e5cf657aab15e Mon Sep 17 00:00:00 2001 From: bbstilson Date: Mon, 8 Jun 2020 22:11:00 -0700 Subject: [PATCH 41/69] fix compat issues --- .../org/allenai/common/Compat.scala | 14 ++++++++ .../org/allenai/common/Compat.scala | 14 ++++++++ .../scala/org/allenai/common/Config.scala | 2 +- .../scala/org/allenai/common/FileUtils.scala | 3 +- .../scala/org/allenai/common/JsonIo.scala | 7 ++-- .../org/allenai/common/ParIterator.scala | 2 +- .../scala/org/allenai/common/Version.scala | 5 ++- .../scala/org/allenai/common/ConfigSpec.scala | 2 +- .../scala/org/allenai/common/JsonIoSpec.scala | 6 ++-- .../org/allenai/common/ParIteratorSpec.scala | 36 +++++++++---------- 10 files changed, 58 insertions(+), 33 deletions(-) create mode 100644 core/src/main/scala-2.11-2.12/org/allenai/common/Compat.scala create mode 100644 core/src/main/scala-2.13/org/allenai/common/Compat.scala diff --git a/core/src/main/scala-2.11-2.12/org/allenai/common/Compat.scala b/core/src/main/scala-2.11-2.12/org/allenai/common/Compat.scala new file mode 100644 index 0000000..1d61add --- /dev/null +++ b/core/src/main/scala-2.11-2.12/org/allenai/common/Compat.scala @@ -0,0 +1,14 @@ +package org.allenai.common + +import scala.collection.convert._ + +object Compat { + object JavaConverters extends DecorateAsJava with DecorateAsScala + + object IterableOps { + implicit class IterableOpsImplicits[A](iter: Iterable[A]) { + def toStreamCompat: Stream[A] = iter.toStream + def toIteratorCompat: Iterator[A] = iter.toIterator + } + } +} diff --git a/core/src/main/scala-2.13/org/allenai/common/Compat.scala b/core/src/main/scala-2.13/org/allenai/common/Compat.scala new file mode 100644 index 0000000..f08cebf --- /dev/null +++ b/core/src/main/scala-2.13/org/allenai/common/Compat.scala @@ -0,0 +1,14 @@ +package org.allenai.common + +import scala.collection.convert._ + +object Compat { + object JavaConverters extends AsJavaExtensions with AsScalaExtensions + + object IterableOps { + implicit class IterableOpsImplicits[A](iter: Iterable[A]) { + def toStreamCompat: LazyList[A] = iter.to(LazyList) + def toIteratorCompat: Iterator[A] = iter.iterator + } + } +} diff --git a/core/src/main/scala/org/allenai/common/Config.scala b/core/src/main/scala/org/allenai/common/Config.scala index fb0da62..8a5be8b 100644 --- a/core/src/main/scala/org/allenai/common/Config.scala +++ b/core/src/main/scala/org/allenai/common/Config.scala @@ -1,10 +1,10 @@ package org.allenai.common +import Compat.JavaConverters._ import com.typesafe.config.{ Config => TypesafeConfig, _ } import spray.json._ import java.net.URI -import scala.collection.JavaConverters._ import scala.concurrent.duration._ /** Import to provide enhancements via implicit class conversion for making working diff --git a/core/src/main/scala/org/allenai/common/FileUtils.scala b/core/src/main/scala/org/allenai/common/FileUtils.scala index 6917250..62f255c 100644 --- a/core/src/main/scala/org/allenai/common/FileUtils.scala +++ b/core/src/main/scala/org/allenai/common/FileUtils.scala @@ -1,10 +1,9 @@ package org.allenai.common import au.com.bytecode.opencsv.CSVReader +import Compat.JavaConverters._ import java.io.{ BufferedInputStream, BufferedReader, File, FileInputStream, InputStreamReader } - -import scala.collection.JavaConverters._ import scala.io.{ BufferedSource, Codec, Source } /** Various convenient utilities for reading files and resources. */ diff --git a/core/src/main/scala/org/allenai/common/JsonIo.scala b/core/src/main/scala/org/allenai/common/JsonIo.scala index 2c360ea..138c305 100644 --- a/core/src/main/scala/org/allenai/common/JsonIo.scala +++ b/core/src/main/scala/org/allenai/common/JsonIo.scala @@ -2,9 +2,8 @@ package org.allenai.common import spray.json._ -import scala.io.Source - import java.io.{ OutputStream, PrintWriter, Writer } +import scala.io.Source /** Helpers for streaming lists of JSON objects to and from disk. */ object JsonIo { @@ -12,8 +11,8 @@ object JsonIo { /** Reads single-lines from a given Source, and streams the JSON parsed from them to the caller. * @return a stream of objects of type T */ - def parseJson[T](source: Source)(implicit format: JsonFormat[T]): Stream[T] = { - for (line <- source.getLines().toStream) yield line.parseJson.convertTo[T] + def parseJson[T](source: Source)(implicit format: JsonFormat[T]): Iterator[T] = { + for (line <- source.getLines) yield line.parseJson.convertTo[T] } /** Writes the given objects to the given writer, as one-per-line JSON values. */ diff --git a/core/src/main/scala/org/allenai/common/ParIterator.scala b/core/src/main/scala/org/allenai/common/ParIterator.scala index b92d87a..2d2c5d8 100644 --- a/core/src/main/scala/org/allenai/common/ParIterator.scala +++ b/core/src/main/scala/org/allenai/common/ParIterator.scala @@ -94,7 +94,7 @@ object ParIterator { f: T => O, queueLimit: Int = defaultQueueLimit )(implicit ec: ExecutionContext): Iterator[O] = new Iterator[O] { - private val inner = input.toIterator + private val inner = input private val q = new scala.collection.mutable.Queue[Future[O]]() private def fillQueue(): Unit = { diff --git a/core/src/main/scala/org/allenai/common/Version.scala b/core/src/main/scala/org/allenai/common/Version.scala index 7a4e8ab..00d5a11 100644 --- a/core/src/main/scala/org/allenai/common/Version.scala +++ b/core/src/main/scala/org/allenai/common/Version.scala @@ -1,13 +1,12 @@ package org.allenai.common +import Compat.JavaConverters._ import org.allenai.common.Config._ import org.allenai.common.json._ import com.typesafe.config.ConfigFactory import spray.json.{ JsNumber, JsObject, JsString, JsValue, RootJsonFormat } -import scala.collection.JavaConverters._ - import java.util.Date /** Represents a git version. @@ -107,7 +106,7 @@ object Version { val artifactVersion = artifactConf[String]("version") val sha1 = gitConf[String]("sha1") val commitDate = gitConf[Long]("date") - val remotes = gitConf.getStringList("remotes").asScala + val remotes = gitConf.getStringList("remotes").asScala.toSeq val cacheKey = Option(System.getProperty("application.cacheKey")) Version(GitVersion.create(sha1, commitDate, remotes), artifactVersion, cacheKey) } diff --git a/core/src/test/scala/org/allenai/common/ConfigSpec.scala b/core/src/test/scala/org/allenai/common/ConfigSpec.scala index b2b1653..81e1005 100644 --- a/core/src/test/scala/org/allenai/common/ConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/ConfigSpec.scala @@ -1,5 +1,6 @@ package org.allenai.common +import Compat.JavaConverters._ import org.allenai.common.testkit.UnitSpec import org.allenai.common.Config._ @@ -7,7 +8,6 @@ import com.typesafe.config.{ Config => TypesafeConfig, _ } import spray.json._ import spray.json.DefaultJsonProtocol._ -import scala.collection.JavaConverters._ import scala.concurrent.duration._ import java.net.URI diff --git a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala index ce684d5..a24be0b 100644 --- a/core/src/test/scala/org/allenai/common/JsonIoSpec.scala +++ b/core/src/test/scala/org/allenai/common/JsonIoSpec.scala @@ -34,18 +34,18 @@ class JsonIoSpec extends UnitSpec { "parseJson and writeJson" should "pipe correctly to each other" in { // Input. We'll pipe through writeJson & toJson twice (testing both directions). - val input = Seq(Foo("a"), Foo("b"), Foo("c"), Foo("d")) + val input = List(Foo("a"), Foo("b"), Foo("c"), Foo("d")) // Intermediary: Test that write -> read works. val buffer = new ByteArrayOutputStream() JsonIo.writeJson(input, buffer) val intermediaryOutput = JsonIo.parseJson[Foo](Source.fromString(buffer.toString("UTF8"))) - intermediaryOutput should be(input) + intermediaryOutput.toList should be(input) // Final: Test that read -> write works (with a bonus read). buffer.reset() JsonIo.writeJson(input, buffer) val finalOutput = JsonIo.parseJson[Foo](Source.fromString(buffer.toString("UTF8"))) - finalOutput should be(input) + finalOutput.toList should be(input) } } diff --git a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala index 933f81d..18cfde8 100644 --- a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala +++ b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala @@ -1,15 +1,15 @@ package org.allenai.common -import java.util.concurrent.ConcurrentSkipListSet -import java.util.concurrent.atomic.AtomicInteger - +import Compat.JavaConverters._ +import Compat.IterableOps.IterableOpsImplicits import org.allenai.common.testkit.UnitSpec import org.allenai.common.ParIterator.ParIteratorEnrichment +import java.util.concurrent.ConcurrentSkipListSet +import java.util.concurrent.atomic.AtomicInteger import scala.concurrent.duration._ import scala.concurrent.ExecutionContext.Implicits.global import scala.language.postfixOps -import scala.collection.JavaConverters._ class ParIteratorSpec extends UnitSpec { // With small values (<1000) for scale, this test is unreliable, and with large ones it takes @@ -36,31 +36,31 @@ class ParIteratorSpec extends UnitSpec { val successes = new ConcurrentSkipListSet[Int]() val max = 2000 - val iter = Range(0, max).toIterator + val iter = Range(0, max).toIteratorCompat iter.parForeach { i => Thread.sleep((max - i) % 10) successes.add(i) } val expected = Range(0, max).toSet - assert((successes.asScala -- expected) === Set.empty) - assert((expected -- successes.asScala) === Set.empty) + assert((successes.asScala.toSet -- expected) === Set.empty) + assert((expected -- successes.asScala.toSet) === Set.empty) Thread.sleep(1000) - assert((successes.asScala -- expected) === Set.empty) - assert((expected -- successes.asScala) === Set.empty) + assert((successes.asScala.toSet -- expected) === Set.empty) + assert((expected -- successes.asScala.toSet) === Set.empty) } } it should "nest properly" in { val count = new AtomicInteger() val max = 13 - Range(0, max).toIterator.parForeach { _ => - Range(0, max).toIterator.parForeach { _ => + Range(0, max).toIteratorCompat.parForeach { _ => + Range(0, max).toIteratorCompat.parForeach { _ => val successes = new ConcurrentSkipListSet[Int]() - val iter = Range(0, max).toIterator + val iter = Range(0, max).toIteratorCompat iter.parForeach { i => Thread.sleep((i * max * max) % 10) successes.add(i) @@ -68,8 +68,8 @@ class ParIteratorSpec extends UnitSpec { } val expected = Range(0, max).toSet - assert((successes.asScala -- expected) === Set.empty) - assert((expected -- successes.asScala) === Set.empty) + assert((successes.asScala.toSet -- expected) === Set.empty) + assert((expected -- successes.asScala.toSet) === Set.empty) } } @@ -79,7 +79,7 @@ class ParIteratorSpec extends UnitSpec { it should "map things concurrently" in { val max = 5 val values = Range(0, max).reverse - val iter = values.toIterator + val iter = values.toIteratorCompat val expected = values.map { i => s"$i" } @@ -98,7 +98,7 @@ class ParIteratorSpec extends UnitSpec { it should "map lots of things concurrently" in { val max = 50000 val values = Range(0, max).reverse - val iter = values.toIterator + val iter = values.toIteratorCompat val expected = values.map { i => s"$i" } @@ -111,7 +111,7 @@ class ParIteratorSpec extends UnitSpec { it should "return exceptions from foreach functions" in { val successes = synchronized(collection.mutable.Set[Int]()) intercept[ArithmeticException] { - Range(-20, 20).toIterator.parForeach { i => + Range(-20, 20).toIteratorCompat.parForeach { i => successes.add(10000 / i) } } @@ -129,7 +129,7 @@ class ParIteratorSpec extends UnitSpec { it should "return exceptions from map" in { intercept[ArithmeticException] { - Range(-20, 20).toIterator.parMap(10000 / _).toList + Range(-20, 20).toIteratorCompat.parMap(10000 / _).toList } } } From 2c50feddb9a70c87e981cc7d6791f71149955f08 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Mon, 8 Jun 2020 22:15:57 -0700 Subject: [PATCH 42/69] handle fancy src dirs --- build.sbt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 36b506e..0b710f9 100644 --- a/build.sbt +++ b/build.sbt @@ -56,7 +56,15 @@ lazy val buildSettings = Seq( "-Xfatal-warnings" ), javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), - crossScalaVersions := supportedScalaVersions + crossScalaVersions := supportedScalaVersions, + unmanagedSourceDirectories.in(Compile) ++= { + CrossVersion.partialVersion(scalaVersion.value) match { + case Some((2, x)) if x == 11 || x == 12 => + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) + case Some((2, x)) if x == 13 => Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) + case _ => Seq.empty // dotty support would go here + } + } ) // Not necessary for this repository but here as an example From 7990a0b3c1f283ba8650129e46638972acdb26d5 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Mon, 8 Jun 2020 22:29:34 -0700 Subject: [PATCH 43/69] guicy compat --- .../src/main/scala/org/allenai/common/guice/ConfigModule.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala index 4dccd80..84d72ce 100644 --- a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala +++ b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala @@ -1,5 +1,6 @@ package org.allenai.common.guice +import org.allenai.common.Compat.JavaConverters._ import org.allenai.common.Logging import org.allenai.common.Config._ @@ -14,7 +15,6 @@ import com.typesafe.config.{ } import net.codingwell.scalaguice.ScalaModule -import scala.collection.JavaConverters._ import scala.util.Try /** Parent class for modules which use a typesafe config for values. This automatically binds all From fbc6b603957e2f9cc536cca04927984170b70894 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:06:24 -0700 Subject: [PATCH 44/69] 2.2.0 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 0b710f9..0e618b9 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.1.1-SNAPSHOT" +ThisBuild / version := "2.2.0" ThisBuild / scalaVersion := scala212 lazy val spray = "spray" at "http://repo.spray.io/" From eadcf189fd4ec6b60b8f09e8657871a2bb22069d Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:20:50 -0700 Subject: [PATCH 45/69] default to 2.13 --- build.sbt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sbt b/build.sbt index 0e618b9..dacd28b 100644 --- a/build.sbt +++ b/build.sbt @@ -7,10 +7,10 @@ lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" ThisBuild / version := "2.2.0" -ThisBuild / scalaVersion := scala212 +ThisBuild / scalaVersion := scala213 -lazy val spray = "spray" at "http://repo.spray.io/" -lazy val typesafeReleases = "Typesafe Releases" at "http://repo.typesafe.com/typesafe/releases/" +lazy val spray = "spray" at "https://repo.spray.io/" +lazy val typesafeReleases = "Typesafe Releases" at "https://repo.typesafe.com/typesafe/releases/" lazy val projectSettings = Seq( resolvers ++= Seq( From 7cf2c256590c0904f483c10fb517a1f8c6817ab9 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:21:28 -0700 Subject: [PATCH 46/69] bump minor version --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index dacd28b..9234c0a 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.2.0" +ThisBuild / version := "2.2.1" ThisBuild / scalaVersion := scala213 lazy val spray = "spray" at "https://repo.spray.io/" From 279bc9e5f011fda17073341dd57895328bf439b9 Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:39:42 -0700 Subject: [PATCH 47/69] remove scalacOptions for docs --- build.sbt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.sbt b/build.sbt index 9234c0a..59f341c 100644 --- a/build.sbt +++ b/build.sbt @@ -67,6 +67,8 @@ lazy val buildSettings = Seq( } ) +Compile / doc / scalacOptions ~= { _.filterNot(_ == "-Xfatal-warnings") } + // Not necessary for this repository but here as an example inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) From afea807ebb38bbd956702b09d16d797962e2d8cf Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:40:31 -0700 Subject: [PATCH 48/69] Setting version to 2.2.1 --- version.sbt | 1 + 1 file changed, 1 insertion(+) create mode 100644 version.sbt diff --git a/version.sbt b/version.sbt new file mode 100644 index 0000000..c0504fe --- /dev/null +++ b/version.sbt @@ -0,0 +1 @@ +version in ThisBuild := "2.2.1" From 78c6b45aaf982344f7e113c2ec839ae6c91691ee Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:51:32 -0700 Subject: [PATCH 49/69] don't generate docs --- build.sbt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index 59f341c..7b11ad5 100644 --- a/build.sbt +++ b/build.sbt @@ -22,6 +22,7 @@ lazy val projectSettings = Seq( dependencyOverrides ++= Logging.loggingDependencyOverrides, publishMavenStyle := true, publishArtifact in Test := false, + publishArtifact in (Compile, packageDoc) := false, pomIncludeRepository := { _ => false }, @@ -67,8 +68,6 @@ lazy val buildSettings = Seq( } ) -Compile / doc / scalacOptions ~= { _.filterNot(_ == "-Xfatal-warnings") } - // Not necessary for this repository but here as an example inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) From 4edd25422b9c3f7c82ab05fc335fbc8a10275eed Mon Sep 17 00:00:00 2001 From: bbstilson Date: Tue, 9 Jun 2020 11:53:06 -0700 Subject: [PATCH 50/69] v2.2.2 --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 7b11ad5..337064d 100644 --- a/build.sbt +++ b/build.sbt @@ -6,7 +6,7 @@ lazy val scala213 = "2.13.2" lazy val supportedScalaVersions = List(scala211, scala212, scala213) ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.2.1" +ThisBuild / version := "2.2.2" ThisBuild / scalaVersion := scala213 lazy val spray = "spray" at "https://repo.spray.io/" From 6c50ec96e95f536c32396e4a50ee918def23a5b4 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 19:16:21 -0800 Subject: [PATCH 51/69] upgrade sbt to 1.4.9 --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index c0bab04..dbae93b 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.2.8 +sbt.version=1.4.9 From feeba26f5a49bb4ac9bdee1a61226bc243edeae7 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 19:17:09 -0800 Subject: [PATCH 52/69] clean up and centralize shared config in a global plugin --- build.sbt | 94 +++++-------------------------------- project/GlobalPlugin.scala | 46 ++++++++++++++++++ project/ScalaVersions.scala | 6 +++ 3 files changed, 64 insertions(+), 82 deletions(-) create mode 100644 project/GlobalPlugin.scala create mode 100644 project/ScalaVersions.scala diff --git a/build.sbt b/build.sbt index 337064d..c25939c 100644 --- a/build.sbt +++ b/build.sbt @@ -1,77 +1,5 @@ -import Dependencies._ - -lazy val scala211 = "2.11.12" -lazy val scala212 = "2.12.10" -lazy val scala213 = "2.13.2" -lazy val supportedScalaVersions = List(scala211, scala212, scala213) - -ThisBuild / organization := "org.allenai.common" -ThisBuild / version := "2.2.2" -ThisBuild / scalaVersion := scala213 - -lazy val spray = "spray" at "https://repo.spray.io/" -lazy val typesafeReleases = "Typesafe Releases" at "https://repo.typesafe.com/typesafe/releases/" - -lazy val projectSettings = Seq( - resolvers ++= Seq( - Resolver.bintrayRepo("allenai", "maven"), - spray, - Resolver.jcenterRepo, - typesafeReleases - ), - dependencyOverrides ++= Logging.loggingDependencyOverrides, - publishMavenStyle := true, - publishArtifact in Test := false, - publishArtifact in (Compile, packageDoc) := false, - pomIncludeRepository := { _ => - false - }, - licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), - homepage := Some(url("https://github.com/allenai/common")), - apiURL := Some(url("https://allenai.github.io/common/")), - scmInfo := Some( - ScmInfo( - url("https://github.com/allenai/common"), - "https://github.com/allenai/common.git" - ) - ), - pomExtra := ( - - allenai-dev-role - Allen Institute for Artificial Intelligence - dev-role@allenai.org - - ), - bintrayPackage := s"${organization.value}:${name.value}_${scalaBinaryVersion.value}", - bintrayOrganization := Some("allenai"), - bintrayRepository := "maven" -) - -lazy val buildSettings = Seq( - javaOptions += s"-Dlogback.appname=${name.value}", - scalacOptions ++= Seq( - "-target:jvm-1.8", - "-Xlint", - "-deprecation", - "-feature", - "-Xfatal-warnings" - ), - javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), - crossScalaVersions := supportedScalaVersions, - unmanagedSourceDirectories.in(Compile) ++= { - CrossVersion.partialVersion(scalaVersion.value) match { - case Some((2, x)) if x == 11 || x == 12 => - Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) - case Some((2, x)) if x == 13 => Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) - case _ => Seq.empty // dotty support would go here - } - } -) - -// Not necessary for this repository but here as an example -inConfig(IntegrationTest)(org.scalafmt.sbt.ScalafmtPlugin.scalafmtConfigSettings) - -lazy val common = (project in file(".")) +lazy val common = project + .in(file(".")) .aggregate( core, guice, @@ -81,17 +9,19 @@ lazy val common = (project in file(".")) .settings( Defaults.itSettings, crossScalaVersions := Nil, - publish / skip := true, - buildSettings + // crossScalaVersions must be set to Nil on the aggregating project + // in order to avoid double publishing. + // See: https://www.scala-sbt.org/1.x/docs/Cross-Build.html#Cross+building+a+project+statefully + publish / skip := true ) -lazy val core = Project(id = "core", base = file("core")) - .settings(projectSettings, buildSettings) +lazy val core = project + .in(file("core")) .dependsOn(testkit % "test->compile") -lazy val guice = Project(id = "guice", base = file("guice")) - .settings(projectSettings, buildSettings) +lazy val guice = project + .in(file("guice")) .dependsOn(core, testkit % "test->compile") -lazy val testkit = Project(id = "testkit", base = file("testkit")) - .settings(projectSettings, buildSettings) +lazy val testkit = project + .in(file("testkit")) diff --git a/project/GlobalPlugin.scala b/project/GlobalPlugin.scala new file mode 100644 index 0000000..3e26fb5 --- /dev/null +++ b/project/GlobalPlugin.scala @@ -0,0 +1,46 @@ +import Dependencies._ +import ScalaVersions._ + +import sbt._ +import sbt.Keys._ +import codeartifact.CodeArtifactKeys + +// Applies common settings to all subprojects +object GlobalPlugin extends AutoPlugin { + + override def trigger = allRequirements + + override def projectSettings = + Seq( + scalaVersion := SCALA_213, + organization := "org.allenai.common", + CodeArtifactKeys.codeArtifactUrl := "https://org-allenai-s2-896129387501.d.codeartifact.us-west-2.amazonaws.com/maven/private", + dependencyOverrides ++= Logging.loggingDependencyOverrides, + publishArtifact in Test := false, + publishArtifact in (Compile, packageDoc) := false, + pomIncludeRepository := { _ => + false + }, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), + homepage := Some(url("https://github.com/allenai/common")), + apiURL := Some(url("https://allenai.github.io/common/")), + scmInfo := Some( + ScmInfo( + url("https://github.com/allenai/common"), + "https://github.com/allenai/common.git" + ) + ), + javaOptions += s"-Dlogback.appname=${name.value}", + javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), + crossScalaVersions := SUPPORTED_SCALA_VERSIONS, + unmanagedSourceDirectories.in(Compile) ++= { + CrossVersion.partialVersion(scalaVersion.value) match { + case Some((2, x)) if x == 11 || x == 12 => + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) + case Some((2, x)) if x == 13 => + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) + case _ => Seq.empty // dotty support would go here + } + } + ) +} diff --git a/project/ScalaVersions.scala b/project/ScalaVersions.scala new file mode 100644 index 0000000..65b4411 --- /dev/null +++ b/project/ScalaVersions.scala @@ -0,0 +1,6 @@ +object ScalaVersions { + val SCALA_211 = "2.11.12" + val SCALA_212 = "2.12.10" + val SCALA_213 = "2.13.5" + val SUPPORTED_SCALA_VERSIONS = List(SCALA_211, SCALA_212, SCALA_213) +} From a335d6adff66779db7b5a4bfd59c6f08d6c43f9f Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 19:24:15 -0800 Subject: [PATCH 53/69] update plugins. add tpolecat. modify source to meet new strictness --- .../scala/org/allenai/common/Config.scala | 21 ++++++++-- .../allenai/common/SourceInputStream.scala | 3 +- .../org/allenai/common/StringUtils.scala | 17 +++++---- .../org/allenai/common/ParIteratorSpec.scala | 16 +++++--- .../allenai/common/SeekableSourceSpec.scala | 14 +++---- .../allenai/common/guice/ConfigModule.scala | 14 ++++--- .../common/guice/ConfigModuleSpec.scala | 38 ++++++++++++------- project/plugins.sbt | 6 ++- .../common/testkit/ScratchDirectory.scala | 10 ++++- 9 files changed, 91 insertions(+), 48 deletions(-) diff --git a/core/src/main/scala/org/allenai/common/Config.scala b/core/src/main/scala/org/allenai/common/Config.scala index 8a5be8b..e8a070b 100644 --- a/core/src/main/scala/org/allenai/common/Config.scala +++ b/core/src/main/scala/org/allenai/common/Config.scala @@ -20,7 +20,7 @@ object Config { override def read(jsValue: JsValue): TypesafeConfig = jsValue match { case obj: JsObject => ConfigFactory.parseString(obj.compactPrint, ParseOptions) - case _ => deserializationError("Expected JsObject for Config deserialization") + case _ => deserializationError("Expected JsObject for Config deserialization") } override def write(config: TypesafeConfig): JsValue = @@ -65,18 +65,23 @@ object Config { implicit val stringReader = apply[String] { (config, key) => config.getString(key) } + implicit val intReader = apply[Int] { (config, key) => config.getInt(key) } + implicit val longReader = apply[Long] { (config, key) => config.getLong(key) } + implicit val doubleReader = apply[Double] { (config, key) => config.getDouble(key) } + implicit val boolReader = apply[Boolean] { (config, key) => config.getBoolean(key) } + implicit val configValueReader = apply[ConfigValue] { (config, key) => config.getValue(key) } @@ -84,18 +89,23 @@ object Config { implicit val stringListReader = apply[Seq[String]] { (config, key) => config.getStringList(key).asScala.toSeq } + implicit val intListReader = apply[Seq[Int]] { (config, key) => config.getIntList(key).asScala.toList.map(_.intValue) } + implicit val longListReader = apply[Seq[Long]] { (config, key) => config.getLongList(key).asScala.toList.map(_.longValue) } + implicit val boolListReader = apply[Seq[Boolean]] { (config, key) => config.getBooleanList(key).asScala.toList.map(_.booleanValue) } + implicit val doubleListReader = apply[Seq[Double]] { (config, key) => config.getDoubleList(key).asScala.toList.map(_.doubleValue) } + implicit val configValueListReader = apply[Seq[ConfigValue]] { (config, key) => config.getList(key).asScala.toSeq } @@ -103,9 +113,11 @@ object Config { implicit val configObjReader = apply[ConfigObject] { (config, key) => config.getObject(key) } + implicit val typesafeConfigReader = apply[TypesafeConfig] { (config, key) => config.getConfig(key) } + implicit val typesafeConfigListReader = apply[Seq[TypesafeConfig]] { (config, key) => config.getConfigList(key).asScala.toSeq } @@ -115,13 +127,13 @@ object Config { /** In addition to com.typesafe.config.ConfigException, * will potentially throw java.net.URISyntaxException */ - implicit val uriReader: ConfigReader[URI] = stringReader map { URI.create(_) } + implicit val uriReader: ConfigReader[URI] = stringReader.map { URI.create(_) } // convert config object to a JsValue // this is useful for doing two-step conversion from config value to some class that already has // a JsFormat available (and therefore the user doesn't have to also define a ConfigReader) // Note: any exceptions due to JSON parse (such as DeserializationException) will not be caught. - val jsonReader: ConfigReader[JsValue] = configObjReader map { _.toConfig.toJson } + val jsonReader: ConfigReader[JsValue] = configObjReader.map { _.toConfig.toJson } } /** Adds Scala-friendly methods to a [[com.typesafe.config.Config]] instance: @@ -137,11 +149,12 @@ object Config { * }}} */ implicit class EnhancedConfig(config: TypesafeConfig) { + private def optional[T](f: => T) = try { Some(f) } catch { - case e: ConfigException.Missing => None + case _: ConfigException.Missing => None } /** Required value extraction. diff --git a/core/src/main/scala/org/allenai/common/SourceInputStream.scala b/core/src/main/scala/org/allenai/common/SourceInputStream.scala index 6303b81..f13726f 100644 --- a/core/src/main/scala/org/allenai/common/SourceInputStream.scala +++ b/core/src/main/scala/org/allenai/common/SourceInputStream.scala @@ -30,7 +30,7 @@ class SourceInputStream(val source: Source)(implicit codec: Codec) extends Input -1 } else { availableBytes -= 1 - outputBuffer.get() + outputBuffer.get().toInt } } @@ -53,5 +53,6 @@ class SourceInputStream(val source: Source)(implicit codec: Codec) extends Input // Set the availble bytes & reset the buffer for read. availableBytes = outputBuffer.position outputBuffer.rewind() + () } } diff --git a/core/src/main/scala/org/allenai/common/StringUtils.scala b/core/src/main/scala/org/allenai/common/StringUtils.scala index 27ae860..52f5949 100644 --- a/core/src/main/scala/org/allenai/common/StringUtils.scala +++ b/core/src/main/scala/org/allenai/common/StringUtils.scala @@ -266,9 +266,8 @@ object StringUtils { def removeUnprintable: String = unprintableRegex.replaceAllIn(str, "") def replaceFancyUnicodeChars: String = - fancyChar2simpleChar.foldLeft(str) { - case (s, (unicodeChar, replacement)) => - s.replace(unicodeChar.toString, replacement) + fancyChar2simpleChar.foldLeft(str) { case (s, (unicodeChar, replacement)) => + s.replace(unicodeChar.toString, replacement) } /** @param filter Determine if a character is blacklisted and should be trimmed. @@ -285,7 +284,9 @@ object StringUtils { /** @return Trim non-letter chars from the beginning and end */ def trimNonAlphabetic(): String = - str.dropWhile(c => !Character.isAlphabetic(c)).trimRight(c => !Character.isAlphabetic(c)) + str + .dropWhile(c => !Character.isAlphabetic(c.toInt)) + .trimRight(c => !Character.isAlphabetic(c.toInt)) /** @param chars String containing the blacklist chars. * @return Trim characters from the right that belongs to a blacklist. @@ -305,9 +306,11 @@ object StringUtils { if (i == 0 || i == words.size - 1) { words.update(i, ApacheStringUtils.capitalize(word)) } // Capitalize words that are not simple prepositions - else if (!articles(word) && - !simplePrepositions(word) && - !coordinatingConjunction(word)) { + else if ( + !articles(word) && + !simplePrepositions(word) && + !coordinatingConjunction(word) + ) { words.update(i, ApacheStringUtils.capitalize(word)) } // Otherwise, leave the word as lowercase else { diff --git a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala index 18cfde8..87d8d5e 100644 --- a/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala +++ b/core/src/test/scala/org/allenai/common/ParIteratorSpec.scala @@ -22,8 +22,9 @@ class ParIteratorSpec extends UnitSpec { val iter = Iterator(3, 1, 2) val time = Timing.time { iter.parForeach { i => - Thread.sleep(i * scale) + Thread.sleep((i * scale).toLong) successes.add(i) + () } } @@ -38,8 +39,9 @@ class ParIteratorSpec extends UnitSpec { val max = 2000 val iter = Range(0, max).toIteratorCompat iter.parForeach { i => - Thread.sleep((max - i) % 10) + Thread.sleep(((max - i) % 10).toLong) successes.add(i) + () } val expected = Range(0, max).toSet @@ -62,14 +64,16 @@ class ParIteratorSpec extends UnitSpec { val iter = Range(0, max).toIteratorCompat iter.parForeach { i => - Thread.sleep((i * max * max) % 10) + Thread.sleep(((i * max * max) % 10).toLong) successes.add(i) count.incrementAndGet() + () } val expected = Range(0, max).toSet assert((successes.asScala.toSet -- expected) === Set.empty) assert((expected -- successes.asScala.toSet) === Set.empty) + () } } @@ -85,10 +89,11 @@ class ParIteratorSpec extends UnitSpec { } val time: Duration = Timing.time { val result = iter.parMap { i => - Thread.sleep(i * 100) + Thread.sleep((i * 100).toLong) s"$i" } assert(expected === result.toSeq) + () } val limit: Duration = ((max * 100) millis) + (50 millis) @@ -113,6 +118,7 @@ class ParIteratorSpec extends UnitSpec { intercept[ArithmeticException] { Range(-20, 20).toIteratorCompat.parForeach { i => successes.add(10000 / i) + () } } } @@ -121,7 +127,7 @@ class ParIteratorSpec extends UnitSpec { intercept[ArithmeticException] { Iterator(new NotImplementedError(), new ArithmeticException()).zipWithIndex.parForeach { case (e, index) => - Thread.sleep((1 - index) * 1000) + Thread.sleep(((1 - index) * 1000).toLong) throw e } } diff --git a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala index 01e986e..b4109e6 100644 --- a/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala +++ b/core/src/test/scala/org/allenai/common/SeekableSourceSpec.scala @@ -13,11 +13,11 @@ class SeekableSourceSpec extends UnitSpec { implicit val codec = Codec.UTF8 /** Stores fü入, in UTF-8. */ - val foomlaut: Array[Byte] = Array('f'.toByte, 0xc3, 0xbc, 0xe5, 0x85, 0xa5) map { _.toByte } + val foomlaut: Array[Byte] = Array('f'.toInt, 0xc3, 0xbc, 0xe5, 0x85, 0xa5).map { _.toByte } /** @return a channel open to a new temp file containing the given chars */ def newFileWithChars(chars: Iterable[Char]): FileChannel = { - newFileWithBytes(chars.toArray map { _.toByte }) + newFileWithBytes(chars.toArray.map { _.toByte }) } /** @return a channel open to a new temp file containing the given bytes */ @@ -198,7 +198,7 @@ class SeekableSourceSpec extends UnitSpec { } it should "handle four-byte unicode characters" in { - val thumbsUp = newFileWithBytes(Array('u', 0xf0, 0x9f, 0x91, 0x8d, 'p') map { _.toByte }) + val thumbsUp = newFileWithBytes(Array('u', 0xf0, 0x9f, 0x91, 0x8d, 'p').map { _.toByte }) val source = new SeekableSource(thumbsUp) source.next() should be('u') @@ -215,7 +215,7 @@ class SeekableSourceSpec extends UnitSpec { it should "handle malformed input correctly" in { // Valid letter, invalid start, bad three-byte char, valid letter. - val badChars = newFileWithBytes(Array('a', 0xff, 0xe0, 0x03, 0x8f, 'b') map { _.toByte }) + val badChars = newFileWithBytes(Array('a', 0xff, 0xe0, 0x03, 0x8f, 'b').map { _.toByte }) val source = new SeekableSource(badChars) source.next() should be('a') @@ -227,7 +227,7 @@ class SeekableSourceSpec extends UnitSpec { } it should "handle partial two-byte characters at the end of a stream" in { - val earlyEnd = newFileWithBytes(Array('a', 0xc3) map { _.toByte }) + val earlyEnd = newFileWithBytes(Array('a', 0xc3).map { _.toByte }) val source = new SeekableSource(earlyEnd) source.next() should be('a') @@ -235,7 +235,7 @@ class SeekableSourceSpec extends UnitSpec { } it should "handle partial three-byte characters at the end of a stream" in { - val earlyEnd = newFileWithBytes(Array('a', 0xe5, 0x85) map { _.toByte }) + val earlyEnd = newFileWithBytes(Array('a', 0xe5, 0x85).map { _.toByte }) val source = new SeekableSource(earlyEnd) source.next() should be('a') @@ -243,7 +243,7 @@ class SeekableSourceSpec extends UnitSpec { } it should "handle partial four-byte characters at the end of a stream" in { - val earlyEnd = newFileWithBytes(Array('a', 0xf0, 0x9f, 0x91) map { _.toByte }) + val earlyEnd = newFileWithBytes(Array('a', 0xf0, 0x9f, 0x91).map { _.toByte }) val source = new SeekableSource(earlyEnd) source.next() should be('a') diff --git a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala index 84d72ce..0404893 100644 --- a/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala +++ b/guice/src/main/scala/org/allenai/common/guice/ConfigModule.scala @@ -58,12 +58,12 @@ import scala.util.Try * format: ON * @param config the runtime config to use containing all values to bind */ -class ConfigModule(config: Config) extends ScalaModule with Logging { +abstract class ConfigModule(config: Config) extends ScalaModule with Logging { /** The actual config to bind. */ private lazy val fullConfig = { val resolvedConfig = config.withFallback(defaultConfig).resolve() - bindingPrefix map { resolvedConfig.atPath } getOrElse { resolvedConfig } + bindingPrefix.map { resolvedConfig.atPath }.getOrElse { resolvedConfig } } /** An optional filename pointing to a file containing default config values. @@ -86,15 +86,17 @@ class ConfigModule(config: Config) extends ScalaModule with Logging { * in the provided config. */ def defaultConfig: Config = - configName map { name => - ConfigFactory.parseResources(getClass, name) - } getOrElse ConfigFactory.empty + configName + .map { name => + ConfigFactory.parseResources(getClass, name) + } + .getOrElse(ConfigFactory.empty) /** Configure method for implementing classes to override if they wish to create additional * bindings, or bindings based on config values. * @param config the fully-initilized config object */ - def configureWithConfig(config: Config): Unit = {} + def configureWithConfig(config: Config): Unit /** Binds the config provided in the constructor, plus any default config found, and calls * configureWithConfig with the resultant config object. diff --git a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala index 941ddf6..e5e61c7 100644 --- a/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala +++ b/guice/src/test/scala/org/allenai/common/guice/ConfigModuleSpec.scala @@ -6,8 +6,9 @@ import com.typesafe.config.{ Config, ConfigFactory } import org.allenai.common.testkit.UnitSpec case class CaseClass(a: String) + // Test class, defined in a way that's injectable by Guice (outside of a wrapping class). -case class AnnotatedClass @Inject()( +case class AnnotatedClass @Inject() ( @Named("fooString") foo: String, // This string has a default value in the module.conf file. @Named("hasDefault") hasDefault: String, @@ -19,14 +20,14 @@ case class AnnotatedClass @Inject()( @Named("unsupported") unsupported: CaseClass ) -case class OptionalParamClass @Inject()( +case class OptionalParamClass @Inject() ( @Named("presentString") present: String, @Named("presentString") presentOption: Option[String], @Named("missingString") missingOption: Option[String] ) // Test class with nested Config objects. -case class NestedConfig @Inject()( +case class NestedConfig @Inject() ( @Named("root") root: Config, @Named("root.nested") nested: Config, @Named("nested") nestedNone: Option[Config], @@ -35,7 +36,7 @@ case class NestedConfig @Inject()( ) // Test class, using namespaced values. -case class PrefixClass @Inject()( +case class PrefixClass @Inject() ( @Named("prefix.fooString") foo: String, // This string has a default value in the module.conf file. @Named("prefix.hasDefault") hasDefault: String, @@ -46,13 +47,13 @@ case class PrefixClass @Inject()( ) // Test class with dotted keys. -case class DottedKeys @Inject()( +case class DottedKeys @Inject() ( @Named("\"i.have\".dots") dots: String, @Named("\"i.have.more.dots\".bar") bar: Int ) // Test class with Seq values. -case class SeqValues @Inject()( +case class SeqValues @Inject() ( @Named("seq.ofConfig") configs: Seq[Config], @Named("seq.ofString") strings: Seq[String], @Named("seq.ofBool") booleans: Seq[Boolean], @@ -60,6 +61,8 @@ case class SeqValues @Inject()( ) class ConfigModuleSpec extends UnitSpec { + import ConfigModuleSpec._ + "bindConfig" should "bind config values to appropriate @Named bindings" in { // Config with an entry for all of the bindable values except the one with a default. val testConfig = ConfigFactory.parseString(""" @@ -68,7 +71,7 @@ class ConfigModuleSpec extends UnitSpec { barNum = 42 boolbool = true """) - val testModule = new ConfigModule(testConfig) { + val testModule = new TestConfig(testConfig) { override def configureWithConfig(c: Config): Unit = { // Manually bind things missing from the config. bind[Set[String]].toInstance(Set("unannotated")) @@ -94,7 +97,7 @@ class ConfigModuleSpec extends UnitSpec { barNum = 42 boolbool = true """) - val testModule = new ConfigModule(testConfig) { + val testModule = new TestConfig(testConfig) { override def configName: Option[String] = Some("test_default.conf") override def configureWithConfig(c: Config): Unit = { @@ -131,7 +134,7 @@ class ConfigModuleSpec extends UnitSpec { barNum = 42 boolbool = true """) - val testModule = new ConfigModule(testConfig) { + val testModule = new TestConfig(testConfig) { override def configName: Option[String] = Some("test_default.conf") override def configureWithConfig(c: Config): Unit = { @@ -160,7 +163,7 @@ class ConfigModuleSpec extends UnitSpec { nested.bool = true ignored_no_prefix = "Should be ignored" """) - val testModule = new ConfigModule(testConfig) { + val testModule = new TestConfig(testConfig) { override def bindingPrefix: Option[String] = Some("prefix") override def configureWithConfig(c: Config): Unit = { bind[Int].annotatedWithName("ignored_no_prefix").toInstance(33) @@ -181,7 +184,7 @@ class ConfigModuleSpec extends UnitSpec { presentString = "here" // missingString = "missing" """) - val testModule = new ConfigModule(testConfig) + val testModule = new TestConfig(testConfig) val injector = Guice.createInjector(testModule) @@ -202,7 +205,7 @@ class ConfigModuleSpec extends UnitSpec { } """) - val testModule = new ConfigModule(testConfig) + val testModule = new TestConfig(testConfig) val injector = Guice.createInjector(testModule) @@ -225,7 +228,7 @@ class ConfigModuleSpec extends UnitSpec { bar = 123 } """) - val testModule = new ConfigModule(testConfig) + val testModule = new TestConfig(testConfig) val injector = Guice.createInjector(testModule) @@ -239,7 +242,7 @@ class ConfigModuleSpec extends UnitSpec { seq.ofBool = [ true, false, true ] seq.ofDouble = [ 1, 2 ] """) - val testModule = new ConfigModule(testConfig) + val testModule = new TestConfig(testConfig) val injector = Guice.createInjector(testModule) @@ -249,3 +252,10 @@ class ConfigModuleSpec extends UnitSpec { instance.doubles shouldBe Seq(1.0, 2.0) } } + +object ConfigModuleSpec { + + class TestConfig(config: Config) extends ConfigModule(config) { + def configureWithConfig(config: Config): Unit = () + } +} diff --git a/project/plugins.sbt b/project/plugins.sbt index 42f8fad..0450882 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,7 @@ addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") -addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.0.4") +addSbtPlugin("org.allenai" % "allenai-sbt-plugins" % "3.2.0") -addSbtPlugin("org.allenai.plugins" % "allenai-sbt-plugins" % "3.0.0") + +addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.16") diff --git a/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala b/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala index 4999639..bd18dd1 100644 --- a/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala +++ b/testkit/src/main/scala/org/allenai/common/testkit/ScratchDirectory.scala @@ -11,7 +11,10 @@ trait ScratchDirectory extends BeforeAndAfterAll { val scratchDir: File = { val dir = Files.createTempDirectory(this.getClass.getSimpleName).toFile - sys.addShutdownHook(delete(dir)) + sys.addShutdownHook { + delete(dir) + () + } dir } @@ -20,7 +23,10 @@ trait ScratchDirectory extends BeforeAndAfterAll { s"Unable to create scratch directory $scratchDir" ) - override def afterAll(): Unit = delete(scratchDir) + override def afterAll(): Unit = { + delete(scratchDir) + () + } private def delete(f: File): Boolean = { if (f.isDirectory()) { From c13267a73c27de081ff0fd8a767363f605bafafc Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 19:24:31 -0800 Subject: [PATCH 54/69] sync scalafmt config with scholar/others --- .scalafmt.conf | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index 25047a3..f453dbb 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,12 +1,9 @@ -version = 2.0.0 +version = "2.7.4" maxColumn = 100 -docstrings = ScalaDoc -align = none -align.tokens = [] -rewrite.rules = [SortImports, SortModifiers] -rewrite.sortModifiers.order = [ -"implicit", "final", "sealed", "abstract", -"override", "private", "protected", "lazy" -] -spaces.inImportCurlyBraces = true +docstrings = "ScalaDoc" +assumeStandardLibraryStripMargin = true +continuationIndent.callSite = 2 continuationIndent.defnSite = 2 +newlines.alwaysBeforeTopLevelStatements = true +rewrite.rules = [AvoidInfix, PreferCurlyFors, SortImports, SortModifiers] +spaces.inImportCurlyBraces = true From ab733129c772752fec11e3b4c877e5112bc4e2c6 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 19:24:50 -0800 Subject: [PATCH 55/69] add codeartifact. drop bintray --- .gitignore | 3 ++- project/build.sbt | 1 + project/plugins.sbt | 3 ++- project/project/plugins.sbt | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 project/build.sbt create mode 100644 project/project/plugins.sbt diff --git a/.gitignore b/.gitignore index 4fa0430..0634f98 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ bin/ target/ **/.bloop/ **/.metals/ -project/metals.sbt +**/*/metals.sbt +.bsp/ diff --git a/project/build.sbt b/project/build.sbt new file mode 100644 index 0000000..abbd81d --- /dev/null +++ b/project/build.sbt @@ -0,0 +1 @@ +codeArtifactUrl := "https://org-allenai-s2-896129387501.d.codeartifact.us-west-2.amazonaws.com/maven/private", diff --git a/project/plugins.sbt b/project/plugins.sbt index 0450882..2b2ebc7 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,7 +1,8 @@ -addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") addSbtPlugin("org.allenai" % "allenai-sbt-plugins" % "3.2.0") +// This should match the version in the meta-build. +addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.16") diff --git a/project/project/plugins.sbt b/project/project/plugins.sbt new file mode 100644 index 0000000..e0770d5 --- /dev/null +++ b/project/project/plugins.sbt @@ -0,0 +1,2 @@ +// This should match the version in the child build. +addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") From 32a073230127cb6bf32216c0cb06fb1587870112 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 20:04:07 -0800 Subject: [PATCH 56/69] fix tests --- core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala index bebfac1..2b72ada 100644 --- a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala @@ -53,7 +53,7 @@ class LoggingConfigSpec extends UnitSpec with Logging { Source .fromFile(path.toString) .mkString - .contains("<i>html</i>") + .contains("html") ) } } From 9721b5541739fc0780a8a10044165c748fe536d7 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 20:50:12 -0800 Subject: [PATCH 57/69] simplify release process with sbt-plugin --- README.md | 44 ++++++++++++++----------------------------- project/Release.scala | 19 +++++++++++++++++++ project/plugins.sbt | 2 ++ 3 files changed, 35 insertions(+), 30 deletions(-) create mode 100644 project/Release.scala diff --git a/README.md b/README.md index 3dd89d1..ca9a91c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -common -====== +# common [![CircleCI](https://circleci.com/gh/allenai/common/tree/master.svg?style=svg)](https://circleci.com/gh/allenai/common/tree/master) @@ -11,47 +10,32 @@ A collection of useful utility classes and functions. Slowly on the path to depr `core` - Catchall collection of utilities. -Using this project as a library ------------------- +## Using this project as a library -`common` is published to [JCenter](https://bintray.com/bintray/jcenter) (an -alternative to Maven Central) via [BinTray](https://bintray.com/) at https://bintray.com/allenai/maven. -You will need to include [a resolver for the JCenter -repo](https://github.com/softprops/bintray-sbt#resolving-bintray-artifacts) -using the `sbt-bintray` plugin to find this artifact. +`common` is published to [CodeArtifact](https://us-west-2.console.aws.amazon.com/codesuite/codeartifact/d/896129387501/org-allenai-s2/r/private?region=us-west-2). +You will need to add a resolver via the [`sbt-codeartifact`](https://github.com/bbstilson/sbt-codeartifact/) plugin to use these libraries. -Releasing new versions ----------------------- +## Releasing new versions -This project releases to BinTray. To make a release: +To make a release: -1. Pull the latest code on the master branch that you want to release -1. Edit `build.sbt` to remove "-SNAPSHOT" from the current version -1. Create a pull request if desired or push to master if you are only changing the version -1. Tag the release `git tag -a vX.Y.Z -m "Release X.Y.Z"` replacing X.Y.Z with the correct version -1. Push the tag back to origin `git push origin vX.Y.Z` -1. Release the build on Bintray `sbt +publish` (the "+" is required to cross-compile) -1. Verify publication [on bintray.com](https://bintray.com/allenai/maven) -1. Bump the version in `build.sbt` on master (and push!) with X.Y.Z+1-SNAPSHOT (e.g., 2.5.1 --SNAPSHOT after releasing 2.5.0) +```sbt +> release +``` -If you make a mistake you can rollback the release with `sbt bintrayUnpublish` and retag the - version to a different commit as necessary. - -Guideline for Contributing to `common` ---------------------------- +## Guideline for Contributing to `common` There is no strict process for contributing to `common`. However, following are some general guidelines. -### Discuss in Pull Request Code Reviews ### +### Discuss in Pull Request Code Reviews If you have implemented something in a repository other than `common` and that you think could be a candidate to be migrated into `common`, ask reviewers for feedback when issuing your pull request. -### Create a GitHub Issue ### +### Create a GitHub Issue Feel free create a GitHub issue in the `common` project to provide traceability and a forum for discussion. -### Use TODO Comments ### +### Use TODO Comments While working on a task, go ahead and implement the functionality that you think would be a good fit for `common`, and comment the implementation with a TODO suggesting it belongs in `common`. An example: @@ -74,6 +58,6 @@ reference the issue number in your TODO comment: // TODO(mygithubusername): migrate to common. See https://github.com/allenai/common/issues/123 ... -### Have Two Code Reviewers to `common` Pull Requests ### +### Have Two Code Reviewers to `common` Pull Requests Try and always have at least two reviewers for a pull request to `common` diff --git a/project/Release.scala b/project/Release.scala new file mode 100644 index 0000000..5fa1b60 --- /dev/null +++ b/project/Release.scala @@ -0,0 +1,19 @@ +import sbtrelease.ReleaseStateTransformations._ +import sbtrelease.ReleasePlugin.autoImport._ + +object Release { + + def settings: Seq[ReleaseStep] = Seq( + checkSnapshotDependencies, + inquireVersions, + runClean, + releaseStepCommandAndRemaining("+test"), + setReleaseVersion, + // commitReleaseVersion, + // tagRelease, + releaseStepCommandAndRemaining("+codeArtifactPublish") + // setNextVersion, + // commitNextVersion, + // pushChanges + ) +} diff --git a/project/plugins.sbt b/project/plugins.sbt index 2b2ebc7..4fb9f4c 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -6,3 +6,5 @@ addSbtPlugin("org.allenai" % "allenai-sbt-plugins" % "3.2.0") addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.16") + +addSbtPlugin("com.github.sbt" % "sbt-release" % "1.0.15") From 96fe9cccb1a62be34ca13563767ee270a81f1320 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 21:06:34 -0800 Subject: [PATCH 58/69] override default release process --- project/GlobalPlugin.scala | 29 ++--------------------------- project/Release.scala | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/project/GlobalPlugin.scala b/project/GlobalPlugin.scala index 3e26fb5..294114f 100644 --- a/project/GlobalPlugin.scala +++ b/project/GlobalPlugin.scala @@ -13,34 +13,9 @@ object GlobalPlugin extends AutoPlugin { override def projectSettings = Seq( scalaVersion := SCALA_213, - organization := "org.allenai.common", CodeArtifactKeys.codeArtifactUrl := "https://org-allenai-s2-896129387501.d.codeartifact.us-west-2.amazonaws.com/maven/private", dependencyOverrides ++= Logging.loggingDependencyOverrides, - publishArtifact in Test := false, - publishArtifact in (Compile, packageDoc) := false, - pomIncludeRepository := { _ => - false - }, - licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), - homepage := Some(url("https://github.com/allenai/common")), - apiURL := Some(url("https://allenai.github.io/common/")), - scmInfo := Some( - ScmInfo( - url("https://github.com/allenai/common"), - "https://github.com/allenai/common.git" - ) - ), javaOptions += s"-Dlogback.appname=${name.value}", - javacOptions ++= Seq("-source", "1.8", "-target", "1.8"), - crossScalaVersions := SUPPORTED_SCALA_VERSIONS, - unmanagedSourceDirectories.in(Compile) ++= { - CrossVersion.partialVersion(scalaVersion.value) match { - case Some((2, x)) if x == 11 || x == 12 => - Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) - case Some((2, x)) if x == 13 => - Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) - case _ => Seq.empty // dotty support would go here - } - } - ) + javacOptions ++= Seq("-source", "1.8", "-target", "1.8") + ) ++ Release.settings } diff --git a/project/Release.scala b/project/Release.scala index 5fa1b60..0e7fd8e 100644 --- a/project/Release.scala +++ b/project/Release.scala @@ -1,9 +1,42 @@ +import ScalaVersions._ + import sbtrelease.ReleaseStateTransformations._ import sbtrelease.ReleasePlugin.autoImport._ +import sbt._ +import sbt.Keys._ object Release { - def settings: Seq[ReleaseStep] = Seq( + def settings: Seq[Setting[_]] = Seq( + organization := "org.allenai.common", + crossScalaVersions := SUPPORTED_SCALA_VERSIONS, + releaseProcess := releaseSteps, + unmanagedSourceDirectories.in(Compile) ++= { + CrossVersion.partialVersion(scalaVersion.value) match { + case Some((2, x)) if x == 11 || x == 12 => + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) + case Some((2, x)) if x == 13 => + Seq(file(sourceDirectory.value.getPath + "/main/scala-2.13")) + case _ => Seq.empty // dotty support would go here + } + }, + publishArtifact in Test := false, + publishArtifact in (Compile, packageDoc) := false, + pomIncludeRepository := { _ => + false + }, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), + homepage := Some(url("https://github.com/allenai/common")), + apiURL := Some(url("https://allenai.github.io/common/")), + scmInfo := Some( + ScmInfo( + url("https://github.com/allenai/common"), + "https://github.com/allenai/common.git" + ) + ) + ) + + def releaseSteps: Seq[ReleaseStep] = Seq( checkSnapshotDependencies, inquireVersions, runClean, From 696acc4e0844d5355397ab548f9616f65b486dc1 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 22:30:46 -0800 Subject: [PATCH 59/69] remove sbt-plugins and enable forking --- project/GlobalPlugin.scala | 1 + project/ScalaVersions.scala | 2 +- project/plugins.sbt | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/project/GlobalPlugin.scala b/project/GlobalPlugin.scala index 294114f..9f129d2 100644 --- a/project/GlobalPlugin.scala +++ b/project/GlobalPlugin.scala @@ -14,6 +14,7 @@ object GlobalPlugin extends AutoPlugin { Seq( scalaVersion := SCALA_213, CodeArtifactKeys.codeArtifactUrl := "https://org-allenai-s2-896129387501.d.codeartifact.us-west-2.amazonaws.com/maven/private", + fork := true, dependencyOverrides ++= Logging.loggingDependencyOverrides, javaOptions += s"-Dlogback.appname=${name.value}", javacOptions ++= Seq("-source", "1.8", "-target", "1.8") diff --git a/project/ScalaVersions.scala b/project/ScalaVersions.scala index 65b4411..eb05b9d 100644 --- a/project/ScalaVersions.scala +++ b/project/ScalaVersions.scala @@ -1,6 +1,6 @@ object ScalaVersions { val SCALA_211 = "2.11.12" - val SCALA_212 = "2.12.10" + val SCALA_212 = "2.12.11" val SCALA_213 = "2.13.5" val SUPPORTED_SCALA_VERSIONS = List(SCALA_211, SCALA_212, SCALA_213) } diff --git a/project/plugins.sbt b/project/plugins.sbt index 4fb9f4c..4da3b59 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,7 +1,5 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") -addSbtPlugin("org.allenai" % "allenai-sbt-plugins" % "3.2.0") - // This should match the version in the meta-build. addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") From 93a1faada2188a1df1a4c106ee90cc72b72379b7 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Fri, 12 Mar 2021 23:41:41 -0800 Subject: [PATCH 60/69] tweak build to apply release settings last --- build.sbt | 5 +++- .../scala/org/allenai/common/JsonIo.scala | 2 +- .../allenai/common/LoggingConfigSpec.scala | 2 +- .../org/allenai/common/VersionSpec.scala | 4 ++- project/GlobalPlugin.scala | 2 +- project/Release.scala | 27 +++++++++---------- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/build.sbt b/build.sbt index c25939c..8bb17ba 100644 --- a/build.sbt +++ b/build.sbt @@ -8,20 +8,23 @@ lazy val common = project .configs(IntegrationTest) .settings( Defaults.itSettings, - crossScalaVersions := Nil, // crossScalaVersions must be set to Nil on the aggregating project // in order to avoid double publishing. // See: https://www.scala-sbt.org/1.x/docs/Cross-Build.html#Cross+building+a+project+statefully + crossScalaVersions := Nil, publish / skip := true ) lazy val core = project .in(file("core")) .dependsOn(testkit % "test->compile") + .settings(Release.settings) lazy val guice = project .in(file("guice")) .dependsOn(core, testkit % "test->compile") + .settings(Release.settings) lazy val testkit = project .in(file("testkit")) + .settings(Release.settings) diff --git a/core/src/main/scala/org/allenai/common/JsonIo.scala b/core/src/main/scala/org/allenai/common/JsonIo.scala index 138c305..b6de241 100644 --- a/core/src/main/scala/org/allenai/common/JsonIo.scala +++ b/core/src/main/scala/org/allenai/common/JsonIo.scala @@ -12,7 +12,7 @@ object JsonIo { * @return a stream of objects of type T */ def parseJson[T](source: Source)(implicit format: JsonFormat[T]): Iterator[T] = { - for (line <- source.getLines) yield line.parseJson.convertTo[T] + for (line <- source.getLines()) yield line.parseJson.convertTo[T] } /** Writes the given objects to the given writer, as one-per-line JSON values. */ diff --git a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala index 2b72ada..bebfac1 100644 --- a/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala +++ b/core/src/test/scala/org/allenai/common/LoggingConfigSpec.scala @@ -53,7 +53,7 @@ class LoggingConfigSpec extends UnitSpec with Logging { Source .fromFile(path.toString) .mkString - .contains("html") + .contains("<i>html</i>") ) } } diff --git a/core/src/test/scala/org/allenai/common/VersionSpec.scala b/core/src/test/scala/org/allenai/common/VersionSpec.scala index 2bb1b88..884fe58 100644 --- a/core/src/test/scala/org/allenai/common/VersionSpec.scala +++ b/core/src/test/scala/org/allenai/common/VersionSpec.scala @@ -57,7 +57,9 @@ class VersionSpec extends UnitSpec { ) } - "fromResources" should "find common-core's resources" in { + // TODO (bbstilson): Move this test to sbt-plugins where it belongs. + // See: https://github.com/allenai/common/issues/230 + "fromResources" should "find common-core's resources" ignore { // No asserts; this will throw an exception if it's unfound. Version.fromResources("org.allenai.common", "common-core") } diff --git a/project/GlobalPlugin.scala b/project/GlobalPlugin.scala index 9f129d2..3e201c3 100644 --- a/project/GlobalPlugin.scala +++ b/project/GlobalPlugin.scala @@ -18,5 +18,5 @@ object GlobalPlugin extends AutoPlugin { dependencyOverrides ++= Logging.loggingDependencyOverrides, javaOptions += s"-Dlogback.appname=${name.value}", javacOptions ++= Seq("-source", "1.8", "-target", "1.8") - ) ++ Release.settings + ) } diff --git a/project/Release.scala b/project/Release.scala index 0e7fd8e..f49250e 100644 --- a/project/Release.scala +++ b/project/Release.scala @@ -1,17 +1,18 @@ import ScalaVersions._ -import sbtrelease.ReleaseStateTransformations._ import sbtrelease.ReleasePlugin.autoImport._ +import sbtrelease.ReleaseStateTransformations._ + import sbt._ import sbt.Keys._ object Release { - def settings: Seq[Setting[_]] = Seq( + def settings = Seq( organization := "org.allenai.common", crossScalaVersions := SUPPORTED_SCALA_VERSIONS, releaseProcess := releaseSteps, - unmanagedSourceDirectories.in(Compile) ++= { + Compile / unmanagedSourceDirectories ++= { CrossVersion.partialVersion(scalaVersion.value) match { case Some((2, x)) if x == 11 || x == 12 => Seq(file(sourceDirectory.value.getPath + "/main/scala-2.11-2.12")) @@ -20,11 +21,9 @@ object Release { case _ => Seq.empty // dotty support would go here } }, - publishArtifact in Test := false, - publishArtifact in (Compile, packageDoc) := false, - pomIncludeRepository := { _ => - false - }, + Test / publishArtifact := false, + Compile / packageDoc / publishArtifact := false, + pomIncludeRepository := { _ => false }, licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0.html")), homepage := Some(url("https://github.com/allenai/common")), apiURL := Some(url("https://allenai.github.io/common/")), @@ -42,11 +41,11 @@ object Release { runClean, releaseStepCommandAndRemaining("+test"), setReleaseVersion, - // commitReleaseVersion, - // tagRelease, - releaseStepCommandAndRemaining("+codeArtifactPublish") - // setNextVersion, - // commitNextVersion, - // pushChanges + commitReleaseVersion, + tagRelease, + releaseStepCommandAndRemaining("+codeArtifactPublish"), + setNextVersion, + commitNextVersion, + pushChanges ) } From 2c20a62323e19c5fa29cf5e396b25b7d6af6c05a Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 11:47:02 -0700 Subject: [PATCH 61/69] fix release settings --- build.sbt | 10 +--------- project/Release.scala | 9 +++++++++ version.sbt | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/build.sbt b/build.sbt index 8bb17ba..fc6d125 100644 --- a/build.sbt +++ b/build.sbt @@ -5,15 +5,7 @@ lazy val common = project guice, testkit ) - .configs(IntegrationTest) - .settings( - Defaults.itSettings, - // crossScalaVersions must be set to Nil on the aggregating project - // in order to avoid double publishing. - // See: https://www.scala-sbt.org/1.x/docs/Cross-Build.html#Cross+building+a+project+statefully - crossScalaVersions := Nil, - publish / skip := true - ) + .settings(Release.noPublish) lazy val core = project .in(file("core")) diff --git a/project/Release.scala b/project/Release.scala index f49250e..35d8cfd 100644 --- a/project/Release.scala +++ b/project/Release.scala @@ -8,6 +8,15 @@ import sbt.Keys._ object Release { + def noPublish = Seq( + // crossScalaVersions must be set to Nil on the aggregating project + // in order to avoid double publishing. + // See: https://www.scala-sbt.org/1.x/docs/Cross-Build.html#Cross+building+a+project+statefully + crossScalaVersions := Nil, + releaseProcess := releaseSteps, + publish / skip := true + ) + def settings = Seq( organization := "org.allenai.common", crossScalaVersions := SUPPORTED_SCALA_VERSIONS, diff --git a/version.sbt b/version.sbt index c0504fe..730503b 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "2.2.1" +ThisBuild / version := "2.3.0" From 25fa06ddcc99ccdf6d7df126180b92f7f3ffef81 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 14:52:45 -0700 Subject: [PATCH 62/69] add mockito to base trait --- project/Dependencies.scala | 7 ++----- testkit/build.sbt | 6 +----- .../scala/org/allenai/common/testkit/AllenAiBaseSpec.scala | 5 ++++- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index c724aab..117d854 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -23,13 +23,10 @@ object Dependencies { val openCsv = "net.sf.opencsv" % "opencsv" % "2.1" - val pegdown = "org.pegdown" % "pegdown" % "1.4.2" - val scalaGuice = "net.codingwell" %% "scala-guice" % "4.2.6" - val scalaCheck = "org.scalacheck" %% "scalacheck" % "1.14.0" - - val scalaTest = "org.scalatest" %% "scalatest" % "3.0.8" + val scalaTest = "org.scalatest" %% "scalatest" % "3.2.5" + val scalaTestMockito = "org.scalatestplus" %% "mockito-3-4" % "3.2.5.0" val sprayJson = "io.spray" %% "spray-json" % "1.3.5" diff --git a/testkit/build.sbt b/testkit/build.sbt index 9a060cb..97710eb 100644 --- a/testkit/build.sbt +++ b/testkit/build.sbt @@ -2,8 +2,4 @@ import Dependencies._ name := "common-testkit" -libraryDependencies ++= Seq( - scalaCheck, - scalaTest, - pegdown -) +libraryDependencies ++= Seq(scalaTest, scalaTestMockito) diff --git a/testkit/src/main/scala/org/allenai/common/testkit/AllenAiBaseSpec.scala b/testkit/src/main/scala/org/allenai/common/testkit/AllenAiBaseSpec.scala index 4712db2..59b440c 100644 --- a/testkit/src/main/scala/org/allenai/common/testkit/AllenAiBaseSpec.scala +++ b/testkit/src/main/scala/org/allenai/common/testkit/AllenAiBaseSpec.scala @@ -1,5 +1,8 @@ package org.allenai.common.testkit import org.scalatest._ +import org.scalatestplus.mockito.MockitoSugar +import flatspec._ +import matchers._ -trait AllenAiBaseSpec extends FlatSpecLike with Matchers +trait AllenAiBaseSpec extends AnyFlatSpec with should.Matchers with MockitoSugar From 3a9092ab6bb13ff4902990b26b2b8b8b4e47fef6 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 15:22:50 -0700 Subject: [PATCH 63/69] ignore vscode dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0634f98..77585f7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ target/ **/.metals/ **/*/metals.sbt .bsp/ +.vscode/ From d86d8849718c10d339e39460f93ae36fd846a7d5 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 15:25:30 -0700 Subject: [PATCH 64/69] Setting version to 2.3.1-SNAPSHOT --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 730503b..28229e7 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "2.3.0" +ThisBuild / version := "2.3.1-SNAPSHOT" From 0e13af40aefdffdb05577841ec2ab633ee4f2c88 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 15:38:42 -0700 Subject: [PATCH 65/69] upgrade sbt-codeartifact --- project/plugins.sbt | 2 +- project/project/plugins.sbt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index 4da3b59..2bb8a58 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,7 +1,7 @@ addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") // This should match the version in the meta-build. -addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") +addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.3") addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.16") diff --git a/project/project/plugins.sbt b/project/project/plugins.sbt index e0770d5..3bdec05 100644 --- a/project/project/plugins.sbt +++ b/project/project/plugins.sbt @@ -1,2 +1,2 @@ // This should match the version in the child build. -addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.2") +addSbtPlugin("io.github.bbstilson" % "sbt-codeartifact" % "0.1.3") From 83dafd224d115cf96cc80fc234e505f475cfd8fd Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 16:20:46 -0700 Subject: [PATCH 66/69] Setting version to 2.3.1 --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 28229e7..32bb971 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "2.3.1-SNAPSHOT" +ThisBuild / version := "2.3.1" From 8e1fe7500dfbb2413e25cdbb87592d6bca240b3b Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 15 Mar 2021 16:21:09 -0700 Subject: [PATCH 67/69] Setting version to 2.3.2-SNAPSHOT --- version.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.sbt b/version.sbt index 32bb971..d1d9390 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -ThisBuild / version := "2.3.1" +ThisBuild / version := "2.3.2-SNAPSHOT" From 1ac14e6b38b5d39ac87fa816a9fd1f39253e69ea Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 12 Apr 2021 11:00:19 -0700 Subject: [PATCH 68/69] drop spray patch --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 117d854..bcbe4ce 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -28,7 +28,7 @@ object Dependencies { val scalaTest = "org.scalatest" %% "scalatest" % "3.2.5" val scalaTestMockito = "org.scalatestplus" %% "mockito-3-4" % "3.2.5.0" - val sprayJson = "io.spray" %% "spray-json" % "1.3.5" + val sprayJson = "io.spray" %% "spray-json" % "1.3.2" val typesafeConfig = "com.typesafe" % "config" % "1.2.1" From ee9a5795d675bd75fca6e2dbe8e001c465316998 Mon Sep 17 00:00:00 2001 From: Brandon Stilson Date: Mon, 12 Apr 2021 11:45:48 -0700 Subject: [PATCH 69/69] Revert "drop spray patch" This reverts commit 1ac14e6b38b5d39ac87fa816a9fd1f39253e69ea. --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index bcbe4ce..117d854 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -28,7 +28,7 @@ object Dependencies { val scalaTest = "org.scalatest" %% "scalatest" % "3.2.5" val scalaTestMockito = "org.scalatestplus" %% "mockito-3-4" % "3.2.5.0" - val sprayJson = "io.spray" %% "spray-json" % "1.3.2" + val sprayJson = "io.spray" %% "spray-json" % "1.3.5" val typesafeConfig = "com.typesafe" % "config" % "1.2.1"