allenai · joelgrus · Dec 3, 2018 · Nov 21, 2018 · Nov 27, 2018 · Nov 27, 2018
diff --git a/allennlp/commands/configure.py b/allennlp/commands/configure.py
@@ -1,61 +1,53 @@
 """
-The ``configure`` subcommand generates a stub configuration for
-the specified class (or for the top level configuration if no class specified).
+The ``configure`` subcommand launches a webapp that helps you
+generate an AllenNLP configuration file.
 
 .. code-block:: bash
 
     $ allennlp configure --help
-    usage: allennlp configure [-h] [class]
 
-    Generate a configuration stub for a specific class (or for config as a whole if [class] is omitted).
+    usage: allennlp configure [-h] [--port PORT]
+                            [--include-package INCLUDE_PACKAGE]
 
-    positional arguments:
-    class
+    Run the configuration wizard
 
     optional arguments:
     -h, --help            show this help message and exit
+    --port PORT           port to serve the wizard on
+    --include-package INCLUDE_PACKAGE
+                            additional packages to include
 """
 
 import argparse
 
+from flask_cors import CORS
+from gevent.pywsgi import WSGIServer
+
 from allennlp.commands.subcommand import Subcommand
-from allennlp.common.configuration import configure, Config, render_config
+from allennlp.service.config_explorer import make_app
+
 
 class Configure(Subcommand):
     def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
         # pylint: disable=protected-access
-        description = '''Generate a configuration stub for a specific class (or for config as a whole)'''
+        description = '''Run the configuration wizard'''
         subparser = parser.add_parser(
-                name, description=description, help='Generate configuration stubs.')
+                name, description=description, help='Run the configuration wizard.')
 
-        subparser.add_argument('cla55', nargs='?', default='', metavar='class')
-        subparser.set_defaults(func=_configure)
+        subparser.add_argument('--port', type=int, default=8123, help='port to serve the wizard on')
+        subparser.add_argument('--include-package',
+                               type=str,
+                               action='append',
+                               default=[],
+                               help='additional packages to include')
+        subparser.set_defaults(func=_run_wizard)
 
         return subparser
 
-def _configure(args: argparse.Namespace) -> None:
-    cla55 = args.cla55
-    parts = cla55.split(".")
-    module = ".".join(parts[:-1])
-    class_name = parts[-1]
-
-    print()
-
-    try:
-        config = configure(cla55)
-        if isinstance(config, Config):
-            if cla55:
-                print(f"configuration stub for {cla55}:\n")
-            else:
-                print(f"configuration stub for AllenNLP:\n")
-            print(render_config(config))
-        else:
-            print(f"{class_name} is an abstract base class, choose one of the following subclasses:\n")
-            for subclass in config:
-                print("\t", subclass)
-    except ModuleNotFoundError:
-        print(f"unable to load module {module}")
-    except AttributeError:
-        print(f"class {class_name} does not exist in module {module}")
-
-    print()
+def _run_wizard(args: argparse.Namespace) -> None:
+    app = make_app(args.include_package)
+    CORS(app)
+
+    http_server = WSGIServer(('0.0.0.0', args.port), app)
+    print(f"serving Config Explorer at http://localhost:{args.port}")
+    http_server.serve_forever()
diff --git a/allennlp/common/configuration.py b/allennlp/common/configuration.py
@@ -16,11 +16,12 @@
 from allennlp.common import Registrable, JsonDict
 from allennlp.data.dataset_readers import DatasetReader
 from allennlp.data.iterators import DataIterator
-from allennlp.data.vocabulary import Vocabulary
+from allennlp.data.vocabulary import Vocabulary, DEFAULT_NON_PADDED_NAMESPACES
 from allennlp.models.model import Model
 from allennlp.modules.seq2seq_encoders import _Seq2SeqWrapper
 from allennlp.modules.seq2vec_encoders import _Seq2VecWrapper
 from allennlp.modules.token_embedders import Embedding
+from allennlp.nn.activations import Activation
 from allennlp.nn.initializers import Initializer
 from allennlp.nn.regularizers import Regularizer
 from allennlp.training.optimizers import Optimizer as AllenNLPOptimizer
@@ -66,6 +67,10 @@ def json_annotation(cla55: Optional[type]):
     if cla55 is None:
         return {'origin': '?'}
 
+    # Special case to handle activation functions, which can't be specified as JSON
+    if cla55 == Activation:
+        return {'origin': 'str'}
+
     # Hack because e.g. typing.Union isn't a type.
     if isinstance(cla55, type) and issubclass(cla55, Initializer) and cla55 != Initializer:
         init_fn = cla55()._init_function
@@ -110,6 +115,9 @@ def to_json(self) -> JsonDict:
         if is_configurable(self.annotation):
             json_dict["configurable"] = True
 
+        if is_registrable(self.annotation):
+            json_dict["registrable"] = True
+
         if self.default_value != _NO_DEFAULT:
             try:
                 # Ugly check that default value is actually serializable
@@ -316,6 +324,23 @@ def _remove_optional(typ3: type) -> type:
     else:
         return typ3
 
+def is_registrable(typ3: type) -> bool:
+    # Throw out optional:
+    typ3 = _remove_optional(typ3)
+
+    # Anything with a from_params method is itself configurable.
+    # So are regularizers even though they don't.
+    if typ3 == Regularizer:
+        return True
+
+    # Some annotations are unions and will crash `issubclass`.
+    # TODO: figure out a better way to deal with them
+    try:
+        return issubclass(typ3, Registrable)
+    except TypeError:
+        return False
+
+
 def is_configurable(typ3: type) -> bool:
     # Throw out optional:
     typ3 = _remove_optional(typ3)
@@ -375,7 +400,7 @@ def _render(item: ConfigItem, indent: str = "") -> str:
         ConfigItem(name="evaluate_on_test",
                    annotation=bool,
                    default_value=False,
-                   comment="whether to evaluate on the test dataset at the end of training (don't do it!"),
+                   comment="whether to evaluate on the test dataset at the end of training (don't do it!)"),
         ConfigItem(name="model",
                    annotation=Model,
                    default_value=_NO_DEFAULT,
@@ -404,7 +429,7 @@ def _valid_choices(cla55: type) -> Dict[str, str]:
     Return a mapping {registered_name -> subclass_name}
     for the registered subclasses of `cla55`.
     """
-    choices: Dict[str, str] = {}
+    valid_choices: Dict[str, str] = {}
 
     if cla55 not in Registrable._registry:
         raise ValueError(f"{cla55} is not a known Registrable class")
@@ -414,11 +439,20 @@ def _valid_choices(cla55: type) -> Dict[str, str]:
         if isinstance(subclass, (_Seq2SeqWrapper, _Seq2VecWrapper)):
             subclass = subclass._module_class
 
-        choices[name] = full_name(subclass)
+        valid_choices[name] = full_name(subclass)
+
+    return valid_choices
+
+def choices(full_path: str = '') -> List[str]:
+    parts = full_path.split(".")
+    class_name = parts[-1]
+    module_name = ".".join(parts[:-1])
+    module = importlib.import_module(module_name)
+    cla55 = getattr(module, class_name)
+    return list(_valid_choices(cla55).values())
 
-    return choices
 
-def configure(full_path: str = '') -> Union[Config, List[str]]:
+def configure(full_path: str = '') -> Config:
     if not full_path:
         return BASE_CONFIG
 
@@ -427,8 +461,51 @@ def configure(full_path: str = '') -> Union[Config, List[str]]:
     module_name = ".".join(parts[:-1])
     module = importlib.import_module(module_name)
     cla55 = getattr(module, class_name)
-
-    if Registrable in getattr(cla55, '__bases__', ()):
-        return list(_valid_choices(cla55).values())
+    if cla55 == Vocabulary:
+        return VOCAB_CONFIG
     else:
         return _auto_config(cla55)
+
+
+# ONE OFF LOGIC FOR VOCABULARY
+VOCAB_CONFIG: Config = Config([
+        ConfigItem(name="directory_path",
+                   annotation=str,
+                   default_value=None,
+                   comment="path to an existing vocabulary (if you want to use one)"),
+        ConfigItem(name="extend",
+                   annotation=bool,
+                   default_value=False,
+                   comment="whether to extend the existing vocabulary (if you specified one)"),
+        ConfigItem(name="min_count",
+                   annotation=int,
+                   default_value=None,
+                   comment="only include tokens that occur at least this many times"),
+        ConfigItem(name="max_vocab_size",
+                   annotation=Union[int, Dict[str, int]],
+                   default_value=None,
+                   comment="used to cap the number of tokens in your vocabulary"),
+        ConfigItem(name="non_padded_namespaces",
+                   annotation=List[str],
+                   default_value=DEFAULT_NON_PADDED_NAMESPACES,
+                   comment="namespaces that don't get padding or OOV tokens"),
+        ConfigItem(name="pretrained_files",
+                   annotation=Dict[str, str],
+                   default_value=None,
+                   comment="pretrained embedding files for each namespace"),
+        ConfigItem(name="min_pretrained_embeddings",
+                   annotation=Dict[str, int],
+                   default_value=None,
+                   comment="specifies a number of lines to keep for each namespace, "
+                   "even for words not appearing in the data"),
+        ConfigItem(name="only_include_pretrained_words",
+                   annotation=bool,
+                   default_value=False,
+                   comment=("if True, keeps only the words that appear in the pretrained set. "
+                            "if False, also includes non-pretrained words that exceed min_count.")),
+        ConfigItem(name="tokens_to_add",
+                   annotation=Dict[str, List[str]],
+                   default_value=None,
+                   comment=("any tokens here will certainly be included in the keyed namespace, "
+                            "regardless of your data"))
+])