8000 release config explorer by joelgrus · Pull Request #2118 · allenai/allennlp · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

release config explorer #2118

Merged
merged 11 commits into from
Dec 3, 2018
66 changes: 29 additions & 37 deletions allennlp/commands/configure.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,53 @@
"""
The ``configure`` subcommand generates a stub configuration for
the specified class (or for the top level configuration if no class specified).
The ``configure`` subcommand launches a webapp that helps you
generate an AllenNLP configuration file.

.. code-block:: bash

$ allennlp configure --help
usage: allennlp configure [-h] [class]

Generate a configuration stub for a specific class (or for config as a whole if [class] is omitted).
usage: allennlp configure [-h] [--port PORT]
[--include-package INCLUDE_PACKAGE]

positional arguments:
class
Run the configuration wizard

optional arguments:
-h, --help show this help message and exit
--port PORT port to serve the wizard on
--include-package INCLUDE_PACKAGE
additional packages to include
"""

import argparse

from flask_cors import CORS
from gevent.pywsgi import WSGIServer

from allennlp.commands.subcommand import Subcommand
from allennlp.common.configuration import configure, Config, render_config
from allennlp.service.config_explorer import make_app


class Configure(Subcommand):
def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
# pylint: disable=protected-access
description = '''Generate a configuration stub for a specific class (or for config as a whole)'''
description = '''Run the configuration wizard'''
subparser = parser.add_parser(
name, description=description, help='Generate configuration stubs.')
name, description=description, help='Run the configuration wizard.')

subparser.add_argument('cla55', nargs='?', default='', metavar='class')
subparser.set_defaults(func=_configure)
subparser.add_argument('--port', type=int, default=8123, help='port to serve the wizard on')
subparser.add_argument('--include-package',
type=str,
action='append',
default=[],
help='additional packages to include')
subparser.set_defaults(func=_run_wizard)

return subparser

def _configure(args: argparse.Namespace) -> None:
cla55 = args.cla55
parts = cla55.split(".")
module = ".".join(parts[:-1])
class_name = parts[-1]

print()

try:
config = configure(cla55)
if isinstance(config, Config):
if cla55:
print(f"configuration stub for {cla55}:\n")
else:
print(f"configuration stub for AllenNLP:\n")
print(render_config(config))
else:
print(f"{class_name} is an abstract base class, choose one of the following subclasses:\n")
for subclass in config:
print("\t", subclass)
except ModuleNotFoundError:
print(f"unable to load module {module}")
except AttributeError:
print(f"class {class_name} does not exist in module {module}")

print()
def _run_wizard(args: argparse.Namespace) -> None:
app = make_app(args.include_package)
CORS(app)

http_server = WSGIServer(('0.0.0.0', args.port), app)
print(f"serving Config Explorer at http://localhost:{args.port}")
http_server.serve_forever()
95 changes: 86 additions & 9 deletions allennlp/common/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
from allennlp.common import Registrable, JsonDict
from allennlp.data.dataset_readers import DatasetReader
from allennlp.data.iterators import DataIterator
from allennlp.data.vocabulary import Vocabulary
from allennlp.data.vocabulary import Vocabulary, DEFAULT_NON_PADDED_NAMESPACES
from allennlp.models.model import Model
from allennlp.modules.seq2seq_encoders import _Seq2SeqWrapper
from allennlp.modules.seq2vec_encoders import _Seq2VecWrapper
from allennlp.modules.token_embedders import Embedding
from allennlp.nn.activations import Activation
from allennlp.nn.initializers import Initializer
from allennlp.nn.regularizers import Regularizer
from allennlp.training.optimizers import Optimizer as AllenNLPOptimizer
Expand Down Expand Up @@ -66,6 +67,10 @@ def json_annotation(cla55: Optional[type]):
if cla55 is None:
return {'origin': '?'}

# Special case to handle activation functions, which can't be specified as JSON
if cla55 == Activation:
return {'origin': 'str'}

# Hack because e.g. typing.Union isn't a type.
if isinstance(cla55, type) and issubclass(cla55, Initializer) and cla55 != Initializer:
init_fn = cla55()._init_function
Expand Down Expand Up @@ -110,6 +115,9 @@ def to_json(self) -> JsonDict:
if is_configurable(self.annotation):
json_dict["configurable"] = True

if is_registrable(self.annotation):
json_dict["registrable"] = True

if self.default_value != _NO_DEFAULT:
try:
# Ugly check that default value is actually serializable
Expand Down Expand Up @@ -316,6 +324,23 @@ def _remove_optional(typ3: type) -> type:
else:
return typ3

def is_registrable(typ3: type) -> bool:
# Throw out optional:
typ3 = _remove_optional(typ3)

# Anything with a from_params method is itself configurable.
# So are regularizers even though they don't.
if typ3 == Regularizer:
return True

# Some annotations are unions and will crash `issubclass`.
# TODO: figure out a better way to deal with them
try:
return issubclass(typ3, Registrable)
except TypeError:
return False


def is_configurable(typ3: type) -> bool:
# Throw out optional:
typ3 = _remove_optional(typ3)
Expand Down Expand Up @@ -375,7 +400,7 @@ def _render(item: ConfigItem, indent: str = "") -> str:
ConfigItem(name="evaluate_on_test",
annotation=bool,
default_value=False,
comment="whether to evaluate on the test dataset at the end of training (don't do it!"),
comment="whether to evaluate on the test dataset at the end of training (don't do it!)"),
ConfigItem(name="model",
annotation=Model,
default_value=_NO_DEFAULT,
Expand Down Expand Up @@ -404,7 +429,7 @@ def _valid_choices(cla55: type) -> Dict[str, str]:
Return a mapping {registered_name -> subclass_name}
for the registered subclasses of `cla55`.
"""
choices: Dict[str, str] = {}
valid_choices: Dict[str, str] = {}

if cla55 not in Registrable._registry:
raise ValueError(f"{cla55} is not a known Registrable class")
Expand All @@ -414,11 +439,20 @@ def _valid_choices(cla55: type) -> Dict[str, str]:
if isinstance(subclass, (_Seq2SeqWrapper, _Seq2VecWrapper)):
subclass = subclass._module_class

choices[name] = full_name(subclass)
valid_choices[name] = full_name(subclass)

return valid_choices

def choices(full_path: str = '') -> List[str]:
parts = full_path.split(".")
class_name = parts[-1]
module_name = ".".join(parts[:-1])
module = importlib.import_module(module_name)
cla55 = getattr(module, class_name)
return list(_valid_choices(cla55).values())

return choices

def configure(full_path: str = '') -> Union[Config, List[str]]:
def configure(full_path: str = '') -> Config:
if not full_path:
return BASE_CONFIG

Expand All @@ -427,8 +461,51 @@ def configure(full_path: str = '') -> Union[Config, List[str]]:
module_name = ".".join(parts[:-1])
module = importlib.import_module(module_name)
cla55 = getattr(module, class_name)

if Registrable in getattr(cla55, '__bases__', ()):
return list(_valid_choices(cla55).values())
if cla55 == Vocabulary:
return VOCAB_CONFIG
else:
return _auto_config(cla55)


# ONE OFF LOGIC FOR VOCABULARY
VOCAB_CONFIG: Config = Config([
ConfigItem(name="directory_path",
annotation=str,
default_value=None,
comment="path to an existing vocabulary (if you want to use one)"),
ConfigItem(name="extend",
annotation=bool,
default_value=False,
comment="whether to extend the existing vocabulary (if you specified one)"),
ConfigItem(name="min_count",
annotation=int,
default_value=None,
comment="only include tokens that occur at least this many times"),
ConfigItem(name="max_vocab_size",
annotation=Union[int, Dict[str, int]],
default_value=None,
comment="used to cap the number of tokens in your vocabulary"),
ConfigItem(name="non_padded_namespaces",
annotation=List[str],
default_value=DEFAULT_NON_PADDED_NAMESPACES,
comment="namespaces that don't get padding or OOV tokens"),
ConfigItem(name="pretrained_files",
annotation=Dict[str, str],
default_value=None,
comment="pretrained embedding files for each namespace"),
ConfigItem(name="min_pretrained_embeddings",
annotation=Dict[str, int],
default_value=None,
comment="specifies a number of lines to keep for each namespace, "
"even for words not appearing in the data"),
ConfigItem(name="only_include_pretrained_words",
annotation=bool,
default_value=False,
comment=("if True, keeps only the words that appear in the pretrained set. "
"if False, also includes non-pretrained words that exceed min_count.")),
ConfigItem(name="tokens_to_add",
annotation=Dict[str, List[str]],
default_value=None,
comment=("any tokens here will certainly be included in the keyed namespace, "
"regardless of your data"))
])
Loading
0