8000 Add QuaRel semantic parser by OyvindTafjord · Pull Request #1857 · allenai/allennlp · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Add QuaRel semantic parser #1857

Merged
merged 43 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f82e54b
Fix buggy overlap feature
OyvindTafjord Sep 20, 2018
004134a
Move Predictor import
OyvindTafjord Sep 20, 2018
6dcc7be
Fix predictor import
OyvindTafjord Sep 20, 2018
10d46e2
Add QuaRel data/parse/predict code
OyvindTafjord Sep 20, 2018
87171c7
Fix bugs in entity bits code
OyvindTafjord Sep 21, 2018
58d72be
Update test to match fix to span overlap
OyvindTafjord Sep 24, 2018
77d0060
Remove some non-training variables
OyvindTafjord Sep 24, 2018
932792b
Basic tests and fixtures for QuaRel parser
OyvindTafjord Sep 24, 2018
8727f82
Fix definition ordering issue
OyvindTafjord Sep 24, 2018
da34c6d
Update tagger
OyvindTafjord Sep 25, 2018
f0c69d6
Merge branch 'master' into quarel
OyvindTafjord Sep 25, 2018
e0e991e
Add min_pretrained_embeddings
OyvindTafjord Sep 25, 2018
7845d5b
Move vocab save to after model setup
OyvindTafjord Sep 25, 2018
768dd32
Merge branch 'fix-vocab-save' into quarel
OyvindTafjord Sep 25, 2018
266f440
Revert "Model can store extra pretained embeddings (#1817)"
OyvindTafjord Sep 26, 2018
1bcf563
Support adding to vocabulary from embedding file
OyvindTafjord Sep 26, 2018
e023fa7
Merge branch 'vocab-min-pretrained' into quarel
OyvindTafjord Sep 26, 2018
621d227
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Sep 27, 2018
358fc8a
Update some paths
OyvindTafjord Sep 27, 2018
d722a83
Fix/patch various mypy/pylint complaints
OyvindTafjord Sep 27, 2018
9fb340f
Fix incorrect type hint
OyvindTafjord Sep 27, 2018
14bd584
Make lower case variable names to satisfy pylint
OyvindTafjord Sep 27, 2018
c6d75a4
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Sep 27, 2018
261e4a1
Remove currently unused entity encoder code
OyvindTafjord Sep 28, 2018
bf5af18
Add test for model with entity bits
OyvindTafjord Sep 28, 2018
b91f22e
Some robustifying tweaks
OyvindTafjord Sep 28, 2018
6a1fdd8
More quarel tests
OyvindTafjord Sep 28, 2018
0b95de9
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Sep 28, 2018
e738342
Some cleanup for denotation_only mode
OyvindTafjord Oct 1, 2018
9d8a007
Added more testing
OyvindTafjord Oct 1, 2018
e886080
Improve test code coverage
OyvindTafjord Oct 1, 2018
56b6d01
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Oct 1, 2018
303aa9d
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Oct 2, 2018
9b8490a
Remove accidentally-committed files
matt-gardner Oct 2, 2018
6dc7f3e
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Oct 3, 2018
8232690
Remove deprecated GrammarStatelet args
OyvindTafjord Oct 3, 2018
66d39c8
Merge remote-tracking branch 'origin/quarel' into quarel
OyvindTafjord Oct 3, 2018
d2bae28
Fix spurious double code
OyvindTafjord Oct 4, 2018
9f6c4e4
Support URLs for dataset paths
OyvindTafjord Oct 4, 2018
ab62440
Update some paths
OyvindTafjord Oct 4, 2018
b77f2ac
Merge remote-tracking branch 'upstream/master' into quarel
OyvindTafjord Oct 4, 2018
f68d146
Fix docs
matt-gardner Oct 4, 2018
a663d19
Merge branch 'master' into quarel
OyvindTafjord Oct 4, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion allennlp/commands/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,11 @@ def train_model(params: Params,
if key in datasets_for_vocab_creation)
)

model = Model.from_params(vocab=vocab, params=params.pop('model'))

# Initializing the model can have side effect of expanding the vocabulary
vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

model = Model.from_params(vocab=vocab, params=params.pop('model'))
iterator = DataIterator.from_params(params.pop("iterator"))
iterator.index_with(vocab)
validation_iterator_params = params.pop("validation_iterator", None)
Expand Down
1 change: 1 addition & 0 deletions allennlp/data/dataset_readers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from allennlp.data.dataset_readers.quora_paraphrase import QuoraParaphraseDatasetReader
from allennlp.data.dataset_readers.semantic_parsing import (
WikiTablesDatasetReader, AtisDatasetReader, NlvrDatasetReader, TemplateText2SqlDatasetReader)
from allennlp.data.dataset_readers.semantic_parsing.quarel import QuarelDatasetReader
508 changes: 508 additions & 0 deletions allennlp/data/dataset_readers/semantic_parsing/quarel.py

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions allennlp/data/fields/knowledge_graph_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,13 @@ def _span_overlap_fraction(self,
# Some tables have empty cells.
return 0
seen_entity_words = set()
token_index_left = token_index
while token_index < len(tokens) and tokens[token_index].text in entity_words:
seen_entity_words.add(tokens[token_index].text)
token_index += 1
while token_index_left >= 0 and tokens[token_index_left].text in entity_words:
seen_entity_words.add(tokens[token_index_left].text)
token_index_left -= 1
return len(seen_entity_words) / len(entity_words)

def _span_lemma_overlap_fraction(self,
Expand All @@ -415,9 +419,13 @@ def _span_lemma_overlap_fraction(self,
# Some tables have empty cells.
return 0
seen_entity_lemmas = set()
token_index_left = token_index
while token_index < len(tokens) and tokens[token_index].lemma_ in entity_lemmas:
seen_entity_lemmas.add(tokens[token_index].lemma_)
token_index += 1
while token_index_left >= 0 and tokens[token_index_left].lemma_ in entity_lemmas:
seen_entity_lemmas.add(tokens[token_index_left].lemma_)
token_index_left -= 1
return len(seen_entity_lemmas) / len(entity_lemmas)

# pylint: enable=unused-argument,no-self-use
1 change: 1 addition & 0 deletions allennlp/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from allennlp.models.reading_comprehension.bidaf import BidirectionalAttentionFlow
from allennlp.models.semantic_parsing.nlvr.nlvr_coverage_semantic_parser import NlvrCoverageSemanticParser
from allennlp.models.semantic_parsing.nlvr.nlvr_direct_semantic_parser import NlvrDirectSemanticParser
from allennlp.models.semantic_parsing.quarel.quarel_semantic_parser import QuarelSemanticParser
from allennlp.models.semantic_parsing.wikitables.wikitables_mml_semantic_parser import WikiTablesMmlSemanticParser
from allennlp.models.semantic_parsing.wikitables.wikitables_erm_semantic_parser import WikiTablesErmSemanticParser
from allennlp.models.semantic_parsing.atis.atis_semantic_parser import AtisSemanticParser
Expand Down
Empty file.
749 changes: 749 additions & 0 deletions allennlp/models/semantic_parsing/quarel/quarel_semantic_parser.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions allennlp/predictors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from allennlp.predictors.event2mind import Event2MindPredictor
from allennlp.predictors.nlvr_parser import NlvrParserPredictor
from allennlp.predictors.open_information_extraction import OpenIePredictor
from allennlp.predictors.quarel_parser import QuarelParserPredictor
from allennlp.predictors.semantic_role_labeler import SemanticRoleLabelerPredictor
from allennlp.predictors.sentence_tagger import SentenceTaggerPredictor
from allennlp.predictors.simple_seq2seq import SimpleSeq2SeqPredictor
Expand Down
2 changes: 1 addition & 1 deletion allennlp/predictors/open_information_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from allennlp.data import DatasetReader, Instance
from allennlp.data.tokenizers import WordTokenizer
from allennlp.models import Model
from allennlp.service.predictors.predictor import Predictor
from allennlp.predictors.predictor import Predictor
from allennlp.data.tokenizers.word_splitter import SpacyWordSplitter
from allennlp.data.tokenizers import Token

Expand Down
3 changes: 2 additions & 1 deletion allennlp/predictors/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
'dialog_qa': 'dialog_qa',
'event2mind': 'event2mind',
'simple_tagger': 'sentence-tagger',
'srl': 'semantic-role-labeling'
'srl': 'semantic-role-labeling',
'quarel_parser': 'quarel-parser'
}

class Predictor(Registrable):
Expand Down
100 changes: 100 additions & 0 deletions allennlp/predictors/quarel_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from typing import cast, Tuple

from overrides import overrides

from allennlp.common.util import JsonDict, sanitize
from allennlp.data import Instance
from allennlp.data.dataset_readers.semantic_parsing.quarel import QuarelDatasetReader
from allennlp.predictors.predictor import Predictor
from allennlp.semparse.contexts.quarel_utils import get_explanation, from_qr_spec_string
from allennlp.semparse.contexts.quarel_utils import words_from_entity_string, from_entity_cues_string


@Predictor.register('quarel-parser')
class QuarelParserPredictor(Predictor):
"""
Wrapper for the quarel_semantic_parser model.
"""
def _my_json_to_instance(self, json_dict: JsonDict) -> Tuple[Instance, JsonDict]:
"""
"""

# Make a cast here to satisfy mypy
dataset_reader = cast(QuarelDatasetReader, self._dataset_reader)

# TODO: Fix protected access usage
question_data = dataset_reader.preprocess(json_dict, predict=True)[0]

qr_spec_override = None
dynamic_entities = None
if 'entitycues' in json_dict:
entity_cues = from_entity_cues_string(json_dict['entitycues'])
dynamic_entities = dataset_reader._dynamic_entities.copy() # pylint: disable=protected-access
for entity, cues in entity_cues.items():
key = "a:" + entity
entity_strings = [words_from_entity_string(entity).lower()]
entity_strings += cues
dynamic_entities[key] = " ".join(entity_strings)

if 'qrspec' in json_dict:
qr_spec_override = from_qr_spec_string(json_dict['qrspec'])
old_entities = dynamic_entities
if old_entities is None:
old_entities = dataset_reader._dynamic_entities.copy() # pylint: disable=protected-access
dynamic_entities = {}
for qset in qr_spec_override:
for entity in qset:
key = "a:" + entity
value = old_entities.get(key, words_from_entity_string(entity).lower())
dynamic_entities[key] = value

question = question_data['question']
tokenized_question = dataset_reader._tokenizer.tokenize(question.lower()) # pylint: disable=protected-access
world_extractions = question_data.get('world_extractions')

instance = dataset_reader.text_to_instance(question,
world_extractions=world_extractions,
qr_spec_override=qr_spec_override,
dynamic_entities_override=dynamic_entities)

world_extractions_out = {"world1": "N/A", "world2": "N/A"}
if world_extractions is not None:
world_extractions_out.update(world_extractions)

extra_info = {'question': json_dict['question'],
'question_tokens': tokenized_question,
"world_extractions": world_extractions_out}
return instance, extra_info

@overrides
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
instance, _ = self._my_json_to_instance(json_dict)
return instance

@overrides
def predict_json(self, inputs: JsonDict) -> JsonDict:
instance, return_dict = self._my_json_to_instance(inputs)
world = instance.fields['world'].metadata # type: ignore
outputs = self._model.forward_on_instance(instance)

answer_index = outputs['answer_index']
if answer_index == 0:
answer = "A"
elif answer_index == 1:
answer = "B"
else:
answer = "None"
outputs['answer'] = answer

return_dict.update(outputs)

if answer != "None":
explanation = get_explanation(return_dict['logical_form'],
return_dict['world_extractions'],
answer_index,
world)
else:
explanation = [{"header": "No consistent interpretation found!", "content": []}]

return_dict['explanation'] = explanation
return sanitize(return_dict)
Loading
0