Closed
Description
It appears that under the latest nokogiri (v1.16.2), misc. tags of meanings in JMDict are no longer being parsed correctly.
(Last known to work under nokogiri@1.13.3)
Take this meaning from JMDict item #1224880, よろしい. It should have the tag "uk" which indicates "usually kana"
#<Eiwa::Tag::Meaning:0x0000000126f3b908
@antonyms=[],
@attrs={},
@characters="",
@comments=[],
@cross_references=[],
@definitions=
[#<Eiwa::Tag::Definition:0x00000001239d4760
@attrs={"xml:lang"=>"eng"},
@characters="good",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="good",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d4440
@attrs={"xml:lang"=>"eng"},
@characters="OK",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="OK",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d2f50
@attrs={"xml:lang"=>"eng"},
@characters="all right",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="all right",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d2b40
@attrs={"xml:lang"=>"eng"},
@characters="fine",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="fine",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d2870
@attrs={"xml:lang"=>"eng"},
@characters="very well",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="very well",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d1a10
@attrs={"xml:lang"=>"eng"},
@characters="will do",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="will do",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d0610
@attrs={"xml:lang"=>"eng"},
@characters="may",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="may",
@type=nil>,
#<Eiwa::Tag::Definition:0x00000001239d04d0
@attrs={"xml:lang"=>"eng"},
@characters="can",
@gender=nil,
@language="eng",
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="gloss",
@text="can",
@type=nil>],
@dialects=[],
@fields=[],
@misc_tags=
[#<Eiwa::Tag::Entity:0x00000001239d49e0
@attrs={},
@code=nil,
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="misc",
@text=nil>,
#<Eiwa::Tag::Entity:0x00000001239d48f0
@attrs={},
@code=nil,
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="misc",
@text=nil>],
@parent=
#<Eiwa::Tag::Entry:0x00000001239d4ee0
@attrs={},
@characters="",
@id=1224880,
@meanings=[#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>],
@parent=#<Eiwa::Tag::Other:0x000000012331aaf0 @attrs={}, @characters="", @parent=nil, @tag_name="JMdict">,
@readings=
[#<Eiwa::Tag::Reading:0x00000001239d4cb0
@attrs={},
@characters="",
@frequency_tags=[:ichi1],
@imprecise_reading=false,
@info_tags=[],
@parent=#<Eiwa::Tag::Entry:0x00000001239d4ee0 ...>,
@tag_name="r_ele",
@text="よろしい">],
@spellings=
[#<Eiwa::Tag::Spelling:0x00000001239d4df0
@attrs={},
@characters="",
@frequency_tags=[:ichi1],
@info_tags=[],
@parent=#<Eiwa::Tag::Entry:0x00000001239d4ee0 ...>,
@tag_name="k_ele",
@text="宜しい">],
@tag_name="entry">,
@parts_of_speech=
[#<Eiwa::Tag::Entity:0x00000001239d4b20
@attrs={},
@code=nil,
@parent=#<Eiwa::Tag::Meaning:0x0000000126f3b908 ...>,
@tag_name="pos",
@text=nil>],
@restricted_to_readings=[],
@restricted_to_spellings=[],
@source_languages=[],
@tag_name="sense">
But instead, as the printout of its sole meaning above shows, the code of both misc tags is now coming back as nil:
> entry.meanings.first.misc_tags.map(&:code)
=> [nil, nil]
Metadata
Metadata
Assignees
Labels
No labels