feat(corp/data-import): parse lemmas from OpenCorpora dump

Change-Id: I1e4efcfc8e555f61578b563411d5e6ed9590d8e8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7860
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2023-01-18 03:22:53 +03:00 committed by tazjin
parent ee7616d956
commit 485c3cc912
2 changed files with 135 additions and 14 deletions

View file

@ -80,11 +80,11 @@ fn main() {
let mut out = BufWriter::new(std::io::stdout().lock());
while let Some(elem) = parser.next_element() {
match elem {
oc_parser::OcElement::Grammeme(g) => {
writeln!(out, "{:?}", g).ensure("writing element failed")
if let oc_parser::OcElement::Lemma(lemma) = elem {
if lemma.lemma.word == "тяжёлый" {
writeln!(out, "{:?}", lemma).ensure("writing output failed");
break;
}
oc_parser::OcElement::Lemma(_) => continue,
}
}