feat(corp/data-import): parse lemmas from OpenCorpora dump
Change-Id: I1e4efcfc8e555f61578b563411d5e6ed9590d8e8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7860 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
This commit is contained in:
parent
ee7616d956
commit
485c3cc912
2 changed files with 135 additions and 14 deletions
|
|
@ -80,11 +80,11 @@ fn main() {
|
|||
let mut out = BufWriter::new(std::io::stdout().lock());
|
||||
|
||||
while let Some(elem) = parser.next_element() {
|
||||
match elem {
|
||||
oc_parser::OcElement::Grammeme(g) => {
|
||||
writeln!(out, "{:?}", g).ensure("writing element failed")
|
||||
if let oc_parser::OcElement::Lemma(lemma) = elem {
|
||||
if lemma.lemma.word == "тяжёлый" {
|
||||
writeln!(out, "{:?}", lemma).ensure("writing output failed");
|
||||
break;
|
||||
}
|
||||
oc_parser::OcElement::Lemma(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue