Source code for loglan_core.addons.word_sourcer

"""
This module contains an addon for basic Word Model,
which makes it possible to work with word's sources
"""

from __future__ import annotations

import re
from typing import Iterable

from sqlalchemy import select, or_
from sqlalchemy.sql.selectable import Select

from ..type import BaseType
from ..word import BaseWord


[docs] class WordSource: """Word Source from Word.origin for Prims""" PATTERN_SOURCE = r"\d+\/\d+\w" LANGUAGES = { "E": "English", "C": "Chinese", "H": "Hindi", "R": "Russian", "S": "Spanish", "F": "French", "J": "Japanese", "G": "German", } def __init__(self, source): compatibility_search = re.search(self.PATTERN_SOURCE, source) self.coincidence, self.length, self.language = self.parse_source( compatibility_search ) transcription_search = re.search(rf"(?!{self.PATTERN_SOURCE}) .+", source) self.transcription = ( str(transcription_search[0]).strip() if transcription_search else None )
[docs] def __str__(self): """ Returns: """ return f"<{self.__class__.__name__} {self.as_string}>"
[docs] @staticmethod def parse_source( compatibility_search, ) -> tuple[int, int, str]: """ Args: compatibility_search: Returns: """ if compatibility_search: coincidence: int = int(compatibility_search[0][:-1].split("/")[0]) length: int = int(compatibility_search[0][:-1].split("/")[1]) language: str = compatibility_search[0][-1:] return coincidence, length, language raise ValueError("No compatible source found")
@property def as_string(self) -> str: """ Format WordSource as string, for example, '3/5R mesto' Returns: str """ if not all([self.coincidence, self.length, self.language, self.transcription]): return str() return f"{self.coincidence}/{self.length}{self.language} {self.transcription}"
[docs] class WordSourcer: """WordSourcer Model""" # these primes have switched djifoas like 'flo' for 'folma' switch_prims = [ "canli", "farfu", "folma", "forli", "kutla", "marka", "mordu", "sanca", "sordi", "suksi", "surna", ]
[docs] @classmethod def get_sources_prim(cls, word: BaseWord): """ Returns: """ # existing_prim_types = ["C", "D", "I", "L", "N", "O", "S", ] if not word.type.group == "Prim": return None if word.type.type_ == "C-Prim": return cls._get_sources_c_prim(word) return f"{word.name}: {word.origin}{' < ' + word.origin_x if word.origin_x else ''}"
[docs] @staticmethod def _get_sources_c_prim(word: BaseWord) -> list[WordSource] | None: """ Returns: """ if word.type.type_ != "C-Prim": return None sources = str(word.origin).split(" | ") return [WordSource(source) for source in sources]
[docs] @classmethod def get_sources_cpx( cls, word: BaseWord, as_str: bool = False ) -> Select[tuple[BaseWord]] | list[str]: """Extract source words from self.origin field accordingly Args: word (Word): as_str (bool): return Word objects if False else as simple str (Default value = False) Example: 'foldjacea' > ['forli', 'djano', 'cenja'] Returns: List of words from which the self.name was created """ if not word.type.group == "Cpx": return [] sources = cls._prepare_sources_cpx(word) return sources if as_str else cls.words_from_source_cpx(sources)
[docs] @classmethod def words_from_source_cpx(cls, sources: list[str]) -> Select[tuple[BaseWord]]: """ Args: sources: Returns: """ exclude_ids = cls.get_type_ids(types=("LW", "Cpd")) return ( select(BaseWord) .filter(BaseWord.name.in_(sources)) .filter(BaseWord.type_id.notin_(exclude_ids)) )
[docs] @classmethod def get_type_ids(cls, types: Iterable[str]): """ Get ids of specific types from provided list Args: types (Iterable[str]): List of types to get Returns: Subquery """ return ( select(BaseType.id) .filter( or_( BaseType.type_.in_(types), BaseType.type_x.in_(types), BaseType.group.in_(types), ) ) .scalar_subquery() )
[docs] @staticmethod def _prepare_sources_cpx(word: BaseWord) -> list[str]: """ Returns: """ if not word.origin: return [] sources_str = word.origin.replace("(", "").replace(")", "").replace("/", "") sources_list = sources_str.split("+") sources = [ s if not s.endswith(("r", "h")) else s[:-1] for s in sources_list if s not in ["y", "r", "n"] ] return sources
[docs] @classmethod def get_sources_cpd( cls, word: BaseWord, as_str: bool = False ) -> Select[tuple[BaseWord]] | list[str]: """Extract source words from self.origin field accordingly Args: word: Word: as_str: bool: return Word objects if False else as simple str (Default value = False) Returns: List of words from which the self.name was created """ if not word.type.type_ == "Cpd": return [] sources = cls._prepare_sources_cpd(word) return sources if as_str else cls.words_from_source_cpd(sources)
[docs] @staticmethod def _prepare_sources_cpd(word: BaseWord) -> list[str]: """ Returns: """ if not word.origin: return [] sources_str = ( word.origin.replace("(", "") .replace(")", "") .replace("/", "") .replace("-", "") ) sources = [s.strip() for s in sources_str.split("+") if s] return sources
[docs] @classmethod def words_from_source_cpd(cls, sources: list[str]) -> Select[tuple[BaseWord]]: """ Args: sources: Returns: """ type_ids = cls.get_type_ids(types=("LW", "Cpd")) return ( select(BaseWord) .filter(BaseWord.name.in_(sources)) .filter(BaseWord.type_id.in_(type_ids)) )
[docs] @staticmethod def prepare_origin(origin: str) -> str: """ Remove text in parentheses, reverse characters between slash, remove slash. Examples: zav(lo)+da(n)z(a)+fo/l(ma) => zav+daz+flo be(rt)i+n+(t)rac(i)+ve(sl)o => bei+n+rac+veo Args: origin: str Returns: str """ origin_list = list(re.sub(r"\([^)]*\)", "", origin)) for index, char in enumerate(origin_list): if char == "/" and 0 < index < len(origin_list) - 1: start_index = index - 1 end_index = index + 2 origin_list[start_index:end_index] = reversed( origin_list[start_index:end_index] ) return "".join(origin_list).replace("/", "")
[docs] @staticmethod def get_parent_complex(origin: str) -> str: """ Args: Example: zavdazflo -> zav(lo)+da(n)z(a)+fo/l(ma) => dazflo zanynurkokmio -> za(v)n(o)+y+nur+kok(fa)+mi(tr)o => nurkokmio cabsrusia -> cab(ro)+su/r(na)+si(tf)a => srusia beinracveo -> be(rt)i+n+(t)rac(i)+ve(sl)o => racveo Returns: """ origin_list = WordSourcer.prepare_origin(origin).split("+") origin_list = origin_list[1:] origin_list = ( origin_list if origin_list and origin_list[0] not in ["y", "r", "n"] else origin_list[1:] ) return "".join(origin_list)
[docs] class OriginParser: # pylint: disable=too-few-public-methods """ Test Class """ def __init__(self, word: BaseWord): self.word = word