"""
This module contains an addon for basic Word Model,
which makes it possible to work with word's sources
"""
from __future__ import annotations
import re
from typing import Iterable
from sqlalchemy import select, or_
from sqlalchemy.sql.selectable import Select
from ..type import BaseType
from ..word import BaseWord
[docs]
class WordSource:
"""Word Source from Word.origin for Prims"""
PATTERN_SOURCE = r"\d+\/\d+\w"
LANGUAGES = {
"E": "English",
"C": "Chinese",
"H": "Hindi",
"R": "Russian",
"S": "Spanish",
"F": "French",
"J": "Japanese",
"G": "German",
}
def __init__(self, source):
compatibility_search = re.search(self.PATTERN_SOURCE, source)
self.coincidence, self.length, self.language = self.parse_source(
compatibility_search
)
transcription_search = re.search(rf"(?!{self.PATTERN_SOURCE}) .+", source)
self.transcription = (
str(transcription_search[0]).strip() if transcription_search else None
)
[docs]
def __str__(self):
"""
Returns:
"""
return f"<{self.__class__.__name__} {self.as_string}>"
[docs]
@staticmethod
def parse_source(
compatibility_search,
) -> tuple[int, int, str]:
"""
Args:
compatibility_search:
Returns:
"""
if compatibility_search:
coincidence: int = int(compatibility_search[0][:-1].split("/")[0])
length: int = int(compatibility_search[0][:-1].split("/")[1])
language: str = compatibility_search[0][-1:]
return coincidence, length, language
raise ValueError("No compatible source found")
@property
def as_string(self) -> str:
"""
Format WordSource as string, for example, '3/5R mesto'
Returns:
str
"""
if not all([self.coincidence, self.length, self.language, self.transcription]):
return str()
return f"{self.coincidence}/{self.length}{self.language} {self.transcription}"
[docs]
class WordSourcer:
"""WordSourcer Model"""
# these primes have switched djifoas like 'flo' for 'folma'
switch_prims = [
"canli",
"farfu",
"folma",
"forli",
"kutla",
"marka",
"mordu",
"sanca",
"sordi",
"suksi",
"surna",
]
[docs]
@classmethod
def get_sources_prim(cls, word: BaseWord):
"""
Returns:
"""
# existing_prim_types = ["C", "D", "I", "L", "N", "O", "S", ]
if not word.type.group == "Prim":
return None
if word.type.type_ == "C-Prim":
return cls._get_sources_c_prim(word)
return f"{word.name}: {word.origin}{' < ' + word.origin_x if word.origin_x else ''}"
[docs]
@staticmethod
def _get_sources_c_prim(word: BaseWord) -> list[WordSource] | None:
"""
Returns:
"""
if word.type.type_ != "C-Prim":
return None
sources = str(word.origin).split(" | ")
return [WordSource(source) for source in sources]
[docs]
@classmethod
def get_sources_cpx(
cls, word: BaseWord, as_str: bool = False
) -> Select[tuple[BaseWord]] | list[str]:
"""Extract source words from self.origin field accordingly
Args:
word (Word):
as_str (bool): return Word objects if False else as simple str
(Default value = False)
Example:
'foldjacea' > ['forli', 'djano', 'cenja']
Returns:
List of words from which the self.name was created
"""
if not word.type.group == "Cpx":
return []
sources = cls._prepare_sources_cpx(word)
return sources if as_str else cls.words_from_source_cpx(sources)
[docs]
@classmethod
def words_from_source_cpx(cls, sources: list[str]) -> Select[tuple[BaseWord]]:
"""
Args:
sources:
Returns:
"""
exclude_ids = cls.get_type_ids(types=("LW", "Cpd"))
return (
select(BaseWord)
.filter(BaseWord.name.in_(sources))
.filter(BaseWord.type_id.notin_(exclude_ids))
)
[docs]
@classmethod
def get_type_ids(cls, types: Iterable[str]):
"""
Get ids of specific types from provided list
Args:
types (Iterable[str]): List of types to get
Returns:
Subquery
"""
return (
select(BaseType.id)
.filter(
or_(
BaseType.type_.in_(types),
BaseType.type_x.in_(types),
BaseType.group.in_(types),
)
)
.scalar_subquery()
)
[docs]
@staticmethod
def _prepare_sources_cpx(word: BaseWord) -> list[str]:
"""
Returns:
"""
if not word.origin:
return []
sources_str = word.origin.replace("(", "").replace(")", "").replace("/", "")
sources_list = sources_str.split("+")
sources = [
s if not s.endswith(("r", "h")) else s[:-1]
for s in sources_list
if s not in ["y", "r", "n"]
]
return sources
[docs]
@classmethod
def get_sources_cpd(
cls, word: BaseWord, as_str: bool = False
) -> Select[tuple[BaseWord]] | list[str]:
"""Extract source words from self.origin field accordingly
Args:
word: Word:
as_str: bool: return Word objects if False else as simple str
(Default value = False)
Returns:
List of words from which the self.name was created
"""
if not word.type.type_ == "Cpd":
return []
sources = cls._prepare_sources_cpd(word)
return sources if as_str else cls.words_from_source_cpd(sources)
[docs]
@staticmethod
def _prepare_sources_cpd(word: BaseWord) -> list[str]:
"""
Returns:
"""
if not word.origin:
return []
sources_str = (
word.origin.replace("(", "")
.replace(")", "")
.replace("/", "")
.replace("-", "")
)
sources = [s.strip() for s in sources_str.split("+") if s]
return sources
[docs]
@classmethod
def words_from_source_cpd(cls, sources: list[str]) -> Select[tuple[BaseWord]]:
"""
Args:
sources:
Returns:
"""
type_ids = cls.get_type_ids(types=("LW", "Cpd"))
return (
select(BaseWord)
.filter(BaseWord.name.in_(sources))
.filter(BaseWord.type_id.in_(type_ids))
)
[docs]
@staticmethod
def prepare_origin(origin: str) -> str:
"""
Remove text in parentheses, reverse characters between slash, remove slash.
Examples:
zav(lo)+da(n)z(a)+fo/l(ma) => zav+daz+flo
be(rt)i+n+(t)rac(i)+ve(sl)o => bei+n+rac+veo
Args:
origin: str
Returns: str
"""
origin_list = list(re.sub(r"\([^)]*\)", "", origin))
for index, char in enumerate(origin_list):
if char == "/" and 0 < index < len(origin_list) - 1:
start_index = index - 1
end_index = index + 2
origin_list[start_index:end_index] = reversed(
origin_list[start_index:end_index]
)
return "".join(origin_list).replace("/", "")
[docs]
@staticmethod
def get_parent_complex(origin: str) -> str:
"""
Args:
Example:
zavdazflo -> zav(lo)+da(n)z(a)+fo/l(ma) => dazflo
zanynurkokmio -> za(v)n(o)+y+nur+kok(fa)+mi(tr)o => nurkokmio
cabsrusia -> cab(ro)+su/r(na)+si(tf)a => srusia
beinracveo -> be(rt)i+n+(t)rac(i)+ve(sl)o => racveo
Returns:
"""
origin_list = WordSourcer.prepare_origin(origin).split("+")
origin_list = origin_list[1:]
origin_list = (
origin_list
if origin_list and origin_list[0] not in ["y", "r", "n"]
else origin_list[1:]
)
return "".join(origin_list)
[docs]
class OriginParser: # pylint: disable=too-few-public-methods
"""
Test Class
"""
def __init__(self, word: BaseWord):
self.word = word