Source code for conlang.utils
from typing import List, Dict
from .phonemes import PHONEME_SET, COMMON_PHONEME_SET
[docs]
def parse_phonemes(word: str) -> List[str]:
"""
Splits a word string into phonemes using the predefined PHONEME_SET list.
Args:
word (str): The word to split into phonemes as a string.
Returns:
List[str]: A list of phonemes.
"""
phonemes = []
i = 0
while i < len(word):
# Phonemes can be up to 3 characters long
for j in [3, 2, 1]:
if word[i:i + j] in PHONEME_SET:
phonemes.append(word[i:i + j])
i += j
break
else:
phonemes.append(word[i])
i += 1
return phonemes
[docs]
def process_phonemes(phonemes: Dict[str, List[str]]) -> Dict[str, List[str]]:
"""
Processes phoneme sets to make common phonemes more likely to be chosen.
Args:
phonemes (Dict[str, List[str]]): A dictionary of phoneme categories and their respective
phonemes.
Returns:
Dict[str, List[str]]: A modified dictionary where common phonemes are more likely.
"""
processed = {
category: [
phoneme for phoneme in phoneme_list
for _ in range(2 if all(item in COMMON_PHONEME_SET for item in parse_phonemes(phoneme)) else 1)
]
for category, phoneme_list in phonemes.items()
}
return processed
[docs]
def process_patterns(patterns: List[str], phonemes: Dict[str, List[str]]) -> List[str]:
"""
Processes patterns to prioritize simpler ones (short and without clusters).
Args:
patterns (List[str]): A list of patterns to process.
phonemes (Dict[str, List[str]]): A dictionary of phoneme categories and their respective
phonemes.
Returns:
List[str]: A modified list of patterns where simpler patterns are more likely.
"""
def has_cluster(pattern: str) -> bool:
"""
Checks if a pattern contains phoneme clusters.
Args:
pattern (str): The pattern to check.
Returns:
bool: True if the pattern contains clusters, False otherwise.
"""
for letter in pattern:
if any(char not in PHONEME_SET for char in phonemes[letter]):
return True
return False
# Prioritize simpler patterns (those without clusters)
processed = [pattern for pattern in patterns for _ in range(
2 if not has_cluster(pattern) else 1)]
# Sort by length to prioritize shorter patterns
processed = sorted(processed, key=len)
return processed