python_rag_system/guess.py
2025-10-07 11:04:10 +03:00

34 lines
980 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import itertools
def all_combinations(alphabet, max_length):
"""Генерирует все комбинации длиной от 1 до max_length"""
all_combos = []
for length in range(1, max_length + 1):
combos = itertools.product(alphabet, repeat=length)
all_combos.extend([''.join(combo) for combo in combos])
return all_combos
russian_lower = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
def vectorize(word, combinations):
vector = []
if len(word) == 1:
return None
last = None
for curr in word:
if not last:
last = curr
continue
pair = last + curr
indx = combinations.index(pair)
vector.append(indx)
return vector
word = 'маме'
combinations = all_combinations(russian_lower, 2)
result = vectorize(word, combinations)
print("Все комбинации длиной 1-2:")
print(combinations)
print("Вектор:", result)