# Удобнее в Jupyterlab, ячейки отделены # ################# import os import sys import pickle import spacy import torch import torchaudio import torchaudio.transforms as T import soundfile as sf import ipywidgets as widgets ################# def load_model(): # модель tts silerio device = torch.device('cpu') torch.set_num_threads(8) local_file = './data/model.pt' if not os.path.isfile(local_file): torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v3_1_ru.pt', local_file) model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model") model.to(device) return model def load_ready_samples(path='./temp/ready_samples.pickle'): ready_samples = {} if os.path.isfile(path): with open(path, 'rb') as handle: ready_samples = pickle.load(handle) return ready_samples def print_ready_samples(ready_samples): k = sorted(ready_samples) for i in k: print(i) def save_ready_samples(ready_samples, path='./temp/ready_samples.pickle'): with open(path, 'wb') as handle: pickle.dump(ready_samples, handle, protocol=pickle.HIGHEST_PROTOCOL) def transform_48000(t): wf, sr = t resample_transform = T.Resample(sr, 48000) wf_resampled = resample_transform(wf) return wf_resampled def load_additional_sounds(additional_sounds = {}): # доп звуки. Таким же образом можно готовые триггеры вставлять additional_sounds["|"] = transform_48000(torchaudio.load('./data/snap.wav')) additional_sounds["*"] = transform_48000(torchaudio.load('./data/chestclosed.wav')) additional_sounds["@"] = transform_48000(torchaudio.load('./data/pop.wav')) additional_sounds["^"] = transform_48000(torchaudio.load('./data/explosion.wav')) additional_sounds["$"] = transform_48000(torchaudio.load('./data/breathout.wav')) additional_sounds["%"] = transform_48000(torchaudio.load('./data/windchime.wav')) return additional_sounds def add_wf(audio, ch, sample, wf): # длительность sample должна быть больше длительности вставляемого аудио!!! # если длительность меньше, то будет перезаписано и увеличена длительность. # если есть символ ch то добавляем на его место wf while ch in sample: pos = sample.find(ch) # отрезаем нужный кусок temp_pos_before = pos*len(audio)//len(sample) temp_pos_after = pos*len(audio)//len(sample)+len(wf[0]) if temp_pos_after <= len(audio): a1 = audio[:temp_pos_before] # до a2 = torch.add(audio[temp_pos_before:temp_pos_after],wf[0]) # момент a3 = audio[temp_pos_after:] # после # конкатенация audio = torch.cat([a1,a2,a3], dim=0) else: audio = wf[0] # удаляем ch из sample sample = sample[:pos] + sample[pos+1:] return audio def tts(text_sample, additional_sounds={}, ready_samples={}): # разбивка на строки text_sample = text_sample.replace('.','.\n') text_sample = text_sample.replace('?','?\n') samples = text_sample.split('\n') samples = [s for s in samples if len(s)>1] # начальные данные sample_rate = 48000 # speaker = 'baya' #'xenia' speaker = 'xenia' put_accent=True put_yo=True # переменная для конкатенации c паузой 1 сек res = torch.zeros(int(sample_rate*1.000)) # прогресс бар w = widgets.IntProgress( value=0, min=0, max=len(samples)-1, description='Loading:', bar_style='info', # 'success', 'info', 'warning', 'danger' or '' style={'bar_color': 'maroon'}, orientation='horizontal' ) display(w) # для каждой строки for i, sample in enumerate(samples): w.value = i # Убираем лишние пробелы и переводим в нижний регистор sample = sample.rstrip().lstrip().lower() # Добавляем ударения sample = accentuate_plus_custom(sample) sample = accentuate_plus(accentuate(sample, wordforms, lemmas)) if not (sample in ready_samples): ssml_sample = f'{sample}' audio = model.apply_tts(ssml_text=ssml_sample, speaker=speaker, sample_rate=sample_rate, put_accent=put_accent, put_yo=put_yo) # убираем щелкание (скорее всего баг конца строки) audio = audio[:-int(sample_rate*0.342)] # добавляем паузу после предложения audio = torch.cat([audio, torch.zeros(int(sample_rate*0.742))], dim=0) # длительность sample должна быть больше длительности вставляемого аудио!!! for ch,wf in additional_sounds.items(): if ch in sample: audio = add_wf(audio, ch, sample, wf) # в кэш ready_samples[sample] = audio # добавляем строку в общее аудио res = torch.cat([res, ready_samples[sample]], dim=0) return res, sample_rate def check_files(i, o): if (os.path.exists(o) and os.stat(i).st_mtime == \ os.stat(o).st_mtime): print('Преобразование уже выполнено.') return False return True def save_wav(i, o, audio, sample_rate): sf.write(o, audio, sample_rate) time = os.stat(i).st_mtime os.utime(o, (time,time)) def load(): with open(file="./data/lemmas.dat", mode='rb') as f: lemmas = pickle.loads(f.read()) with open(file="./data/wordforms.dat", mode='rb') as f: wordforms = pickle.loads(f.read()) return lemmas, wordforms ########## скачать файлы # https://github.com/einhornus/russian_accentuation def introduce_special_cases_from_dictionary(dictionary): for word in dictionary: if (" " in word) or ("-" in word): if len(dictionary[word]) == 1: try: ru_nlp.tokenizer.add_special_case(word, [{"ORTH": dictionary[word][0]["accentuated"]}]) ru_nlp.tokenizer.add_special_case(word.capitalize(), [{"ORTH": dictionary[word][0]["accentuated"].capitalize()}]) except: pass def compatible(interpretation, lemma, tag, lemmas): if lemma in lemmas: pos_exists = False possible_poses = lemmas[lemma]["pos"] for i in range(len(possible_poses)): if possible_poses[i] in tag: pos_exists = True break if not (pos_exists): return False if interpretation == "canonical": return True if "plural" in interpretation and not ("Number=Plur" in tag): return False if "singular" in interpretation and not ("Number=Sing" in tag): return False if not ("nominative" in interpretation) and ("Case=Nom" in tag): return False if not ("genitive" in interpretation) and ("Case=Gen" in tag): return False if not ("dative" in interpretation) and ("Case=Dat" in tag): return False if not ("accusative" in interpretation) and ("Case=Acc" in tag): adj = False if "ADJ" in tag and "Animacy=Inan" in tag: adj = True if not adj: return False if not ("instrumental" in interpretation) and ("Case=Ins" in tag): return False if not ("prepositional" in interpretation) and not ("locative" in interpretation) and ("Case=Loc" in tag): return False if (("present" in interpretation) or ("future" in interpretation)) and ("Tense=Past" in tag): return False if (("past" in interpretation) or ("future" in interpretation)) and ("Tense=Pres" in tag): return False if (("past" in interpretation) or ("present" in interpretation)) and ("Tense=Fut" in tag): return False return True def derive_single_accentuation(interpretations): if len(interpretations) == 0: return None res = interpretations[0]["accentuated"] for i in range(1, len(interpretations)): if interpretations[i]["accentuated"] != res: return None return res def accentuate_word(word, lemmas): if ("tag" in word) and ("PROPN" in word["tag"]): return word["token"] if word["is_punctuation"] or (not "interpretations" in word): return word["token"] else: res = derive_single_accentuation(word["interpretations"]) if not (res is None): return res else: compatible_interpretations = [] for i in range(len(word["interpretations"])): if compatible(word["interpretations"][i]["form"], word["interpretations"][i]["lemma"], word["tag"], lemmas): compatible_interpretations.append(word["interpretations"][i]) res = derive_single_accentuation(compatible_interpretations) if not (res is None): return res else: new_compatible_interpretations = [] for i in range(len(compatible_interpretations)): if compatible_interpretations[i]["lemma"] == word["lemma"]: new_compatible_interpretations.append(compatible_interpretations[i]) res = derive_single_accentuation(new_compatible_interpretations) if not (res is None): return res else: return word["token"] def tokenize(text, wordforms): res = [] doc = ru_nlp(text) for token in doc: if token.pos_ != 'PUNCT': word = {"token": token.text, "tag": token.tag_} if word["token"] in wordforms: word["interpretations"] = wordforms[word["token"]] if word["token"].lower() in wordforms: word["interpretations"] = wordforms[word["token"].lower()] word["lemma"] = token.lemma_ word["is_punctuation"] = False word["uppercase"] = word["token"].upper() == word["token"] word["starts_with_a_capital_letter"] = word["token"][0].upper() == word["token"][0] else: word = {"token": token.text, "is_punctuation": True} word["whitespace"] = token.whitespace_ res.append(word) return res def accentuate(text, wordforms, lemmas): res = "" words = tokenize(text, wordforms) for i in range(len(words)): accentuated = accentuate_word(words[i], lemmas) if "starts_with_a_capital_letter" in words[i] and words[i]["starts_with_a_capital_letter"]: accentuated = accentuated.capitalize() if "uppercase" in words[i] and words[i]["uppercase"]: accentuated = accentuated.upper() res += accentuated res += words[i]["whitespace"] return res ########## def accentuate_plus_custom(s): spec_acc = { 'бэмби':'б+эмби', # 'скрещенны':'скр+ещенны', # 'голоса':'г+олоса', # 'головы':'голов+ы', # 'находитесь':'нах+одитесь', # 'отпустите':'отпуст+ите', # 'примете':'пр+имете', # 'вы цените':'вы ц+ените', # 'вы так цените':'вы так ц+ените', # 'цвета':'цвет+а', # 'научитесь':'науч+итесь', # 'ходите':'х+одите', # 'уходите':'ух+одите', # 'перегружена':'перег+ружена', # 'уходите':'ух+одите', # 'уходите':'ух+одите', } for key, value in spec_acc.items(): s = s.replace(key, value) return s def accentuate_plus(s): while '\u0301' in s: pos = s.find('\u0301') s = s[:pos-1] + '+' + s[pos-1] + s[pos+1:] return s ########## # удраения ru_nlp = spacy.load('ru_core_news_lg') lemmas, wordforms = load() introduce_special_cases_from_dictionary(wordforms) ########## # silerio model = load_model() ready_samples = load_ready_samples() additional_sounds = load_additional_sounds() ########## # folder = 'BS 2023-04-02' folder = 'scripts/BS RU' names = [ # "BBB 01 INDUCTION", # "BBB 02 GOOD GIRL", # "BBB 03 BIMBO MIND", # "BBB 04 BIMBO TRAINED", # "BBB 05 BIMBO BRAIN BREAK", # "BBB 07 AMNESIA", # "BBB 08 BIMBO VIRUS", # "BBB 09 BJ LOOP", # "BBB 10 COMPLY LOOP", # "BBB 12 MUSCLE MOMMY", # "BBB 13 BIMBO SQUEEZE", # "BBB 14 BIMBO CUM SLUT", # "BBB 15 BIMBO CHEER", # "BBB 16 Advanced Induction Mind Wipe", # "BBB 17 Advanced Bimbo Mind and Body (Patreon Exclusive)", # "BBB 18 Advanced Good Girl For Cock (Patreon Exclusive)", # "BBB RU 01 INDUCTION", # "BBB RU 02 GOOD GIRL", # "BBB RU 03 BIMBO MIND", # "BBB RU 04 BIMBO TRAINED", # "BBB RU 05 BIMBO BRAIN BREAK", # "BBB RU 07 AMNESIA", # "BBB RU 08 BIMBO VIRUS", # "BBB RU 09 BJ LOOP", # "BBB RU 10 COMPLY LOOP", # "BBB RU 13 BIMBO SQUEEZE", # "BBB RU 15 BIMBO CHEER", # "01 Bubble Induction", # "02 Bubble Acceptance", # "03 Bambi Named and Drained", # "BS 04 Bambi IQ Lock", # "BS 05 Bambi Body Lock", # "BS 06 Bambi Attitude Lock", # "BS 07 Bambi Uniformed", # "BS 08 Bambi Takeover", # "BS 09 Bambi Cockslut", # "10 Bambi Awakens", "50 Harmony 1", "51 Harmony 2", # "BS RU 01 Bubble Induction", ] files = [folder +'/'+ file_path +'.txt' for file_path in names] print(files) ########## for name in names: i = folder+ '/' + name + '.txt' o = folder+ '/' + name + '.wav' with open(i, 'rt', encoding='utf-8') as file: text_sample = file.read() print(i) if check_files(i, o): audio, sample_rate = tts(text_sample, additional_sounds, ready_samples) save_wav(i, o, audio, sample_rate) save_ready_samples(ready_samples) ############# # для разового запроса from IPython.display import Audio text_sample = ''' Когда я вытягиваю руку и лопаю Ваш пузырик.@. Хорошая девочка. Закрываю ларец.*. Бэмби спи.$. ''' audio, sample_rate = tts(text_sample, additional_sounds, ready_samples = {}) sf.write('temp/test111.wav', audio, sample_rate) audio, sample_rate = sf.read('temp/test111.wav') display(Audio(audio, rate=sample_rate)) # torchaudio.save(f'тест1.wav',audio[0],sample_rate) ###############