diff --git "a/TMIDIX.py" "b/TMIDIX.py" --- "a/TMIDIX.py" +++ "b/TMIDIX.py" @@ -5,9 +5,8 @@ r'''############################################################################ # # # Tegridy MIDI X Module (TMIDI X / tee-midi eks) -# Version 1.0 # -# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1437 +# NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1450 # # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au # @@ -26,7 +25,7 @@ r'''############################################################################ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -47,9 +46,23 @@ r'''############################################################################ # Copyright 2020 Peter Billam # ################################################################################### -###################################################################################''' +################################################################################### +''' + +################################################################################### + +__version__ = "25.9.22" + +print('=' * 70) +print('TMIDIX Python module') +print('Version:', __version__) +print('=' * 70) +print('Loading module...') + +################################################################################### import sys, struct, copy + Version = '6.7' VersionDate = '20201120' @@ -1439,7 +1452,6 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False, ################################################################################### # # Tegridy MIDI X Module (TMIDI X / tee-midi eks) -# Version 1.0 # # Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam # pjb.com.au @@ -1473,9 +1485,13 @@ import multiprocessing from itertools import zip_longest from itertools import groupby +from itertools import cycle +from itertools import product from collections import Counter from collections import defaultdict +from collections import OrderedDict +from collections import deque from operator import itemgetter @@ -1485,6 +1501,9 @@ from difflib import SequenceMatcher as SM import statistics import math +from math import gcd + +from functools import reduce import matplotlib.pyplot as plt @@ -1492,6 +1511,17 @@ import psutil import json +from pathlib import Path + +import shutil + +import hashlib + +from array import array + +from pathlib import Path +from fnmatch import fnmatch + ################################################################################### # # Original TMIDI Tegridy helper functions @@ -3700,19 +3730,52 @@ def validate_pitches(chord, channel_to_check = 0, return_sorted = True): chord.sort(key = lambda x: x[4], reverse=True) return chord -def adjust_score_velocities(score, max_velocity): +def adjust_score_velocities(score, + max_velocity, + adj_per_channel=False, + adj_in_place=True + ): + + if adj_in_place: + buf = score + + else: + buf = copy.deepcopy(score) + + notes = [evt for evt in buf if evt[0] == 'note'] + + if not notes: + return buf + + if adj_per_channel: + channel_max = {} + + for _, _, _, ch, _, vel, _ in notes: + channel_max[ch] = max(channel_max.get(ch, 0), vel) + + channel_factor = { + ch: (max_velocity / vmax if vmax > 0 else 1.0) + for ch, vmax in channel_max.items() + } - min_velocity = min([c[5] for c in score]) - max_velocity_all_channels = max([c[5] for c in score]) - min_velocity_ratio = min_velocity / max_velocity_all_channels + for evt in buf: + if evt[0] == 'note': + ch = evt[3] + factor = channel_factor.get(ch, 1.0) + new_vel = int(evt[5] * factor) + evt[5] = max(1, min(127, new_vel)) - max_channel_velocity = max([c[5] for c in score]) - if max_channel_velocity < min_velocity: - factor = max_velocity / min_velocity else: - factor = max_velocity / max_channel_velocity - for i in range(len(score)): - score[i][5] = int(score[i][5] * factor) + global_max = max(vel for _, _, _, _, _, vel, _ in notes) + factor = max_velocity / global_max if global_max > 0 else 1.0 + + for evt in buf: + if evt[0] == 'note': + new_vel = int(evt[5] * factor) + evt[5] = max(1, min(127, new_vel)) + + if not adj_in_place: + return buf def chordify_score(score, return_choridfied_score=True, @@ -3844,7 +3907,11 @@ def chordify_score(score, else: return None -def fix_monophonic_score_durations(monophonic_score): +def fix_monophonic_score_durations(monophonic_score, + min_notes_gap=1, + min_notes_dur=1, + extend_durs=False + ): fixed_score = [] @@ -3856,15 +3923,21 @@ def fix_monophonic_score_durations(monophonic_score): nmt = monophonic_score[i+1][1] if note[1]+note[2] >= nmt: - note_dur = nmt-note[1]-1 + note_dur = max(1, nmt-note[1]-min_notes_gap) else: - note_dur = note[2] - - new_note = [note[0], note[1], note_dur] + note[3:] + if extend_durs: + note_dur = max(1, nmt-note[1]-min_notes_gap) - fixed_score.append(new_note) + else: + note_dur = note[2] - fixed_score.append(monophonic_score[-1]) + new_note = [note[0], note[1], note_dur] + note[3:] + + if new_note[2] >= min_notes_dur: + fixed_score.append(new_note) + + if monophonic_score[-1][2] >= min_notes_dur: + fixed_score.append(monophonic_score[-1]) elif type(monophonic_score[0][0]) == int: @@ -3874,22 +3947,26 @@ def fix_monophonic_score_durations(monophonic_score): nmt = monophonic_score[i+1][0] if note[0]+note[1] >= nmt: - note_dur = nmt-note[0]-1 + note_dur = max(1, nmt-note[0]-min_notes_gap) else: - note_dur = note[1] + if extend_durs: + note_dur = max(1, nmt-note[0]-min_notes_gap) + else: + note_dur = note[1] + new_note = [note[0], note_dur] + note[2:] - - fixed_score.append(new_note) - - fixed_score.append(monophonic_score[-1]) + + if new_note[1] >= min_notes_dur: + fixed_score.append(new_note) + + if monophonic_score[-1][1] >= min_notes_dur: + fixed_score.append(monophonic_score[-1]) return fixed_score ################################################################################### -from itertools import product - ALL_CHORDS = [[0], [7], [5], [9], [2], [4], [11], [10], [8], [6], [3], [1], [0, 9], [2, 5], [4, 7], [7, 10], [2, 11], [0, 3], [6, 9], [1, 4], [8, 11], [5, 8], [1, 10], [3, 6], [0, 4], [5, 9], [7, 11], [0, 7], [0, 5], [2, 10], [2, 7], [2, 9], @@ -4144,15 +4221,16 @@ def tones_chord_to_pitches(tones_chord, base_pitch=60): ################################################################################### def advanced_score_processor(raw_score, - patches_to_analyze=list(range(129)), - return_score_analysis=False, - return_enhanced_score=False, - return_enhanced_score_notes=False, - return_enhanced_monophonic_melody=False, - return_chordified_enhanced_score=False, - return_chordified_enhanced_score_with_lyrics=False, - return_score_tones_chords=False, - return_text_and_lyric_events=False + patches_to_analyze=list(range(129)), + return_score_analysis=False, + return_enhanced_score=False, + return_enhanced_score_notes=False, + return_enhanced_monophonic_melody=False, + return_chordified_enhanced_score=False, + return_chordified_enhanced_score_with_lyrics=False, + return_score_tones_chords=False, + return_text_and_lyric_events=False, + apply_sustain=False ): '''TMIDIX Advanced Score Processor''' @@ -4192,6 +4270,9 @@ def advanced_score_processor(raw_score, e[2] = e[2] % 16 e[3] = e[3] % 128 + if apply_sustain: + apply_sustain_to_ms_score([1000, basic_single_track_score]) + basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True) basic_single_track_score.sort(key=lambda x: x[1]) @@ -4706,7 +4787,8 @@ def augment_enhanced_score_notes(enhanced_score_notes, ceil_timings=False, round_timings=False, legacy_timings=True, - sort_drums_last=False + sort_drums_last=False, + even_timings=False ): esn = copy.deepcopy(enhanced_score_notes) @@ -4749,6 +4831,16 @@ def augment_enhanced_score_notes(enhanced_score_notes, e[4] = max(1, min(127, e[4] + pitch_shift)) pe = enhanced_score_notes[i] + + + if even_timings: + + for e in esn: + if e[1] % 2 != 0: + e[1] += 1 + + if e[2] % 2 != 0: + e[2] += 1 if full_sorting: @@ -4983,54 +5075,101 @@ def patch_list_from_enhanced_score_notes(enhanced_score_notes, ################################################################################### -def patch_enhanced_score_notes(enhanced_score_notes, - default_patch=0, - drums_patch=9, - verbose=False - ): +def patch_enhanced_score_notes(escore_notes, + default_patch=0, + reserved_patch=-1, + reserved_patch_channel=-1, + drums_patch=9, + verbose=False + ): - #=========================================================================== + #=========================================================================== + + enhanced_score_notes = copy.deepcopy(escore_notes) + + #=========================================================================== enhanced_score_notes_with_patch_changes = [] patches = [-1] * 16 + if -1 < reserved_patch < 128 and -1 < reserved_patch_channel < 128: + patches[reserved_patch_channel] = reserved_patch + overflow_idx = -1 for idx, e in enumerate(enhanced_score_notes): - if e[0] == 'note': - if e[3] != 9: - if patches[e[3]] == -1: - patches[e[3]] = e[6] - else: - if patches[e[3]] != e[6]: - if e[6] in patches: - e[3] = patches.index(e[6]) - else: - if -1 in patches: - patches[patches.index(-1)] = e[6] - else: - overflow_idx = idx - break - - enhanced_score_notes_with_patch_changes.append(e) + if e[0] == 'note': + if e[3] != 9: + if -1 < reserved_patch < 128 and -1 < reserved_patch_channel < 128: + if e[6] == reserved_patch: + e[3] = reserved_patch_channel + + if patches[e[3]] == -1: + patches[e[3]] = e[6] + + else: + if patches[e[3]] != e[6]: + if e[6] in patches: + e[3] = patches.index(e[6]) + + else: + if -1 in patches: + patches[patches.index(-1)] = e[6] + + else: + overflow_idx = idx + break + + enhanced_score_notes_with_patch_changes.append(e) #=========================================================================== overflow_patches = [] + overflow_channels = [-1] * 16 + overflow_channels[9] = drums_patch + + if -1 < reserved_patch < 128 and -1 < reserved_patch_channel < 128: + overflow_channels[reserved_patch_channel] = reserved_patch if overflow_idx != -1: - for idx, e in enumerate(enhanced_score_notes[overflow_idx:]): - if e[0] == 'note': - if e[3] != 9: - if e[6] not in patches: - if e[6] not in overflow_patches: - overflow_patches.append(e[6]) - enhanced_score_notes_with_patch_changes.append(['patch_change', e[1], e[3], e[6]]) - else: - e[3] = patches.index(e[6]) + for idx, e in enumerate(enhanced_score_notes[overflow_idx:]): + if e[0] == 'note': + if e[3] != 9: + if e[6] not in overflow_channels: + + if -1 in overflow_channels: + free_chan = overflow_channels.index(-1) + overflow_channels[free_chan] = e[6] + e[3] = free_chan + + enhanced_score_notes_with_patch_changes.append(['patch_change', e[1], e[3], e[6]]) + + overflow_patches.append(e[6]) + + else: + overflow_channels = [-1] * 16 + overflow_channels[9] = drums_patch + + if -1 < reserved_patch < 128 and -1 < reserved_patch_channel < 128: + overflow_channels[reserved_patch_channel] = reserved_patch + e[3] = reserved_patch_channel + + if e[6] != reserved_patch: + + free_chan = overflow_channels.index(-1) + e[3] = free_chan + + overflow_channels[e[3]] = e[6] + + enhanced_score_notes_with_patch_changes.append(['patch_change', e[1], e[3], e[6]]) + + overflow_patches.append(e[6]) - enhanced_score_notes_with_patch_changes.append(e) + else: + e[3] = overflow_channels.index(e[6]) + + enhanced_score_notes_with_patch_changes.append(e) #=========================================================================== @@ -5040,9 +5179,13 @@ def patch_enhanced_score_notes(enhanced_score_notes, #=========================================================================== + overflow_patches = ordered_set(overflow_patches) + + #=========================================================================== + if verbose: print('=' * 70) - print('Composition patches') + print('Main composition patches') print('=' * 70) for c, p in enumerate(patches): print('Cha', str(c).zfill(2), '---', str(p).zfill(3), Number2patch[p]) @@ -5055,6 +5198,8 @@ def patch_enhanced_score_notes(enhanced_score_notes, print(str(p).zfill(3), Number2patch[p]) print('=' * 70) + #=========================================================================== + return enhanced_score_notes_with_patch_changes, patches, overflow_patches ################################################################################### @@ -6689,12 +6834,23 @@ def find_next_bar(escore_notes, bar_time, start_note_idx, cur_bar): def align_escore_notes_to_bars(escore_notes, bar_time=4000, trim_durations=False, - split_durations=False + split_durations=False, + even_timings=False ): #============================================================================= + + escore = copy.deepcopy(escore_notes) + + if even_timings: + for e in escore: + if e[1] % 2 != 0: + e[1] += 1 + + if e[2] % 2 != 0: + e[2] += 1 - aligned_escore_notes = copy.deepcopy(escore_notes) + aligned_escore_notes = copy.deepcopy(escore) abs_time = 0 nidx = 0 @@ -6706,13 +6862,13 @@ def align_escore_notes_to_bars(escore_notes, while next_bar: - next_bar = find_next_bar(escore_notes, bar_time, nidx, bcount) + next_bar = find_next_bar(escore, bar_time, nidx, bcount) if next_bar: - - gescore_notes = escore_notes[nidx:next_bar[1]] + gescore_notes = escore[nidx:next_bar[1]] + else: - gescore_notes = escore_notes[nidx:] + gescore_notes = escore[nidx:] original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])] adj_timings = adjust_numbers_to_sum(original_timings, bar_time) @@ -6727,7 +6883,8 @@ def align_escore_notes_to_bars(escore_notes, nidx += 1 if next_bar: - delta = escore_notes[next_bar[1]][1]-escore_notes[next_bar[1]-1][1] + delta = escore[next_bar[1]][1]-escore[next_bar[1]-1][1] + bcount += 1 #============================================================================= @@ -6984,7 +7141,8 @@ def escore_notes_to_binary_matrix(escore_notes, channel=0, patch=0, flip_matrix=False, - reverse_matrix=False + reverse_matrix=False, + encode_velocities=False ): escore = [e for e in escore_notes if e[3] == channel and e[6] == patch] @@ -7008,14 +7166,17 @@ def escore_notes_to_binary_matrix(escore_notes, duration = max(1, duration) chan = max(0, min(15, chan)) pitch = max(0, min(127, pitch)) - velocity = max(0, min(127, velocity)) + velocity = max(1, min(127, velocity)) pat = max(0, min(128, pat)) if channel == chan and patch == pat: for t in range(time, min(time + duration, time_range)): - - escore_matrix[t][pitch] = 1 + if encode_velocities: + escore_matrix[t][pitch] = velocity + + else: + escore_matrix[t][pitch] = 1 if flip_matrix: @@ -7039,7 +7200,8 @@ def escore_notes_to_binary_matrix(escore_notes, def binary_matrix_to_original_escore_notes(binary_matrix, channel=0, patch=0, - velocity=-1 + velocity=-1, + decode_velocities=False ): result = [] @@ -7078,8 +7240,11 @@ def binary_matrix_to_original_escore_notes(binary_matrix, for r in result: - if velocity == -1: - vel = max(40, r[2]) + if velocity == -1 and not decode_velocities: + vel = max(40, r[2]) + + if decode_velocities: + vel = r[3] original_escore_notes.append(['note', r[0], r[1], channel, r[2], vel, patch]) @@ -7904,7 +8069,7 @@ def solo_piano_escore_notes(escore_notes, keep_drums=False, ): - cscore = chordify_score([1000, escore_notes]) + cscore = chordify_score([1000, copy.deepcopy(escore_notes)]) sp_escore_notes = [] @@ -9576,7 +9741,14 @@ def escore_notes_to_text_description(escore_notes, song_name='', artist_name='', timings_divider=16, + return_feat_dict=False, + return_feat_dict_vals=False ): + + #============================================================================== + + feat_dict = {} + feat_dict_vals = {} #============================================================================== @@ -9590,6 +9762,9 @@ def escore_notes_to_text_description(escore_notes, elif song_time_min >= 2.5: song_length = 'long' + + feat_dict['song_len'] = song_length.capitalize() + feat_dict_vals['song_len'] = song_time_min #============================================================================== @@ -9601,18 +9776,25 @@ def escore_notes_to_text_description(escore_notes, if len(escore_times) == len(set(escore_times)): comp_type = 'monophonic melody' ctype = 'melody' + ctv = 0 elif len(escore_times) >= len(set(escore_times)) and 1 in Counter(escore_times).values(): comp_type = 'melody and accompaniment' ctype = 'song' + ctv = 1 elif len(escore_times) >= len(set(escore_times)) and 1 not in Counter(escore_times).values(): comp_type = 'accompaniment' ctype = 'song' + ctv = 2 else: comp_type = 'drum track' ctype = 'drum track' + ctv = 3 + + feat_dict['song_type'] = comp_type.capitalize() + feat_dict_vals['song_type'] = ctv #============================================================================== @@ -9627,6 +9809,13 @@ def escore_notes_to_text_description(escore_notes, nd_patches_counts = Counter([p for p in all_patches if p < 128]).most_common() dominant_instrument = alpha_str(Number2patch[nd_patches_counts[0][0]]) + + feat_dict['most_com_instr'] = instruments + feat_dict_vals['most_com_instr'] = [p for p in patches if p < 128] + + else: + feat_dict['most_com_instr'] = None + feat_dict_vals['most_com_instr'] = [] if 128 in patches: drums_present = True @@ -9634,9 +9823,16 @@ def escore_notes_to_text_description(escore_notes, drums_pitches = [e[4] for e in escore_notes if e[3] == 9] most_common_drums = [alpha_str(Notenum2percussion[p[0]]) for p in Counter(drums_pitches).most_common(3) if p[0] in Notenum2percussion] + + feat_dict['most_com_drums'] = most_common_drums + feat_dict_vals['most_com_drums'] = [p[0] for p in Counter(drums_pitches).most_common(3)] else: drums_present = False + + feat_dict['most_com_drums'] = None + + feat_dict_vals['most_com_drums'] = [] #============================================================================== @@ -9646,60 +9842,111 @@ def escore_notes_to_text_description(escore_notes, if pitches: key = SEMITONES[statistics.mode(pitches) % 12] + + feat_dict['key'] = key.title() + feat_dict_vals['key'] = statistics.mode(pitches) % 12 + + else: + feat_dict['key'] = None + feat_dict_vals['key'] = -1 #============================================================================== scale = '' mood = '' + feat_dict['scale'] = None + feat_dict['mood'] = None + feat_dict_vals['scale'] = -1 + feat_dict_vals['mood'] = -1 + if pitches: result = escore_notes_scale(escore_notes) scale = result[0] mood = result[1].split(' ')[0].lower() + + feat_dict['scale'] = scale.title() + feat_dict['mood'] = mood.title() + + res = escore_notes_scale(escore_notes, return_scale_indexes=True) + feat_dict_vals['scale'] = res[0] + feat_dict_vals['mood'] = res[1] #============================================================================== - + + feat_dict['rythm'] = None + feat_dict['tempo'] = None + feat_dict['tone'] = None + feat_dict['dynamics'] = None + + feat_dict_vals['rythm'] = -1 + feat_dict_vals['tempo'] = -1 + feat_dict_vals['tone'] = -1 + feat_dict_vals['dynamics'] = -1 + if pitches: escore_averages = escore_notes_averages(escore_notes, return_ptcs_and_vels=True) if escore_averages[0] < (128 / timings_divider): rythm = 'fast' + ryv = 0 elif (128 / timings_divider) <= escore_averages[0] <= (192 / timings_divider): rythm = 'average' + ryv = 1 elif escore_averages[0] > (192 / timings_divider): rythm = 'slow' + ryv = 2 if escore_averages[1] < (256 / timings_divider): tempo = 'fast' + tev = 0 elif (256 / timings_divider) <= escore_averages[1] <= (384 / timings_divider): tempo = 'average' + tev = 1 elif escore_averages[1] > (384 / timings_divider): tempo = 'slow' + tev = 2 if escore_averages[2] < 50: tone = 'bass' + tov = 0 elif 50 <= escore_averages[2] <= 70: tone = 'midrange' + tov = 1 elif escore_averages[2] > 70: tone = 'treble' + tov = 2 if escore_averages[3] < 64: dynamics = 'quiet' + dyn = 0 elif 64 <= escore_averages[3] <= 96: dynamics = 'average' + dyn = 1 elif escore_averages[3] > 96: dynamics = 'loud' + dyn = 2 + + feat_dict['rythm'] = rythm.title() + feat_dict['tempo'] = tempo.title() + feat_dict['tone'] = tone.title() + feat_dict['dynamics'] = dynamics.title() + + feat_dict_vals['rythm'] = ryv + feat_dict_vals['tempo'] = tev + feat_dict_vals['tone'] = tov + feat_dict_vals['dynamics'] = dyn #============================================================================== @@ -9707,6 +9954,12 @@ def escore_notes_to_text_description(escore_notes, lead_melodies = [] base_melodies = [] + + feat_dict['lead_mono_mels'] = None + feat_dict['base_mono_mels'] = None + + feat_dict_vals['lead_mono_mels'] = [] + feat_dict_vals['base_mono_mels'] = [] if mono_melodies: @@ -9716,15 +9969,19 @@ def escore_notes_to_text_description(escore_notes, if mel[0] in LEAD_INSTRUMENTS and escore_avgs[3] > 60: lead_melodies.append([Number2patch[mel[0]], mel[1]]) + feat_dict_vals['lead_mono_mels'].append(mel[0]) elif mel[0] in BASE_INSTRUMENTS and escore_avgs[3] <= 60: base_melodies.append([Number2patch[mel[0]], mel[1]]) + feat_dict_vals['base_mono_mels'].append(mel[0]) if lead_melodies: lead_melodies.sort(key=lambda x: x[1], reverse=True) + feat_dict['lead_mono_mels'] = lead_melodies if base_melodies: base_melodies.sort(key=lambda x: x[1], reverse=True) + feat_dict['base_mono_mels'] = base_melodies #============================================================================== @@ -9911,8 +10168,20 @@ def escore_notes_to_text_description(escore_notes, description += '\n' #============================================================================== - - return description + + final_feat_dict = [] + + if return_feat_dict: + final_feat_dict.append(feat_dict) + + if return_feat_dict_vals: + final_feat_dict.append(feat_dict_vals) + + if return_feat_dict or return_feat_dict_vals: + return final_feat_dict + + else: + return description ################################################################################### @@ -11138,13 +11407,17 @@ def escore_notes_core(escore_notes, core_len=128): ################################################################################### -def multiprocessing_wrapper(function, data_list): +def multiprocessing_wrapper(function, data_list, verbose=True): with multiprocessing.Pool() as pool: results = [] - for result in tqdm.tqdm(pool.imap_unordered(function, data_list), total=len(data_list)): + for result in tqdm.tqdm(pool.imap(function, data_list), + total=len(data_list), + disable=not verbose + ): + results.append(result) return results @@ -11293,28 +11566,83 @@ def system_memory_utilization(return_dict=False): ################################################################################### +def system_cpus_utilization(return_dict=False): + + if return_dict: + return {'num_cpus': psutil.cpu_count(), + 'cpus_util': psutil.cpu_percent() + } + + else: + print('Number of CPUs:', psutil.cpu_count()) + print('CPUs utilization:', psutil.cpu_percent()) + +################################################################################### + def create_files_list(datasets_paths=['./'], files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'], + max_num_files_per_dir=-1, + randomize_dir_files=False, + max_total_files=-1, randomize_files_list=True, + check_for_dupes=False, + use_md5_hashes=False, + return_dupes=False, verbose=True ): + if verbose: print('=' * 70) print('Searching for files...') print('This may take a while on a large dataset in particular...') print('=' * 70) - filez_set = defaultdict(None) - files_exts = tuple(files_exts) - for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose): - for dirpath, dirnames, filenames in os.walk(dataset_addr): - for file in filenames: - if file not in filez_set and file.endswith(files_exts): - filez_set[os.path.join(dirpath, file)] = None - - filez = list(filez_set.keys()) + filez_set = defaultdict(None) + dupes_list = [] + + for dataset_addr in datasets_paths: + + print('=' * 70) + print('Processing', dataset_addr) + print('=' * 70) + + for dirpath, dirnames, filenames in tqdm.tqdm(os.walk(dataset_addr), disable=not verbose): + + if randomize_dir_files: + random.shuffle(filenames) + + if max_num_files_per_dir > 0: + max_num_files = max_num_files_per_dir + + else: + max_num_files = len(filenames) + + for file in filenames[:max_num_files]: + if file.endswith(files_exts): + if check_for_dupes: + + if use_md5_hashes: + md5_hash = hashlib.md5(open(os.path.join(dirpath, file), 'rb').read()).hexdigest() + + if md5_hash not in filez_set: + filez_set[md5_hash] = os.path.join(dirpath, file) + + else: + dupes_list.append(os.path.join(dirpath, file)) + + else: + if file not in filez_set: + filez_set[file] = os.path.join(dirpath, file) + + else: + dupes_list.append(os.path.join(dirpath, file)) + else: + fpath = os.path.join(dirpath, file) + filez_set[fpath] = fpath + + filez = list(filez_set.values()) if verbose: print('Done!') @@ -11334,6 +11662,7 @@ def create_files_list(datasets_paths=['./'], if verbose: print('Found', len(filez), 'files.') + print('Skipped', len(dupes_list), 'duplicate files.') print('=' * 70) else: @@ -11341,8 +11670,20 @@ def create_files_list(datasets_paths=['./'], print('Could not find any files...') print('Please check dataset dirs and files extensions...') print('=' * 70) + + if max_total_files > 0: + if return_dupes: + return filez[:max_total_files], dupes_list + + else: + return filez[:max_total_files] + + else: + if return_dupes: + return filez, dupes_list - return filez + else: + return filez ################################################################################### @@ -12163,8 +12504,16 @@ def escore_notes_pitches_chords_signature(escore_notes, sort_by_counts=False, use_full_chords=False ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + max_patch = max(0, min(128, max_patch)) - escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129] + escore_notes = [e for e in escore_notes if e[6] <= max_patch] if escore_notes: @@ -12173,7 +12522,7 @@ def escore_notes_pitches_chords_signature(escore_notes, sig = [] dsig = [] - drums_offset = 321 + 128 + drums_offset = len(CHORDS) + 128 bad_chords_counter = 0 @@ -12190,10 +12539,10 @@ def escore_notes_pitches_chords_signature(escore_notes, tones_chord = sorted(set([p % 12 for p in pitches])) try: - sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128 + sig_token = CHORDS.index(tones_chord) + 128 except: checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords) - sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128 + sig_token = CHORDS.index(checked_tones_chord) + 128 bad_chords_counter += 1 elif len(pitches) == 1: @@ -12226,6 +12575,2463 @@ def escore_notes_pitches_chords_signature(escore_notes, else: return [] +################################################################################### + +def compute_sustain_intervals(events): + + intervals = [] + pedal_on = False + current_start = None + + for t, cc in events: + if not pedal_on and cc >= 64: + + pedal_on = True + current_start = t + elif pedal_on and cc < 64: + + pedal_on = False + intervals.append((current_start, t)) + current_start = None + + if pedal_on: + intervals.append((current_start, float('inf'))) + + merged = [] + + for interval in intervals: + if merged and interval[0] <= merged[-1][1]: + merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1])) + else: + merged.append(interval) + return merged + +################################################################################### + +def apply_sustain_to_ms_score(score): + + sustain_by_channel = {} + + for track in score[1:]: + for event in track: + if event[0] == 'control_change' and event[3] == 64: + channel = event[2] + sustain_by_channel.setdefault(channel, []).append((event[1], event[4])) + + sustain_intervals_by_channel = {} + + for channel, events in sustain_by_channel.items(): + events.sort(key=lambda x: x[0]) + sustain_intervals_by_channel[channel] = compute_sustain_intervals(events) + + global_max_off = 0 + + for track in score[1:]: + for event in track: + if event[0] == 'note': + global_max_off = max(global_max_off, event[1] + event[2]) + + for channel, intervals in sustain_intervals_by_channel.items(): + updated_intervals = [] + for start, end in intervals: + if end == float('inf'): + end = global_max_off + updated_intervals.append((start, end)) + sustain_intervals_by_channel[channel] = updated_intervals + + if sustain_intervals_by_channel: + + for track in score[1:]: + for event in track: + if event[0] == 'note': + start = event[1] + nominal_dur = event[2] + nominal_off = start + nominal_dur + channel = event[3] + + intervals = sustain_intervals_by_channel.get(channel, []) + effective_off = nominal_off + + for intv_start, intv_end in intervals: + if intv_start < nominal_off < intv_end: + effective_off = intv_end + break + + effective_dur = effective_off - start + + event[2] = effective_dur + + return score + +################################################################################### + +def copy_file(src_file: str, trg_dir: str, add_subdir: bool = False, verbose: bool = False): + + src_path = Path(src_file) + target_directory = Path(trg_dir) + + if not src_path.is_file(): + if verbose: + print("Source file does not exist or is not a file.") + + return None + + target_directory.mkdir(parents=True, exist_ok=True) + + if add_subdir: + first_letter = src_path.name[0] + target_directory = target_directory / first_letter + target_directory.mkdir(parents=True, exist_ok=True) + + destination = target_directory / src_path.name + + try: + shutil.copy2(src_path, destination) + + except: + if verbose: + print('File could not be copied!') + + return None + + if verbose: + print('File copied!') + + return None + +################################################################################### + +def escore_notes_even_timings(escore_notes, in_place=True): + + if in_place: + for e in escore_notes: + if e[1] % 2 != 0: + e[1] += 1 + + if e[2] % 2 != 0: + e[2] += 1 + + return [] + + else: + escore = copy.deepcopy(escore_notes) + + for e in escore: + if e[1] % 2 != 0: + e[1] += 1 + + if e[2] % 2 != 0: + e[2] += 1 + + return escore + +################################################################################### + +def both_chords(chord1, chord2, merge_threshold=2): + + if len(chord1) > 1 and len(chord2) > 0 and chord2[0][1]-chord1[0][1] <= merge_threshold: + return True + + elif len(chord1) > 0 and len(chord2) > 1 and chord2[0][1]-chord1[0][1] <= merge_threshold: + return True + + else: + return False + +def merge_chords(chord1, chord2, sort_drums_last=False): + + mchord = chord1 + + seen = [] + + for e in chord2: + if tuple([e[4], e[6]]) not in seen: + mchord.append(e) + seen.append(tuple([e[4], e[6]])) + + for e in mchord[1:]: + e[1] = mchord[0][1] + + if sort_drums_last: + mchord.sort(key=lambda x: (-x[4], x[6]) if x[6] != 128 else (x[6], -x[4])) + + else: + mchord.sort(key=lambda x: (-x[4], x[6])) + + return mchord + +def merge_escore_notes(escore_notes, merge_threshold=2, sort_drums_last=False): + + cscore = chordify_score([1000, escore_notes]) + + merged_chords = [] + merged_chord = cscore[0] + + for i in range(1, len(cscore)): + + cchord = cscore[i] + + if both_chords(merged_chord, cchord, merge_threshold=merge_threshold): + merged_chord = merge_chords(merged_chord, cchord, sort_drums_last=sort_drums_last) + + else: + merged_chords.append(merged_chord) + merged_chord = cchord + + return flatten(merged_chords) + +################################################################################### + +def solo_piano_escore_notes_tokenized(escore_notes, + compress_start_times=True, + encode_velocities=False, + verbose=False + ): + + if verbose: + print('=' * 70) + print('Encoding MIDI...') + + sp_escore_notes = solo_piano_escore_notes(escore_notes) + zscore = recalculate_score_timings(sp_escore_notes) + dscore = delta_score_notes(zscore, timings_clip_value=127) + + score = [] + + notes_counter = 0 + chords_counter = 1 + + for i, e in enumerate(dscore): + + dtime = e[1] + dur = e[2] + ptc = e[4] + vel = e[5] + + if compress_start_times: + + if i == 0: + score.extend([0, dur+128, ptc+256]) + + if encode_velocities: + score.append(vel+384) + + else: + if dtime == 0: + score.extend([dur+128, ptc+256]) + + else: + score.extend([dtime, dur+128, ptc+256]) + + if encode_velocities: + score.append(vel+384) + + if dtime != 0: + chords_counter += 1 + + else: + score.extend([dtime, dur+128, ptc+256]) + + if encode_velocities: + score.append(vel+384) + + if dtime != 0: + chords_counter += 1 + + notes_counter += 1 + + if verbose: + print('Done!') + print('=' * 70) + + print('Source MIDI composition has', len(zscore), 'notes') + print('Source MIDI composition has', len([d[1] for d in dscore if d[1] !=0 ])+1, 'chords') + print('-' * 70) + print('Encoded sequence has', notes_counter, 'pitches') + print('Encoded sequence has', chords_counter, 'chords') + print('-' * 70) + print('Final encoded sequence has', len(score), 'tokens') + print('=' * 70) + + return score + +################################################################################### + +def equalize_closest_elements_dynamic(seq, + min_val=128, + max_val=256, + splitting_factor=1.5, + tightness_threshold=0.15 + ): + + candidates = [(i, x) for i, x in enumerate(seq) if min_val <= x <= max_val] + + if len(candidates) < 2: + return seq.copy() + + sorted_candidates = sorted(candidates, key=lambda pair: pair[1]) + candidate_values = [val for _, val in sorted_candidates] + + differences = [candidate_values[i+1] - candidate_values[i] for i in range(len(candidate_values)-1)] + + def median(lst): + + n = len(lst) + sorted_lst = sorted(lst) + mid = n // 2 + + if n % 2 == 0: + return (sorted_lst[mid - 1] + sorted_lst[mid]) / 2.0 + + else: + return sorted_lst[mid] + + med_diff = median(differences) + + split_indices = [i for i, diff in enumerate(differences) if diff > splitting_factor * med_diff] + + clusters = [] + + if split_indices: + start = 0 + for split_index in split_indices: + clusters.append(sorted_candidates[start:split_index+1]) + start = split_index + 1 + clusters.append(sorted_candidates[start:]) + + else: + clusters = [sorted_candidates] + + + valid_clusters = [cluster for cluster in clusters if len(cluster) >= 2] + if not valid_clusters: + return seq.copy() + + def cluster_spread(cluster): + values = [val for (_, val) in cluster] + return max(values) - min(values) + + valid_clusters.sort(key=lambda cluster: (len(cluster), -cluster_spread(cluster)), reverse=True) + selected_cluster = valid_clusters[0] + + allowed_range_width = max_val - min_val + spread = cluster_spread(selected_cluster) + ratio = spread / allowed_range_width + + if ratio > tightness_threshold: + return seq.copy() + + cluster_values = [val for (_, val) in selected_cluster] + equal_value = sum(cluster_values) // len(cluster_values) + + + result = list(seq) + for idx, _ in selected_cluster: + result[idx] = equal_value + + return result + +################################################################################### + +def chunk_list(lst, chunk_size): + return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)] + +################################################################################### + +def compress_tokens_sequence(seq, + min_val=128, + max_val=256, + group_size=2, + splitting_factor=1.5, + tightness_threshold=0.15 + ): + + comp_seq = equalize_closest_elements_dynamic(seq, + min_val, + max_val, + splitting_factor=splitting_factor, + tightness_threshold=tightness_threshold + ) + + seq_split = sorted(chunk_list(comp_seq, group_size), key=lambda x: (-x[0], -x[1])) + + seq_grouped = [[[k]] + [vv[1:] for vv in v] for k, v in groupby(seq_split, key=lambda x: x[0])] + + return flatten(flatten(sorted(seq_grouped, key=lambda x: -x[1][0]))) + +################################################################################### + +def merge_adjacent_pairs(values_counts): + + merged = [] + i = 0 + + while i < len(values_counts): + + if i < len(values_counts) - 1: + value1, count1 = values_counts[i] + value2, count2 = values_counts[i + 1] + + if value2 - value1 == 1: + if count2 > count1: + merged_value = value2 + + else: + merged_value = value1 + + merged_count = count1 + count2 + merged.append((merged_value, merged_count)) + + i += 2 + + continue + + merged.append(values_counts[i]) + + i += 1 + + return merged + +################################################################################### + +def merge_escore_notes_start_times(escore_notes, num_merges=1): + + new_dscore = delta_score_notes(escore_notes) + + times = [e[1] for e in new_dscore if e[1] != 0] + times_counts = sorted(Counter(times).most_common()) + + prev_counts = [] + new_times_counts = times_counts + + mcount = 0 + + while prev_counts != new_times_counts: + prev_counts = new_times_counts + new_times_counts = merge_adjacent_pairs(new_times_counts) + + mcount += 1 + + if mcount == num_merges: + break + + gtimes = [r[0] for r in new_times_counts] + + for e in new_dscore: + if e[1] > 0: + e[1] = find_closest_value(gtimes, e[1])[0] + e[2] -= num_merges + + return delta_score_to_abs_score(new_dscore) + +################################################################################### + +def multi_instrumental_escore_notes_tokenized(escore_notes, compress_seq=False): + + melody_chords = [] + + pe = escore_notes[0] + + for i, e in enumerate(escore_notes): + + dtime = max(0, min(255, e[1]-pe[1])) + + dur = max(0, min(255, e[2])) + + cha = max(0, min(15, e[3])) + + if cha == 9: + pat = 128 + + else: + pat = max(0, min(127, e[6])) + + ptc = max(0, min(127, e[4])) + + vel = max(8, min(127, e[5])) + velocity = round(vel / 15)-1 + + dur_vel = (8 * dur) + velocity + pat_ptc = (129 * pat) + ptc + + if compress_seq: + if dtime != 0 or i == 0: + melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304]) + + else: + melody_chords.extend([dur_vel+256, pat_ptc+2304]) + + else: + melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304]) + + pe = e + + return melody_chords + +################################################################################### + +def merge_counts(data, return_lists=True): + + merged = defaultdict(int) + + for value, count in data: + merged[value] += count + + if return_lists: + return [[k, v] for k, v in merged.items()] + + else: + return list(merged.items()) + +################################################################################### + +def convert_escore_notes_pitches_chords_signature(signature, convert_to_full_chords=True): + + if convert_to_full_chords: + SRC_CHORDS = ALL_CHORDS_SORTED + TRG_CHORDS = ALL_CHORDS_FULL + + else: + SRC_CHORDS = ALL_CHORDS_FULL + TRG_CHORDS = ALL_CHORDS_SORTED + + cdiff = len(TRG_CHORDS) - len(SRC_CHORDS) + + pitches_counts = [c for c in signature if -1 < c[0] < 128] + chords_counts = [c for c in signature if 127 < c[0] < len(SRC_CHORDS)+128] + drums_counts = [[c[0]+cdiff, c[1]] for c in signature if len(SRC_CHORDS)+127 < c[0] < len(SRC_CHORDS)+256] + bad_chords_count = [c for c in signature if c[0] == -1] + + new_chords_counts = [] + + for c in chords_counts: + tones_chord = SRC_CHORDS[c[0]-128] + + if tones_chord not in TRG_CHORDS: + tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=convert_to_full_chords) + bad_chords_count[0][1] += 1 + + new_chords_counts.append([TRG_CHORDS.index(tones_chord)+128, c[1]]) + + return pitches_counts + merge_counts(new_chords_counts) + drums_counts + bad_chords_count + +################################################################################### + +def convert_bytes_in_nested_list(lst, encoding='utf-8', errors='ignore'): + + new_list = [] + + for item in lst: + if isinstance(item, list): + new_list.append(convert_bytes_in_nested_list(item)) + + elif isinstance(item, bytes): + new_list.append(item.decode(encoding, errors=errors)) + + else: + new_list.append(item) + + return new_list + +################################################################################### + +def mult_pitches(pitches, min_oct=4, max_oct=6): + + tones_chord = sorted(set([p % 12 for p in pitches])) + + mult_ptcs = [] + + for t in tones_chord: + for i in range(min_oct, max_oct): + mult_ptcs.append((i*12)+t) + + return mult_ptcs + +################################################################################### + +def find_next(pitches, cur_ptc): + + i = 0 + + for i, p in enumerate(pitches): + if p != cur_ptc: + break + + return i + +################################################################################### + +def ordered_groups_unsorted(data, key_index): + + def keyfunc(sublist): + return sublist[key_index] + + groups = [] + + for key, group in groupby(data, key=keyfunc): + groups.append((key, list(group))) + + return groups + +################################################################################### + +def ordered_groups(data, ptc_idx, pat_idx): + + groups = OrderedDict() + + for sublist in data: + key = tuple([sublist[ptc_idx], sublist[pat_idx]]) + + if key not in groups: + groups[key] = [] + + groups[key].append(sublist) + + return list(groups.items()) + +################################################################################### + +def merge_melody_notes(escore_notes, pitches_idx=4, max_dur=255, last_dur=128): + + groups = ordered_groups_unsorted(escore_notes, pitches_idx) + + merged_melody_notes = [] + + for i, (k, g) in enumerate(groups[:-1]): + + if len(g) == 1: + merged_melody_notes.extend(g) + + else: + dur = min(max_dur, groups[i+1][1][0][1] - g[0][1]) + + merged_melody_notes.append(['note', + g[0][1], + dur, + g[0][3], + g[0][4], + g[0][5], + g[0][6] + ]) + + merged_melody_notes.append(['note', + groups[-1][1][0][1], + last_dur, + groups[-1][1][0][3], + groups[-1][1][0][4], + groups[-1][1][0][5], + groups[-1][1][0][6] + ]) + + return merged_melody_notes + +################################################################################### + +def add_expressive_melody_to_enhanced_score_notes(escore_notes, + melody_start_chord=0, + melody_prime_pitch=60, + melody_step=1, + melody_channel=3, + melody_patch=40, + melody_notes_max_duration=255, + melody_last_note_dur=128, + melody_clip_max_min_durs=[], + melody_max_velocity=120, + acc_max_velocity=90, + return_melody=False + ): + + + score = copy.deepcopy(escore_notes) + + adjust_score_velocities(score, acc_max_velocity) + + cscore = chordify_score([1000, score]) + + melody_pitches = [melody_prime_pitch] + + for i, c in enumerate(cscore[melody_start_chord:]): + + if i % melody_step == 0: + + pitches = [e[4] for e in c if e[3] != 9] + + if pitches: + cptc = find_closest_value(mult_pitches(pitches), melody_pitches[-1])[0] + melody_pitches.append(cptc) + + song_f = [] + mel_f = [] + + idx = 1 + + for i, c in enumerate(cscore[:-melody_step]): + pitches = [e[4] for e in c if e[3] != 9] + + if pitches and i >= melody_start_chord and i % melody_step == 0: + dur = min(cscore[i+melody_step][0][1] - c[0][1], melody_notes_max_duration) + + mel_f.append(['note', + c[0][1], + dur, + melody_channel, + 60+(melody_pitches[idx] % 24), + 100 + ((melody_pitches[idx] % 12) * 2), + melody_patch + ]) + idx += 1 + + song_f.extend(c) + + song_f.extend(flatten(cscore[-melody_step:])) + + if len(melody_clip_max_min_durs) == 2: + for e in mel_f: + if e[2] >= melody_clip_max_min_durs[0]: + e[2] = melody_clip_max_min_durs[1] + + adjust_score_velocities(mel_f, melody_max_velocity) + + merged_melody_notes = merge_melody_notes(mel_f, + max_dur=melody_notes_max_duration, + last_dur=melody_last_note_dur + ) + + song_f = sorted(merged_melody_notes + song_f, + key=lambda x: x[1] + ) + + if return_melody: + return mel_f + + else: + return song_f + +################################################################################### + +def list_md5_hash(ints_list): + + arr = array('H', ints_list) + binary_data = arr.tobytes() + + return hashlib.md5(binary_data).hexdigest() + +################################################################################### + +def fix_escore_notes_durations(escore_notes, + min_notes_gap=1, + min_notes_dur=1, + times_idx=1, + durs_idx=2, + channels_idx = 3, + pitches_idx=4, + patches_idx=6 + ): + + notes = [e for e in escore_notes if e[channels_idx] != 9] + drums = [e for e in escore_notes if e[channels_idx] == 9] + + escore_groups = ordered_groups(notes, pitches_idx, patches_idx) + + merged_score = [] + + for k, g in escore_groups: + if len(g) > 2: + fg = fix_monophonic_score_durations(g, + min_notes_gap=min_notes_gap, + min_notes_dur=min_notes_dur + ) + merged_score.extend(fg) + + elif len(g) == 2: + + if g[0][times_idx]+g[0][durs_idx] >= g[1][times_idx]: + g[0][durs_idx] = max(1, g[1][times_idx] - g[0][times_idx] - min_notes_gap) + + merged_score.extend(g) + + else: + merged_score.extend(g) + + return sorted(merged_score + drums, key=lambda x: x[times_idx]) + +################################################################################### + +def create_nested_chords_tree(chords_list): + + tree = {} + + for chord in chords_list: + + node = tree + + for semitone in chord: + if semitone not in node: + node[semitone] = {} + + node = node[semitone] + + node.setdefault(-1, []).append(chord) + + return tree + +################################################################################### + +def get_chords_with_prefix(nested_chords_tree, prefix): + + node = nested_chords_tree + + for semitone in prefix: + if semitone in node: + node = node[semitone] + + else: + return [] + + collected_chords = [] + + def recursive_collect(subnode): + if -1 in subnode: + collected_chords.extend(subnode[-1]) + + for key, child in subnode.items(): + if key != -1: + recursive_collect(child) + + recursive_collect(node) + + return collected_chords + +################################################################################### + +def get_chords_by_semitones(chords_list, chord_semitones): + + query_set = set(chord_semitones) + results = [] + + for chord in chords_list: + + chord_set = set(chord) + + if query_set.issubset(chord_set): + results.append(sorted(set(chord))) + + return results + +################################################################################### + +def remove_duplicate_pitches_from_escore_notes(escore_notes, + pitches_idx=4, + patches_idx=6, + return_dupes_count=False + ): + + cscore = chordify_score([1000, escore_notes]) + + new_escore = [] + + bp_count = 0 + + for c in cscore: + + cho = [] + seen = [] + + for cc in c: + if [cc[pitches_idx], cc[patches_idx]] not in seen: + cho.append(cc) + seen.append([cc[pitches_idx], cc[patches_idx]]) + + else: + bp_count += 1 + + new_escore.extend(cho) + + if return_dupes_count: + return bp_count + + else: + return new_escore + +################################################################################### + +def chunks_shuffle(lst, + min_len=1, + max_len=3, + seed=None + ): + + rnd = random.Random(seed) + chunks = [] + i, n = 0, len(lst) + + while i < n: + size = rnd.randint(min_len, max_len) + size = min(size, n - i) + chunks.append(lst[i : i + size]) + i += size + + rnd.shuffle(chunks) + + flattened = [] + for chunk in chunks: + flattened.extend(chunk) + + return flattened + +################################################################################### + +def convert_bytes_in_nested_list(lst, + encoding='utf-8', + errors='ignore', + return_changed_events_count=False + ): + + new_list = [] + + ce_count = 0 + + for item in lst: + if isinstance(item, list): + new_list.append(convert_bytes_in_nested_list(item)) + + elif isinstance(item, bytes): + new_list.append(item.decode(encoding, errors=errors)) + ce_count += 1 + + else: + new_list.append(item) + + if return_changed_events_count: + return new_list, ce_count + + else: + return new_list + +################################################################################### + +def find_deepest_midi_dirs(roots, + marker_file="midi_score.mid", + suffixes=None, + randomize=False, + seed=None, + verbose=False + ): + + try: + iter(roots) + if isinstance(roots, (str, Path)): + root_list = [roots] + else: + root_list = list(roots) + + except TypeError: + root_list = [roots] + + if isinstance(marker_file, (list, tuple)): + patterns = [p.lower() for p in marker_file if p] + + else: + patterns = [marker_file.lower()] if marker_file else [] + + allowed = {s.lower() for s in (suffixes or ['.mid', '.midi', '.kar'])} + + if verbose: + print("Settings:") + print(" Roots:", [str(r) for r in root_list]) + print(" Marker patterns:", patterns or "") + print(" Allowed suffixes:", allowed) + print(f" Randomize={randomize}, Seed={seed}") + + results = defaultdict(list) + rng = random.Random(seed) + + for root in root_list: + + root_path = Path(root) + + if not root_path.is_dir(): + print(f"Warning: '{root_path}' is not a valid directory, skipping.") + continue + + if verbose: + print(f"\nScanning root: {str(root_path)}") + + all_dirs = list(root_path.rglob("*")) + dirs_iter = tqdm.tqdm(all_dirs, desc=f"Dirs in {root_path.name}", disable=not verbose) + + for dirpath in dirs_iter: + if not dirpath.is_dir(): + continue + + children = list(dirpath.iterdir()) + if any(child.is_dir() for child in children): + if verbose: + print(f"Skipping non-leaf: {str(dirpath)}") + continue + + files = [f for f in children if f.is_file()] + names = [f.name.lower() for f in files] + + if patterns: + matched = any(fnmatch(name, pat) for name in names for pat in patterns) + if not matched: + if verbose: + print(f"No marker in: {str(dirpath)}") + continue + + if verbose: + print(f"Marker found in: {str(dirpath)}") + + else: + if verbose: + print(f"Including leaf (no marker): {str(dirpath)}") + + for f in files: + if f.suffix.lower() in allowed: + results[str(dirpath)].append(str(f)) + + if verbose: + print(f" Collected: {f.name}") + + all_leaves = list(results.keys()) + if randomize: + if verbose: + print("\nShuffling leaf directories") + + rng.shuffle(all_leaves) + + else: + all_leaves.sort() + + final_dict = {} + + for leaf in all_leaves: + file_list = results[leaf][:] + if randomize: + if verbose: + print(f"Shuffling files in: {leaf}") + + rng.shuffle(file_list) + + else: + file_list.sort() + + final_dict[leaf] = file_list + + if verbose: + print("\nScan complete. Found directories:") + for d, fl in final_dict.items(): + print(f" {d} -> {len(fl)} files") + + return final_dict + +################################################################################### + +PERCUSSION_GROUPS = { + + 1: { # Bass Drums + 35: 'Acoustic Bass Drum', + 36: 'Bass Drum 1', + }, + 2: { # Stick + 37: 'Side Stick', + }, + 3: { # Snares + 38: 'Acoustic Snare', + 40: 'Electric Snare', + }, + 4: { # Claps + 39: 'Hand Clap', + }, + 5: { # Floor Toms + 41: 'Low Floor Tom', + 43: 'High Floor Tom', + }, + 6: { # Hi-Hats + 42: 'Closed Hi-Hat', + 44: 'Pedal Hi-Hat', + 46: 'Open Hi-Hat', + }, + 7: { # Toms + 45: 'Low Tom', + 47: 'Low-Mid Tom', + 48: 'Hi-Mid Tom', + 50: 'High Tom', + }, + 8: { # Cymbals + 49: 'Crash Cymbal 1', + 51: 'Ride Cymbal 1', + 52: 'Chinese Cymbal', + 55: 'Splash Cymbal', + 57: 'Crash Cymbal 2', + 59: 'Ride Cymbal 2', + }, + 9: { # Bells + 53: 'Ride Bell', + }, + 10: { # Tambourine + 54: 'Tambourine', + }, + 11: { # Cowbell + 56: 'Cowbell', + }, + 12: { # Vibraslap + 58: 'Vibraslap', + }, + 13: { # Bongos + 60: 'Hi Bongo', + 61: 'Low Bongo', + }, + 14: { # Congas + 62: 'Mute Hi Conga', + 63: 'Open Hi Conga', + 64: 'Low Conga', + }, + 15: { # Timbales + 65: 'High Timbale', + 66: 'Low Timbale', + }, + 16: { # Agogô + 67: 'High Agogo', + 68: 'Low Agogo', + }, + 17: { # Cabasa + 69: 'Cabasa', + }, + 18: { # Maracas + 70: 'Maracas', + }, + 19: { # Whistles + 71: 'Short Whistle', + 72: 'Long Whistle', + }, + 20: { # Guiros + 73: 'Short Guiro', + 74: 'Long Guiro', + }, + 21: { # Claves + 75: 'Claves', + }, + 22: { # Wood Blocks + 76: 'Hi Wood Block', + 77: 'Low Wood Block', + }, + 23: { # Cuica + 78: 'Mute Cuica', + 79: 'Open Cuica', + }, + 24: { # Triangles + 80: 'Mute Triangle', + 81: 'Open Triangle', + }, +} + +################################################################################### + +def escore_notes_to_expanded_binary_matrix(escore_notes, + channel=0, + patch=0, + flip_matrix=False, + reverse_matrix=False, + encode_velocities=True + ): + + escore = [e for e in escore_notes if e[3] == channel and e[6] == patch] + + if escore: + last_time = escore[-1][1] + last_notes = [e for e in escore if e[1] == last_time] + max_last_dur = max([e[2] for e in last_notes]) + + time_range = last_time+max_last_dur + + escore_matrix = [] + + escore_matrix = [[(0, 0)] * 128 for _ in range(time_range)] + + for note in escore: + + etype, time, duration, chan, pitch, velocity, pat = note + + time = max(0, time) + duration = max(1, duration) + chan = max(0, min(15, chan)) + pitch = max(0, min(127, pitch)) + velocity = max(1, min(127, velocity)) + pat = max(0, min(128, pat)) + + if channel == chan and patch == pat: + + count = 0 + + for t in range(time, min(time + duration, time_range)): + if encode_velocities: + escore_matrix[t][pitch] = velocity, count + + else: + escore_matrix[t][pitch] = 1, count + count += 1 + + if flip_matrix: + + temp_matrix = [] + + for m in escore_matrix: + temp_matrix.append(m[::-1]) + + escore_matrix = temp_matrix + + if reverse_matrix: + escore_matrix = escore_matrix[::-1] + + return escore_matrix + + else: + return None + +################################################################################### + +def transpose_list(lst): + return [list(row) for row in zip(*lst)] + +################################################################################### + +def chunk_list(lst, size): + return [lst[i:i + size] for i in range(0, len(lst), size)] + +################################################################################### + +def flip_list_rows(lst): + return [row[::-1] for row in lst] + +################################################################################### + +def flip_list_columns(lst): + return lst[::-1] + +################################################################################### + +def exists(sub, lst): + sub_len = len(sub) + return any(lst[i:i + sub_len] == sub for i in range(len(lst) - sub_len + 1)) + +################################################################################### + +def exists_noncontig(sub, lst): + it = iter(lst) + return all(x in it for x in sub) + +################################################################################### + +def exists_ratio(sub, lst, ratio): + matches = sum(x in set(lst) for x in sub) + return matches / len(sub) >= ratio + +################################################################################### + +def top_k_list_value(lst, k, reverse=True): + return sorted(lst, reverse=reverse)[k] + +################################################################################### + +def top_k_list_values(lst, k, reverse=True): + return sorted(lst, reverse=reverse)[:k] + +################################################################################### + +def concat_rows(lst_A, lst_B): + return [a + b for a, b in zip(lst_A, lst_B)] + +################################################################################### + +def concat_cols(lst_A, lst_B): + return [[ra + rb for ra, rb in zip(a, b)] for a, b in zip(lst_A, lst_B)] + +################################################################################### + +def chunk_by_threshold_mode(nums, threshold=0, normalize=False): + + if not nums: + return [] + + chunks = [] + chunk = [] + freq = defaultdict(int) + max_freq = 0 + mode_val = None + + def try_add_and_validate(value): + + nonlocal max_freq, mode_val + + chunk.append(value) + freq[value] += 1 + new_max_freq = max_freq + candidate_mode = mode_val + + if freq[value] > new_max_freq: + new_max_freq = freq[value] + candidate_mode = value + + mode = candidate_mode + valid = True + + for v in chunk: + if abs(v - mode) > threshold: + valid = False + break + + if not valid: + + chunk.pop() + freq[value] -= 1 + if freq[value] == 0: + del freq[value] + + return False + + max_freq = new_max_freq + mode_val = mode + return True + + for num in nums: + if not chunk: + chunk.append(num) + freq[num] = 1 + mode_val = num + max_freq = 1 + + else: + if not try_add_and_validate(num): + if normalize: + normalized_chunk = [mode_val] * len(chunk) + chunks.append(normalized_chunk) + + else: + chunks.append(chunk[:]) + + chunk.clear() + freq.clear() + + chunk.append(num) + freq[num] = 1 + mode_val = num + max_freq = 1 + + if chunk: + if normalize: + normalized_chunk = [mode_val] * len(chunk) + chunks.append(normalized_chunk) + + else: + chunks.append(chunk) + + return chunks + +################################################################################### + +def proportional_adjust(values, target_sum, threshold): + + n = len(values) + if n == 0: + return [] + + locked_idx = [i for i, v in enumerate(values) if v < threshold] + adj_idx = [i for i in range(n) if i not in locked_idx] + + locked_sum = sum(values[i] for i in locked_idx) + adj_original_sum = sum(values[i] for i in adj_idx) + adj_target_sum = target_sum - locked_sum + + def _proportional_scale(idxs, original, target): + + scaled_vals = {i: original[i] * (target / sum(original[i] for i in idxs)) + if sum(original[i] for i in idxs) > 0 else 0 + for i in idxs} + + floored = {i: math.floor(scaled_vals[i]) for i in idxs} + rem = target - sum(floored.values()) + + fracs = sorted( + ((scaled_vals[i] - floored[i], i) for i in idxs), + key=lambda x: (x[0], -x[1]), + reverse=True + ) + + for _, idx in fracs[:rem]: + floored[idx] += 1 + + result = original.copy() + + for i in idxs: + result[i] = floored[i] + + return result + + if not adj_idx: + if locked_sum == target_sum: + return values.copy() + + return _proportional_scale(locked_idx, values, target_sum) + + if adj_target_sum < 0: + return _proportional_scale(range(n), values, target_sum) + + if adj_original_sum == 0: + base = adj_target_sum // len(adj_idx) + rem = adj_target_sum - base * len(adj_idx) + result = values.copy() + + for j, idx in enumerate(sorted(adj_idx)): + increment = base + (1 if j < rem else 0) + result[idx] = values[idx] + increment + + return result + + result = values.copy() + scaled = {i: values[i] * (adj_target_sum / adj_original_sum) for i in adj_idx} + floored = {i: math.floor(scaled[i]) for i in adj_idx} + floor_sum = sum(floored.values()) + rem = adj_target_sum - floor_sum + + fracs = sorted( + ((scaled[i] - floored[i], i) for i in adj_idx), + key=lambda x: (x[0], -x[1]), + reverse=True + ) + + for _, idx in fracs[:rem]: + floored[idx] += 1 + + for i in adj_idx: + result[i] = floored[i] + + return result + +################################################################################### + +def advanced_align_escore_notes_to_bars(escore_notes, + bar_dtime=200, + dtimes_adj_thresh=4, + min_dur_gap=0 + ): + + #======================================================== + + escore_notes = recalculate_score_timings(escore_notes) + + cscore = chordify_score([1000, escore_notes]) + + #======================================================== + + dtimes = [0] + [min(199, b[1]-a[1]) for a, b in zip(escore_notes[:-1], escore_notes[1:]) if b[1]-a[1] != 0] + + score_times = sorted(set([e[1] for e in escore_notes])) + + #======================================================== + + dtimes_chunks = [] + + time = 0 + dtime = [] + + for i, dt in enumerate(dtimes): + time += dt + dtime.append(dt) + + if time >= bar_dtime: + dtimes_chunks.append(dtime) + + time = 0 + dtime = [] + + dtimes_chunks.append(dtime) + + #======================================================== + + fixed_times = [] + + time = 0 + + for i, dt in enumerate(dtimes_chunks): + + adj_dt = proportional_adjust(dt, + bar_dtime, + dtimes_adj_thresh + ) + + for t in adj_dt: + + time += t + + fixed_times.append(time) + + #======================================================== + + output_score = [] + + for i, c in enumerate(cscore): + + cc = copy.deepcopy(c) + time = fixed_times[i] + + for e in cc: + e[1] = time + + output_score.append(e) + + #======================================================== + + output_score = fix_escore_notes_durations(output_score, + min_notes_gap=min_dur_gap + ) + + #======================================================== + + return output_score + +################################################################################### + +def check_monophonic_melody(escore_notes, + times_idx=1, + durs_idx=2 + ): + + bcount = 0 + + for i in range(len(escore_notes)-1): + if escore_notes[i][times_idx]+escore_notes[i][durs_idx] > escore_notes[i+1][times_idx]: + bcount += 1 + + return bcount / len(escore_notes) + +################################################################################### + +def longest_common_chunk(list1, list2): + + base, mod = 257, 10**9 + 7 + max_len = min(len(list1), len(list2)) + + def get_hashes(seq, size): + + h, power = 0, 1 + hashes = set() + + for i in range(size): + h = (h * base + seq[i]) % mod + power = (power * base) % mod + + hashes.add(h) + + for i in range(size, len(seq)): + h = (h * base - seq[i - size] * power + seq[i]) % mod + hashes.add(h) + + return hashes + + def find_match(size): + + hashes2 = get_hashes(list2, size) + h, power = 0, 1 + + for i in range(size): + h = (h * base + list1[i]) % mod + power = (power * base) % mod + + if h in hashes2: + return list1[:size] + + for i in range(size, len(list1)): + h = (h * base - list1[i - size] * power + list1[i]) % mod + if h in hashes2: + return list1[i - size + 1:i + 1] + + return [] + + left, right = 0, max_len + result = [] + + while left <= right: + mid = (left + right) // 2 + chunk = find_match(mid) + + if chunk: + result = chunk + left = mid + 1 + else: + + right = mid - 1 + + return result + +################################################################################### + +def detect_plateaus(data, min_len=2, tol=0.0): + + plateaus = [] + n = len(data) + if n < min_len: + return plateaus + + min_deque = deque() + max_deque = deque() + + start = 0 + idx = 0 + + while idx < n: + v = data[idx] + + if not isinstance(v, (int, float)) or math.isnan(v): + + if idx - start >= min_len: + plateaus.append(data[start:idx]) + + idx += 1 + start = idx + min_deque.clear() + max_deque.clear() + + continue + + while max_deque and data[max_deque[-1]] <= v: + max_deque.pop() + + max_deque.append(idx) + + while min_deque and data[min_deque[-1]] >= v: + min_deque.pop() + + min_deque.append(idx) + + if data[max_deque[0]] - data[min_deque[0]] > tol: + + if idx - start >= min_len: + plateaus.append(data[start:idx]) + + start = idx + + min_deque.clear() + max_deque.clear() + + max_deque.append(idx) + min_deque.append(idx) + + idx += 1 + + if n - start >= min_len: + plateaus.append(data[start:n]) + + return plateaus + +################################################################################### + +def alpha_str_to_toks(s, shift=0, add_seos=False): + + tokens = [] + + if add_seos: + tokens = [53+shift] + + for char in s: + if char == ' ': + tokens.append(52+shift) + + elif char.isalpha(): + base = 0 if char.isupper() else 26 + offset = ord(char.upper()) - ord('A') + token = (base + offset + shift) % 52 # wrap A–Z/a–z + tokens.append(token) + + if add_seos: + tokens.append(53+shift) + + return tokens + +################################################################################### + +def toks_to_alpha_str(tokens, shift=0, sep=''): + + chars = [] + + for token in tokens: + if token == 53+shift: + continue + + elif token == 52+shift: + chars.append(' ') + + elif 0 <= token <= 25: + original = (token - shift) % 52 + chars.append(chr(ord('A') + original)) + + elif 26 <= token <= 51: + original = (token - shift) % 52 + chars.append(chr(ord('a') + (original - 26))) + + return sep.join(chars) + +################################################################################### + +def insert_caps_newlines(text): + + if bool(re.search(r'\b[A-Z][a-z]+\b', text)): + pattern = re.compile(r'\s+(?=[A-Z])') + + return pattern.sub('\n', text) + +################################################################################### + +def insert_newlines(text, every=4): + + count = 0 + result = [] + + for char in text: + result.append(char) + + if char == '\n': + count += 1 + + if count % every == 0: + result.append('\n') + + return ''.join(result) + +################################################################################### + +def symmetric_match_ratio(list_a, list_b, threshold=0): + + a_sorted = sorted(list_a) + b_sorted = sorted(list_b) + + i, j = 0, 0 + matches = 0 + + used_a = set() + used_b = set() + + while i < len(a_sorted) and j < len(b_sorted): + diff = abs(a_sorted[i] - b_sorted[j]) + + if diff <= threshold: + matches += 1 + used_a.add(i) + used_b.add(j) + i += 1 + j += 1 + + elif a_sorted[i] < b_sorted[j]: + i += 1 + + else: + j += 1 + + avg_len = (len(list_a) + len(list_b)) / 2 + + return matches / avg_len if avg_len > 0 else 0.0 + +################################################################################### + +def escore_notes_to_chords(escore_notes, + use_full_chords=False, + repair_bad_chords=True, + skip_pitches=False, + convert_pitches=True, + shift_chords=False, + return_tones_chords=False + ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + sp_score = solo_piano_escore_notes(escore_notes) + + cscore = chordify_score([1000, sp_score]) + + chords = [] + + for c in cscore: + pitches = sorted(set([e[4] for e in c])) + + tones_chord = sorted(set([p % 12 for p in pitches])) + + if repair_bad_chords: + if tones_chord not in CHORDS: + tones_chord = check_and_fix_tones_chord(tones_chord, + use_full_chords=use_full_chords + ) + + if return_tones_chords: + if convert_pitches: + chords.append(tones_chord) + + else: + if len(pitches) > 1: + chords.append(tones_chord) + + else: + chords.append([-pitches[0]]) + + else: + if skip_pitches: + if tones_chord in CHORDS: + cho_tok = CHORDS.index(tones_chord) + + else: + cho_tok = -1 + + if len(pitches) > 1: + chords.append(cho_tok) + + else: + if tones_chord in CHORDS: + cho_tok = CHORDS.index(tones_chord) + + else: + cho_tok = -1 + + if cho_tok != -1: + if convert_pitches: + if shift_chords: + if len(pitches) > 1: + chords.append(cho_tok+12) + + else: + chords.append(pitches[0] % 12) + + else: + chords.append(cho_tok) + + else: + if len(pitches) > 1: + chords.append(cho_tok+128) + + else: + chords.append(pitches[0]) + + return chords + +################################################################################### + +def replace_chords_in_escore_notes(escore_notes, + chords_list=[-1], + use_full_chords=False, + use_shifted_chords=False + ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + if use_shifted_chords: + shift = 12 + + else: + shift = 0 + + if min(chords_list) >= 0 and max(chords_list) <= len(CHORDS)+shift: + + chords_list_iter = cycle(chords_list) + + nd_score = [e for e in escore_notes if e[3] != 9] + d_score = [e for e in escore_notes if e[3] == 9] + + cscore = chordify_score([1000, nd_score]) + + new_score = [] + + for i, c in enumerate(cscore): + + cur_chord = next(chords_list_iter) + + cc = copy.deepcopy(c) + + if use_shifted_chords: + if cur_chord < 12: + sub_tones_chord = [cur_chord] + + else: + sub_tones_chord = CHORDS[cur_chord-12] + else: + sub_tones_chord = CHORDS[cur_chord] + + stcho = cycle(sub_tones_chord) + + if len(sub_tones_chord) > len(c): + cc = [copy.deepcopy(e) for e in cc for _ in range(len(sub_tones_chord))] + + pseen = [] + + for e in cc: + st = next(stcho) + new_pitch = ((e[4] // 12) * 12) + st + + if [new_pitch, e[6]] not in pseen: + e[4] = new_pitch + + new_score.append(e) + pseen.append([new_pitch, e[6]]) + + final_score = sorted(new_score+d_score, key=lambda x: x[1]) + + return final_score + + else: + return [] + +################################################################################### + +class Cell: + def __init__(self, cost, segments, gaps, prev_dir): + self.cost = cost + self.segments = segments + self.gaps = gaps + self.prev_dir = prev_dir + +def align_integer_lists(seq1, seq2): + + n, m = len(seq1), len(seq2) + + if n == 0: + return [None]*m, seq2.copy(), sum(abs(x) for x in seq2) + if m == 0: + return seq1.copy(), [None]*n, sum(abs(x) for x in seq1) + + priority = {'diag': 0, 'up': 1, 'left': 2} + + dp = [ + [Cell(cost=math.inf, segments=math.inf, gaps=math.inf, prev_dir='') for _ in range(m+1)] + for _ in range(n+1) + ] + dp[0][0] = Cell(cost=0, segments=0, gaps=0, prev_dir='') + + for i in range(1, n+1): + prev = dp[i-1][0] + new_cost = prev.cost + abs(seq1[i-1]) + new_seg = prev.segments + (1 if prev.prev_dir != 'up' else 0) + new_gaps = prev.gaps + 1 + dp[i][0] = Cell(new_cost, new_seg, new_gaps, 'up') + + for j in range(1, m+1): + prev = dp[0][j-1] + new_cost = prev.cost + abs(seq2[j-1]) + new_seg = prev.segments + (1 if prev.prev_dir != 'left' else 0) + new_gaps = prev.gaps + 1 + dp[0][j] = Cell(new_cost, new_seg, new_gaps, 'left') + + for i in range(1, n+1): + for j in range(1, m+1): + a, b = seq1[i-1], seq2[j-1] + + c0 = dp[i-1][j-1] + cand_diag = Cell( + cost = c0.cost + abs(a - b), + segments = c0.segments, + gaps = c0.gaps, + prev_dir = 'diag' + ) + + c1 = dp[i-1][j] + seg1 = c1.segments + (1 if c1.prev_dir != 'up' else 0) + cand_up = Cell( + cost = c1.cost + abs(a), + segments = seg1, + gaps = c1.gaps + 1, + prev_dir = 'up' + ) + + c2 = dp[i][j-1] + seg2 = c2.segments + (1 if c2.prev_dir != 'left' else 0) + cand_left = Cell( + cost = c2.cost + abs(b), + segments = seg2, + gaps = c2.gaps + 1, + prev_dir = 'left' + ) + + best = min( + (cand_diag, cand_up, cand_left), + key=lambda c: (c.cost, c.segments, c.gaps, priority[c.prev_dir]) + ) + dp[i][j] = best + + aligned1 = [] + aligned2 = [] + i, j = n, m + + while i > 0 or j > 0: + cell = dp[i][j] + + if cell.prev_dir == 'diag': + aligned1.append(seq1[i-1]) + aligned2.append(seq2[j-1]) + i, j = i-1, j-1 + + elif cell.prev_dir == 'up': + aligned1.append(seq1[i-1]) + aligned2.append(None) + i -= 1 + + else: + aligned1.append(None) + aligned2.append(seq2[j-1]) + j -= 1 + + aligned1.reverse() + aligned2.reverse() + + total_cost = int(dp[n][m].cost) + + return aligned1, aligned2, total_cost + +################################################################################### + +def most_common_delta_time(escore_notes): + + dscore = delta_score_notes(escore_notes) + + dtimes = [t[1] for t in dscore if t[1] != 0] + + cdtime, count = Counter(dtimes).most_common(1)[0] + + return [cdtime, count / len(dtimes)] + +################################################################################### + +def delta_tones(escore_notes, + ptcs_idx=4 + ): + + pitches = [p[ptcs_idx] for p in escore_notes] + tones = [p % 12 for p in pitches] + + return [b-a for a, b in zip(tones[:-1], tones[1:])] + +################################################################################### + +def find_divisors(val, + reverse=False + ): + + if val == 0: + return [] + + n = abs(val) + divisors = set() + + for i in range(1, int(n**0.5) + 1): + if n % i == 0: + divisors.add(i) + divisors.add(n // i) + + return sorted(divisors, reverse=reverse) + +################################################################################### + +def find_common_divisors(values, + reverse=False + ): + + if not values: + return [] + + non_zero = [abs(v) for v in values if v != 0] + if not non_zero: + return [] + + overall_gcd = reduce(gcd, non_zero) + + divisors = set() + + for i in range(1, int(overall_gcd**0.5) + 1): + if overall_gcd % i == 0: + divisors.add(i) + divisors.add(overall_gcd // i) + + return sorted(divisors, reverse=reverse) + +################################################################################### + +def strings_dict(list_of_strings, + verbose=False + ): + + str_set = set() + + for st in tqdm.tqdm(list_of_strings, disable=not verbose): + for cha in st: + str_set.add(cha) + + str_lst = sorted(str_set) + + str_dic = dict(zip(str_lst, range(len(str_lst)))) + rev_str_dic = {v: k for k, v in str_dic.items()} + + return str_dic, rev_str_dic + +################################################################################### + +def chords_common_tones_chain(chords, + use_full_chords=False + ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + tones_chords = [CHORDS[c] for c in chords if 0 <= c < len(CHORDS)] + + n = len(tones_chords) + + if not tones_chords: + return [] + + if n < 2: + return tones_chords + + result = [] + + for i in range(n): + if i == 0: + common = set(tones_chords[0]) & set(tones_chords[1]) + + elif i == n - 1: + common = set(tones_chords[n - 2]) & set(tones_chords[n - 1]) + + else: + common = set(tones_chords[i - 1]) & set(tones_chords[i]) & set(tones_chords[i + 1]) + + result.append(min(common) if common else -1) + + return result + +################################################################################### + +def tones_chord_to_int(tones_chord, + reverse_bits=True + ): + + cbits = tones_chord_to_bits(tones_chord, + reverse=reverse_bits + ) + + cint = bits_to_int(cbits) + + return cint + +################################################################################### + +def int_to_tones_chord(integer, + reverse_bits=True + ): + + integer = integer % 4096 + + cbits = int_to_bits(integer) + + if reverse_bits: + cbits.reverse() + + tones_chord = bits_to_tones_chord(cbits) + + return tones_chord + +################################################################################### + +def fix_bad_chords_in_escore_notes(escore_notes, + use_full_chords=False, + return_bad_chords_count=False + ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + bcount = 0 + + if escore_notes: + + chords = chordify_score([1000, escore_notes]) + + fixed_chords = [] + + for c in chords: + c.sort(key=lambda x: x[3]) + + if len(c) > 1: + + groups = groupby(c, key=lambda x: x[3]) + + for cha, gr in groups: + + if cha != 9: + + gr = list(gr) + + tones_chord = sorted(set([p[4] % 12 for p in gr])) + + if tones_chord not in CHORDS: + tones_chord = check_and_fix_tones_chord(tones_chord, + use_full_chords=use_full_chords + ) + + bcount += 1 + + ngr = [] + + for n in gr: + if n[4] % 12 in tones_chord: + ngr.append(n) + + fixed_chords.extend(ngr) + + else: + fixed_chords.extend(gr) + + + else: + fixed_chords.extend(c) + + fixed_chords.sort(key=lambda x: (x[1], -x[4])) + + if return_bad_chords_count: + return fixed_chords, bcount + + else: + return fixed_chords + + else: + if return_bad_chords_count: + return escore_notes, bcount + + else: + return escore_notes + +################################################################################### + +def remove_events_from_escore_notes(escore_notes, + ele_idx=2, + ele_vals=[1], + chan_idx=3, + skip_drums=True + ): + + new_escore_notes = [] + + for e in escore_notes: + if skip_drums: + if e[ele_idx] not in ele_vals or e[chan_idx] == 9: + new_escore_notes.append(e) + + else: + if e[ele_idx] not in ele_vals: + new_escore_notes.append(e) + + return new_escore_notes + +################################################################################### + +def flatten_spikes(arr): + + if len(arr) < 3: + return arr[:] + + result = arr[:] + + for i in range(1, len(arr) - 1): + prev, curr, next_ = arr[i - 1], arr[i], arr[i + 1] + + if (prev <= next_ and (curr > prev and curr > next_)) or \ + (prev >= next_ and (curr < prev and curr < next_)): + result[i] = max(min(prev, next_), min(max(prev, next_), curr)) + + return result + +################################################################################### + +def flatten_spikes_advanced(arr, window=1): + + if len(arr) < 3: + return arr[:] + + result = arr[:] + n = len(arr) + + def is_spike(i): + left = arr[i - window:i] + right = arr[i + 1:i + 1 + window] + + if not left or not right: + return False + + avg_left = sum(left) / len(left) + avg_right = sum(right) / len(right) + + if arr[i] > avg_left and arr[i] > avg_right: + return True + + if arr[i] < avg_left and arr[i] < avg_right: + return True + + return False + + for i in range(window, n - window): + if is_spike(i): + neighbors = arr[i - window:i] + arr[i + 1:i + 1 + window] + result[i] = int(sorted(neighbors)[len(neighbors) // 2]) + + return result + +################################################################################### + +def add_smooth_melody_to_enhanced_score_notes(escore_notes, + melody_channel=3, + melody_patch=40, + melody_start_chord=0, + min_notes_gap=0, + exclude_durs=[1], + adv_flattening=True, + extend_durs=True, + max_mel_vels=127, + max_acc_vels=80, + return_melody=False + ): + + escore_notes1 = remove_duplicate_pitches_from_escore_notes(escore_notes) + + escore_notes2 = fix_escore_notes_durations(escore_notes1, + min_notes_gap=min_notes_gap + ) + + escore_notes3 = fix_bad_chords_in_escore_notes(escore_notes2) + + escore_notes4 = remove_events_from_escore_notes(escore_notes3, + ele_vals=exclude_durs + ) + + escore_notes5 = add_expressive_melody_to_enhanced_score_notes(escore_notes4, + melody_channel=melody_channel, + melody_patch=melody_patch, + melody_start_chord=melody_start_chord, + return_melody=True, + ) + + mel_score = remove_events_from_escore_notes(escore_notes5, + ele_vals=exclude_durs + ) + + pitches = [p[4] for p in mel_score] + + if adv_flattening: + res = flatten_spikes_advanced(pitches) + + else: + res = flatten_spikes(pitches) + + mel_score3 = copy.deepcopy(mel_score) + + for i, e in enumerate(mel_score3): + e[4] = res[i] + + mel_score3 = fix_monophonic_score_durations(merge_melody_notes(mel_score3), + extend_durs=extend_durs + ) + + adjust_score_velocities(mel_score3, max_mel_vels) + adjust_score_velocities(escore_notes4, max_acc_vels) + + if return_melody: + return sorted(mel_score3, key=lambda x: (x[1], -x[4])) + + else: + return sorted(mel_score3 + escore_notes4, key=lambda x: (x[1], -x[4])) + +################################################################################### + +def sorted_chords_to_full_chords(chords): + + cchords = [] + + for c in chords: + tones_chord = ALL_CHORDS_SORTED[c] + + if tones_chord not in ALL_CHORDS_FULL: + tones_chord = check_and_fix_tones_chord(tones_chord) + + cchords.append(ALL_CHORDS_FULL.index(tones_chord)) + + return cchords + +################################################################################### + +def full_chords_to_sorted_chords(chords): + + cchords = [] + + for c in chords: + tones_chord = ALL_CHORDS_FULL[c] + + if tones_chord not in ALL_CHORDS_SORTED: + tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=False) + + cchords.append(ALL_CHORDS_SORTED.index(tones_chord)) + + return cchords + +################################################################################### + +def chords_to_escore_notes(chords, + use_full_chords=False, + chords_dtime=500, + add_melody=True, + add_texture=True, + ): + + if use_full_chords: + CHORDS = ALL_CHORDS_FULL + + else: + CHORDS = ALL_CHORDS_SORTED + + score = [] + + dtime = 0 + + dur = chords_dtime + + for c in chords: + + if add_melody: + score.append(['note', dtime, dur, 3, CHORDS[c][0]+72, 115+CHORDS[c][0], 40]) + + for cc in CHORDS[c]: + score.append(['note', dtime, dur, 0, cc+48, 30+cc+48, 0]) + + if random.randint(0, 1) and add_texture: + score.append(['note', dtime, dur, 0, cc+60, 20+cc+60, 0]) + + dtime += chords_dtime + + return score + +################################################################################### + +print('Module loaded!') +print('=' * 70) +print('Enjoy! :)') +print('=' * 70) + ################################################################################### # This is the end of the TMIDI X Python module ################################################################################### \ No newline at end of file