ya sdelal'
This commit is contained in:
123
docs/generate_plots.py
Normal file
123
docs/generate_plots.py
Normal file
@@ -0,0 +1,123 @@
|
||||
import os
|
||||
import re
|
||||
from math import floor, ceil
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
|
||||
HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
LOGS = os.path.join(HERE, '../../docs/logs/')
|
||||
|
||||
|
||||
datasets = {
|
||||
'moby': {
|
||||
'idx': 0,
|
||||
'name': 'Moby Dick (~200k words)',
|
||||
'target': 8.4,
|
||||
'lim': (16000, 320000)
|
||||
},
|
||||
'wiki': {
|
||||
'name': 'English Wikipedia (~90M words)',
|
||||
'idx': 1,
|
||||
'target': 8.3,
|
||||
'lim': (16000, 360000)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def s(n):
|
||||
return 's' if n > 1 else ''
|
||||
|
||||
|
||||
def idx_of(l, cond=lambda x: x):
|
||||
try:
|
||||
return next(i for i, e in enumerate(l) if cond(e))
|
||||
except StopIteration:
|
||||
return -1
|
||||
|
||||
|
||||
def meta_from_fn(fn):
|
||||
m = re.search(r'(.+)_(\d+)_learner_(\d+)_pp', fn)
|
||||
return (lambda x: (x[0], int(x[1]), int(x[2])))(
|
||||
m.group(1,2,3)
|
||||
)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
files = sorted(os.listdir(LOGS), key= lambda x: meta_from_fn(x)[1])
|
||||
|
||||
fig = plt.figure(figsize=(10, 4))
|
||||
fig.subplots_adjust(left=0.06, right=0.99, top=0.91, wspace=0.18)
|
||||
axs = fig.subplots(1, len(datasets))
|
||||
pp_speedup = []
|
||||
l_speedup = []
|
||||
|
||||
for fn in files:
|
||||
name, learners, pipelines = meta_from_fn(fn)
|
||||
if learners == 16:
|
||||
continue
|
||||
with open(os.path.join(LOGS, fn)) as f:
|
||||
lines = f.readlines()
|
||||
matches = [re.search(r'windows (\d+) validation loss (\d+\.\d+)', l)
|
||||
for l in lines]
|
||||
matches = [m for m in matches if m is not None]
|
||||
win_loss = [
|
||||
(lambda x: (int(x[0]), float(x[1])))(m.group(1, 2)) for m in matches
|
||||
]
|
||||
windows, loss = zip(*win_loss)
|
||||
axs[datasets[name]['idx']].plot(
|
||||
windows[1:], loss[1:], linestyle='-' * (1 + (pipelines>1)),
|
||||
color=f'C{learners // 2}',
|
||||
label=f'{learners} Learner{s(learners)},'
|
||||
f' {pipelines} Pipeline{s(pipelines)}'
|
||||
)
|
||||
ttt = windows[idx_of(loss, lambda l: l < datasets[name]['target'])]
|
||||
if name == 'wiki':
|
||||
if pipelines > 1 or learners == 1:
|
||||
pp_speedup.append((pipelines, ttt))
|
||||
if pipelines == 1:
|
||||
l_speedup.append((learners, ttt))
|
||||
|
||||
for d in datasets.values():
|
||||
a = axs[d['idx']]
|
||||
a.set_xlabel('Context Windows per Learner')
|
||||
a.set_ylabel('Validation Loss')
|
||||
a.set_xticks([windows[1]] + [*range(0, 300001, 100000)])
|
||||
a.set_xlim(*d['lim'])
|
||||
a.set_title(d['name'])
|
||||
a.legend()
|
||||
a.axhline(d['target'], color='k', linestyle=':')
|
||||
|
||||
fig.savefig(os.path.join(HERE, 'fig/datasets.pdf'))
|
||||
|
||||
def speedup_plot(zipped):
|
||||
factors, time = zip(*sorted(zipped))
|
||||
time = np.asarray(time)
|
||||
speedup = time[0] / time
|
||||
print(factors, time)
|
||||
plt.plot(factors, speedup)
|
||||
plt.xlim(min(factors), max(factors))
|
||||
plt.ylim(min(speedup), max(speedup))
|
||||
plt.xticks([*range(min(factors), max(factors) + 1)])
|
||||
plt.yticks([*range(floor(min(speedup)), ceil(max(speedup)) + 1)])
|
||||
plt.grid()
|
||||
|
||||
fig = plt.figure(figsize=(10, 4))
|
||||
fig.subplots_adjust(left=0.06, right=0.99, top=0.91, wspace=0.18)
|
||||
|
||||
plt.subplot(121)
|
||||
speedup_plot(l_speedup)
|
||||
plt.title('Single Pipeline')
|
||||
plt.xlabel('Number of Learners')
|
||||
plt.ylabel(f'Speedup to Target {datasets["wiki"]["target"]}')
|
||||
|
||||
plt.subplot(122)
|
||||
speedup_plot(pp_speedup)
|
||||
plt.title('Multiple Pipelines')
|
||||
plt.xlabel('Number of Pipelines')
|
||||
plt.ylabel(f'Speedup to Target {datasets["wiki"]["target"]}')
|
||||
|
||||
plt.savefig(os.path.join(HERE, 'fig/speedups.pdf'))
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user