mainly faster startup thanks to 'lazy' loading

This commit is contained in:
2019-12-12 20:02:03 -08:00
parent d954a9832b
commit 966bbc904c
4 changed files with 35 additions and 28 deletions

2
.gitignore vendored
View File

@@ -6,4 +6,4 @@ compile_commands.json
build/ build/
cythoned/ cythoned/
__pycache__/ __pycache__/
data/ data_*/

View File

@@ -1,11 +1,6 @@
import os import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
tf.random.set_random_seed(42)
from mynet import onehot from mynet import onehot
@@ -27,6 +22,7 @@ def word_tokenize(s: str):
def create_test_dataset(win): def create_test_dataset(win):
import numpy as np
test_dataset = np.vectorize(vocab.get)(np.genfromtxt(TEST, dtype=str)) test_dataset = np.vectorize(vocab.get)(np.genfromtxt(TEST, dtype=str))
assert test_dataset.shape[1] == 2*win + 1 assert test_dataset.shape[1] == 2*win + 1
X_test = test_dataset[:, [*range(0, win), *range(win+1, win+win+1)]] X_test = test_dataset[:, [*range(0, win), *range(win+1, win+win+1)]]
@@ -35,6 +31,10 @@ def create_test_dataset(win):
def create_mnist_network(): def create_mnist_network():
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
tf.random.set_random_seed(42)
model = tf.keras.models.Sequential([ model = tf.keras.models.Sequential([
tf.keras.layers.Dense(30, input_shape=(784,), activation='relu'), tf.keras.layers.Dense(30, input_shape=(784,), activation='relu'),
tf.keras.layers.Dense(10, activation='softmax') tf.keras.layers.Dense(10, activation='softmax')
@@ -45,6 +45,10 @@ def create_mnist_network():
def create_cbow_network(win, embed): def create_cbow_network(win, embed):
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
tf.random.set_random_seed(42)
ctxt = tf.keras.layers.Input(shape=[2*win]) ctxt = tf.keras.layers.Input(shape=[2*win])
ed = tf.keras.layers.Embedding(len(vocab), embed, input_length=2*win)(ctxt) ed = tf.keras.layers.Embedding(len(vocab), embed, input_length=2*win)(ctxt)
cbow = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(ed) cbow = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(ed)

43
main.c
View File

@@ -17,8 +17,9 @@
#define TAG_INSTR 9 #define TAG_INSTR 9
#define TAG_TERMT 10 #define TAG_TERMT 10
#define COMM 25 #define COMM 50
#define ITER 100 #define ITER 250
#define TARGET 8.40
#define BS 32 #define BS 32
#define EMB 32 #define EMB 32
#define WIN 2 #define WIN 2
@@ -213,8 +214,8 @@ void tokenizer(const char* source) {
void filterer() { void filterer() {
INFO_PRINTF("Starting filterer %d\n", getpid()); INFO_PRINTF("Starting filterer %d\n", getpid());
int rid = my_role_id(FILTERER); int rid = my_role_id(FILTERER);
int prev = mpi_id_from_role_id(TOKENIZER, rid); int tokenizer = mpi_id_from_role_id(TOKENIZER, rid);
int next = mpi_id_from_role_id(BATCHER, rid); int batcher = mpi_id_from_role_id(BATCHER, rid);
Word w = {0, NULL}; Word w = {0, NULL};
const size_t window_size = 2 * WIN + 1; const size_t window_size = 2 * WIN + 1;
@@ -223,7 +224,7 @@ void filterer() {
while (1) { while (1) {
while (have != window_size) { while (have != window_size) {
recv_word(&w, prev); recv_word(&w, tokenizer);
if (!strlen(w.data)) break; if (!strlen(w.data)) break;
@@ -234,10 +235,10 @@ void filterer() {
if (!strlen(w.data)) break; if (!strlen(w.data)) break;
have = 0; have = 0;
send_window(window, window_size, next); send_window(window, window_size, batcher);
} }
window[0] = -1; window[0] = -1;
send_window(window, window_size, next); send_window(window, window_size, batcher);
free_word(&w); free_word(&w);
free(window); free(window);
INFO_PRINTF("Finishing filterer %d\n", getpid()); INFO_PRINTF("Finishing filterer %d\n", getpid());
@@ -246,7 +247,7 @@ void filterer() {
void batcher() { void batcher() {
INFO_PRINTF("Starting batcher %d\n", getpid()); INFO_PRINTF("Starting batcher %d\n", getpid());
int rid = my_role_id(BATCHER); int rid = my_role_id(BATCHER);
int prev = mpi_id_from_role_id(FILTERER, rid); int tokenizer = mpi_id_from_role_id(FILTERER, rid);
int learner_mpi_id = 0; int learner_mpi_id = 0;
const size_t window_size = 2 * WIN + 1; const size_t window_size = 2 * WIN + 1;
@@ -256,7 +257,7 @@ void batcher() {
while (1) { while (1) {
for in_range(r, BS) { for in_range(r, BS) {
recv_window(l_wid, window_size, prev); recv_window(l_wid, window_size, tokenizer);
if (l_wid[0] == -1) break; if (l_wid[0] == -1) break;
@@ -381,7 +382,8 @@ void dispatcher() {
float crt_loss = first_loss; float crt_loss = first_loss;
float min_loss = crt_loss; float min_loss = crt_loss;
time_t start = time(NULL); time_t start = time(NULL);
for in_range(i, COMM) { size_t rounds = 0;
while (crt_loss > TARGET) {
randidx(round, number_of(LEARNER), lpr); randidx(round, number_of(LEARNER), lpr);
for in_range(k, lpr) { for in_range(k, lpr) {
// Instruct learners to learn // Instruct learners to learn
@@ -397,8 +399,10 @@ void dispatcher() {
set_net_weights(frank, &wl); set_net_weights(frank, &wl);
crt_loss = eval_net(frank); crt_loss = eval_net(frank);
min_loss = crt_loss < min_loss ? crt_loss : min_loss; min_loss = crt_loss < min_loss ? crt_loss : min_loss;
INFO_PRINTF("Round %ld, validation loss %f\n", i, crt_loss); INFO_PRINTF("Round %ld, validation loss %f\n", rounds, crt_loss);
rounds++;
} }
time_t finish = time(NULL);
go = -1; go = -1;
for in_range(t, number_of(TOKENIZER)) { for in_range(t, number_of(TOKENIZER)) {
@@ -410,14 +414,17 @@ void dispatcher() {
TAG_INSTR, MPI_COMM_WORLD); TAG_INSTR, MPI_COMM_WORLD);
} }
time_t finish = time(NULL);
float delta_t = finish - start; float delta_t = finish - start;
float delta_l = first_loss - crt_loss; float delta_l = first_loss - crt_loss;
INFO_PRINTF( INFO_PRINTF(
"Laptop MPI adam consecutive_batch W%d E%d " "Laptop MPI adam consecutive_batch "
"BS%d R%d bpe%d LPR%d pp%d," "W%d E%d BS%d bpe%d LPR%d pp%lu,"
"%f,%f,%f\n", WIN, EMB, BS, COMM, ITER, lpr, g_argc - 1, "%f,%f,%f,%f,"
delta_l / COMM, delta_l / delta_t, min_loss); "%lu,%.0f,%lu\n",
WIN, EMB, BS, ITER, lpr, number_of(TOKENIZER),
delta_l/rounds, delta_l/delta_t, min_loss, TARGET,
rounds, delta_t,BS*ITER*rounds
);
Py_DECREF(frank); Py_DECREF(frank);
free_weightlist(&wl); free_weightlist(&wl);
for in_range(i, lpr) free_weightlist(wls + i); for in_range(i, lpr) free_weightlist(wls + i);
@@ -440,9 +447,9 @@ int main (int argc, const char **argv) {
MPI_Abort(MPI_COMM_WORLD, 1); MPI_Abort(MPI_COMM_WORLD, 1);
} }
int pipelines = argc - 1; int pipelines = argc - 1;
int min_nodes = 3 * pipelines + 2; int min_nodes = 4 * pipelines + 1;
if (world_size() < min_nodes) { if (world_size() < min_nodes) {
INFO_PRINTF("You requested %d pipelines " INFO_PRINTF("You requested %d pipeline(s) "
"but only provided %d procs " "but only provided %d procs "
"(%d required)\n", "(%d required)\n",
pipelines, world_size(), min_nodes); pipelines, world_size(), min_nodes);

View File

@@ -22,7 +22,3 @@ executable('fedavg_mpi',
dependencies: [mpi, python], dependencies: [mpi, python],
include_directories: numpy_header, include_directories: numpy_header,
link_args: '-Wl,-w') link_args: '-Wl,-w')
run_command('cp',
meson.current_build_dir() + 'compile_commands.json',
meson.current_source_dir())