stop refactoring and get some stuff huggin' done
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,8 +2,7 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
run
|
run
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
*.txt
|
cfg.json
|
||||||
build/
|
build/
|
||||||
cythoned/
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
data_*/
|
data_*/
|
||||||
|
|||||||
60
bridge.pyx
60
bridge.pyx
@@ -11,8 +11,6 @@ import flask
|
|||||||
|
|
||||||
|
|
||||||
tokenizers = {}
|
tokenizers = {}
|
||||||
X_test = None
|
|
||||||
y_test = None
|
|
||||||
|
|
||||||
|
|
||||||
cdef extern from "numpy/arrayobject.h":
|
cdef extern from "numpy/arrayobject.h":
|
||||||
@@ -49,6 +47,30 @@ cdef public void serve():
|
|||||||
nn.app.run(port=8448)
|
nn.app.run(port=8448)
|
||||||
|
|
||||||
|
|
||||||
|
cdef public size_t getwin():
|
||||||
|
return nn.WIN
|
||||||
|
|
||||||
|
|
||||||
|
cdef public size_t getemb():
|
||||||
|
return nn.EMB
|
||||||
|
|
||||||
|
|
||||||
|
cdef public size_t getbs():
|
||||||
|
return nn.CFG['bs']
|
||||||
|
|
||||||
|
|
||||||
|
cdef public size_t getbpe():
|
||||||
|
return nn.CFG['bpe']
|
||||||
|
|
||||||
|
|
||||||
|
cdef public float gettarget():
|
||||||
|
return nn.CFG['target']
|
||||||
|
|
||||||
|
|
||||||
|
cdef public float getflpc():
|
||||||
|
return nn.CFG['flpc']
|
||||||
|
|
||||||
|
|
||||||
cdef public int get_tokens(WordList* wl, const char *filename):
|
cdef public int get_tokens(WordList* wl, const char *filename):
|
||||||
fnu = filename.decode('utf-8')
|
fnu = filename.decode('utf-8')
|
||||||
if fnu not in tokenizers:
|
if fnu not in tokenizers:
|
||||||
@@ -82,10 +104,8 @@ cdef public void _dbg_print(object o):
|
|||||||
eprint(o)
|
eprint(o)
|
||||||
|
|
||||||
|
|
||||||
cdef public void _dbg_print_cbow_batch(
|
cdef public void _dbg_print_cbow_batch(float* batch, size_t bs):
|
||||||
object net, float* batch, size_t bs
|
X_np, y_np = cbow_batch(batch, bs)
|
||||||
):
|
|
||||||
X_np, y_np = cbow_batch(net, batch, bs)
|
|
||||||
eprint(X_np)
|
eprint(X_np)
|
||||||
eprint(y_np)
|
eprint(y_np)
|
||||||
|
|
||||||
@@ -95,9 +115,9 @@ cdef public void randidx(int* idx, size_t l, size_t how_much):
|
|||||||
memcpy(idx, PyArray_DATA(i_np), how_much * sizeof(int))
|
memcpy(idx, PyArray_DATA(i_np), how_much * sizeof(int))
|
||||||
|
|
||||||
|
|
||||||
cdef public object create_network(int win, int embed):
|
cdef public object create_network():
|
||||||
try:
|
try:
|
||||||
net = nn.create_cbow_network(win, embed)
|
net = nn.create_cbow_network()
|
||||||
eprint(net)
|
eprint(net)
|
||||||
return net
|
return net
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -111,7 +131,7 @@ cdef public void set_net_weights(object net, WeightList* wl):
|
|||||||
cdef public void step_net(
|
cdef public void step_net(
|
||||||
object net, float* batch, size_t bs
|
object net, float* batch, size_t bs
|
||||||
):
|
):
|
||||||
X_train, y_train = cbow_batch(net, batch, bs)
|
X_train, y_train = cbow_batch(batch, bs)
|
||||||
net.train_on_batch(X_train, y_train)
|
net.train_on_batch(X_train, y_train)
|
||||||
|
|
||||||
|
|
||||||
@@ -120,10 +140,7 @@ cdef public size_t out_size(object net):
|
|||||||
|
|
||||||
|
|
||||||
cdef public float eval_net(object net):
|
cdef public float eval_net(object net):
|
||||||
try:
|
return nn.eval_network(net)
|
||||||
return net.evaluate(X_test, y_test, verbose=False)
|
|
||||||
except Exception as e:
|
|
||||||
eprint(e)
|
|
||||||
|
|
||||||
|
|
||||||
cdef public void init_weightlist_like(WeightList* wl, object net):
|
cdef public void init_weightlist_like(WeightList* wl, object net):
|
||||||
@@ -162,14 +179,8 @@ cdef public void combo_weights(
|
|||||||
wf += alpha * ww
|
wf += alpha * ww
|
||||||
|
|
||||||
|
|
||||||
cdef public void create_test_dataset(size_t win):
|
cdef tuple cbow_batch(float* batch, size_t bs):
|
||||||
_create_test_dataset(win)
|
win = nn.WIN
|
||||||
|
|
||||||
|
|
||||||
cdef tuple cbow_batch(
|
|
||||||
object net, float* batch, size_t bs
|
|
||||||
):
|
|
||||||
win = net.input_shape[1] // 2
|
|
||||||
batch_np = np.asarray(<float[:bs,:2*win+1]>batch)
|
batch_np = np.asarray(<float[:bs,:2*win+1]>batch)
|
||||||
X_np = batch_np[:, [*range(win), *range(win+1, win+win+1)]]
|
X_np = batch_np[:, [*range(win), *range(win+1, win+win+1)]]
|
||||||
y_np = nn.onehot(batch_np[:, win], nc=len(nn.vocab))
|
y_np = nn.onehot(batch_np[:, win], nc=len(nn.vocab))
|
||||||
@@ -177,6 +188,7 @@ cdef tuple cbow_batch(
|
|||||||
|
|
||||||
|
|
||||||
cdef list wrap_weight_list(WeightList* wl):
|
cdef list wrap_weight_list(WeightList* wl):
|
||||||
|
"""Thinly wraps a WeightList struct into a NumPy array."""
|
||||||
weights = []
|
weights = []
|
||||||
for i in range(wl.n_weights):
|
for i in range(wl.n_weights):
|
||||||
w_shape = <long[:wl.weights[i].dims]>wl.weights[i].shape
|
w_shape = <long[:wl.weights[i].dims]>wl.weights[i].shape
|
||||||
@@ -220,9 +232,3 @@ def ensure_contiguous(a):
|
|||||||
|
|
||||||
def eprint(*args, **kwargs):
|
def eprint(*args, **kwargs):
|
||||||
return print(*args, flush=True, **kwargs)
|
return print(*args, flush=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _create_test_dataset(win):
|
|
||||||
global X_test, y_test
|
|
||||||
if X_test is None or y_test is None:
|
|
||||||
X_test, y_test = nn.create_test_dataset(win)
|
|
||||||
|
|||||||
54
library.py
54
library.py
@@ -1,19 +1,38 @@
|
|||||||
import os
|
import os
|
||||||
|
import json
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
|
||||||
from mynet import onehot
|
from mynet import onehot
|
||||||
|
|
||||||
|
|
||||||
|
WIN = 2
|
||||||
|
EMB = 32
|
||||||
|
|
||||||
HERE = os.path.abspath(os.path.dirname(__file__))
|
HERE = os.path.abspath(os.path.dirname(__file__))
|
||||||
DATA = os.path.join(HERE, 'data')
|
|
||||||
|
|
||||||
|
def read_cfg():
|
||||||
|
with open(os.path.join(HERE, 'cfg.json')) as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
CFG = read_cfg()
|
||||||
|
DATA = os.path.join(HERE, CFG['data'])
|
||||||
CORPUS = os.path.join(DATA, 'corpus.txt')
|
CORPUS = os.path.join(DATA, 'corpus.txt')
|
||||||
VOCAB = os.path.join(DATA, 'vocab.txt')
|
VOCAB = os.path.join(DATA, 'vocab.txt')
|
||||||
TEST = os.path.join(DATA, 'test.txt')
|
TEST = os.path.join(DATA, 'test.txt')
|
||||||
|
|
||||||
vocab = {
|
|
||||||
w: i for i, w in enumerate(open(VOCAB).read().splitlines(keepends=False))
|
def read_vocab_list():
|
||||||
}
|
with open(VOCAB) as f:
|
||||||
inv_vocab = sorted(vocab, key=vocab.get)
|
return f.read().split()
|
||||||
|
|
||||||
|
|
||||||
|
inv_vocab = read_vocab_list()
|
||||||
|
vocab = {w: i for i, w in enumerate(inv_vocab)}
|
||||||
|
|
||||||
|
X_test = None
|
||||||
|
y_test = None
|
||||||
|
|
||||||
|
|
||||||
def word_tokenize(s: str):
|
def word_tokenize(s: str):
|
||||||
@@ -21,13 +40,14 @@ def word_tokenize(s: str):
|
|||||||
return l.split()
|
return l.split()
|
||||||
|
|
||||||
|
|
||||||
def create_test_dataset(win):
|
def create_test_dataset():
|
||||||
import numpy as np
|
import numpy as np
|
||||||
test_dataset = np.vectorize(vocab.get)(np.genfromtxt(TEST, dtype=str))
|
test_dataset = np.vectorize(vocab.get)(np.genfromtxt(TEST, dtype=str))
|
||||||
assert test_dataset.shape[1] == 2*win + 1
|
assert test_dataset.shape[1] == 2*WIN + 1
|
||||||
X_test = test_dataset[:, [*range(0, win), *range(win+1, win+win+1)]]
|
|
||||||
y_test = onehot(test_dataset[:, win], nc=len(vocab))
|
global X_test, y_test
|
||||||
return X_test, y_test
|
X_test = test_dataset[:, [*range(0, WIN), *range(WIN+1, WIN+WIN+1)]]
|
||||||
|
y_test = onehot(test_dataset[:, WIN], nc=len(vocab))
|
||||||
|
|
||||||
|
|
||||||
def create_mnist_network():
|
def create_mnist_network():
|
||||||
@@ -44,13 +64,13 @@ def create_mnist_network():
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
def create_cbow_network(win, embed):
|
def create_cbow_network():
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
|
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
|
||||||
tf.random.set_random_seed(42)
|
tf.random.set_random_seed(42)
|
||||||
|
|
||||||
ctxt = tf.keras.layers.Input(shape=[2*win])
|
ctxt = tf.keras.layers.Input(shape=[2*WIN])
|
||||||
ed = tf.keras.layers.Embedding(len(vocab), embed, input_length=2*win)(ctxt)
|
ed = tf.keras.layers.Embedding(len(vocab), EMB, input_length=2*WIN)(ctxt)
|
||||||
cbow = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(ed)
|
cbow = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(ed)
|
||||||
blowup = tf.keras.layers.Dense(len(vocab), activation='softmax')(cbow)
|
blowup = tf.keras.layers.Dense(len(vocab), activation='softmax')(cbow)
|
||||||
mod = tf.keras.Model(inputs=ctxt, outputs=blowup)
|
mod = tf.keras.Model(inputs=ctxt, outputs=blowup)
|
||||||
@@ -61,9 +81,15 @@ def create_cbow_network(win, embed):
|
|||||||
return mod
|
return mod
|
||||||
|
|
||||||
|
|
||||||
|
def eval_network(net):
|
||||||
|
if X_test is None or y_test is None:
|
||||||
|
create_test_dataset()
|
||||||
|
return net.evaluate(X_test, y_test, verbose=False)
|
||||||
|
|
||||||
|
|
||||||
def token_generator(filename):
|
def token_generator(filename):
|
||||||
with open(filename) as f:
|
with open(filename) as f:
|
||||||
for i, l in enumerate(f.readlines()):
|
for l in f:
|
||||||
if not l.isspace():
|
if not l.isspace():
|
||||||
tok = word_tokenize(l)
|
tok = word_tokenize(l)
|
||||||
if tok:
|
if tok:
|
||||||
|
|||||||
48
main.c
48
main.c
@@ -17,14 +17,6 @@
|
|||||||
#define TAG_INSTR 9
|
#define TAG_INSTR 9
|
||||||
#define TAG_TERMT 10
|
#define TAG_TERMT 10
|
||||||
|
|
||||||
#define COMM 50
|
|
||||||
#define ITER 250
|
|
||||||
#define TARGET 8.40
|
|
||||||
#define BS 32
|
|
||||||
#define EMB 32
|
|
||||||
#define WIN 2
|
|
||||||
#define FLPC 1
|
|
||||||
|
|
||||||
#define in_range(i, x) (size_t i = 0; i < (x); i++)
|
#define in_range(i, x) (size_t i = 0; i < (x); i++)
|
||||||
// I am honestly VERY sorry for this
|
// I am honestly VERY sorry for this
|
||||||
// but the power of macros corrupts even the best of us
|
// but the power of macros corrupts even the best of us
|
||||||
@@ -218,7 +210,7 @@ void filterer() {
|
|||||||
int batcher = mpi_id_from_role_id(BATCHER, rid);
|
int batcher = mpi_id_from_role_id(BATCHER, rid);
|
||||||
|
|
||||||
Word w = {0, NULL};
|
Word w = {0, NULL};
|
||||||
const size_t window_size = 2 * WIN + 1;
|
const size_t window_size = 2 * getwin() + 1;
|
||||||
long* window = malloc(window_size * sizeof(long));
|
long* window = malloc(window_size * sizeof(long));
|
||||||
size_t have = 0;
|
size_t have = 0;
|
||||||
|
|
||||||
@@ -248,15 +240,16 @@ void batcher() {
|
|||||||
INFO_PRINTF("Starting batcher %d\n", getpid());
|
INFO_PRINTF("Starting batcher %d\n", getpid());
|
||||||
int rid = my_role_id(BATCHER);
|
int rid = my_role_id(BATCHER);
|
||||||
int tokenizer = mpi_id_from_role_id(FILTERER, rid);
|
int tokenizer = mpi_id_from_role_id(FILTERER, rid);
|
||||||
|
int bs = getbs();
|
||||||
|
|
||||||
int learner_mpi_id = 0;
|
int learner_mpi_id = 0;
|
||||||
const size_t window_size = 2 * WIN + 1;
|
const size_t window_size = 2 * getwin() + 1;
|
||||||
const size_t bufsize = BS * window_size;
|
const size_t bufsize = bs * window_size;
|
||||||
float* batch = malloc(bufsize * sizeof(float));
|
float* batch = malloc(bufsize * sizeof(float));
|
||||||
long* l_wid = malloc(window_size * sizeof(long));
|
long* l_wid = malloc(window_size * sizeof(long));
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
for in_range(r, BS) {
|
for in_range(r, bs) {
|
||||||
recv_window(l_wid, window_size, tokenizer);
|
recv_window(l_wid, window_size, tokenizer);
|
||||||
|
|
||||||
if (l_wid[0] == -1) break;
|
if (l_wid[0] == -1) break;
|
||||||
@@ -327,13 +320,15 @@ void learner() {
|
|||||||
int dispatcher = mpi_id_from_role_id(DISPATCHER, 0);
|
int dispatcher = mpi_id_from_role_id(DISPATCHER, 0);
|
||||||
INFO_PRINTF("Learner %d (pid %d) is assigned to pipeline %d\n", rid,
|
INFO_PRINTF("Learner %d (pid %d) is assigned to pipeline %d\n", rid,
|
||||||
getpid(), my_batcher_rid);
|
getpid(), my_batcher_rid);
|
||||||
|
size_t bs = getbs();
|
||||||
|
size_t bpe = getbpe();
|
||||||
|
|
||||||
PyObject* net = create_network(WIN, EMB);
|
PyObject* net = create_network();
|
||||||
WeightList wl;
|
WeightList wl;
|
||||||
init_weightlist_like(&wl, net);
|
init_weightlist_like(&wl, net);
|
||||||
|
|
||||||
size_t window_size = (2*WIN + 1);
|
size_t window_size = 2 * getwin() + 1;
|
||||||
size_t bufsize = BS * window_size;
|
size_t bufsize = bs * window_size;
|
||||||
float* batch = malloc(bufsize * sizeof(float));
|
float* batch = malloc(bufsize * sizeof(float));
|
||||||
|
|
||||||
int go;
|
int go;
|
||||||
@@ -343,11 +338,11 @@ void learner() {
|
|||||||
while (go != -1) {
|
while (go != -1) {
|
||||||
recv_weights(&wl, dispatcher);
|
recv_weights(&wl, dispatcher);
|
||||||
set_net_weights(net, &wl);
|
set_net_weights(net, &wl);
|
||||||
for in_range(k, ITER) {
|
for in_range(k, bpe) {
|
||||||
MPI_Send(&me, 1, MPI_INT, batcher, TAG_READY, MPI_COMM_WORLD);
|
MPI_Send(&me, 1, MPI_INT, batcher, TAG_READY, MPI_COMM_WORLD);
|
||||||
MPI_Recv(batch, bufsize, MPI_FLOAT, batcher, TAG_BATCH,
|
MPI_Recv(batch, bufsize, MPI_FLOAT, batcher, TAG_BATCH,
|
||||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
step_net(net, batch, BS);
|
step_net(net, batch, bs);
|
||||||
}
|
}
|
||||||
update_weightlist(&wl, net);
|
update_weightlist(&wl, net);
|
||||||
send_weights(&wl, dispatcher);
|
send_weights(&wl, dispatcher);
|
||||||
@@ -364,14 +359,17 @@ void learner() {
|
|||||||
void dispatcher() {
|
void dispatcher() {
|
||||||
INFO_PRINTF("Starting dispatcher %d\n", getpid());
|
INFO_PRINTF("Starting dispatcher %d\n", getpid());
|
||||||
int go = 1;
|
int go = 1;
|
||||||
|
size_t bs = getbs();
|
||||||
|
size_t bpe = getbpe();
|
||||||
|
float target = gettarget();
|
||||||
|
float flpc = getflpc();
|
||||||
|
|
||||||
PyObject* frank = create_network(WIN, EMB);
|
PyObject* frank = create_network();
|
||||||
create_test_dataset(WIN);
|
|
||||||
WeightList wl;
|
WeightList wl;
|
||||||
init_weightlist_like(&wl, frank);
|
init_weightlist_like(&wl, frank);
|
||||||
update_weightlist(&wl, frank);
|
update_weightlist(&wl, frank);
|
||||||
|
|
||||||
int lpr = number_of(LEARNER) * FLPC; // Learners per round
|
int lpr = number_of(LEARNER) * flpc; // Learners per round
|
||||||
WeightList *wls = malloc(sizeof(WeightList) * lpr);
|
WeightList *wls = malloc(sizeof(WeightList) * lpr);
|
||||||
for in_range(i, lpr) {
|
for in_range(i, lpr) {
|
||||||
init_weightlist_like(wls + i, frank);
|
init_weightlist_like(wls + i, frank);
|
||||||
@@ -383,7 +381,7 @@ void dispatcher() {
|
|||||||
float min_loss = crt_loss;
|
float min_loss = crt_loss;
|
||||||
time_t start = time(NULL);
|
time_t start = time(NULL);
|
||||||
size_t rounds = 0;
|
size_t rounds = 0;
|
||||||
while (crt_loss > TARGET) {
|
while (crt_loss > target) {
|
||||||
randidx(round, number_of(LEARNER), lpr);
|
randidx(round, number_of(LEARNER), lpr);
|
||||||
for in_range(k, lpr) {
|
for in_range(k, lpr) {
|
||||||
// Instruct learners to learn
|
// Instruct learners to learn
|
||||||
@@ -418,12 +416,12 @@ void dispatcher() {
|
|||||||
float delta_l = first_loss - crt_loss;
|
float delta_l = first_loss - crt_loss;
|
||||||
INFO_PRINTF(
|
INFO_PRINTF(
|
||||||
"Laptop MPI adam consecutive_batch "
|
"Laptop MPI adam consecutive_batch "
|
||||||
"W%d E%d BS%d bpe%d LPR%d pp%lu,"
|
"W%lu E%lu BS%lu bpe%lu LPR%d pp%lu,"
|
||||||
"%f,%f,%f,%f,"
|
"%f,%f,%f,%f,"
|
||||||
"%lu,%.0f,%lu\n",
|
"%lu,%.0f,%lu\n",
|
||||||
WIN, EMB, BS, ITER, lpr, number_of(TOKENIZER),
|
getwin(), getemb(), bs, bpe, lpr, number_of(TOKENIZER),
|
||||||
delta_l/rounds, delta_l/delta_t, min_loss, TARGET,
|
delta_l/rounds, delta_l/delta_t, min_loss, target,
|
||||||
rounds, delta_t,BS*ITER*rounds
|
rounds, delta_t,bs*bpe*rounds
|
||||||
);
|
);
|
||||||
Py_DECREF(frank);
|
Py_DECREF(frank);
|
||||||
free_weightlist(&wl);
|
free_weightlist(&wl);
|
||||||
|
|||||||
Reference in New Issue
Block a user