now it kinda learns again and code is kinda clean

This commit is contained in:
2019-12-01 15:44:34 -08:00
parent 5d14171631
commit bc6d34e253
3 changed files with 39 additions and 53 deletions

View File

@@ -75,24 +75,15 @@ cdef public void f_idx_list_to_print(float* f_idxs, size_t num):
# return retval
cdef public void cbow_batch(
float* batch, size_t bs, size_t win
):
batch_np = np.asarray(<float[:bs,:2*win+1]>batch)
# Deal with X
X_np = np.concatenate([batch_np[:, :win], batch_np[:, win+1:]], axis=1)
y_np = nn.onehot(batch_np[:, win], nc=len(nn.vocab))
eprint(batch_np)
eprint(X_np)
eprint(np.argmax(y_np, axis=1))
cdef public void debug_print(object o):
eprint(o)
cdef public object create_network(int win, int embed):
return nn.create_cbow_network(win, embed)
try:
return nn.create_cbow_network(win, embed)
except Exception as e:
eprint(e)
cdef public void set_net_weights(object net, WeightList* wl):
@@ -102,9 +93,7 @@ cdef public void set_net_weights(object net, WeightList* wl):
cdef public void step_net(
object net, float* batch, size_t bs
):
# X_train, y_train = cbow_batch(net, batch, bs)
X_train = None
y_train = None
X_train, y_train = cbow_batch(net, batch, bs)
net.train_on_batch(X_train, y_train)
@@ -113,7 +102,10 @@ cdef public size_t out_size(object net):
cdef public float eval_net(object net):
return net.evaluate(X_test, y_test, verbose=False)
try:
return net.evaluate(X_test, y_test, verbose=False)
except Exception as e:
eprint(e)
cdef public void init_weightlist_like(WeightList* wl, object net):
@@ -156,6 +148,16 @@ cdef public void create_test_dataset(size_t win):
_create_test_dataset(win)
cdef tuple cbow_batch(
object net, float* batch, size_t bs
):
win = net.input_shape[1] // 2
batch_np = np.asarray(<float[:bs,:2*win+1]>batch)
X_np = np.concatenate([batch_np[:, :win], batch_np[:, win+1:]], axis=1)
y_np = nn.onehot(batch_np[:, win], nc=len(nn.vocab))
return X_np, y_np
cdef list wrap_weight_list(WeightList* wl):
weights = []
for i in range(wl.n_weights):

View File

@@ -29,8 +29,6 @@ def create_test_dataset(win):
ds = np.array([vocab[w] for w in word_tokenize(f.read())
if w in vocab])
idx = np.random.choice(np.arange(win, len(ds) - win), S)
oh_store = np.zeros((S, len(vocab)), dtype=np.float32)
onehot(oh_store, ds[idx])
return (
# X
np.stack([
@@ -39,7 +37,7 @@ def create_test_dataset(win):
], axis=0).astype(np.float32),
#y
oh_store
onehot(ds[idx], nc=len(vocab))
)
def create_mnist_network():
@@ -67,7 +65,7 @@ def create_cbow_network(win, embed):
def token_generator(filename):
with open(filename) as f:
for i, l in enumerate(f.readlines(1000)):
for i, l in enumerate(f.readlines()):
if not l.isspace():
tok = word_tokenize(l)
if tok:

50
main.c
View File

@@ -15,9 +15,9 @@
#define TAG_SWORD 7
#define TAG_IWORD 8
#define COMM 1
#define ITER 1000
#define BS 10
#define COMM 10
#define ITER 100
#define BS 32
#define EMB 20
#define WIN 2
#define FSPC 1
@@ -36,7 +36,7 @@ typedef enum{
TOKENIZER,
FILTERER,
BATCHER,
SLAVE,
LEARNER,
MASTER
} Role;
@@ -60,7 +60,7 @@ size_t number_of(Role what) {
return 1;
case BATCHER:
return 1;
case SLAVE:
case LEARNER:
return world_size()
- number_of(TOKENIZER)
- number_of(FILTERER)
@@ -189,11 +189,10 @@ void batcher() {
}
}
if (l_wid[0] == -1) break;
cbow_batch(batch, BS, WIN);
// MPI_Recv(&s, 1, MPI_INT, MPI_ANY_SOURCE, TAG_READY, MPI_COMM_WORLD,
// MPI_STATUS_IGNORE);
// MPI_Send(batch, bufsize, MPI_FLOAT, s, TAG_BATCH, MPI_COMM_WORLD);
MPI_Recv(&s, 1, MPI_INT, MPI_ANY_SOURCE, TAG_READY, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
MPI_Send(batch, bufsize, MPI_FLOAT, s, TAG_BATCH, MPI_COMM_WORLD);
}
free(l_wid);
free(batch);
@@ -233,7 +232,7 @@ void recv_weights(WeightList* wl, int src, int tag) {
}
}
void slave_node() {
void learner() {
// 0. Announce readiness?
// 1. Receive weights from master ([ ] has to know its master)
// 2. Request batch from reader ([ ] has to choose a reader)
@@ -246,35 +245,22 @@ void slave_node() {
create_test_dataset(WIN);
WeightList wl;
init_weightlist_like(&wl, net);
size_t entry_size = (2*WIN + 1);
size_t bufsize = BS * entry_size;
size_t vocab = out_size(net);
size_t n_words = (BS + WIN + WIN);
size_t X_numel = BS * (WIN + WIN);
size_t y_numel = BS * vocab;
float* X = malloc(X_numel * sizeof(float));
float* y = malloc(y_numel * sizeof(float));
float* f_widx = malloc(n_words * sizeof(float));
float* batch = malloc(bufsize * sizeof(float));
for in_range(i, COMM) {
// MPI_Send(&me, 1, MPI_INT, mpi_id_from_role_id(MASTER, 0),
// TAG_READY, MPI_COMM_WORLD);
// recv_weights(&wl, mpi_id_from_role_id(MASTER, 0), TAG_WEIGH);
// set_net_weights(net, &wl);
for in_range(k, ITER) {
MPI_Send(&me, 1, MPI_INT, mpi_id_from_role_id(BATCHER, 0),
TAG_READY, MPI_COMM_WORLD);
MPI_Recv(f_widx, n_words, MPI_FLOAT,
MPI_Recv(batch, bufsize, MPI_FLOAT,
mpi_id_from_role_id(BATCHER, 0), TAG_BATCH, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
// cbow_batch(X, y, f_widx, BS, WIN);
step_net(net, X, BS);
#warning "fix this"
INFO_PRINTLN(".");
step_net(net, batch, BS);
}
printf("%d net: %f\n", my_mpi_id(), eval_net(net));
update_weightlist(&wl, net);
// send_weights(&wl, mpi_id_from_role_id(MASTER, 0), TAG_WEIGH);
}
Py_DECREF(net);
free_weightlist(&wl);
@@ -293,7 +279,7 @@ void master_node() {
init_weightlist_like(&wl, frank);
update_weightlist(&wl, frank);
int spr = number_of(SLAVE) * FSPC; // Slaves per round
int spr = number_of(LEARNER) * FSPC; // Slaves per round
int s;
WeightList *wls = malloc(sizeof(WeightList) * spr);
@@ -350,9 +336,9 @@ int main (int argc, const char **argv) {
case BATCHER:
batcher();
break;
// case SLAVE:
// slave_node();
// break;
case LEARNER:
learner();
break;
default:
INFO_PRINTLN("DYING HORRIBLY!");
// case SLAVE: slave_node(); break;