diff --git a/library.pyx b/library.pyx index 6a1ea16..03c3f70 100644 --- a/library.pyx +++ b/library.pyx @@ -3,6 +3,7 @@ import numpy as np import mynet as mn from libc.stdlib cimport malloc +from libc.string cimport memcpy ctr = [] @@ -11,9 +12,6 @@ opt = mn.SGDOptimizer(lr=0.1) cdef extern from "numpy/arrayobject.h": - object PyArray_SimpleNewFromData( - int nd, long* dims, int typenum, void* data - ) void *PyArray_DATA(np.ndarray arr) @@ -25,7 +23,7 @@ ctypedef public struct Dense: ctypedef public struct Network: - Py_ssize_t n_layers; + size_t n_layers; Dense* layers; @@ -34,90 +32,86 @@ cdef public char * greeting(): cdef public void debug_print(object o): - print(o.flags) + print(o) -cdef public np.ndarray[np.float32_t, ndim=2, mode='c'] predict( - object net, - np.ndarray[np.float32_t, ndim=2, mode='c'] X +cdef public void predict( + Network* net, + float* X, + size_t batch_size ): - try: - return net(X) - except Exception as e: - print(e) - - -cdef public object create_network(): - return mn.Network((784, 10), mn.relu, mn.sigmoid, mn.bin_x_entropy) - - -cdef public object combo_net(list nets): - return mn.combo_net(nets) - - -cdef public object make_like(object neta, object netb): - netb.be_like(neta) + pass + # try: + # return net(X) + # except Exception as e: + # print(e) cdef public void step_net( - object net, + Network* c_net, float* batch_data, - Py_ssize_t batch_size + size_t batch_size ): - cdef Py_ssize_t in_dim = net.geometry[0] - cdef Py_ssize_t out_dim = net.geometry[-1] + net = wrap_c_network(c_net) + cdef size_t in_dim = net.geometry[0] + cdef size_t out_dim = net.geometry[-1] batch = np.asarray(batch_data) net.step(batch[:, :in_dim], batch[:, in_dim:], opt) -cdef public float eval_net( - object net -): +cdef public float eval_net(Network* c_net): + net = wrap_c_network(c_net) return net.evaluate(X_test, y_test, 'cls') -cdef public np.ndarray[np.float32_t, ndim=2, mode='c'] mnist_batch( - Py_ssize_t bs -): +cdef public void mnist_batch(float* batch, size_t bs): idx = np.random.choice(len(X_train), bs, replace=False) arr = np.concatenate([X_train[idx], y_train[idx]], axis=1) - return arr + memcpy(batch, PyArray_DATA(arr), arr.size*sizeof(float)) -cdef public void inspect_array( +cdef public void create_c_network(Network* c_net): + net = _create_network() + c_net.n_layers = len(net.layers) + c_net.layers = malloc(sizeof(Dense) * c_net.n_layers) + for i, l in enumerate(net.layers): + d0, d1 = l.W.shape + c_net.layers[i].shape[0] = d0 + c_net.layers[i].shape[1] = d1 + c_net.layers[i].W = malloc(sizeof(float) * d0 * d1) + c_net.layers[i].b = malloc(sizeof(float) * d1) + memcpy(c_net.layers[i].W, PyArray_DATA(l.W), sizeof(float) * d0 * d1) + memcpy(c_net.layers[i].b, PyArray_DATA(l.b), sizeof(float) * d1) + c_net.layers[i].ownmem = 1 + + +cdef public void be_like(Network* c_dst, Network* c_src): + dst = wrap_c_network(c_dst) + src = wrap_c_network(c_src) + dst.be_like(src) + + +cdef object wrap_c_network(Network* c_net): + net = _create_network(init=False) + for i, l in enumerate(net.layers): + d0, d1 = l.W.shape + l.W = np.asarray(c_net.layers[i].W) + l.b = np.asarray(c_net.layers[i].b) + return net + + +cdef void inspect_array( np.ndarray[np.float32_t, ndim=2, mode='c'] a ): - print(a.flags) - print(a.dtype) - print(a.sum()) + print(a.flags, flush=True) + print(a.dtype, flush=True) + print(a.sum(), flush=True) -cdef public void be_like_cified( - object net, - Network* c_net -): - """WARNING this function makes an assumption that `net` and `c_net` - have the same shape and hopefully is going to crash horribly otherwise.""" - for i, l in enumerate(net.layers): - w1, w2 = l.W.shape - l.W[:] = c_net.layers[i].W - l.b[:] = c_net.layers[i].b +def _create_network(init=True): + return mn.Network((784, 10), mn.relu, mn.sigmoid, mn.bin_x_entropy, + initialize=init) -cdef public void cify_network( - object net, Network* c_net -): - """WARNING `c_net` is valid as long as `net` is - - Whoever has `c_net` is responsible for freeing c_net.layers list - Layers themselves don't need any de-init. - """ - c_net.n_layers = len(net.layers) - c_net.layers = malloc(len(net.layers) * sizeof(Dense)) - for i, l in enumerate(net.layers): - w1, w2 = l.W.shape - c_net.layers[i].shape[0] = w1 - c_net.layers[i].shape[1] = w2 - c_net.layers[i].W = PyArray_DATA(l.W) - c_net.layers[i].b = PyArray_DATA(l.b) - c_net.layers[i].ownmem = 0 +def combo_net(nets): + return mn.combo_net(nets) diff --git a/main.c b/main.c index 6196c0a..025bc71 100644 --- a/main.c +++ b/main.c @@ -1,17 +1,15 @@ #include #include +#include #include -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION -#include - #include "library.h" #define P_READER 0 #define P_SLAVE 1 #define P_MASTER 2 -#define COMM 50 +#define COMM 500 #define ITER 32 #define BS 32 @@ -24,24 +22,20 @@ typedef enum{ // Reads some data and converts it to 2D float array void data_reader() { + size_t batch_numel = (784 + 10) * BS; + float* batch = malloc(batch_numel * sizeof(float)); while (1) { - PyArrayObject* batch = mnist_batch(BS); - - long* shape = PyArray_SHAPE(batch); - MPI_Send(shape, 2, MPI_LONG, P_SLAVE, 0, MPI_COMM_WORLD); - MPI_Send(PyArray_DATA(batch), PyArray_SIZE(batch), MPI_FLOAT, - P_SLAVE, 0, MPI_COMM_WORLD); - Py_DECREF(batch); + mnist_batch(batch, BS); + MPI_Send(batch, batch_numel, MPI_FLOAT, P_SLAVE, 0, MPI_COMM_WORLD); } } -void send_network(Network* c_net, int dest, int tag) { - Py_ssize_t n_layers = c_net->n_layers; +void send_network(const Network* c_net, int dest, int tag) { + size_t n_layers = c_net->n_layers; MPI_Send(&n_layers, 1, MPI_LONG, dest, tag, MPI_COMM_WORLD); - for (Py_ssize_t i = 0; i < n_layers; i++) { + for (size_t i = 0; i < n_layers; i++) { long d0 = c_net->layers[i].shape[0]; long d1 = c_net->layers[i].shape[1]; - MPI_Send(c_net->layers[i].shape, 2, MPI_LONG, dest, tag, MPI_COMM_WORLD); MPI_Send(c_net->layers[i].W, d0 * d1, MPI_FLOAT, dest, tag, @@ -52,10 +46,11 @@ void send_network(Network* c_net, int dest, int tag) { } void recv_network(Network* c_net, int src, int tag) { + // Creates a new network at c_net MPI_Recv(&c_net->n_layers, 1, MPI_LONG, src, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); c_net->layers = malloc(sizeof(Dense) * c_net->n_layers); - for (Py_ssize_t i = 0; i < c_net->n_layers; i++) { + for (size_t i = 0; i < c_net->n_layers; i++) { MPI_Recv(&c_net->layers[i].shape, 2, MPI_LONG, src, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); long d0 = c_net->layers[i].shape[0]; @@ -71,7 +66,7 @@ void recv_network(Network* c_net, int src, int tag) { } void free_network_contents(Network* c_net) { - for (Py_ssize_t i = 0; i < c_net->n_layers; i++) { + for (size_t i = 0; i < c_net->n_layers; i++) { if (c_net->layers[i].ownmem) { free(c_net->layers[i].b); free(c_net->layers[i].W); @@ -82,43 +77,43 @@ void free_network_contents(Network* c_net) { // Receives weight updates and trains, sends learned weights back to master void slave_node() { - PyObject* net = create_network(); + Network net; + create_c_network(&net); + + size_t batch_numel = (784 + 10) * BS; + float* batch = malloc(batch_numel * sizeof(float)); + for (int i = 0; i < COMM; i++) { char go; MPI_Recv(&go, 1, MPI_CHAR, P_MASTER, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); for (int k = 0; k < ITER; k++) { - long shape[2]; - MPI_Recv(shape, 2, MPI_LONG, P_READER, MPI_ANY_TAG, MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - long size = shape[0] * shape[1]; - float* batch = malloc(shape[0] * shape[1] * sizeof(float)); - MPI_Recv(batch, size, MPI_FLOAT, P_READER, MPI_ANY_TAG, + MPI_Recv(batch, batch_numel, MPI_FLOAT, P_READER, MPI_ANY_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - step_net(net, batch, BS); - free(batch); + step_net(&net, batch, BS); } - Network c_net; - cify_network(net, &c_net); - send_network(&c_net, P_MASTER, 0); - free_network_contents(&c_net); + printf("Net: %f\n", eval_net(&net)); + send_network(&net, P_MASTER, 0); } - Py_DECREF(net); + + free(batch); + free_network_contents(&net); } // Stores most up-to-date model, sends it to slaves for training void master_node() { - PyObject* frank = create_network(); + Network frank; + create_c_network(&frank); for (int i = 0; i < COMM; i++) { char go; MPI_Send(&go, 1, MPI_CHAR, P_SLAVE, 0, MPI_COMM_WORLD); - Network c_net; - recv_network(&c_net, P_SLAVE, MPI_ANY_TAG); - be_like_cified(frank, &c_net); - free_network_contents(&c_net); - printf("Frank: %f\n", eval_net(frank)); + Network net; + recv_network(&net, P_SLAVE, MPI_ANY_TAG); + be_like(&frank, &net); + free_network_contents(&net); + printf("Frank: %f\n", eval_net(&frank)); } - Py_DECREF(frank); + free_network_contents(&frank); } Role map_node() { @@ -136,7 +131,7 @@ int main (int argc, const char **argv) { // Cython Boilerplate PyImport_AppendInittab("library", PyInit_library); Py_Initialize(); - import_array(); + // import_array(); PyRun_SimpleString("import sys\nsys.path.insert(0,'')"); PyObject* library_module = PyImport_ImportModule("library");