a great work has been done here
now there is no mentioning python in c code (except for the boilerplate at the beginning, but the rest is nice clean c). all the bridging is being done in cython (where it belongs). and no memory leaks so there's that!
This commit is contained in:
126
library.pyx
126
library.pyx
@@ -3,6 +3,7 @@ import numpy as np
|
|||||||
import mynet as mn
|
import mynet as mn
|
||||||
|
|
||||||
from libc.stdlib cimport malloc
|
from libc.stdlib cimport malloc
|
||||||
|
from libc.string cimport memcpy
|
||||||
|
|
||||||
|
|
||||||
ctr = []
|
ctr = []
|
||||||
@@ -11,9 +12,6 @@ opt = mn.SGDOptimizer(lr=0.1)
|
|||||||
|
|
||||||
|
|
||||||
cdef extern from "numpy/arrayobject.h":
|
cdef extern from "numpy/arrayobject.h":
|
||||||
object PyArray_SimpleNewFromData(
|
|
||||||
int nd, long* dims, int typenum, void* data
|
|
||||||
)
|
|
||||||
void *PyArray_DATA(np.ndarray arr)
|
void *PyArray_DATA(np.ndarray arr)
|
||||||
|
|
||||||
|
|
||||||
@@ -25,7 +23,7 @@ ctypedef public struct Dense:
|
|||||||
|
|
||||||
|
|
||||||
ctypedef public struct Network:
|
ctypedef public struct Network:
|
||||||
Py_ssize_t n_layers;
|
size_t n_layers;
|
||||||
Dense* layers;
|
Dense* layers;
|
||||||
|
|
||||||
|
|
||||||
@@ -34,90 +32,86 @@ cdef public char * greeting():
|
|||||||
|
|
||||||
|
|
||||||
cdef public void debug_print(object o):
|
cdef public void debug_print(object o):
|
||||||
print(o.flags)
|
print(o)
|
||||||
|
|
||||||
|
|
||||||
cdef public np.ndarray[np.float32_t, ndim=2, mode='c'] predict(
|
cdef public void predict(
|
||||||
object net,
|
Network* net,
|
||||||
np.ndarray[np.float32_t, ndim=2, mode='c'] X
|
float* X,
|
||||||
|
size_t batch_size
|
||||||
):
|
):
|
||||||
try:
|
pass
|
||||||
return net(X)
|
# try:
|
||||||
except Exception as e:
|
# return net(X)
|
||||||
print(e)
|
# except Exception as e:
|
||||||
|
# print(e)
|
||||||
|
|
||||||
cdef public object create_network():
|
|
||||||
return mn.Network((784, 10), mn.relu, mn.sigmoid, mn.bin_x_entropy)
|
|
||||||
|
|
||||||
|
|
||||||
cdef public object combo_net(list nets):
|
|
||||||
return mn.combo_net(nets)
|
|
||||||
|
|
||||||
|
|
||||||
cdef public object make_like(object neta, object netb):
|
|
||||||
netb.be_like(neta)
|
|
||||||
|
|
||||||
|
|
||||||
cdef public void step_net(
|
cdef public void step_net(
|
||||||
object net,
|
Network* c_net,
|
||||||
float* batch_data,
|
float* batch_data,
|
||||||
Py_ssize_t batch_size
|
size_t batch_size
|
||||||
):
|
):
|
||||||
cdef Py_ssize_t in_dim = net.geometry[0]
|
net = wrap_c_network(c_net)
|
||||||
cdef Py_ssize_t out_dim = net.geometry[-1]
|
cdef size_t in_dim = net.geometry[0]
|
||||||
|
cdef size_t out_dim = net.geometry[-1]
|
||||||
batch = np.asarray(<float[:batch_size,:in_dim+out_dim]>batch_data)
|
batch = np.asarray(<float[:batch_size,:in_dim+out_dim]>batch_data)
|
||||||
net.step(batch[:, :in_dim], batch[:, in_dim:], opt)
|
net.step(batch[:, :in_dim], batch[:, in_dim:], opt)
|
||||||
|
|
||||||
|
|
||||||
cdef public float eval_net(
|
cdef public float eval_net(Network* c_net):
|
||||||
object net
|
net = wrap_c_network(c_net)
|
||||||
):
|
|
||||||
return net.evaluate(X_test, y_test, 'cls')
|
return net.evaluate(X_test, y_test, 'cls')
|
||||||
|
|
||||||
|
|
||||||
cdef public np.ndarray[np.float32_t, ndim=2, mode='c'] mnist_batch(
|
cdef public void mnist_batch(float* batch, size_t bs):
|
||||||
Py_ssize_t bs
|
|
||||||
):
|
|
||||||
idx = np.random.choice(len(X_train), bs, replace=False)
|
idx = np.random.choice(len(X_train), bs, replace=False)
|
||||||
arr = np.concatenate([X_train[idx], y_train[idx]], axis=1)
|
arr = np.concatenate([X_train[idx], y_train[idx]], axis=1)
|
||||||
return arr
|
memcpy(batch, <float*>PyArray_DATA(arr), arr.size*sizeof(float))
|
||||||
|
|
||||||
|
|
||||||
cdef public void inspect_array(
|
cdef public void create_c_network(Network* c_net):
|
||||||
|
net = _create_network()
|
||||||
|
c_net.n_layers = len(net.layers)
|
||||||
|
c_net.layers = <Dense*>malloc(sizeof(Dense) * c_net.n_layers)
|
||||||
|
for i, l in enumerate(net.layers):
|
||||||
|
d0, d1 = l.W.shape
|
||||||
|
c_net.layers[i].shape[0] = d0
|
||||||
|
c_net.layers[i].shape[1] = d1
|
||||||
|
c_net.layers[i].W = <float*>malloc(sizeof(float) * d0 * d1)
|
||||||
|
c_net.layers[i].b = <float*>malloc(sizeof(float) * d1)
|
||||||
|
memcpy(c_net.layers[i].W, PyArray_DATA(l.W), sizeof(float) * d0 * d1)
|
||||||
|
memcpy(c_net.layers[i].b, PyArray_DATA(l.b), sizeof(float) * d1)
|
||||||
|
c_net.layers[i].ownmem = 1
|
||||||
|
|
||||||
|
|
||||||
|
cdef public void be_like(Network* c_dst, Network* c_src):
|
||||||
|
dst = wrap_c_network(c_dst)
|
||||||
|
src = wrap_c_network(c_src)
|
||||||
|
dst.be_like(src)
|
||||||
|
|
||||||
|
|
||||||
|
cdef object wrap_c_network(Network* c_net):
|
||||||
|
net = _create_network(init=False)
|
||||||
|
for i, l in enumerate(net.layers):
|
||||||
|
d0, d1 = l.W.shape
|
||||||
|
l.W = np.asarray(<float[:d0,:d1]>c_net.layers[i].W)
|
||||||
|
l.b = np.asarray(<float[:d1]>c_net.layers[i].b)
|
||||||
|
return net
|
||||||
|
|
||||||
|
|
||||||
|
cdef void inspect_array(
|
||||||
np.ndarray[np.float32_t, ndim=2, mode='c'] a
|
np.ndarray[np.float32_t, ndim=2, mode='c'] a
|
||||||
):
|
):
|
||||||
print(a.flags)
|
print(a.flags, flush=True)
|
||||||
print(a.dtype)
|
print(a.dtype, flush=True)
|
||||||
print(a.sum())
|
print(a.sum(), flush=True)
|
||||||
|
|
||||||
|
|
||||||
cdef public void be_like_cified(
|
def _create_network(init=True):
|
||||||
object net,
|
return mn.Network((784, 10), mn.relu, mn.sigmoid, mn.bin_x_entropy,
|
||||||
Network* c_net
|
initialize=init)
|
||||||
):
|
|
||||||
"""WARNING this function makes an assumption that `net` and `c_net`
|
|
||||||
have the same shape and hopefully is going to crash horribly otherwise."""
|
|
||||||
for i, l in enumerate(net.layers):
|
|
||||||
w1, w2 = l.W.shape
|
|
||||||
l.W[:] = <float[:w1,:w2]>c_net.layers[i].W
|
|
||||||
l.b[:] = <float[:w2]>c_net.layers[i].b
|
|
||||||
|
|
||||||
|
|
||||||
cdef public void cify_network(
|
def combo_net(nets):
|
||||||
object net, Network* c_net
|
return mn.combo_net(nets)
|
||||||
):
|
|
||||||
"""WARNING `c_net` is valid as long as `net` is
|
|
||||||
|
|
||||||
Whoever has `c_net` is responsible for freeing c_net.layers list
|
|
||||||
Layers themselves don't need any de-init.
|
|
||||||
"""
|
|
||||||
c_net.n_layers = len(net.layers)
|
|
||||||
c_net.layers = <Dense*>malloc(len(net.layers) * sizeof(Dense))
|
|
||||||
for i, l in enumerate(net.layers):
|
|
||||||
w1, w2 = l.W.shape
|
|
||||||
c_net.layers[i].shape[0] = w1
|
|
||||||
c_net.layers[i].shape[1] = w2
|
|
||||||
c_net.layers[i].W = <float*>PyArray_DATA(l.W)
|
|
||||||
c_net.layers[i].b = <float*>PyArray_DATA(l.b)
|
|
||||||
c_net.layers[i].ownmem = 0
|
|
||||||
|
|||||||
73
main.c
73
main.c
@@ -1,17 +1,15 @@
|
|||||||
#include <Python.h>
|
#include <Python.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
|
|
||||||
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
|
|
||||||
#include <numpy/arrayobject.h>
|
|
||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
|
|
||||||
#define P_READER 0
|
#define P_READER 0
|
||||||
#define P_SLAVE 1
|
#define P_SLAVE 1
|
||||||
#define P_MASTER 2
|
#define P_MASTER 2
|
||||||
|
|
||||||
#define COMM 50
|
#define COMM 500
|
||||||
#define ITER 32
|
#define ITER 32
|
||||||
#define BS 32
|
#define BS 32
|
||||||
|
|
||||||
@@ -24,24 +22,20 @@ typedef enum{
|
|||||||
|
|
||||||
// Reads some data and converts it to 2D float array
|
// Reads some data and converts it to 2D float array
|
||||||
void data_reader() {
|
void data_reader() {
|
||||||
|
size_t batch_numel = (784 + 10) * BS;
|
||||||
|
float* batch = malloc(batch_numel * sizeof(float));
|
||||||
while (1) {
|
while (1) {
|
||||||
PyArrayObject* batch = mnist_batch(BS);
|
mnist_batch(batch, BS);
|
||||||
|
MPI_Send(batch, batch_numel, MPI_FLOAT, P_SLAVE, 0, MPI_COMM_WORLD);
|
||||||
long* shape = PyArray_SHAPE(batch);
|
|
||||||
MPI_Send(shape, 2, MPI_LONG, P_SLAVE, 0, MPI_COMM_WORLD);
|
|
||||||
MPI_Send(PyArray_DATA(batch), PyArray_SIZE(batch), MPI_FLOAT,
|
|
||||||
P_SLAVE, 0, MPI_COMM_WORLD);
|
|
||||||
Py_DECREF(batch);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void send_network(Network* c_net, int dest, int tag) {
|
void send_network(const Network* c_net, int dest, int tag) {
|
||||||
Py_ssize_t n_layers = c_net->n_layers;
|
size_t n_layers = c_net->n_layers;
|
||||||
MPI_Send(&n_layers, 1, MPI_LONG, dest, tag, MPI_COMM_WORLD);
|
MPI_Send(&n_layers, 1, MPI_LONG, dest, tag, MPI_COMM_WORLD);
|
||||||
for (Py_ssize_t i = 0; i < n_layers; i++) {
|
for (size_t i = 0; i < n_layers; i++) {
|
||||||
long d0 = c_net->layers[i].shape[0];
|
long d0 = c_net->layers[i].shape[0];
|
||||||
long d1 = c_net->layers[i].shape[1];
|
long d1 = c_net->layers[i].shape[1];
|
||||||
|
|
||||||
MPI_Send(c_net->layers[i].shape, 2, MPI_LONG, dest, tag,
|
MPI_Send(c_net->layers[i].shape, 2, MPI_LONG, dest, tag,
|
||||||
MPI_COMM_WORLD);
|
MPI_COMM_WORLD);
|
||||||
MPI_Send(c_net->layers[i].W, d0 * d1, MPI_FLOAT, dest, tag,
|
MPI_Send(c_net->layers[i].W, d0 * d1, MPI_FLOAT, dest, tag,
|
||||||
@@ -52,10 +46,11 @@ void send_network(Network* c_net, int dest, int tag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void recv_network(Network* c_net, int src, int tag) {
|
void recv_network(Network* c_net, int src, int tag) {
|
||||||
|
// Creates a new network at c_net
|
||||||
MPI_Recv(&c_net->n_layers, 1, MPI_LONG, src, tag, MPI_COMM_WORLD,
|
MPI_Recv(&c_net->n_layers, 1, MPI_LONG, src, tag, MPI_COMM_WORLD,
|
||||||
MPI_STATUS_IGNORE);
|
MPI_STATUS_IGNORE);
|
||||||
c_net->layers = malloc(sizeof(Dense) * c_net->n_layers);
|
c_net->layers = malloc(sizeof(Dense) * c_net->n_layers);
|
||||||
for (Py_ssize_t i = 0; i < c_net->n_layers; i++) {
|
for (size_t i = 0; i < c_net->n_layers; i++) {
|
||||||
MPI_Recv(&c_net->layers[i].shape, 2, MPI_LONG, src, tag,
|
MPI_Recv(&c_net->layers[i].shape, 2, MPI_LONG, src, tag,
|
||||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
long d0 = c_net->layers[i].shape[0];
|
long d0 = c_net->layers[i].shape[0];
|
||||||
@@ -71,7 +66,7 @@ void recv_network(Network* c_net, int src, int tag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void free_network_contents(Network* c_net) {
|
void free_network_contents(Network* c_net) {
|
||||||
for (Py_ssize_t i = 0; i < c_net->n_layers; i++) {
|
for (size_t i = 0; i < c_net->n_layers; i++) {
|
||||||
if (c_net->layers[i].ownmem) {
|
if (c_net->layers[i].ownmem) {
|
||||||
free(c_net->layers[i].b);
|
free(c_net->layers[i].b);
|
||||||
free(c_net->layers[i].W);
|
free(c_net->layers[i].W);
|
||||||
@@ -82,43 +77,43 @@ void free_network_contents(Network* c_net) {
|
|||||||
|
|
||||||
// Receives weight updates and trains, sends learned weights back to master
|
// Receives weight updates and trains, sends learned weights back to master
|
||||||
void slave_node() {
|
void slave_node() {
|
||||||
PyObject* net = create_network();
|
Network net;
|
||||||
|
create_c_network(&net);
|
||||||
|
|
||||||
|
size_t batch_numel = (784 + 10) * BS;
|
||||||
|
float* batch = malloc(batch_numel * sizeof(float));
|
||||||
|
|
||||||
for (int i = 0; i < COMM; i++) {
|
for (int i = 0; i < COMM; i++) {
|
||||||
char go;
|
char go;
|
||||||
MPI_Recv(&go, 1, MPI_CHAR, P_MASTER, MPI_ANY_TAG, MPI_COMM_WORLD,
|
MPI_Recv(&go, 1, MPI_CHAR, P_MASTER, MPI_ANY_TAG, MPI_COMM_WORLD,
|
||||||
MPI_STATUS_IGNORE);
|
MPI_STATUS_IGNORE);
|
||||||
for (int k = 0; k < ITER; k++) {
|
for (int k = 0; k < ITER; k++) {
|
||||||
long shape[2];
|
MPI_Recv(batch, batch_numel, MPI_FLOAT, P_READER, MPI_ANY_TAG,
|
||||||
MPI_Recv(shape, 2, MPI_LONG, P_READER, MPI_ANY_TAG, MPI_COMM_WORLD,
|
|
||||||
MPI_STATUS_IGNORE);
|
|
||||||
long size = shape[0] * shape[1];
|
|
||||||
float* batch = malloc(shape[0] * shape[1] * sizeof(float));
|
|
||||||
MPI_Recv(batch, size, MPI_FLOAT, P_READER, MPI_ANY_TAG,
|
|
||||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||||
step_net(net, batch, BS);
|
step_net(&net, batch, BS);
|
||||||
free(batch);
|
|
||||||
}
|
}
|
||||||
Network c_net;
|
printf("Net: %f\n", eval_net(&net));
|
||||||
cify_network(net, &c_net);
|
send_network(&net, P_MASTER, 0);
|
||||||
send_network(&c_net, P_MASTER, 0);
|
|
||||||
free_network_contents(&c_net);
|
|
||||||
}
|
}
|
||||||
Py_DECREF(net);
|
|
||||||
|
free(batch);
|
||||||
|
free_network_contents(&net);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stores most up-to-date model, sends it to slaves for training
|
// Stores most up-to-date model, sends it to slaves for training
|
||||||
void master_node() {
|
void master_node() {
|
||||||
PyObject* frank = create_network();
|
Network frank;
|
||||||
|
create_c_network(&frank);
|
||||||
for (int i = 0; i < COMM; i++) {
|
for (int i = 0; i < COMM; i++) {
|
||||||
char go;
|
char go;
|
||||||
MPI_Send(&go, 1, MPI_CHAR, P_SLAVE, 0, MPI_COMM_WORLD);
|
MPI_Send(&go, 1, MPI_CHAR, P_SLAVE, 0, MPI_COMM_WORLD);
|
||||||
Network c_net;
|
Network net;
|
||||||
recv_network(&c_net, P_SLAVE, MPI_ANY_TAG);
|
recv_network(&net, P_SLAVE, MPI_ANY_TAG);
|
||||||
be_like_cified(frank, &c_net);
|
be_like(&frank, &net);
|
||||||
free_network_contents(&c_net);
|
free_network_contents(&net);
|
||||||
printf("Frank: %f\n", eval_net(frank));
|
printf("Frank: %f\n", eval_net(&frank));
|
||||||
}
|
}
|
||||||
Py_DECREF(frank);
|
free_network_contents(&frank);
|
||||||
}
|
}
|
||||||
|
|
||||||
Role map_node() {
|
Role map_node() {
|
||||||
@@ -136,7 +131,7 @@ int main (int argc, const char **argv) {
|
|||||||
// Cython Boilerplate
|
// Cython Boilerplate
|
||||||
PyImport_AppendInittab("library", PyInit_library);
|
PyImport_AppendInittab("library", PyInit_library);
|
||||||
Py_Initialize();
|
Py_Initialize();
|
||||||
import_array();
|
// import_array();
|
||||||
PyRun_SimpleString("import sys\nsys.path.insert(0,'')");
|
PyRun_SimpleString("import sys\nsys.path.insert(0,'')");
|
||||||
PyObject* library_module = PyImport_ImportModule("library");
|
PyObject* library_module = PyImport_ImportModule("library");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user