diff --git a/Dockerfile b/Dockerfile index 69110e3..d42e889 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,9 @@ RUN pip3 install --no-cache \ meson==0.51.2 \ numpy==1.16.4 \ tensorflow==1.14.0 \ - Flask==1.0.2 \ cython==0.29.14 + RUN mkdir /workspace -COPY bridge.pyx library.py server.py meson.build main.c /workspace/ +COPY bridge.pyx library.py meson.build main.c /workspace/ RUN cd /workspace && meson build && cd build && ninja WORKDIR /workspace diff --git a/bridge.pyx b/bridge.pyx index 1a0d6b9..163cde3 100644 --- a/bridge.pyx +++ b/bridge.pyx @@ -7,7 +7,6 @@ from libc.stdlib cimport malloc, realloc from libc.string cimport memcpy import library as nn -import server as srv tokenizers = {} @@ -43,16 +42,6 @@ cdef public char *greeting(): return f'The value is {3**3**3}'.encode('utf-8') -cdef public void serve(): - srv.serve() - - -cdef public void server_update(float *emb): - embeddings = np.asarray(emb) - low_dim = nn.calc_TSNE(embeddings) - srv.emb_map = dict(zip(nn.inv_vocab, low_dim)) - - cdef public size_t getwin(): return nn.WIN diff --git a/docs/report.latex b/docs/report.latex index 72f00b8..6d5ac5a 100644 --- a/docs/report.latex +++ b/docs/report.latex @@ -244,14 +244,14 @@ for the whole application. Therefore, the formula for the minimum number of processes to be requested from \verb|mpiexec| looks like the following: \begin{lstlisting} - NUM_PROC >= (4 * num_text_files) + 2 + NUM_PROC >= (4 * num_text_files) + 1 \end{lstlisting} To figure out how many Learners will be created, the following formula can be used: \begin{lstlisting} - num_learners = NUM_PROC - 2 - (3 * num_text_files) + num_learners = NUM_PROC - 1 - (3 * num_text_files) \end{lstlisting} During running, the program will create the folder \verb|trained| in the @@ -367,7 +367,8 @@ statistics and exit. \section{Evaluation} The main focus of evaluation was to determine if executing several neural -network training nodes in parallel can speed-up the training process. The +network training nodes in parallel can speed-up the training process. +The employed approach was to define a \textit{target loss} that the network has to achieve and then to measure \textit{the number of context windows} that each Learner node has to process and, secondarily, the time it takes for the system @@ -457,6 +458,14 @@ computationally viable not to store the data as one big file but rather have it split across multiple nodes, this mode of operation should be investigated further and possibly preferred for large-scale training. +As a last note, the learned embeddings themselves were not of high importance +for the evaluation, since it is known that in order to obtain high quality +embeddings a much higher amount of data (a dataset of \mbox{$>$ 100B words}) +and computation time is needed than it was feasible to do as a part of the +project. However, the learning outcomes were empirically evaluated and it was +found that even with relatively short training runs the networks could capture +some meaningful relationships between the vocabulary words. + \begin{figure} \centering \includegraphics[width=\linewidth]{fig/datasets.pdf} diff --git a/main.c b/main.c index d983ac6..8e1a611 100644 --- a/main.c +++ b/main.c @@ -37,7 +37,6 @@ typedef enum { FILTER, BATCHER, LEARNER, - VISUALIZER, DISPATCHER } Role; @@ -66,10 +65,7 @@ size_t number_of(Role what) { - number_of(TOKENIZER) - number_of(FILTER) - number_of(BATCHER) - - number_of(DISPATCHER) - - number_of(VISUALIZER); - case VISUALIZER: - return 0; + - number_of(DISPATCHER); case DISPATCHER: return 1; } @@ -364,7 +360,6 @@ void learner() { void dispatcher() { INFO_PRINTF("Starting dispatcher %d\n", getpid()); int go = 1; - // int visualizer = mpi_id_from_role_id(VISUALIZER, 0); size_t bs = getbs(); size_t bpe = getbpe(); float target = gettarget(); @@ -404,9 +399,6 @@ void dispatcher() { crt_loss = eval_net(frank); min_loss = crt_loss < min_loss ? crt_loss : min_loss; INFO_PRINTF("Round %ld, validation loss %f\n", rounds, crt_loss); - // MPI_Send(&go, 1, MPI_INT, visualizer, TAG_INSTR, MPI_COMM_WORLD); - // MPI_Send(wl.weights[0].W, emb_mat_size, MPI_FLOAT, - // visualizer, TAG_EMBED, MPI_COMM_WORLD); ckpt_net(frank); @@ -423,9 +415,6 @@ void dispatcher() { MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(LEARNER, l), TAG_INSTR, MPI_COMM_WORLD); } - // MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(VISUALIZER, 0), - // TAG_INSTR, MPI_COMM_WORLD); - save_emb(frank); float delta_t = finish - start; @@ -445,32 +434,6 @@ void dispatcher() { free(wls); free(round); INFO_PRINTF("Finishing dispatcher %d\n", getpid()); - // sleep(4); - // INFO_PRINTLN("Visualization server is still running on port 8448\n" - // "To terminate, press Ctrl-C"); -} - -void visualizer() { - INFO_PRINTF("Starting visualizer %d\n", getpid()); - serve(); - - int dispatcher = mpi_id_from_role_id(DISPATCHER, 0); - int go_on = 1; - - size_t emb_mat_size = getvocsize() * getemb(); - float* embeddings = malloc(emb_mat_size * sizeof(float)); - - MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - - while(go_on != -1) { - MPI_Recv(embeddings, emb_mat_size, MPI_FLOAT, dispatcher, TAG_EMBED, - MPI_COMM_WORLD, MPI_STATUS_IGNORE); - server_update(embeddings); - MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD, - MPI_STATUS_IGNORE); - } - INFO_PRINTF("Exiting visualizer node %d\n", getpid()); } int main (int argc, const char **argv) { @@ -483,7 +446,7 @@ int main (int argc, const char **argv) { MPI_Abort(MPI_COMM_WORLD, 1); } int pipelines = argc - 1; - int min_nodes = 4 * pipelines + 2; + int min_nodes = 4 * pipelines + 1; if (world_size() < min_nodes) { INFO_PRINTF("You requested %d pipeline(s) " "but only provided %d procs " @@ -519,9 +482,6 @@ int main (int argc, const char **argv) { case DISPATCHER: dispatcher(); break; - case VISUALIZER: - visualizer(); - break; default: INFO_PRINTLN("DYING HORRIBLY!"); } diff --git a/server.py b/server.py deleted file mode 100644 index 90c8502..0000000 --- a/server.py +++ /dev/null @@ -1,34 +0,0 @@ -from threading import Thread - -import flask - - -t = None -app = flask.Flask(__name__) -emb_map = None - - - -import logging -log = logging.getLogger('werkzeug') -log.setLevel(logging.ERROR) -app.logger.setLevel(logging.ERROR) - - -@app.route('/') -def main(): - if emb_map is None: - return 'Hello World!' - else: - return '\n'.join(f'{w}: {vec}' for w, vec in emb_map.items()) - - -def serve(): - global t - if t is None: - t = Thread(target=app.run, kwargs={'port': 8448}) - t.start() - - -if __name__ == '__main__': - serve()