purged the visualizer

This commit is contained in:
2019-12-15 22:17:46 -08:00
parent 24ca380cbf
commit 27a1beff70
5 changed files with 16 additions and 92 deletions

View File

@@ -9,9 +9,9 @@ RUN pip3 install --no-cache \
meson==0.51.2 \ meson==0.51.2 \
numpy==1.16.4 \ numpy==1.16.4 \
tensorflow==1.14.0 \ tensorflow==1.14.0 \
Flask==1.0.2 \
cython==0.29.14 cython==0.29.14
RUN mkdir /workspace RUN mkdir /workspace
COPY bridge.pyx library.py server.py meson.build main.c /workspace/ COPY bridge.pyx library.py meson.build main.c /workspace/
RUN cd /workspace && meson build && cd build && ninja RUN cd /workspace && meson build && cd build && ninja
WORKDIR /workspace WORKDIR /workspace

View File

@@ -7,7 +7,6 @@ from libc.stdlib cimport malloc, realloc
from libc.string cimport memcpy from libc.string cimport memcpy
import library as nn import library as nn
import server as srv
tokenizers = {} tokenizers = {}
@@ -43,16 +42,6 @@ cdef public char *greeting():
return f'The value is {3**3**3}'.encode('utf-8') return f'The value is {3**3**3}'.encode('utf-8')
cdef public void serve():
srv.serve()
cdef public void server_update(float *emb):
embeddings = np.asarray(<float[:getvocsize(),:getemb()]>emb)
low_dim = nn.calc_TSNE(embeddings)
srv.emb_map = dict(zip(nn.inv_vocab, low_dim))
cdef public size_t getwin(): cdef public size_t getwin():
return nn.WIN return nn.WIN

View File

@@ -244,14 +244,14 @@ for the whole application. Therefore, the formula for the minimum number of
processes to be requested from \verb|mpiexec| looks like the following: processes to be requested from \verb|mpiexec| looks like the following:
\begin{lstlisting} \begin{lstlisting}
NUM_PROC >= (4 * num_text_files) + 2 NUM_PROC >= (4 * num_text_files) + 1
\end{lstlisting} \end{lstlisting}
To figure out how many Learners will be created, the following formula can be To figure out how many Learners will be created, the following formula can be
used: used:
\begin{lstlisting} \begin{lstlisting}
num_learners = NUM_PROC - 2 - (3 * num_text_files) num_learners = NUM_PROC - 1 - (3 * num_text_files)
\end{lstlisting} \end{lstlisting}
During running, the program will create the folder \verb|trained| in the During running, the program will create the folder \verb|trained| in the
@@ -367,7 +367,8 @@ statistics and exit.
\section{Evaluation} \section{Evaluation}
The main focus of evaluation was to determine if executing several neural The main focus of evaluation was to determine if executing several neural
network training nodes in parallel can speed-up the training process. The network training nodes in parallel can speed-up the training process.
The
employed approach was to define a \textit{target loss} that the network has to employed approach was to define a \textit{target loss} that the network has to
achieve and then to measure \textit{the number of context windows} that each achieve and then to measure \textit{the number of context windows} that each
Learner node has to process and, secondarily, the time it takes for the system Learner node has to process and, secondarily, the time it takes for the system
@@ -457,6 +458,14 @@ computationally viable not to store the data as one big file but rather have it
split across multiple nodes, this mode of operation should be investigated split across multiple nodes, this mode of operation should be investigated
further and possibly preferred for large-scale training. further and possibly preferred for large-scale training.
As a last note, the learned embeddings themselves were not of high importance
for the evaluation, since it is known that in order to obtain high quality
embeddings a much higher amount of data (a dataset of \mbox{$>$ 100B words})
and computation time is needed than it was feasible to do as a part of the
project. However, the learning outcomes were empirically evaluated and it was
found that even with relatively short training runs the networks could capture
some meaningful relationships between the vocabulary words.
\begin{figure} \begin{figure}
\centering \centering
\includegraphics[width=\linewidth]{fig/datasets.pdf} \includegraphics[width=\linewidth]{fig/datasets.pdf}

44
main.c
View File

@@ -37,7 +37,6 @@ typedef enum {
FILTER, FILTER,
BATCHER, BATCHER,
LEARNER, LEARNER,
VISUALIZER,
DISPATCHER DISPATCHER
} Role; } Role;
@@ -66,10 +65,7 @@ size_t number_of(Role what) {
- number_of(TOKENIZER) - number_of(TOKENIZER)
- number_of(FILTER) - number_of(FILTER)
- number_of(BATCHER) - number_of(BATCHER)
- number_of(DISPATCHER) - number_of(DISPATCHER);
- number_of(VISUALIZER);
case VISUALIZER:
return 0;
case DISPATCHER: case DISPATCHER:
return 1; return 1;
} }
@@ -364,7 +360,6 @@ void learner() {
void dispatcher() { void dispatcher() {
INFO_PRINTF("Starting dispatcher %d\n", getpid()); INFO_PRINTF("Starting dispatcher %d\n", getpid());
int go = 1; int go = 1;
// int visualizer = mpi_id_from_role_id(VISUALIZER, 0);
size_t bs = getbs(); size_t bs = getbs();
size_t bpe = getbpe(); size_t bpe = getbpe();
float target = gettarget(); float target = gettarget();
@@ -404,9 +399,6 @@ void dispatcher() {
crt_loss = eval_net(frank); crt_loss = eval_net(frank);
min_loss = crt_loss < min_loss ? crt_loss : min_loss; min_loss = crt_loss < min_loss ? crt_loss : min_loss;
INFO_PRINTF("Round %ld, validation loss %f\n", rounds, crt_loss); INFO_PRINTF("Round %ld, validation loss %f\n", rounds, crt_loss);
// MPI_Send(&go, 1, MPI_INT, visualizer, TAG_INSTR, MPI_COMM_WORLD);
// MPI_Send(wl.weights[0].W, emb_mat_size, MPI_FLOAT,
// visualizer, TAG_EMBED, MPI_COMM_WORLD);
ckpt_net(frank); ckpt_net(frank);
@@ -423,9 +415,6 @@ void dispatcher() {
MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(LEARNER, l), MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(LEARNER, l),
TAG_INSTR, MPI_COMM_WORLD); TAG_INSTR, MPI_COMM_WORLD);
} }
// MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(VISUALIZER, 0),
// TAG_INSTR, MPI_COMM_WORLD);
save_emb(frank); save_emb(frank);
float delta_t = finish - start; float delta_t = finish - start;
@@ -445,32 +434,6 @@ void dispatcher() {
free(wls); free(wls);
free(round); free(round);
INFO_PRINTF("Finishing dispatcher %d\n", getpid()); INFO_PRINTF("Finishing dispatcher %d\n", getpid());
// sleep(4);
// INFO_PRINTLN("Visualization server is still running on port 8448\n"
// "To terminate, press Ctrl-C");
}
void visualizer() {
INFO_PRINTF("Starting visualizer %d\n", getpid());
serve();
int dispatcher = mpi_id_from_role_id(DISPATCHER, 0);
int go_on = 1;
size_t emb_mat_size = getvocsize() * getemb();
float* embeddings = malloc(emb_mat_size * sizeof(float));
MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
while(go_on != -1) {
MPI_Recv(embeddings, emb_mat_size, MPI_FLOAT, dispatcher, TAG_EMBED,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
server_update(embeddings);
MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
INFO_PRINTF("Exiting visualizer node %d\n", getpid());
} }
int main (int argc, const char **argv) { int main (int argc, const char **argv) {
@@ -483,7 +446,7 @@ int main (int argc, const char **argv) {
MPI_Abort(MPI_COMM_WORLD, 1); MPI_Abort(MPI_COMM_WORLD, 1);
} }
int pipelines = argc - 1; int pipelines = argc - 1;
int min_nodes = 4 * pipelines + 2; int min_nodes = 4 * pipelines + 1;
if (world_size() < min_nodes) { if (world_size() < min_nodes) {
INFO_PRINTF("You requested %d pipeline(s) " INFO_PRINTF("You requested %d pipeline(s) "
"but only provided %d procs " "but only provided %d procs "
@@ -519,9 +482,6 @@ int main (int argc, const char **argv) {
case DISPATCHER: case DISPATCHER:
dispatcher(); dispatcher();
break; break;
case VISUALIZER:
visualizer();
break;
default: default:
INFO_PRINTLN("DYING HORRIBLY!"); INFO_PRINTLN("DYING HORRIBLY!");
} }

View File

@@ -1,34 +0,0 @@
from threading import Thread
import flask
t = None
app = flask.Flask(__name__)
emb_map = None
import logging
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)
app.logger.setLevel(logging.ERROR)
@app.route('/')
def main():
if emb_map is None:
return 'Hello World!'
else:
return '\n'.join(f'{w}: {vec}' for w, vec in emb_map.items())
def serve():
global t
if t is None:
t = Thread(target=app.run, kwargs={'port': 8448})
t.start()
if __name__ == '__main__':
serve()