purged the visualizer
This commit is contained in:
@@ -9,9 +9,9 @@ RUN pip3 install --no-cache \
|
||||
meson==0.51.2 \
|
||||
numpy==1.16.4 \
|
||||
tensorflow==1.14.0 \
|
||||
Flask==1.0.2 \
|
||||
cython==0.29.14
|
||||
|
||||
RUN mkdir /workspace
|
||||
COPY bridge.pyx library.py server.py meson.build main.c /workspace/
|
||||
COPY bridge.pyx library.py meson.build main.c /workspace/
|
||||
RUN cd /workspace && meson build && cd build && ninja
|
||||
WORKDIR /workspace
|
||||
|
||||
11
bridge.pyx
11
bridge.pyx
@@ -7,7 +7,6 @@ from libc.stdlib cimport malloc, realloc
|
||||
from libc.string cimport memcpy
|
||||
|
||||
import library as nn
|
||||
import server as srv
|
||||
|
||||
|
||||
tokenizers = {}
|
||||
@@ -43,16 +42,6 @@ cdef public char *greeting():
|
||||
return f'The value is {3**3**3}'.encode('utf-8')
|
||||
|
||||
|
||||
cdef public void serve():
|
||||
srv.serve()
|
||||
|
||||
|
||||
cdef public void server_update(float *emb):
|
||||
embeddings = np.asarray(<float[:getvocsize(),:getemb()]>emb)
|
||||
low_dim = nn.calc_TSNE(embeddings)
|
||||
srv.emb_map = dict(zip(nn.inv_vocab, low_dim))
|
||||
|
||||
|
||||
cdef public size_t getwin():
|
||||
return nn.WIN
|
||||
|
||||
|
||||
@@ -244,14 +244,14 @@ for the whole application. Therefore, the formula for the minimum number of
|
||||
processes to be requested from \verb|mpiexec| looks like the following:
|
||||
|
||||
\begin{lstlisting}
|
||||
NUM_PROC >= (4 * num_text_files) + 2
|
||||
NUM_PROC >= (4 * num_text_files) + 1
|
||||
\end{lstlisting}
|
||||
|
||||
To figure out how many Learners will be created, the following formula can be
|
||||
used:
|
||||
|
||||
\begin{lstlisting}
|
||||
num_learners = NUM_PROC - 2 - (3 * num_text_files)
|
||||
num_learners = NUM_PROC - 1 - (3 * num_text_files)
|
||||
\end{lstlisting}
|
||||
|
||||
During running, the program will create the folder \verb|trained| in the
|
||||
@@ -367,7 +367,8 @@ statistics and exit.
|
||||
\section{Evaluation}
|
||||
|
||||
The main focus of evaluation was to determine if executing several neural
|
||||
network training nodes in parallel can speed-up the training process. The
|
||||
network training nodes in parallel can speed-up the training process.
|
||||
The
|
||||
employed approach was to define a \textit{target loss} that the network has to
|
||||
achieve and then to measure \textit{the number of context windows} that each
|
||||
Learner node has to process and, secondarily, the time it takes for the system
|
||||
@@ -457,6 +458,14 @@ computationally viable not to store the data as one big file but rather have it
|
||||
split across multiple nodes, this mode of operation should be investigated
|
||||
further and possibly preferred for large-scale training.
|
||||
|
||||
As a last note, the learned embeddings themselves were not of high importance
|
||||
for the evaluation, since it is known that in order to obtain high quality
|
||||
embeddings a much higher amount of data (a dataset of \mbox{$>$ 100B words})
|
||||
and computation time is needed than it was feasible to do as a part of the
|
||||
project. However, the learning outcomes were empirically evaluated and it was
|
||||
found that even with relatively short training runs the networks could capture
|
||||
some meaningful relationships between the vocabulary words.
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=\linewidth]{fig/datasets.pdf}
|
||||
|
||||
44
main.c
44
main.c
@@ -37,7 +37,6 @@ typedef enum {
|
||||
FILTER,
|
||||
BATCHER,
|
||||
LEARNER,
|
||||
VISUALIZER,
|
||||
DISPATCHER
|
||||
} Role;
|
||||
|
||||
@@ -66,10 +65,7 @@ size_t number_of(Role what) {
|
||||
- number_of(TOKENIZER)
|
||||
- number_of(FILTER)
|
||||
- number_of(BATCHER)
|
||||
- number_of(DISPATCHER)
|
||||
- number_of(VISUALIZER);
|
||||
case VISUALIZER:
|
||||
return 0;
|
||||
- number_of(DISPATCHER);
|
||||
case DISPATCHER:
|
||||
return 1;
|
||||
}
|
||||
@@ -364,7 +360,6 @@ void learner() {
|
||||
void dispatcher() {
|
||||
INFO_PRINTF("Starting dispatcher %d\n", getpid());
|
||||
int go = 1;
|
||||
// int visualizer = mpi_id_from_role_id(VISUALIZER, 0);
|
||||
size_t bs = getbs();
|
||||
size_t bpe = getbpe();
|
||||
float target = gettarget();
|
||||
@@ -404,9 +399,6 @@ void dispatcher() {
|
||||
crt_loss = eval_net(frank);
|
||||
min_loss = crt_loss < min_loss ? crt_loss : min_loss;
|
||||
INFO_PRINTF("Round %ld, validation loss %f\n", rounds, crt_loss);
|
||||
// MPI_Send(&go, 1, MPI_INT, visualizer, TAG_INSTR, MPI_COMM_WORLD);
|
||||
// MPI_Send(wl.weights[0].W, emb_mat_size, MPI_FLOAT,
|
||||
// visualizer, TAG_EMBED, MPI_COMM_WORLD);
|
||||
|
||||
ckpt_net(frank);
|
||||
|
||||
@@ -423,9 +415,6 @@ void dispatcher() {
|
||||
MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(LEARNER, l),
|
||||
TAG_INSTR, MPI_COMM_WORLD);
|
||||
}
|
||||
// MPI_Send(&go, 1, MPI_INT, mpi_id_from_role_id(VISUALIZER, 0),
|
||||
// TAG_INSTR, MPI_COMM_WORLD);
|
||||
|
||||
save_emb(frank);
|
||||
|
||||
float delta_t = finish - start;
|
||||
@@ -445,32 +434,6 @@ void dispatcher() {
|
||||
free(wls);
|
||||
free(round);
|
||||
INFO_PRINTF("Finishing dispatcher %d\n", getpid());
|
||||
// sleep(4);
|
||||
// INFO_PRINTLN("Visualization server is still running on port 8448\n"
|
||||
// "To terminate, press Ctrl-C");
|
||||
}
|
||||
|
||||
void visualizer() {
|
||||
INFO_PRINTF("Starting visualizer %d\n", getpid());
|
||||
serve();
|
||||
|
||||
int dispatcher = mpi_id_from_role_id(DISPATCHER, 0);
|
||||
int go_on = 1;
|
||||
|
||||
size_t emb_mat_size = getvocsize() * getemb();
|
||||
float* embeddings = malloc(emb_mat_size * sizeof(float));
|
||||
|
||||
MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD,
|
||||
MPI_STATUS_IGNORE);
|
||||
|
||||
while(go_on != -1) {
|
||||
MPI_Recv(embeddings, emb_mat_size, MPI_FLOAT, dispatcher, TAG_EMBED,
|
||||
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
|
||||
server_update(embeddings);
|
||||
MPI_Recv(&go_on, 1, MPI_INT, dispatcher, TAG_INSTR, MPI_COMM_WORLD,
|
||||
MPI_STATUS_IGNORE);
|
||||
}
|
||||
INFO_PRINTF("Exiting visualizer node %d\n", getpid());
|
||||
}
|
||||
|
||||
int main (int argc, const char **argv) {
|
||||
@@ -483,7 +446,7 @@ int main (int argc, const char **argv) {
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
int pipelines = argc - 1;
|
||||
int min_nodes = 4 * pipelines + 2;
|
||||
int min_nodes = 4 * pipelines + 1;
|
||||
if (world_size() < min_nodes) {
|
||||
INFO_PRINTF("You requested %d pipeline(s) "
|
||||
"but only provided %d procs "
|
||||
@@ -519,9 +482,6 @@ int main (int argc, const char **argv) {
|
||||
case DISPATCHER:
|
||||
dispatcher();
|
||||
break;
|
||||
case VISUALIZER:
|
||||
visualizer();
|
||||
break;
|
||||
default:
|
||||
INFO_PRINTLN("DYING HORRIBLY!");
|
||||
}
|
||||
|
||||
34
server.py
34
server.py
@@ -1,34 +0,0 @@
|
||||
from threading import Thread
|
||||
|
||||
import flask
|
||||
|
||||
|
||||
t = None
|
||||
app = flask.Flask(__name__)
|
||||
emb_map = None
|
||||
|
||||
|
||||
|
||||
import logging
|
||||
log = logging.getLogger('werkzeug')
|
||||
log.setLevel(logging.ERROR)
|
||||
app.logger.setLevel(logging.ERROR)
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def main():
|
||||
if emb_map is None:
|
||||
return 'Hello World!'
|
||||
else:
|
||||
return '\n'.join(f'{w}: {vec}' for w, vec in emb_map.items())
|
||||
|
||||
|
||||
def serve():
|
||||
global t
|
||||
if t is None:
|
||||
t = Thread(target=app.run, kwargs={'port': 8448})
|
||||
t.start()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
serve()
|
||||
Reference in New Issue
Block a user