network trains and tests but slow? -- investigate
This commit is contained in:
44
library.py
44
library.py
@@ -1,16 +1,9 @@
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # STFU!
|
||||
# from nltk.corpus import stopwords
|
||||
# from nltk.tokenize import word_tokenize
|
||||
from mynet import load_mnist, onehot
|
||||
|
||||
|
||||
def word_tokenize(s: str):
|
||||
l = ''.join(c.lower() if c.isalpha() else ' ' for c in s)
|
||||
return l.split()
|
||||
|
||||
|
||||
HERE = os.path.abspath(os.path.dirname(__file__))
|
||||
@@ -20,10 +13,38 @@ VOCAB = os.path.join(HERE, 'vocab.txt')
|
||||
vocab = {
|
||||
w: i for i, w in enumerate(open(VOCAB).read().splitlines(keepends=False))
|
||||
}
|
||||
# inv_vocab = [vocab[i] for i in range(len(vocab))]
|
||||
inv_vocab = sorted(vocab, key=vocab.get)
|
||||
|
||||
|
||||
def onehot(oh_store, idx):
|
||||
oh_store[:] = 0
|
||||
oh_store[np.arange(len(idx)), idx.astype(np.int)] = 1
|
||||
|
||||
|
||||
def word_tokenize(s: str):
|
||||
l = ''.join(c.lower() if c.isalpha() else ' ' for c in s)
|
||||
return l.split()
|
||||
|
||||
|
||||
def create_test_dataset(win):
|
||||
S = 1000
|
||||
with open(CORPUS) as f:
|
||||
ds = np.array([vocab[w] for w in word_tokenize(f.read())
|
||||
if w in vocab])
|
||||
idx = np.random.choice(np.arange(win, len(ds) - win), S)
|
||||
oh_store = np.zeros((S, len(vocab)), dtype=np.float32)
|
||||
onehot(oh_store, ds[idx])
|
||||
return (
|
||||
# X
|
||||
np.stack([
|
||||
np.concatenate([ds[i-win:i], ds[i+1:i+win+1]])
|
||||
for i in idx
|
||||
], axis=0).astype(np.float32),
|
||||
|
||||
#y
|
||||
oh_store
|
||||
)
|
||||
|
||||
def create_mnist_network():
|
||||
model = tf.keras.models.Sequential([
|
||||
tf.keras.layers.Dense(30, input_shape=(784,), activation='relu'),
|
||||
@@ -35,8 +56,8 @@ def create_mnist_network():
|
||||
|
||||
|
||||
def create_cbow_network(win, embed):
|
||||
ctxt = tf.keras.layers.Input(shape=[win])
|
||||
ed = tf.keras.layers.Embedding(len(vocab), embed, input_length=win)(ctxt)
|
||||
ctxt = tf.keras.layers.Input(shape=[2*win])
|
||||
ed = tf.keras.layers.Embedding(len(vocab), embed, input_length=2*win)(ctxt)
|
||||
cbow = tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1))(ed)
|
||||
blowup = tf.keras.layers.Dense(len(vocab), activation='softmax')(cbow)
|
||||
mod = tf.keras.Model(inputs=ctxt, outputs=blowup)
|
||||
@@ -44,7 +65,6 @@ def create_cbow_network(win, embed):
|
||||
optimizer='sgd',
|
||||
loss='categorical_crossentropy',
|
||||
)
|
||||
print(mod, flush=True)
|
||||
return mod
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user