Commit 075be1c0 by Paktalin

refactored the code

parent 20504167
Showing with 30 additions and 119 deletions
...@@ -14,142 +14,52 @@ def get_train_test_val(sequences): ...@@ -14,142 +14,52 @@ def get_train_test_val(sequences):
x_test, x_validate, y_test, y_validate = train_test_split(x_test, y_test, test_size=0.2) x_test, x_validate, y_test, y_validate = train_test_split(x_test, y_test, test_size=0.2)
return x_train, y_train, x_test, y_test, x_validate, y_validate return x_train, y_train, x_test, y_test, x_validate, y_validate
def create_RNN_model(seq_length, hidden_units=100): def create_2_layers_model(layer1, layer2, hidden_units):
model = Sequential() model = Sequential()
model.add(Embedding(VOCAB_SIZE, 50, input_length=seq_length)) model.add(Embedding(VOCAB_SIZE, 50, input_length=seq_length))
model.add(SimpleRNN(hidden_units, return_sequences=True)) model.add(layer1)
model.add(SimpleRNN(hidden_units)) model.add(layer2)
model.add(Dense(hidden_units, activation='relu')) model.add(Dense(hidden_units, activation='relu'))
model.add(Dense(VOCAB_SIZE, activation='softmax')) model.add(Dense(VOCAB_SIZE, activation='softmax'))
print(model.summary())
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model return model
def create_LSTM_2_layers(seq_length, hidden_units=100): def create_LSTM_2_layers(seq_length, hidden_units=100):
model = Sequential() layer1 = LSTM(hidden_units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)
model.add(Embedding(VOCAB_SIZE, 50, input_length=seq_length)) layer2 = LSTM(hidden_units, dropout=0.3, recurrent_dropout=0.3)
model.add(LSTM(hidden_units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)) return create_2_layers_model(layer1, layer1, hidden_units)
model.add(LSTM(hidden_units, dropout=0.3, recurrent_dropout=0.3))
model.add(Dense(hidden_units, activation='relu'))
model.add(Dense(VOCAB_SIZE, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
def last_word_lstm_model_name(epoch, hidden_units=None):
return 'lstm_test_validation_' + str(epoch) + '_epochs.h5'
def last_word_rnn_model_name(epoch, units):
return 'rnn_' + str(units) + '_units_' + str(epoch) + '.h5'
def last_word_lstm_2_layers_model_name(epoch):
return 'lstm_2_layers_' + str(epoch) + '.h5'
def last_word_lstm150_2_layers_model_name(epoch):
return 'lstm150_2_layers_' + str(epoch) + '.h5'
def last_word_lstm50_2_layers_model_name(epoch):
return 'lstm50_2_layers_' + str(epoch) + '.h5'
def last_word_lstm_n_2_layers_model_name(epoch, units):
return 'lstm_' + str(units) + '_2_layers_' + str(epoch) + '.h5'
def train_model_with_decreasing_val_loss(model, x_train, y_train, x_test, y_test, x_validate, y_validate, model_name, epoch, best_loss, hidden_units):
print('Epoch %i' % epoch)
train_history = model.fit(x_train, y_train, epochs=1, batch_size=800, validation_data=(x_validate, y_validate))
val_loss = train_history.history['val_loss'][-1]
print('____Improved the loss_____',)
print(model.evaluate(x_test, y_test))
epoch += 1
model.save(model_name(epoch, hidden_units))
best_loss = val_loss
return epoch, best_loss
def train_last_word_lstm_model():
x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array('sequences.csv'))
epoch = 49
val_loss = 1.5351
model = load_model(last_word_lstm_model_name(epoch))
while(True):
epoch, val_loss = train_model_with_decreasing_val_loss(model, x_train, y_train, x_test, y_test, x_validate, y_validate, last_word_lstm_model_name, epoch, val_loss, None)
model = load_model(last_word_lstm_model_name(epoch))
def train_last_word_rnn_model(epoch=None, hidden_units=100):
x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array('sequences.csv'))
val_loss = 2.1231
losses = []
if epoch == None:
model = create_RNN_model(x_train.shape[1], hidden_units)
epoch = 0
else:
model = load_model(last_word_rnn_model_name(epoch, hidden_units))
while(True):
epoch, val_loss = train_model_with_decreasing_val_loss(model, x_train, y_train, x_test, y_test, x_validate, y_validate, last_word_rnn_model_name, epoch, val_loss, hidden_units)
losses.append(val_loss)
print(losses)
model = load_model(last_word_rnn_model_name(epoch, hidden_units))
def print_rnn_model_val_loss():
model = load_model(last_word_lstm_n_2_layers_model_name(11, 100))
def create_RNN_model(seq_length, hidden_units=100):
layer1 = SimpleRNN(hidden_units, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)
layer2 = SimpleRNN(hidden_units, dropout=0.3, recurrent_dropout=0.3)
return create_2_layers_model(layer1, layer2, hidden_units)
def train_lstm_nonstop(): def train_model(input_file, model, dir_name, epochs=70, epochs_start=0, batch_size=32, history=None):
x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array('sequences.csv')) x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array(input_file))
# model = create_LSTM_2_layers(x_train.shape[1], 100) if history == None:
model = load_model('lstm_30_epochs_nonstop_dropout/model_22.h5') history = {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': []}
epochs = 70 for epoch in range(epochs_start, epochs):
history = read_list('lstm_30_epochs_nonstop_dropout/history_22')
for epoch in range(23, epochs):
print('Epoch', epoch) print('Epoch', epoch)
train_history = model.fit(x_train, y_train, epochs=1, batch_size=32, validation_data=(x_validate, y_validate)) train_history = model.fit(x_train, y_train, epochs=1, batch_size=batch_size, validation_data=(x_validate, y_validate))
model.save('lstm_30_epochs_nonstop_dropout/model_' + str(epoch) + '.h5') model.save(dir_name + 'model_' + str(epoch) + '.h5')
for key in train_history.history.keys(): for key in train_history.history.keys():
history[key].append(train_history.history[key][0]) history[key].append(train_history.history[key][0])
save_list(history, 'lstm_30_epochs_nonstop_dropout/history_' + str(epoch)) save_list(history, dir_name + 'history_' + str(epoch))
def train_lstm_nonstop():
dir_name = 'lstm_30_epochs_nonstop_dropout/'
model = load_model(dir_name + 'model_22.h5')
history = read_list(dir_name + 'history_22')
train_model('sequences.csv', model, dir_name, epochs_start=23, history=history)
def train_lstm_50_units_nonstop(): def train_lstm_50_units_nonstop():
x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array('sequences.csv')) dir_name = 'lstm_70_epochs_50_units_nonstop_dropout/'
model = create_LSTM_2_layers(x_train.shape[1], 50) model = create_LSTM_2_layers(x_train.shape[1], 50)
epochs = 70 train_model('sequences.csv', model, dir_name)
directory = 'lstm_70_epochs_50_units_nonstop_dropout/'
history = {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': []}
for epoch in range(epochs):
print('Epoch', epoch)
train_history = model.fit(x_train, y_train, epochs=1, batch_size=32, validation_data=(x_validate, y_validate))
model.save(directory + 'model_' + str(epoch) + '.h5')
for key in train_history.history.keys():
history[key].append(train_history.history[key][0])
save_list(history, directory + 'history_' + str(epoch))
def train_rnn_nonstop(): def train_rnn_nonstop():
x_train, y_train, x_test, y_test, x_validate, y_validate = get_train_test_val(read_array('sequences.csv')) dir_name = 'rnn_50_epochs_nonstop_dropout/'
model = create_RNN_model(x_train.shape[1], 100) model = create_RNN_model(x_train.shape[1], 100)
directory = 'rnn_50_epochs_nonstop_dropout/' train_model('sequences.csv', model, dir_name)
epochs = 70
history = {'loss': [], 'acc': [], 'val_loss': [], 'val_acc': []}
for epoch in range(epochs):
print('Epoch', epoch)
train_history = model.fit(x_train, y_train, epochs=1, batch_size=32, validation_data=(x_validate, y_validate))
model.save(directory + 'model_' + str(epoch) + '.h5')
for key in train_history.history.keys():
history[key].append(train_history.history[key][0])
save_list(history, directory + 'history_' + str(epoch))
def print_lstm_30_history():
hist = read_list('lstm_30_epochs_nonstop_dropout/history_22')
loss_missed = [2.1326, 2.1257, 2.1175, 2.1137, 2.1104, 2.1042]
acc_missed = [0.4142, 0.4156, 0.4170, 0.4169, 0.4180, 0.4197]
val_loss_missed = [2.0993, 2.1017, 2.1083, 2.1216, 2.1224, 2.1213]
val_acc_missed = [0.4226, 0.4172, 0.4221, 0.4234, 0.4230, 0.4258]
hist['loss'] = hist['loss'][:17] + loss_missed + hist['loss'][17:]
hist['val_loss'] = hist['val_loss'][:17] + val_loss_missed + hist['val_loss'][17:]
hist['acc'] = hist['acc'][:17] + acc_missed + hist['acc'][17:]
hist['val_acc'] = hist['val_acc'][:17] + val_acc_missed + hist['val_acc'][17:]
save_list(hist, 'lstm_30_epochs_nonstop_dropout/history_22')
print(read_list('lstm_30_epochs_nonstop_dropout/history_22'))
train_lstm_50_units_nonstop() train_rnn_nonstop()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment