Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
likorn
/
estonian-lstm
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
075be1c0
authored
Jan 14, 2019
by
Paktalin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactored the code
parent
20504167
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
119 deletions
main.py
main.py
View file @
075be1c0
...
@@ -14,142 +14,52 @@ def get_train_test_val(sequences):
...
@@ -14,142 +14,52 @@ def get_train_test_val(sequences):
x_test
,
x_validate
,
y_test
,
y_validate
=
train_test_split
(
x_test
,
y_test
,
test_size
=
0.2
)
x_test
,
x_validate
,
y_test
,
y_validate
=
train_test_split
(
x_test
,
y_test
,
test_size
=
0.2
)
return
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
return
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
def
create_
RNN_model
(
seq_length
,
hidden_units
=
100
):
def
create_
2_layers_model
(
layer1
,
layer2
,
hidden_units
):
model
=
Sequential
()
model
=
Sequential
()
model
.
add
(
Embedding
(
VOCAB_SIZE
,
50
,
input_length
=
seq_length
))
model
.
add
(
Embedding
(
VOCAB_SIZE
,
50
,
input_length
=
seq_length
))
model
.
add
(
SimpleRNN
(
hidden_units
,
return_sequences
=
True
)
)
model
.
add
(
layer1
)
model
.
add
(
SimpleRNN
(
hidden_units
)
)
model
.
add
(
layer2
)
model
.
add
(
Dense
(
hidden_units
,
activation
=
'relu'
))
model
.
add
(
Dense
(
hidden_units
,
activation
=
'relu'
))
model
.
add
(
Dense
(
VOCAB_SIZE
,
activation
=
'softmax'
))
model
.
add
(
Dense
(
VOCAB_SIZE
,
activation
=
'softmax'
))
print
(
model
.
summary
())
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
return
model
return
model
def
create_LSTM_2_layers
(
seq_length
,
hidden_units
=
100
):
def
create_LSTM_2_layers
(
seq_length
,
hidden_units
=
100
):
model
=
Sequential
()
layer1
=
LSTM
(
hidden_units
,
dropout
=
0.2
,
recurrent_dropout
=
0.2
,
return_sequences
=
True
)
model
.
add
(
Embedding
(
VOCAB_SIZE
,
50
,
input_length
=
seq_length
))
layer2
=
LSTM
(
hidden_units
,
dropout
=
0.3
,
recurrent_dropout
=
0.3
)
model
.
add
(
LSTM
(
hidden_units
,
dropout
=
0.2
,
recurrent_dropout
=
0.2
,
return_sequences
=
True
))
return
create_2_layers_model
(
layer1
,
layer1
,
hidden_units
)
model
.
add
(
LSTM
(
hidden_units
,
dropout
=
0.3
,
recurrent_dropout
=
0.3
))
model
.
add
(
Dense
(
hidden_units
,
activation
=
'relu'
))
model
.
add
(
Dense
(
VOCAB_SIZE
,
activation
=
'softmax'
))
model
.
compile
(
loss
=
'categorical_crossentropy'
,
optimizer
=
'adam'
,
metrics
=
[
'accuracy'
])
return
model
def
last_word_lstm_model_name
(
epoch
,
hidden_units
=
None
):
return
'lstm_test_validation_'
+
str
(
epoch
)
+
'_epochs.h5'
def
last_word_rnn_model_name
(
epoch
,
units
):
return
'rnn_'
+
str
(
units
)
+
'_units_'
+
str
(
epoch
)
+
'.h5'
def
last_word_lstm_2_layers_model_name
(
epoch
):
return
'lstm_2_layers_'
+
str
(
epoch
)
+
'.h5'
def
last_word_lstm150_2_layers_model_name
(
epoch
):
return
'lstm150_2_layers_'
+
str
(
epoch
)
+
'.h5'
def
last_word_lstm50_2_layers_model_name
(
epoch
):
return
'lstm50_2_layers_'
+
str
(
epoch
)
+
'.h5'
def
last_word_lstm_n_2_layers_model_name
(
epoch
,
units
):
return
'lstm_'
+
str
(
units
)
+
'_2_layers_'
+
str
(
epoch
)
+
'.h5'
def
train_model_with_decreasing_val_loss
(
model
,
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
,
model_name
,
epoch
,
best_loss
,
hidden_units
):
print
(
'Epoch
%
i'
%
epoch
)
train_history
=
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
800
,
validation_data
=
(
x_validate
,
y_validate
))
val_loss
=
train_history
.
history
[
'val_loss'
][
-
1
]
print
(
'____Improved the loss_____'
,)
print
(
model
.
evaluate
(
x_test
,
y_test
))
epoch
+=
1
model
.
save
(
model_name
(
epoch
,
hidden_units
))
best_loss
=
val_loss
return
epoch
,
best_loss
def
train_last_word_lstm_model
():
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
'sequences.csv'
))
epoch
=
49
val_loss
=
1.5351
model
=
load_model
(
last_word_lstm_model_name
(
epoch
))
while
(
True
):
epoch
,
val_loss
=
train_model_with_decreasing_val_loss
(
model
,
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
,
last_word_lstm_model_name
,
epoch
,
val_loss
,
None
)
model
=
load_model
(
last_word_lstm_model_name
(
epoch
))
def
train_last_word_rnn_model
(
epoch
=
None
,
hidden_units
=
100
):
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
'sequences.csv'
))
val_loss
=
2.1231
losses
=
[]
if
epoch
==
None
:
model
=
create_RNN_model
(
x_train
.
shape
[
1
],
hidden_units
)
epoch
=
0
else
:
model
=
load_model
(
last_word_rnn_model_name
(
epoch
,
hidden_units
))
while
(
True
):
epoch
,
val_loss
=
train_model_with_decreasing_val_loss
(
model
,
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
,
last_word_rnn_model_name
,
epoch
,
val_loss
,
hidden_units
)
losses
.
append
(
val_loss
)
print
(
losses
)
model
=
load_model
(
last_word_rnn_model_name
(
epoch
,
hidden_units
))
def
print_rnn_model_val_loss
():
model
=
load_model
(
last_word_lstm_n_2_layers_model_name
(
11
,
100
))
def
create_RNN_model
(
seq_length
,
hidden_units
=
100
):
layer1
=
SimpleRNN
(
hidden_units
,
dropout
=
0.2
,
recurrent_dropout
=
0.2
,
return_sequences
=
True
)
layer2
=
SimpleRNN
(
hidden_units
,
dropout
=
0.3
,
recurrent_dropout
=
0.3
)
return
create_2_layers_model
(
layer1
,
layer2
,
hidden_units
)
def
train_lstm_nonstop
():
def
train_model
(
input_file
,
model
,
dir_name
,
epochs
=
70
,
epochs_start
=
0
,
batch_size
=
32
,
history
=
None
):
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
'sequences.csv'
))
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
input_file
))
# model = create_LSTM_2_layers(x_train.shape[1], 100)
if
history
==
None
:
model
=
load_model
(
'lstm_30_epochs_nonstop_dropout/model_22.h5'
)
history
=
{
'loss'
:
[],
'acc'
:
[],
'val_loss'
:
[],
'val_acc'
:
[]}
epochs
=
70
for
epoch
in
range
(
epochs_start
,
epochs
):
history
=
read_list
(
'lstm_30_epochs_nonstop_dropout/history_22'
)
for
epoch
in
range
(
23
,
epochs
):
print
(
'Epoch'
,
epoch
)
print
(
'Epoch'
,
epoch
)
train_history
=
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
32
,
validation_data
=
(
x_validate
,
y_validate
))
train_history
=
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
batch_size
,
validation_data
=
(
x_validate
,
y_validate
))
model
.
save
(
'lstm_30_epochs_nonstop_dropout/model_'
+
str
(
epoch
)
+
'.h5'
)
model
.
save
(
dir_name
+
'model_'
+
str
(
epoch
)
+
'.h5'
)
for
key
in
train_history
.
history
.
keys
():
for
key
in
train_history
.
history
.
keys
():
history
[
key
]
.
append
(
train_history
.
history
[
key
][
0
])
history
[
key
]
.
append
(
train_history
.
history
[
key
][
0
])
save_list
(
history
,
'lstm_30_epochs_nonstop_dropout/history_'
+
str
(
epoch
))
save_list
(
history
,
dir_name
+
'history_'
+
str
(
epoch
))
def
train_lstm_nonstop
():
dir_name
=
'lstm_30_epochs_nonstop_dropout/'
model
=
load_model
(
dir_name
+
'model_22.h5'
)
history
=
read_list
(
dir_name
+
'history_22'
)
train_model
(
'sequences.csv'
,
model
,
dir_name
,
epochs_start
=
23
,
history
=
history
)
def
train_lstm_50_units_nonstop
():
def
train_lstm_50_units_nonstop
():
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
'sequences.csv'
))
dir_name
=
'lstm_70_epochs_50_units_nonstop_dropout/'
model
=
create_LSTM_2_layers
(
x_train
.
shape
[
1
],
50
)
model
=
create_LSTM_2_layers
(
x_train
.
shape
[
1
],
50
)
epochs
=
70
train_model
(
'sequences.csv'
,
model
,
dir_name
)
directory
=
'lstm_70_epochs_50_units_nonstop_dropout/'
history
=
{
'loss'
:
[],
'acc'
:
[],
'val_loss'
:
[],
'val_acc'
:
[]}
for
epoch
in
range
(
epochs
):
print
(
'Epoch'
,
epoch
)
train_history
=
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
32
,
validation_data
=
(
x_validate
,
y_validate
))
model
.
save
(
directory
+
'model_'
+
str
(
epoch
)
+
'.h5'
)
for
key
in
train_history
.
history
.
keys
():
history
[
key
]
.
append
(
train_history
.
history
[
key
][
0
])
save_list
(
history
,
directory
+
'history_'
+
str
(
epoch
))
def
train_rnn_nonstop
():
def
train_rnn_nonstop
():
x_train
,
y_train
,
x_test
,
y_test
,
x_validate
,
y_validate
=
get_train_test_val
(
read_array
(
'sequences.csv'
))
dir_name
=
'rnn_50_epochs_nonstop_dropout/'
model
=
create_RNN_model
(
x_train
.
shape
[
1
],
100
)
model
=
create_RNN_model
(
x_train
.
shape
[
1
],
100
)
directory
=
'rnn_50_epochs_nonstop_dropout/'
train_model
(
'sequences.csv'
,
model
,
dir_name
)
epochs
=
70
history
=
{
'loss'
:
[],
'acc'
:
[],
'val_loss'
:
[],
'val_acc'
:
[]}
for
epoch
in
range
(
epochs
):
print
(
'Epoch'
,
epoch
)
train_history
=
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
32
,
validation_data
=
(
x_validate
,
y_validate
))
model
.
save
(
directory
+
'model_'
+
str
(
epoch
)
+
'.h5'
)
for
key
in
train_history
.
history
.
keys
():
history
[
key
]
.
append
(
train_history
.
history
[
key
][
0
])
save_list
(
history
,
directory
+
'history_'
+
str
(
epoch
))
def
print_lstm_30_history
():
hist
=
read_list
(
'lstm_30_epochs_nonstop_dropout/history_22'
)
loss_missed
=
[
2.1326
,
2.1257
,
2.1175
,
2.1137
,
2.1104
,
2.1042
]
acc_missed
=
[
0.4142
,
0.4156
,
0.4170
,
0.4169
,
0.4180
,
0.4197
]
val_loss_missed
=
[
2.0993
,
2.1017
,
2.1083
,
2.1216
,
2.1224
,
2.1213
]
val_acc_missed
=
[
0.4226
,
0.4172
,
0.4221
,
0.4234
,
0.4230
,
0.4258
]
hist
[
'loss'
]
=
hist
[
'loss'
][:
17
]
+
loss_missed
+
hist
[
'loss'
][
17
:]
hist
[
'val_loss'
]
=
hist
[
'val_loss'
][:
17
]
+
val_loss_missed
+
hist
[
'val_loss'
][
17
:]
hist
[
'acc'
]
=
hist
[
'acc'
][:
17
]
+
acc_missed
+
hist
[
'acc'
][
17
:]
hist
[
'val_acc'
]
=
hist
[
'val_acc'
][:
17
]
+
val_acc_missed
+
hist
[
'val_acc'
][
17
:]
save_list
(
hist
,
'lstm_30_epochs_nonstop_dropout/history_22'
)
print
(
read_list
(
'lstm_30_epochs_nonstop_dropout/history_22'
))
train_
lstm_50_units
_nonstop
()
train_
rnn
_nonstop
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment