from sklearn.datasets import fetch_20newsgroups
twenty_train = fetch_20newsgroups(subset='train') #, remove=('headers', 'footers', 'quotes'))
print("Catergories")
print(twenty_train.target_names)
print("-------------")
print("First dataset's sample")
print("\n".join(twenty_train.data[0].split("\n")))
print("------------")
print("First dataset's sample category: ",twenty_train.target[0])
Catergories ['alt.atheism', 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware', 'comp.sys.mac.hardware', 'comp.windows.x', 'misc.forsale', 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space', 'soc.religion.christian', 'talk.politics.guns', 'talk.politics.mideast', 'talk.politics.misc', 'talk.religion.misc'] ------------- First dataset's sample From: lerxst@wam.umd.edu (where's my thing) Subject: WHAT car is this!? Nntp-Posting-Host: rac3.wam.umd.edu Organization: University of Maryland, College Park Lines: 15 I was wondering if anyone out there could enlighten me on this car I saw the other day. It was a 2-door sports car, looked to be from the late 60s/ early 70s. It was called a Bricklin. The doors were really small. In addition, the front bumper was separate from the rest of the body. This is all I know. If anyone can tellme a model name, engine specs, years of production, where this car is made, history, or whatever info you have on this funky looking car, please e-mail. Thanks, - IL ---- brought to you by your neighborhood Lerxst ---- ------------ First dataset's sample category: 7
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(twenty_train.data, twenty_train.target, test_size=0.3, random_state=12547392)
twenty_train = fetch_20newsgroups(subset='test')
X_test, y_test = twenty_train.data[:1000], twenty_train.target[:1000]
print('Train samples: {}'.format(len(X_train)))
print('Val samples: {}'.format(len(X_val)))
Train samples: 7919 Val samples: 3395
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from tqdm import tqdm
nlp = spacy.load('en_core_web_sm',disable=["tagger", "parser","ner"])
nlp.add_pipe('sentencizer')
def tokenize_samples(samples):
tokenized_samples = []
for i in tqdm(range(len(samples))):
doc = nlp(samples[i]) # Tokenize the sample into sentences
tokens = []
for sent in doc.sents:
for tok in sent: # Iterate through the words of the sentence
if '\n' in tok.text or "\t" in tok.text or "--" in tok.text or "*" in tok.text or tok.text.lower() in STOP_WORDS:
continue
if tok.text.strip():
tokens.append(tok.text.replace('"',"'").strip())
tokenized_samples.append(tokens)
return tokenized_samples
X_train_tokenized = tokenize_samples(X_train)
X_val_tokenized = tokenize_samples(X_val)
X_test_tokenized = tokenize_samples(X_test)
0%| | 0/7919 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/spacy/pipeline/lemmatizer.py:211: UserWarning: [W108] The rule-based lemmatizer did not find POS annotation for one or more tokens. Check that your pipeline includes components that assign token.pos, typically 'tagger'+'attribute_ruler' or 'morphologizer'. warnings.warn(Warnings.W108) 100%|██████████| 7919/7919 [04:14<00:00, 31.07it/s] 100%|██████████| 3395/3395 [01:59<00:00, 28.49it/s] 100%|██████████| 1000/1000 [00:25<00:00, 39.15it/s]
import numpy as np
# Get mean and std for length on training set
print('Average length of smples: {}'.format(np.mean([len(x) for x in X_train_tokenized])))
print('Std length of samples: {}'.format(np.std([len(x) for x in X_train_tokenized])))
print('#Samples with length > 1000: {} \n'.format(np.sum([len(x) > 1000 for x in X_train_tokenized])))
print('X_example: {}'.format(X_train_tokenized[0]))
Average length of smples: 240.10670539209497 Std length of samples: 457.42933996960267 #Samples with length > 1000: 178 X_example: [':', 'kastle@wpi', '.', 'WPI.EDU', '(', 'Jacques', 'W', 'Brouillette', ')', 'Subject', ':', ':', 'WARNING', '.....', '(please', 'read', ')', '...', 'Organization', ':', 'Worcester', 'Polytechnic', 'Institute', 'Lines', ':', '8', 'Distribution', ':', 'world', 'NNTP', '-', 'Posting', '-', 'Host', ':', 'wpi.wpi.edu', 'Keywords', ':', 'BRICK', ',', 'TRUCK', ',', 'DANGER', 'plase', 'cease', 'discussion', '.', 'fail', 'people', 'feel', 'need', 'expound', 'issue', 'days', 'days', 'end', '.', 'areas', 'meant', 'type', 'discussion', '.', 'feel', 'need', 'things', ',', 'thought', '.', 'Thanks', '.', ':', 'want', 'things', 'world', ',', '58', 'Plymouth', 'small', ':', ':', 'OPEC', 'nation', 'fuel', '.', 'good', ':', ':', 'thing', '.', 'Car', 'Smashers', 'home', 'sulk', '.', ':', ':', 'Jacques', 'Brouillette', 'Manufacturing', 'Engineering', ':']
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
target_list = twenty_train.target_names
y_train_1_hot = lb.fit_transform([target_list[x] for x in y_train])
y_val_1_hot = lb.transform([target_list[x] for x in y_val])
y_test_1_hot = lb.transform([target_list[x] for x in y_test])
print('Y_example: {}'.format(y_train_1_hot[0]))
Y_example: [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
import tensorflow as tf
from sklearn.metrics import f1_score, recall_score, precision_score
import numpy as np
import os
class Metrics(tf.keras.callbacks.Callback):
def __init__(self, valid_data):
super(Metrics, self).__init__()
self.validation_data = valid_data
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
val_predict = np.argmax(self.model.predict(self.validation_data[0]), -1)
val_targ = self.validation_data[1]
if len(val_targ.shape) == 2 and val_targ.shape[1] != 1:
val_targ = np.argmax(val_targ, -1)
val_targ = tf.cast(val_targ,dtype=tf.float32)
_val_f1 = f1_score(val_targ, val_predict,average="weighted")
_val_recall = recall_score(val_targ, val_predict,average="weighted")
_val_precision = precision_score(val_targ, val_predict,average="weighted")
logs['val_f1'] = _val_f1
logs['val_recall'] = _val_recall
logs['val_precision'] = _val_precision
print(" — val_f1: %f — val_precision: %f — val_recall: %f" % (_val_f1, _val_precision, _val_recall))
return
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
MAX_SEQUENCE_LENGTH = 250
MAX_WORDS = 100000
# Init tokenizer
tokenizer = Tokenizer(num_words=MAX_WORDS)
# num_words: the maximum number of words to keep, based on word frequency.
# oov_token: will be used to replace OOV WORDS
# Fit tokenizer (Updates internal vocabulary based on a list of texts.)
tokenizer.fit_on_texts([" ".join(x) for x in X_train_tokenized])
# Converts text to sequences of IDs
train_seqs = tokenizer.texts_to_sequences([" ".join(x) for x in X_train_tokenized])
val_seqs = tokenizer.texts_to_sequences([" ".join(x) for x in X_val_tokenized])
test_seqs = tokenizer.texts_to_sequences([" ".join(x) for x in X_test_tokenized])
train_data = pad_sequences(train_seqs, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
val_data = pad_sequences(val_seqs, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
test_data = pad_sequences(test_seqs, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
test_seqs = tokenizer.texts_to_sequences([" ".join(x) for x in X_test_tokenized])
test_data = pad_sequences(test_seqs, maxlen=MAX_SEQUENCE_LENGTH, padding='post')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, GRU, Embedding
from tensorflow.keras.optimizers import Adam
BATCH_SIZE=256
EPOCHS=30
GRU_SIZE = 64
DENSE = 32
MAX_WORDS = 100000
EMBEDDING_DIM = 100
MAX_SEQUENCE_LENGTH = 250
# create an empty sequential model
model = Sequential()
# Αdd an embedding layer
model.add(Embedding(input_dim=MAX_WORDS+2, output_dim=EMBEDDING_DIM, # weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,mask_zero=True, trainable=True)) #trainable=False))
# Αdd a bidirectional gru layer with 0.33 variational (recurrent) dropout
model.add(Bidirectional(GRU(GRU_SIZE, input_shape=(MAX_SEQUENCE_LENGTH, EMBEDDING_DIM), return_sequences=False, recurrent_dropout = 0.33)))
# return_sequences=False: Whether to return the last output in the output sequence, or the full sequence.
# Αdd a hidden MLP layer
model.add(Dropout(0.33))
model.add(Dense(DENSE, activation='relu' ))
# Αdd the output MLP layer
model.add(Dropout(0.33))
model.add(Dense(len(twenty_train.target_names), activation='softmax'))
# Multi-class classification -> Use softmax over all possible classes
# model.build((None, EMBEDDING_DIM, VECTOR_DIMENSION))
print(model.summary())
model.compile(loss='categorical_crossentropy',
optimizer=Adam(learning_rate=0.001),
metrics=["accuracy"])
# Save model weights after each epoch with ModelCheckpoint
# IF I WANTED TO USE GDRIVE
# '/content/gdrive/My Drive/checkpoints'
if not os.path.exists('/content/checkpoints'):
os.makedirs('/content/checkpoints')
# '/content/gdrive/My Drive/checkpoints/BiGRUMLP.hdf5'
checkpoint = ModelCheckpoint('/content/checkpoints/BiGRUMLP.hdf5',
monitor='val_accuracy',
mode='max', verbose=2,
save_best_only=True,
save_weights_only=True)
history = model.fit(train_data,
y_train_1_hot,
validation_data=(val_data, y_val_1_hot),
batch_size=BATCH_SIZE,
epochs=EPOCHS,
shuffle=True,
callbacks=[Metrics(valid_data=(val_data, y_val_1_hot)),
checkpoint])
WARNING:tensorflow:Layer gru_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer gru_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer gru_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_3 (Embedding) (None, 250, 100) 10000200 bidirectional_2 (Bidirecti (None, 128) 63744 onal) dropout_4 (Dropout) (None, 128) 0 dense_4 (Dense) (None, 32) 4128 dropout_5 (Dropout) (None, 32) 0 dense_5 (Dense) (None, 20) 660 ================================================================= Total params: 10068732 (38.41 MB) Trainable params: 10068732 (38.41 MB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________ None Epoch 1/30 107/107 [==============================] - 17s 149ms/step — val_f1: 0.030140 — val_precision: 0.120475 — val_recall: 0.068630 Epoch 1: val_accuracy improved from -inf to 0.06863, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 111s 3s/step - loss: 2.9862 - accuracy: 0.0659 - val_loss: 2.9669 - val_accuracy: 0.0686 - val_f1: 0.0301 - val_recall: 0.0686 - val_precision: 0.1205 Epoch 2/30
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
107/107 [==============================] - 15s 144ms/step — val_f1: 0.160939 — val_precision: 0.379353 — val_recall: 0.192636 Epoch 2: val_accuracy improved from 0.06863 to 0.19264, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 88s 3s/step - loss: 2.8937 - accuracy: 0.1137 - val_loss: 2.7587 - val_accuracy: 0.1926 - val_f1: 0.1609 - val_recall: 0.1926 - val_precision: 0.3794 Epoch 3/30
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
107/107 [==============================] - 16s 146ms/step — val_f1: 0.321508 — val_precision: 0.447378 — val_recall: 0.368189 Epoch 3: val_accuracy improved from 0.19264 to 0.36819, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 83s 3s/step - loss: 2.4954 - accuracy: 0.2279 - val_loss: 2.2604 - val_accuracy: 0.3682 - val_f1: 0.3215 - val_recall: 0.3682 - val_precision: 0.4474 Epoch 4/30 107/107 [==============================] - 15s 143ms/step — val_f1: 0.509427 — val_precision: 0.545646 — val_recall: 0.520471 Epoch 4: val_accuracy improved from 0.36819 to 0.52047, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 86s 3s/step - loss: 1.8926 - accuracy: 0.4070 - val_loss: 1.7155 - val_accuracy: 0.5205 - val_f1: 0.5094 - val_recall: 0.5205 - val_precision: 0.5456 Epoch 5/30 107/107 [==============================] - 16s 147ms/step — val_f1: 0.628164 — val_precision: 0.642008 — val_recall: 0.624153 Epoch 5: val_accuracy improved from 0.52047 to 0.62415, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 84s 3s/step - loss: 1.2737 - accuracy: 0.5839 - val_loss: 1.3646 - val_accuracy: 0.6242 - val_f1: 0.6282 - val_recall: 0.6242 - val_precision: 0.6420 Epoch 6/30 107/107 [==============================] - 15s 139ms/step — val_f1: 0.685028 — val_precision: 0.691521 — val_recall: 0.683652 Epoch 6: val_accuracy improved from 0.62415 to 0.68365, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 83s 3s/step - loss: 0.8138 - accuracy: 0.7429 - val_loss: 1.1135 - val_accuracy: 0.6837 - val_f1: 0.6850 - val_recall: 0.6837 - val_precision: 0.6915 Epoch 7/30 107/107 [==============================] - 15s 138ms/step — val_f1: 0.709699 — val_precision: 0.717580 — val_recall: 0.708395 Epoch 7: val_accuracy improved from 0.68365 to 0.70839, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 82s 3s/step - loss: 0.5517 - accuracy: 0.8327 - val_loss: 1.0663 - val_accuracy: 0.7084 - val_f1: 0.7097 - val_recall: 0.7084 - val_precision: 0.7176 Epoch 8/30 107/107 [==============================] - 16s 147ms/step — val_f1: 0.735699 — val_precision: 0.744867 — val_recall: 0.734610 Epoch 8: val_accuracy improved from 0.70839 to 0.73461, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 78s 3s/step - loss: 0.4017 - accuracy: 0.8759 - val_loss: 0.9914 - val_accuracy: 0.7346 - val_f1: 0.7357 - val_recall: 0.7346 - val_precision: 0.7449 Epoch 9/30 107/107 [==============================] - 15s 141ms/step — val_f1: 0.738030 — val_precision: 0.744813 — val_recall: 0.737555 Epoch 9: val_accuracy improved from 0.73461 to 0.73756, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 77s 3s/step - loss: 0.2943 - accuracy: 0.9156 - val_loss: 1.0110 - val_accuracy: 0.7376 - val_f1: 0.7380 - val_recall: 0.7376 - val_precision: 0.7448 Epoch 10/30 107/107 [==============================] - 15s 139ms/step — val_f1: 0.744511 — val_precision: 0.751991 — val_recall: 0.744330 Epoch 10: val_accuracy improved from 0.73756 to 0.74433, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 83s 3s/step - loss: 0.2451 - accuracy: 0.9309 - val_loss: 1.0448 - val_accuracy: 0.7443 - val_f1: 0.7445 - val_recall: 0.7443 - val_precision: 0.7520 Epoch 11/30 107/107 [==============================] - 15s 142ms/step — val_f1: 0.756774 — val_precision: 0.762311 — val_recall: 0.756996 Epoch 11: val_accuracy improved from 0.74433 to 0.75700, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 81s 3s/step - loss: 0.1945 - accuracy: 0.9403 - val_loss: 1.0450 - val_accuracy: 0.7570 - val_f1: 0.7568 - val_recall: 0.7570 - val_precision: 0.7623 Epoch 12/30 107/107 [==============================] - 15s 140ms/step — val_f1: 0.750578 — val_precision: 0.756629 — val_recall: 0.750515 Epoch 12: val_accuracy did not improve from 0.75700 31/31 [==============================] - 78s 3s/step - loss: 0.1742 - accuracy: 0.9499 - val_loss: 1.0930 - val_accuracy: 0.7505 - val_f1: 0.7506 - val_recall: 0.7505 - val_precision: 0.7566 Epoch 13/30 107/107 [==============================] - 16s 147ms/step — val_f1: 0.749980 — val_precision: 0.756777 — val_recall: 0.749926 Epoch 13: val_accuracy did not improve from 0.75700 31/31 [==============================] - 82s 3s/step - loss: 0.1530 - accuracy: 0.9544 - val_loss: 1.1329 - val_accuracy: 0.7499 - val_f1: 0.7500 - val_recall: 0.7499 - val_precision: 0.7568 Epoch 14/30 107/107 [==============================] - 15s 139ms/step — val_f1: 0.756778 — val_precision: 0.763451 — val_recall: 0.756701 Epoch 14: val_accuracy did not improve from 0.75700 31/31 [==============================] - 82s 3s/step - loss: 0.1350 - accuracy: 0.9615 - val_loss: 1.1292 - val_accuracy: 0.7567 - val_f1: 0.7568 - val_recall: 0.7567 - val_precision: 0.7635 Epoch 15/30 107/107 [==============================] - 15s 138ms/step — val_f1: 0.746010 — val_precision: 0.765309 — val_recall: 0.744919 Epoch 15: val_accuracy did not improve from 0.75700 31/31 [==============================] - 81s 3s/step - loss: 0.1330 - accuracy: 0.9620 - val_loss: 1.2302 - val_accuracy: 0.7449 - val_f1: 0.7460 - val_recall: 0.7449 - val_precision: 0.7653 Epoch 16/30 107/107 [==============================] - 15s 142ms/step — val_f1: 0.768306 — val_precision: 0.772954 — val_recall: 0.768483 Epoch 16: val_accuracy improved from 0.75700 to 0.76848, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 82s 3s/step - loss: 0.1152 - accuracy: 0.9670 - val_loss: 1.1630 - val_accuracy: 0.7685 - val_f1: 0.7683 - val_recall: 0.7685 - val_precision: 0.7730 Epoch 17/30 107/107 [==============================] - 15s 140ms/step — val_f1: 0.768201 — val_precision: 0.776478 — val_recall: 0.767894 Epoch 17: val_accuracy did not improve from 0.76848 31/31 [==============================] - 82s 3s/step - loss: 0.0932 - accuracy: 0.9741 - val_loss: 1.2059 - val_accuracy: 0.7679 - val_f1: 0.7682 - val_recall: 0.7679 - val_precision: 0.7765 Epoch 18/30 107/107 [==============================] - 15s 139ms/step — val_f1: 0.766647 — val_precision: 0.775616 — val_recall: 0.765538 Epoch 18: val_accuracy did not improve from 0.76848 31/31 [==============================] - 82s 3s/step - loss: 0.0916 - accuracy: 0.9737 - val_loss: 1.2364 - val_accuracy: 0.7655 - val_f1: 0.7666 - val_recall: 0.7655 - val_precision: 0.7756 Epoch 19/30 107/107 [==============================] - 15s 144ms/step — val_f1: 0.765953 — val_precision: 0.773043 — val_recall: 0.766127 Epoch 19: val_accuracy did not improve from 0.76848 31/31 [==============================] - 81s 3s/step - loss: 0.0811 - accuracy: 0.9779 - val_loss: 1.2450 - val_accuracy: 0.7661 - val_f1: 0.7660 - val_recall: 0.7661 - val_precision: 0.7730 Epoch 20/30 107/107 [==============================] - 15s 138ms/step — val_f1: 0.775256 — val_precision: 0.778580 — val_recall: 0.775258 Epoch 20: val_accuracy improved from 0.76848 to 0.77526, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 75s 2s/step - loss: 0.0821 - accuracy: 0.9768 - val_loss: 1.2009 - val_accuracy: 0.7753 - val_f1: 0.7753 - val_recall: 0.7753 - val_precision: 0.7786 Epoch 21/30 107/107 [==============================] - 15s 140ms/step — val_f1: 0.772409 — val_precision: 0.777390 — val_recall: 0.772312 Epoch 21: val_accuracy did not improve from 0.77526 31/31 [==============================] - 75s 2s/step - loss: 0.0812 - accuracy: 0.9764 - val_loss: 1.2900 - val_accuracy: 0.7723 - val_f1: 0.7724 - val_recall: 0.7723 - val_precision: 0.7774 Epoch 22/30 107/107 [==============================] - 15s 137ms/step — val_f1: 0.778782 — val_precision: 0.782813 — val_recall: 0.778792 Epoch 22: val_accuracy improved from 0.77526 to 0.77879, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 73s 2s/step - loss: 0.0648 - accuracy: 0.9821 - val_loss: 1.2483 - val_accuracy: 0.7788 - val_f1: 0.7788 - val_recall: 0.7788 - val_precision: 0.7828 Epoch 23/30 107/107 [==============================] - 15s 138ms/step — val_f1: 0.782058 — val_precision: 0.790398 — val_recall: 0.781149 Epoch 23: val_accuracy improved from 0.77879 to 0.78115, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 76s 2s/step - loss: 0.0627 - accuracy: 0.9802 - val_loss: 1.3588 - val_accuracy: 0.7811 - val_f1: 0.7821 - val_recall: 0.7811 - val_precision: 0.7904 Epoch 24/30 107/107 [==============================] - 15s 137ms/step — val_f1: 0.773286 — val_precision: 0.780003 — val_recall: 0.772018 Epoch 24: val_accuracy did not improve from 0.78115 31/31 [==============================] - 81s 3s/step - loss: 0.0627 - accuracy: 0.9817 - val_loss: 1.3648 - val_accuracy: 0.7720 - val_f1: 0.7733 - val_recall: 0.7720 - val_precision: 0.7800 Epoch 25/30 107/107 [==============================] - 15s 138ms/step — val_f1: 0.777916 — val_precision: 0.785783 — val_recall: 0.777320 Epoch 25: val_accuracy did not improve from 0.78115 31/31 [==============================] - 81s 3s/step - loss: 0.0576 - accuracy: 0.9835 - val_loss: 1.3201 - val_accuracy: 0.7773 - val_f1: 0.7779 - val_recall: 0.7773 - val_precision: 0.7858 Epoch 26/30 107/107 [==============================] - 15s 141ms/step — val_f1: 0.775969 — val_precision: 0.782348 — val_recall: 0.775258 Epoch 26: val_accuracy did not improve from 0.78115 31/31 [==============================] - 81s 3s/step - loss: 0.0562 - accuracy: 0.9846 - val_loss: 1.3614 - val_accuracy: 0.7753 - val_f1: 0.7760 - val_recall: 0.7753 - val_precision: 0.7823 Epoch 27/30 107/107 [==============================] - 15s 137ms/step — val_f1: 0.769021 — val_precision: 0.779069 — val_recall: 0.768778 Epoch 27: val_accuracy did not improve from 0.78115 31/31 [==============================] - 80s 3s/step - loss: 0.0591 - accuracy: 0.9818 - val_loss: 1.4699 - val_accuracy: 0.7688 - val_f1: 0.7690 - val_recall: 0.7688 - val_precision: 0.7791 Epoch 28/30 107/107 [==============================] - 15s 144ms/step — val_f1: 0.776310 — val_precision: 0.781976 — val_recall: 0.776730 Epoch 28: val_accuracy did not improve from 0.78115 31/31 [==============================] - 76s 2s/step - loss: 0.0524 - accuracy: 0.9854 - val_loss: 1.4153 - val_accuracy: 0.7767 - val_f1: 0.7763 - val_recall: 0.7767 - val_precision: 0.7820 Epoch 29/30 107/107 [==============================] - 15s 143ms/step — val_f1: 0.784216 — val_precision: 0.791574 — val_recall: 0.784094 Epoch 29: val_accuracy improved from 0.78115 to 0.78409, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 80s 3s/step - loss: 0.0473 - accuracy: 0.9852 - val_loss: 1.4511 - val_accuracy: 0.7841 - val_f1: 0.7842 - val_recall: 0.7841 - val_precision: 0.7916 Epoch 30/30 107/107 [==============================] - 15s 136ms/step — val_f1: 0.787878 — val_precision: 0.793807 — val_recall: 0.787629 Epoch 30: val_accuracy improved from 0.78409 to 0.78763, saving model to /content/checkpoints/BiGRUMLP.hdf5 31/31 [==============================] - 81s 3s/step - loss: 0.0460 - accuracy: 0.9874 - val_loss: 1.4352 - val_accuracy: 0.7876 - val_f1: 0.7879 - val_recall: 0.7876 - val_precision: 0.7938
%matplotlib inline
import matplotlib.pyplot as plt
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper right')
plt.show()
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
GRU_SIZE = 64
DENSE = 32
with tf.device('/device:GPU:0'):
model = Sequential()
model.add(Embedding(input_dim=MAX_WORDS+2, output_dim=EMBEDDING_DIM, # weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH,mask_zero=True, trainable=True)) #trainable=False))
model.add(Bidirectional(GRU(GRU_SIZE, return_sequences=False, recurrent_dropout = 0.33)))
model.add(Dense(DENSE, activation='relu' ))
model.add(Dense(len(twenty_train.target_names), activation='softmax'))
# Load weights from the pre-trained model
model.load_weights("/content/checkpoints/BiGRUMLP.hdf5")
# model.load_weights("/content/gdrive/My Drive/checkpoints/BiGRUMLP.hdf5")
predictions = np.argmax(model.predict(val_data), -1)
print(classification_report(y_val, predictions, target_names=twenty_train.target_names))
WARNING:tensorflow:Layer gru_5 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer gru_5 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer gru_5 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
107/107 [==============================] - 23s 203ms/step precision recall f1-score support alt.atheism 0.88 0.83 0.85 160 comp.graphics 0.73 0.68 0.71 165 comp.os.ms-windows.misc 0.73 0.78 0.76 189 comp.sys.ibm.pc.hardware 0.70 0.63 0.66 168 comp.sys.mac.hardware 0.61 0.80 0.69 182 comp.windows.x 0.80 0.66 0.73 168 misc.forsale 0.71 0.65 0.68 182 rec.autos 0.83 0.74 0.78 181 rec.motorcycles 0.82 0.81 0.81 184 rec.sport.baseball 0.92 0.79 0.85 169 rec.sport.hockey 0.80 0.90 0.85 175 sci.crypt 0.91 0.93 0.92 177 sci.electronics 0.84 0.72 0.78 173 sci.med 0.82 0.86 0.84 181 sci.space 0.76 0.90 0.82 181 soc.religion.christian 0.80 0.78 0.79 177 talk.politics.guns 0.83 0.85 0.84 177 talk.politics.mideast 0.96 0.85 0.90 170 talk.politics.misc 0.74 0.84 0.79 135 talk.religion.misc 0.64 0.74 0.69 101 accuracy 0.79 3395 macro avg 0.79 0.79 0.79 3395 weighted avg 0.79 0.79 0.79 3395
from sklearn.metrics import accuracy_score
predictions = np.argmax(model.predict(val_data), -1)
print(f'Validation Accuracy: {accuracy_score(y_val, predictions)*100:.2f}%')
predictions = np.argmax(model.predict(test_data), -1)
print(f'Test Accuracy:{accuracy_score(y_test, predictions)*100:.2f}%')
107/107 [==============================] - 16s 144ms/step Validation Accuracy: 78.76% 32/32 [==============================] - 4s 132ms/step Test Accuracy:67.70%
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras.layers import Layer
import numpy as np
def dot_product(x, kernel):
"""
Wrapper for dot product operation, in order to be compatible with both
Theano and Tensorflow
Args:
x (): input
kernel (): weights
Returns:
"""
if K.backend() == 'tensorflow':
return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
else:
return K.dot(x, kernel)
class LinearAttention(Layer):
def __init__(self,
kernel_regularizer=None, bias_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True,
return_attention=False,
**kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
#apply penalties on layer parameters or layer activity during optimization.
#These penalties are summed into the loss function that the network optimizes.
self.W_regularizer = regularizers.get(kernel_regularizer)
self.b_regularizer = regularizers.get(bias_regularizer)
#setting constraints (eg. non-negativity) on model parameters during training.
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.return_attention = return_attention
super(LinearAttention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
#input_shape[-1] = 600
self.W = self.add_weight(shape=(input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight(shape=(1,),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
else:
self.b = None
self.built = True
def compute_mask(self, inputs, mask=None):
# do not pass the mask to the next layers
if self.return_attention:
return [None, None]
return None
def call(self, x, mask=None):
# eij = Wx + b
eij = dot_product(x, self.W)
if self.bias:
eij += self.b
# Apply mask
if mask is not None:
eij *= K.cast(mask, K.floatx())
# a = softmax(eij)
# shape_of_a = 1000 (sequence length/time steps)
a = K.expand_dims(K.softmax(eij, axis=-1))
# position wise multiplication -> shape = 600x1000
weighted_input = x * a
# shape -> 600
result = K.sum(weighted_input, axis=1)
if self.return_attention:
return [result, a]
return result
def compute_output_shape(self, input_shape):
if self.return_attention:
#input_shape[-1] -> 600
#input_shape[1]-> 1000
#input_shape[0] -> batch_size/number of samples
return [(input_shape[0], input_shape[-1]),
(input_shape[0], input_shape[1])]
else:
return input_shape[0], input_shape[-1]
class DeepAttention(Layer):
def __init__(self,
kernel_regularizer=None, u_regularizer=None, bias_regularizer=None,
W_constraint=None, u_constraint=None, b_constraint=None,
bias=True,
return_attention=False,
**kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(kernel_regularizer)
self.u_regularizer = regularizers.get(u_regularizer)
self.b1_regularizer = regularizers.get(bias_regularizer)
self.b2_regularizer = regularizers.get(bias_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.u_constraint = constraints.get(u_constraint)
self.b1_constraint = constraints.get(b_constraint)
self.b2_constraint = constraints.get(b_constraint)
self.bias = bias
self.return_attention = return_attention
super(DeepAttention, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1],),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b1 = self.add_weight(shape=(input_shape[-1],),
initializer='zero',
name='{}_b1'.format(self.name),
regularizer=self.b1_regularizer,
constraint=self.b1_constraint)
self.b2 = self.add_weight(shape=(1,),
initializer='zero',
name='{}_b2'.format(self.name),
regularizer=self.b2_regularizer,
constraint=self.b2_constraint)
else:
self.b1 = None
self.b2 = None
self.u = self.add_weight(shape=(input_shape[-1],),
initializer=self.init,
name='{}_u'.format(self.name),
regularizer=self.u_regularizer,
constraint=self.u_constraint)
self.built = True
def compute_mask(self, inputs, mask=None):
# do not pass the mask to the next layers
if self.return_attention:
return [None, None]
return None
def call(self, x, mask=None):
# uit = tanh(Wx + b)
uit = dot_product(x, self.W)
if self.bias:
uit += self.b1
uit = K.tanh(uit)
# ait = softmax(Ueij)
eij = dot_product(uit, self.u)
if self.bias:
eij += self.b2
# Apply mask
if mask is not None:
eij *= K.cast(mask, K.floatx())
a = K.expand_dims(K.softmax(eij, axis=-1))
weighted_input = x * a
result = K.sum(weighted_input, axis=1)
if self.return_attention:
return [result, a]
return result
def compute_output_shape(self, input_shape):
if self.return_attention:
return [(input_shape[0], input_shape[-1]),
(input_shape[0], input_shape[1])]
else:
return input_shape[0], input_shape[-1]
!pip install keras-self-attention
Collecting keras-self-attention Downloading keras-self-attention-0.51.0.tar.gz (11 kB) Preparing metadata (setup.py) ... done Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-self-attention) (1.23.5) Building wheels for collected packages: keras-self-attention Building wheel for keras-self-attention (setup.py) ... done Created wheel for keras-self-attention: filename=keras_self_attention-0.51.0-py3-none-any.whl size=18895 sha256=b05f34a5aa9893dda77dfe7899801c2b2281880eff229aac13edd33dd0722c36 Stored in directory: /root/.cache/pip/wheels/b8/f7/24/607b483144fb9c47b4ba2c5fba6b68e54aeee2d5bf6c05302e Successfully built keras-self-attention Installing collected packages: keras-self-attention Successfully installed keras-self-attention-0.51.0
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM, Embedding, Input
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
from tensorflow.keras import Model
from keras_self_attention import SeqSelfAttention
LSTM_SIZE = 300
DENSE = 1000
with tf.device('/device:GPU:0'):
inputs = Input((MAX_SEQUENCE_LENGTH,))
# Define the Embedding Layer with fastext weights
embeddings = Embedding(input_dim=MAX_WORDS+2, output_dim=EMBEDDING_DIM, #weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH, mask_zero=True, trainable=True)(inputs) #trainable=False)(inputs)
drop_emb = Dropout(0.33)(embeddings)
# Define a (Biderectional) RNN with LSTM cells
bilstm = Bidirectional(LSTM(units=LSTM_SIZE, return_sequences=True, recurrent_dropout=0.33))(drop_emb)
drop_encodings = Dropout(0.33)(bilstm)
# Pass the encoding through an Attension Layer
x, attn = DeepAttention(return_attention=True)(drop_encodings)
# x, attn = LinearAttention(return_attention=True)(drop_encodings)
# Alternatively use keras package for self-attention
#x, attn = SeqSelfAttention(return_attention=True)(drop_encodings)
# Apply Droupout to the encoding produced by the attension mechanism
drop_x = Dropout(0.33)(x)
# Pass thruogh a Dense Layer
hidden = Dense(units=DENSE, activation="relu")(drop_x)
# Apply Dropout to the output of the Dense Layer
drop_out = Dropout(0.33)(hidden)
# Last pass through a Dense Layer with softmax activation to produce a probability distribution
out = Dense(units=len(twenty_train.target_names), activation="softmax")(drop_out)
# Wrap model --> Remember Functional API
model2 = Model(inputs=inputs, outputs=out)
print(model2.summary())
model2.compile(loss='categorical_crossentropy',
optimizer=Adam(learning_rate=0.001),
metrics=["accuracy"])
if not os.path.exists('/content/checkpoints'):
os.makedirs('/content/checkpoints')
checkpoint = ModelCheckpoint('/content/checkpoints/BiLSTM_attn.hdf5',
monitor='val_accuracy',
mode='max', verbose=2,
save_best_only=True,
save_weights_only=True)
history2 = model2.fit(train_data, y_train_1_hot,
validation_data=(val_data, y_val_1_hot),
batch_size=128,
epochs=30,
shuffle=True,
callbacks=[Metrics(valid_data=(val_data, y_val_1_hot)),
checkpoint])
WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. /usr/local/lib/python3.10/dist-packages/keras/src/initializers/initializers.py:120: UserWarning: The initializer GlorotUniform is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initializer instance more than once. warnings.warn(
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 250)] 0 embedding_6 (Embedding) (None, 250, 100) 10000200 dropout_6 (Dropout) (None, 250, 100) 0 bidirectional_6 (Bidirecti (None, 250, 600) 962400 onal) dropout_7 (Dropout) (None, 250, 600) 0 deep_attention (DeepAttent [(None, 600), 361201 ion) (None, 250, 1)] dropout_8 (Dropout) (None, 600) 0 dense_12 (Dense) (None, 1000) 601000 dropout_9 (Dropout) (None, 1000) 0 dense_13 (Dense) (None, 20) 20020 ================================================================= Total params: 11944821 (45.57 MB) Trainable params: 11944821 (45.57 MB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
WARNING:absl:`lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.Adam.
None Epoch 1/30 107/107 [==============================] - 20s 183ms/step — val_f1: 0.122530 — val_precision: 0.150319 — val_recall: 0.209720 Epoch 1: val_accuracy improved from -inf to 0.20972, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 239s 3s/step - loss: 2.7477 - accuracy: 0.1183 - val_loss: 2.1294 - val_accuracy: 0.2097 - val_f1: 0.1225 - val_recall: 0.2097 - val_precision: 0.1503 Epoch 2/30
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
107/107 [==============================] - 19s 176ms/step — val_f1: 0.495426 — val_precision: 0.514294 — val_recall: 0.527246 Epoch 2: val_accuracy improved from 0.20972 to 0.52725, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 172s 3s/step - loss: 1.6186 - accuracy: 0.4012 - val_loss: 1.3446 - val_accuracy: 0.5272 - val_f1: 0.4954 - val_recall: 0.5272 - val_precision: 0.5143 Epoch 3/30
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
107/107 [==============================] - 17s 162ms/step — val_f1: 0.711097 — val_precision: 0.737267 — val_recall: 0.710457 Epoch 3: val_accuracy improved from 0.52725 to 0.71046, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 175s 3s/step - loss: 0.7983 - accuracy: 0.7082 - val_loss: 0.9146 - val_accuracy: 0.7105 - val_f1: 0.7111 - val_recall: 0.7105 - val_precision: 0.7373 Epoch 4/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.778107 — val_precision: 0.788423 — val_recall: 0.776436 Epoch 4: val_accuracy improved from 0.71046 to 0.77644, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 165s 3s/step - loss: 0.3040 - accuracy: 0.9063 - val_loss: 0.7610 - val_accuracy: 0.7764 - val_f1: 0.7781 - val_recall: 0.7764 - val_precision: 0.7884 Epoch 5/30 107/107 [==============================] - 18s 171ms/step — val_f1: 0.776559 — val_precision: 0.791935 — val_recall: 0.773196 Epoch 5: val_accuracy did not improve from 0.77644 62/62 [==============================] - 165s 3s/step - loss: 0.1214 - accuracy: 0.9646 - val_loss: 0.9098 - val_accuracy: 0.7732 - val_f1: 0.7766 - val_recall: 0.7732 - val_precision: 0.7919 Epoch 6/30 107/107 [==============================] - 18s 169ms/step — val_f1: 0.777996 — val_precision: 0.796160 — val_recall: 0.773196 Epoch 6: val_accuracy did not improve from 0.77644 62/62 [==============================] - 164s 3s/step - loss: 0.0703 - accuracy: 0.9821 - val_loss: 1.0266 - val_accuracy: 0.7732 - val_f1: 0.7780 - val_recall: 0.7732 - val_precision: 0.7962 Epoch 7/30 107/107 [==============================] - 17s 163ms/step — val_f1: 0.803463 — val_precision: 0.816127 — val_recall: 0.799705 Epoch 7: val_accuracy improved from 0.77644 to 0.79971, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 160s 3s/step - loss: 0.0387 - accuracy: 0.9904 - val_loss: 0.9099 - val_accuracy: 0.7997 - val_f1: 0.8035 - val_recall: 0.7997 - val_precision: 0.8161 Epoch 8/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.810377 — val_precision: 0.823782 — val_recall: 0.807069 Epoch 8: val_accuracy improved from 0.79971 to 0.80707, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 167s 3s/step - loss: 0.0354 - accuracy: 0.9925 - val_loss: 0.8670 - val_accuracy: 0.8071 - val_f1: 0.8104 - val_recall: 0.8071 - val_precision: 0.8238 Epoch 9/30 107/107 [==============================] - 17s 162ms/step — val_f1: 0.801430 — val_precision: 0.814549 — val_recall: 0.797644 Epoch 9: val_accuracy did not improve from 0.80707 62/62 [==============================] - 157s 3s/step - loss: 0.0167 - accuracy: 0.9960 - val_loss: 0.9553 - val_accuracy: 0.7976 - val_f1: 0.8014 - val_recall: 0.7976 - val_precision: 0.8145 Epoch 10/30 107/107 [==============================] - 19s 175ms/step — val_f1: 0.810513 — val_precision: 0.819751 — val_recall: 0.808542 Epoch 10: val_accuracy improved from 0.80707 to 0.80854, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 160s 3s/step - loss: 0.0136 - accuracy: 0.9967 - val_loss: 0.9660 - val_accuracy: 0.8085 - val_f1: 0.8105 - val_recall: 0.8085 - val_precision: 0.8198 Epoch 11/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.815993 — val_precision: 0.826887 — val_recall: 0.812666 Epoch 11: val_accuracy improved from 0.80854 to 0.81267, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 158s 3s/step - loss: 0.0132 - accuracy: 0.9973 - val_loss: 0.9424 - val_accuracy: 0.8127 - val_f1: 0.8160 - val_recall: 0.8127 - val_precision: 0.8269 Epoch 12/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.809659 — val_precision: 0.822246 — val_recall: 0.807069 Epoch 12: val_accuracy did not improve from 0.81267 62/62 [==============================] - 160s 3s/step - loss: 0.0179 - accuracy: 0.9960 - val_loss: 0.9250 - val_accuracy: 0.8071 - val_f1: 0.8097 - val_recall: 0.8071 - val_precision: 0.8222 Epoch 13/30 107/107 [==============================] - 19s 174ms/step — val_f1: 0.806898 — val_precision: 0.817460 — val_recall: 0.803829 Epoch 13: val_accuracy did not improve from 0.81267 62/62 [==============================] - 161s 3s/step - loss: 0.0166 - accuracy: 0.9962 - val_loss: 0.9572 - val_accuracy: 0.8038 - val_f1: 0.8069 - val_recall: 0.8038 - val_precision: 0.8175 Epoch 14/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.811613 — val_precision: 0.823340 — val_recall: 0.808542 Epoch 14: val_accuracy did not improve from 0.81267 62/62 [==============================] - 160s 3s/step - loss: 0.0154 - accuracy: 0.9967 - val_loss: 0.8783 - val_accuracy: 0.8085 - val_f1: 0.8116 - val_recall: 0.8085 - val_precision: 0.8233 Epoch 15/30 107/107 [==============================] - 17s 162ms/step — val_f1: 0.807784 — val_precision: 0.819169 — val_recall: 0.805891 Epoch 15: val_accuracy did not improve from 0.81267 62/62 [==============================] - 162s 3s/step - loss: 0.0140 - accuracy: 0.9973 - val_loss: 0.9599 - val_accuracy: 0.8059 - val_f1: 0.8078 - val_recall: 0.8059 - val_precision: 0.8192 Epoch 16/30 107/107 [==============================] - 19s 174ms/step — val_f1: 0.807657 — val_precision: 0.820539 — val_recall: 0.805891 Epoch 16: val_accuracy did not improve from 0.81267 62/62 [==============================] - 159s 3s/step - loss: 0.0147 - accuracy: 0.9962 - val_loss: 0.9441 - val_accuracy: 0.8059 - val_f1: 0.8077 - val_recall: 0.8059 - val_precision: 0.8205 Epoch 17/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.821950 — val_precision: 0.828624 — val_recall: 0.819735 Epoch 17: val_accuracy improved from 0.81267 to 0.81973, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 158s 3s/step - loss: 0.0122 - accuracy: 0.9973 - val_loss: 0.8409 - val_accuracy: 0.8197 - val_f1: 0.8219 - val_recall: 0.8197 - val_precision: 0.8286 Epoch 18/30 107/107 [==============================] - 18s 168ms/step — val_f1: 0.811753 — val_precision: 0.823568 — val_recall: 0.809131 Epoch 18: val_accuracy did not improve from 0.81973 62/62 [==============================] - 162s 3s/step - loss: 0.0083 - accuracy: 0.9981 - val_loss: 1.0236 - val_accuracy: 0.8091 - val_f1: 0.8118 - val_recall: 0.8091 - val_precision: 0.8236 Epoch 19/30 107/107 [==============================] - 19s 174ms/step — val_f1: 0.820830 — val_precision: 0.832321 — val_recall: 0.817673 Epoch 19: val_accuracy did not improve from 0.81973 62/62 [==============================] - 162s 3s/step - loss: 0.0116 - accuracy: 0.9967 - val_loss: 0.9014 - val_accuracy: 0.8177 - val_f1: 0.8208 - val_recall: 0.8177 - val_precision: 0.8323 Epoch 20/30 107/107 [==============================] - 18s 164ms/step — val_f1: 0.812182 — val_precision: 0.821446 — val_recall: 0.810898 Epoch 20: val_accuracy did not improve from 0.81973 62/62 [==============================] - 160s 3s/step - loss: 0.0119 - accuracy: 0.9966 - val_loss: 0.9161 - val_accuracy: 0.8109 - val_f1: 0.8122 - val_recall: 0.8109 - val_precision: 0.8214 Epoch 21/30 107/107 [==============================] - 17s 163ms/step — val_f1: 0.822124 — val_precision: 0.833710 — val_recall: 0.819146 Epoch 21: val_accuracy did not improve from 0.81973 62/62 [==============================] - 163s 3s/step - loss: 0.0127 - accuracy: 0.9973 - val_loss: 0.9170 - val_accuracy: 0.8191 - val_f1: 0.8221 - val_recall: 0.8191 - val_precision: 0.8337 Epoch 22/30 107/107 [==============================] - 19s 174ms/step — val_f1: 0.818140 — val_precision: 0.827724 — val_recall: 0.816200 Epoch 22: val_accuracy did not improve from 0.81973 62/62 [==============================] - 160s 3s/step - loss: 0.0096 - accuracy: 0.9975 - val_loss: 0.9062 - val_accuracy: 0.8162 - val_f1: 0.8181 - val_recall: 0.8162 - val_precision: 0.8277 Epoch 23/30 107/107 [==============================] - 18s 165ms/step — val_f1: 0.795488 — val_precision: 0.808043 — val_recall: 0.793520 Epoch 23: val_accuracy did not improve from 0.81973 62/62 [==============================] - 160s 3s/step - loss: 0.0158 - accuracy: 0.9960 - val_loss: 1.0718 - val_accuracy: 0.7935 - val_f1: 0.7955 - val_recall: 0.7935 - val_precision: 0.8080 Epoch 24/30 107/107 [==============================] - 17s 163ms/step — val_f1: 0.822440 — val_precision: 0.832766 — val_recall: 0.820029 Epoch 24: val_accuracy improved from 0.81973 to 0.82003, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 162s 3s/step - loss: 0.0147 - accuracy: 0.9967 - val_loss: 0.8964 - val_accuracy: 0.8200 - val_f1: 0.8224 - val_recall: 0.8200 - val_precision: 0.8328 Epoch 25/30 107/107 [==============================] - 18s 167ms/step — val_f1: 0.827384 — val_precision: 0.838000 — val_recall: 0.824742 Epoch 25: val_accuracy improved from 0.82003 to 0.82474, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 157s 3s/step - loss: 0.0114 - accuracy: 0.9976 - val_loss: 0.8981 - val_accuracy: 0.8247 - val_f1: 0.8274 - val_recall: 0.8247 - val_precision: 0.8380 Epoch 26/30 107/107 [==============================] - 18s 167ms/step — val_f1: 0.836116 — val_precision: 0.844064 — val_recall: 0.834168 Epoch 26: val_accuracy improved from 0.82474 to 0.83417, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 162s 3s/step - loss: 0.0073 - accuracy: 0.9985 - val_loss: 0.8789 - val_accuracy: 0.8342 - val_f1: 0.8361 - val_recall: 0.8342 - val_precision: 0.8441 Epoch 27/30 107/107 [==============================] - 18s 165ms/step — val_f1: 0.837743 — val_precision: 0.843811 — val_recall: 0.836230 Epoch 27: val_accuracy improved from 0.83417 to 0.83623, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 160s 3s/step - loss: 0.0060 - accuracy: 0.9989 - val_loss: 0.8408 - val_accuracy: 0.8362 - val_f1: 0.8377 - val_recall: 0.8362 - val_precision: 0.8438 Epoch 28/30 107/107 [==============================] - 18s 167ms/step — val_f1: 0.831491 — val_precision: 0.843039 — val_recall: 0.828277 Epoch 28: val_accuracy did not improve from 0.83623 62/62 [==============================] - 157s 3s/step - loss: 0.0021 - accuracy: 0.9996 - val_loss: 0.9481 - val_accuracy: 0.8283 - val_f1: 0.8315 - val_recall: 0.8283 - val_precision: 0.8430 Epoch 29/30 107/107 [==============================] - 18s 169ms/step — val_f1: 0.838640 — val_precision: 0.842258 — val_recall: 0.837408 Epoch 29: val_accuracy improved from 0.83623 to 0.83741, saving model to /content/checkpoints/BiLSTM_attn.hdf5 62/62 [==============================] - 159s 3s/step - loss: 0.0045 - accuracy: 0.9991 - val_loss: 0.8874 - val_accuracy: 0.8374 - val_f1: 0.8386 - val_recall: 0.8374 - val_precision: 0.8423 Epoch 30/30 107/107 [==============================] - 18s 168ms/step — val_f1: 0.826925 — val_precision: 0.834585 — val_recall: 0.825037 Epoch 30: val_accuracy did not improve from 0.83741 62/62 [==============================] - 158s 3s/step - loss: 0.0033 - accuracy: 0.9992 - val_loss: 0.9261 - val_accuracy: 0.8250 - val_f1: 0.8269 - val_recall: 0.8250 - val_precision: 0.8346
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, Bidirectional, LSTM, Embedding, Input
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
from tensorflow.keras import Model
from sklearn.metrics import classification_report
LSTM_SIZE = 300
DENSE = 1000
with tf.device('/device:GPU:0'):
inputs = Input((MAX_SEQUENCE_LENGTH,))
embeddings = Embedding(MAX_WORDS+2,EMBEDDING_DIM,# weights=[embedding_matrix],
input_length=MAX_SEQUENCE_LENGTH, mask_zero=True, trainable=True)(inputs) #trainable=False)(inputs)
bilstm = Bidirectional(LSTM(units=LSTM_SIZE, return_sequences=True,recurrent_dropout = 0.33))(embeddings)
x, attn = DeepAttention(return_attention=True)(bilstm)
hidden = Dense(units=DENSE, activation="relu")(x)
out = Dense(units=len(twenty_train.target_names), activation="sigmoid")(hidden)
model2 = Model(inputs, out)
# Load weights from the pre-trained model
model2.load_weights("/content/checkpoints/BiLSTM_attn.hdf5")
print("Dev set performance")
predictions_val = np.argmax(model2.predict(val_data), -1)
print(classification_report(y_val, predictions_val, target_names=twenty_train.target_names))
print()
print("Training set performance") # Overfitting on training set
predictions_train = np.argmax(model2.predict(train_data), -1)
print(classification_report(y_train, predictions_train, target_names=twenty_train.target_names))
WARNING:tensorflow:Layer lstm_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer lstm_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU. WARNING:tensorflow:Layer lstm_2 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Dev set performance 107/107 [==============================] - 21s 191ms/step precision recall f1-score support alt.atheism 0.90 0.87 0.89 160 comp.graphics 0.69 0.79 0.74 165 comp.os.ms-windows.misc 0.83 0.85 0.84 189 comp.sys.ibm.pc.hardware 0.63 0.72 0.67 168 comp.sys.mac.hardware 0.87 0.75 0.80 182 comp.windows.x 0.94 0.88 0.90 168 misc.forsale 0.72 0.71 0.71 182 rec.autos 0.80 0.78 0.79 181 rec.motorcycles 0.85 0.86 0.86 184 rec.sport.baseball 0.93 0.91 0.92 169 rec.sport.hockey 0.96 0.90 0.93 175 sci.crypt 0.93 0.94 0.94 177 sci.electronics 0.70 0.79 0.74 173 sci.med 0.92 0.81 0.86 181 sci.space 0.88 0.88 0.88 181 soc.religion.christian 0.77 0.88 0.82 177 talk.politics.guns 0.88 0.93 0.90 177 talk.politics.mideast 0.95 0.87 0.91 170 talk.politics.misc 0.83 0.78 0.80 135 talk.religion.misc 0.76 0.71 0.73 101 accuracy 0.83 3395 macro avg 0.84 0.83 0.83 3395 weighted avg 0.84 0.83 0.83 3395 Training set performance 248/248 [==============================] - 42s 170ms/step precision recall f1-score support alt.atheism 1.00 1.00 1.00 320 comp.graphics 1.00 1.00 1.00 419 comp.os.ms-windows.misc 1.00 1.00 1.00 402 comp.sys.ibm.pc.hardware 1.00 1.00 1.00 422 comp.sys.mac.hardware 1.00 1.00 1.00 396 comp.windows.x 1.00 1.00 1.00 425 misc.forsale 1.00 1.00 1.00 403 rec.autos 1.00 1.00 1.00 413 rec.motorcycles 1.00 1.00 1.00 414 rec.sport.baseball 1.00 1.00 1.00 428 rec.sport.hockey 1.00 1.00 1.00 425 sci.crypt 1.00 1.00 1.00 418 sci.electronics 1.00 1.00 1.00 418 sci.med 1.00 1.00 1.00 413 sci.space 1.00 1.00 1.00 412 soc.religion.christian 0.96 1.00 0.98 422 talk.politics.guns 1.00 1.00 1.00 369 talk.politics.mideast 1.00 0.96 0.98 394 talk.politics.misc 1.00 1.00 1.00 330 talk.religion.misc 1.00 1.00 1.00 276 accuracy 1.00 7919 macro avg 1.00 1.00 1.00 7919 weighted avg 1.00 1.00 1.00 7919
#from sklearn.metrics import accuracy_score
predictions = np.argmax(model2.predict(val_data), -1)
print(f'Validation Accuracy: {accuracy_score(y_val, predictions)*100:.2f}%')
predictions = np.argmax(model2.predict(test_data), -1)
print(f'Test Accuracy:{accuracy_score(y_test, predictions)*100:.2f}%')
107/107 [==============================] - 18s 169ms/step Validation Accuracy: 83.33% 32/32 [==============================] - 6s 171ms/step Test Accuracy:71.70%