I'm doing a neural network to recognize written Cyrillic letters, and I found out that, when I use tanh activation function, it works WAY better with PyTorch than with Keras.
Keras code:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Lambda, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
a_param = 0.5
activation_type = 'tanh'
custom_activation = create_activation_function(a_param, activation_type)
input_shape = (100, 100, 4)
model = Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
model.add(Flatten())
model.add(Dense(784))
model.add(BatchNormalization())
model.add(Lambda(custom_activation))
model.add(Dense(30))
model.add(BatchNormalization())
model.add(Lambda(custom_activation))
model.add(Dense(10))
model.add(Lambda(custom_activation))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss='categorical_crossentropy',
metrics=['accuracy'])
model.summary()
epochs = 30
batch_size = 128
lr_schedule = ReduceLROnPlateau(
monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6
)
early_stopping = EarlyStopping(
monitor='val_loss', patience=3, restore_best_weights=True
)
history = model.fit(
X_train, y_train,
epochs=epochs,
batch_size=batch_size,
validation_data=(X_test, y_test),
shuffle=True,
callbacks=[lr_schedule, early_stopping]
)
Pytorch code:
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 100*100 изображения с 4 каналами (RGBA)
self.fc1 = nn.Linear(100 * 100 * 4, 128) # Входной слой для изображений 100x100 с 4 каналами
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 10)
def forward(self, x):
# Изменение формы тензора в вектор длины 100 * 100 * 4
x = x.view(-1, 100 * 100 * 4)
x = torch.tanh(self.fc1(x)) # Использование tanh
x = torch.tanh(self.fc2(x))
x = self.fc3(x)
return torch.tanh(x) # Применение tanh на выходе
While Keras the accuracy is around +/- 10%, while Pytorch is WAY above - up to 90%. What can be cause of it? Did I do something wrong or is it supposed to work this way?