Data Science Asked by ebrahimi on August 4, 2021
Could you please let me know how to set class-weight
for imbalanced classes in KerasClassifier
while it is used inside the GridSearchCV
?
# Use scikit-learn to grid search the batch size and epochs
from collections import Counter
from sklearn.model_selection import train_test_split,StratifiedKFold,learning_curve,validation_curve,GridSearchCV
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.metrics import classification_report
import pandas as pd
from sklearn.pipeline import Pipeline
# Function to create model, required for KerasClassifier
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=20, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# load dataset
X, y = make_classification(n_classes=2, class_sep=2,weights=[0.95, 0.05], n_informative=3, n_redundant=2, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
print('Original dataset shape {}'.format(Counter(y)))
ln = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)
st=StandardScaler()
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)
pipeline = Pipeline(steps=[('scaler', st),
('clf', model )])
# define the grid search parameters
batch_size = [20, 40, 60, 80, 100]
epochs = [ 50, 100]
param_grid = dict(clf__batch_size=batch_size, clf__epochs=epochs)
cv = StratifiedKFold(n_splits=5, random_state=42)
grid = GridSearchCV(estimator=pipeline, param_grid=param_grid,cv=cv,scoring="f1")
grid_result = grid.fit(X_train, y_train)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
# Predictions
ypred = grid_result.predict(X_train)
print(classification_report(y_train, ypred))
print('######################')
ypred2 = grid_result.predict(X_test)
print(classification_report(y_test, ypred2))
grid_result = grid.fit(X_train, y_train, clf__class_weight={0:0.95, 1:0.05})
FYI, per the docs fit_params
should no longer be passed to the GridSearchCV
constructor as a dict, but should be passed directly to fit
as above.
http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
Correct answer by Bert Kellerman on August 4, 2021
Get help from others!
Recent Questions
Recent Answers
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP