Lösungsskelett
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
MODELS = {
'KNN': KNeighborsClassifier(n_neighbors=5),
'DecisionTree': DecisionTreeClassifier(random_state=13),
'RandomForest': RandomForestClassifier(random_state=13),
'LogReg': LogisticRegression(max_iter=1000, random_state=13),
'SVM': SVC(random_state=13),
# optional: 'GradientBoost': GradientBoostingClassifier(random_state=13),
}
def _row(name, scaling, model, X_tr, y_tr, X_va, y_va):
model.fit(X_tr, y_tr)
yp_tr = model.predict(X_tr)
yp_va = model.predict(X_va)
return {
'model': name,
'scaling': scaling,
'train_acc': accuracy_score(y_tr, yp_tr),
'val_acc': accuracy_score(y_va, yp_va),
'train_prec': precision_score(y_tr, yp_tr, average='macro', zero_division=0),
'val_prec': precision_score(y_va, yp_va, average='macro', zero_division=0),
'train_rec': recall_score(y_tr, yp_tr, average='macro', zero_division=0),
'val_rec': recall_score(y_va, yp_va, average='macro', zero_division=0),
'train_f1': f1_score(y_tr, yp_tr, average='macro', zero_division=0),
'val_f1': f1_score(y_va, yp_va, average='macro', zero_division=0),
}
results = []
for name, model in MODELS.items():
results.append(_row(name, 'unscaled', model, X_train, y_train, X_val, y_val))
results.append(_row(name, 'scaled', model, X_train_scaled, y_train, X_val_scaled, y_val))
df = pd.DataFrame(results)