import numpy as np
import pandas as pd
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
df = pd.read_csv(“FeatureSelection.csv”)
X = df.iloc[:,0:5]
X.head()
y = df.iloc[:,-1]
y.head()
bestfeat = SelectKBest(score_func=chi2, k=4)
fit = bestfeat.fit(X,y)
datascore = pd.DataFrame(fit.scores_)
datacol = pd.DataFrame(X.columns)
fscore = pd.concat([datacol, datascore], axis = 1)
fscore.columns = [“best”, “score”]
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as pl
model= ExtraTreesClassifier()
model.fit(X,y)
ExtraTreesClassifier()
model.feature_importances_
array([0.29935847, 0.15434854, 0.14141038, 0.21183862, 0.19304398])
featimport = pd.Series(model.feature_importances_, index=X.columns)
featimport.nlargest(5).plot(kind=‘bar’)
pl.show()
# Seaborn -----> Heatmap ---->
import seaborn as sns
corrmat = df.corr()
pl.figure(figsize=(20,20))
a= sns.heatmap(df[corrfeat].corr(), annot=True, cmap=“Blues”)
import numpy as np
import pandas as pd
ds = pd.read_csv(“FeatureSelection.csv”)
ds.head()
# Simple feature scaling
for column in ds.columns:
ds[column]=ds[column]/ds[column].abs().max()
import matplotlib.pyplot as pl
ds.plot(kind=‘bar’)
# Min Max method xold=(xold-xmin)/(xmax-xmin)
ds1= ds.copy()
for column in ds1.columns:
ds1[column]= (ds1[column] - ds1[column].min()) / (ds1[column].max() - ds1[column].min())
ds1.head()
# Standardization (Z score method or 0 mean)
ds2=ds.copy()
for column in ds2.columns:
ds2[column]=(ds2[column] - ds2[column].mean()) / ds2[column].std()
ds2.head()