In [34]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [35]:
ds = pd.read_csv('../dataset/placement_clean.csv')
ds.head()
Out[35]:
Unnamed: 0 gender 10th p 10th b 12th p 12th b stream ug p ug t work exp pg t pg p status
0 0 M 67.00 Others 91.00 Others Commerce 58.00 Sci&Tech No Mkt&HR 58.80 Placed
1 1 M 79.33 Central 78.33 Others Science 77.48 Sci&Tech Yes Mkt&Fin 66.28 Placed
2 2 M 65.00 Central 68.00 Central Arts 64.00 Comm&Mgmt No Mkt&Fin 57.80 Placed
3 3 M 56.00 Central 52.00 Central Science 52.00 Sci&Tech No Mkt&HR 59.43 Not Placed
4 4 M 85.80 Central 73.60 Central Commerce 73.30 Comm&Mgmt No Mkt&Fin 55.50 Placed
In [36]:
X = ds.iloc[:, 1:-1].values
y = ds.iloc[:, -1].values
In [37]:
print(X)
[['M' 67.0 'Others' ... 'No' 'Mkt&HR' 58.8]
 ['M' 79.33 'Central' ... 'Yes' 'Mkt&Fin' 66.28]
 ['M' 65.0 'Central' ... 'No' 'Mkt&Fin' 57.8]
 ...
 ['M' 67.0 'Others' ... 'Yes' 'Mkt&Fin' 69.72]
 ['F' 74.0 'Others' ... 'No' 'Mkt&HR' 60.23]
 ['M' 62.0 'Central' ... 'No' 'Mkt&HR' 60.22]]
In [38]:
print(y)
['Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Not Placed'
 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Not Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed'
 'Not Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Placed'
 'Not Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed'
 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Not Placed'
 'Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Not Placed' 'Placed'
 'Placed' 'Not Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed'
 'Placed' 'Not Placed' 'Not Placed' 'Placed' 'Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Not Placed'
 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Placed' 'Not Placed'
 'Placed' 'Not Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed'
 'Not Placed' 'Not Placed' 'Placed' 'Placed' 'Placed' 'Placed'
 'Not Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed'
 'Not Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed'
 'Not Placed' 'Placed' 'Not Placed' 'Placed' 'Placed' 'Placed'
 'Not Placed' 'Placed' 'Not Placed' 'Not Placed' 'Not Placed' 'Not Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Not Placed' 'Placed' 'Not Placed' 'Not Placed' 'Placed'
 'Not Placed' 'Placed' 'Not Placed' 'Placed' 'Not Placed' 'Not Placed'
 'Not Placed' 'Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Not Placed' 'Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Not Placed' 'Placed' 'Not Placed' 'Placed' 'Placed'
 'Placed' 'Placed' 'Placed' 'Not Placed']
In [39]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0, 2, 4, 5, 7, 8, 9])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
In [40]:
print(X)
[[0.0 1.0 0.0 ... 91.0 58.0 58.8]
 [0.0 1.0 1.0 ... 78.33 77.48 66.28]
 [0.0 1.0 1.0 ... 68.0 64.0 57.8]
 ...
 [0.0 1.0 0.0 ... 67.0 73.0 69.72]
 [1.0 0.0 0.0 ... 66.0 58.0 60.23]
 [0.0 1.0 1.0 ... 58.0 53.0 60.22]]
In [41]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
In [42]:
print(y)
[1 1 1 0 1 0 0 1 1 0 1 1 0 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 0 1 0
 1 1 1 1 0 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1
 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 0 0 1 1 1 1 0 0 1 1 0 1
 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1
 1 0 1 1 1 1 1 0 1 1 0 0 1 0 1 1 1 0 1 0 0 0 0 1 1 0 1 0 1 1 1 0 1 0 0 1 0
 1 0 1 0 0 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0]
In [43]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)
In [44]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
In [45]:
print(X_train)
[[-0.76980036  0.76980036  0.9545214  ... -0.57006221 -1.30846321
  -1.31487558]
 [-0.76980036  0.76980036  0.9545214  ... -0.29435751 -0.05053648
  -0.35062408]
 [-0.76980036  0.76980036 -1.04764544 ...  0.07324875 -0.05053648
  -0.77031628]
 ...
 [-0.76980036  0.76980036 -1.04764544 ...  0.07324875  1.62669918
   1.1096273 ]
 [-0.76980036  0.76980036  0.9545214  ... -0.12893469  0.43865726
  -0.90038618]
 [ 1.29903811 -1.29903811  0.9545214  ...  0.90036285 -0.10644433
  -1.15532317]]
In [46]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)
Out[46]:
DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')
In [47]:
y_pred = classifier.predict(X_test)
In [48]:
from sklearn.metrics import precision_score
precision_score(y_test, y_pred, average='weighted')
Out[48]:
0.762432915921288
In [49]:
from sklearn.metrics import f1_score
f1_score(y_test, y_pred, average='weighted')
Out[49]:
0.7631458357200139
In [50]:
from sklearn.metrics import recall_score
recall_score(y_test, y_pred, average='weighted')
Out[50]:
0.7674418604651163
In [ ]:
 
In [ ]: