Implementation of GDM Data with modelling

import pandas as pd
import numpy as np
df = pd.read_excel('../input/gdgdgdm/GDM.xlsx')
#Conditions to populate target column
conditions=[(df['1h glucose']>=7.8) | (df['OGTT 0h value']>=7.8) | (df['OGTT 1h value']>=7.8) | (df['OGTT 2h value']>=7.8), (df['1h glucose']<7.8) | (df['OGTT 0h value']<7.8) | (df['OGTT 1h value']<7.8) | (df['OGTT 2h value']<7.8), (df['1h glucose']=='') | (df['OGTT 0h value']=='') | (df['OGTT 1h value']=='') | (df['OGTT 2h value']=='')]
choices = ['1', '0',np.NaN] df['GDM'] = np.select(conditions, choices, default=np.NaN)#Dropping rows which dont have values as these were transferred or miscarried.
df.drop(df[df['GDM']=='nan'].index, inplace = True)
import seaborn as sns
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
Image by Author
sns.set_style('whitegrid')
sns.countplot(x='GDM',data=df)
sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='Age >30 10',data=df,palette='RdBu_r')
Image by Author
sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='Smoking 123',data=df,palette='RdBu_r')
Image by Author
sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='Overweight 123',data=df,palette='RdBu_r')
Image by Author
df['LowBP']=np.where(df['diastolic BP (mmHg) V1']<=60, 1,0)

sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='LowBP',data=df,palette='RdBu_r')
df['HbA1c']=np.where(df['V1 HbA1c (mmol/mol)']>=30, 1,0)
sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='HbA1c',data=df,palette='RdBu_r')
Image by Author
df['BMI']=np.where(df['BMI (kg/m2) V1']>=25, 1,0)
sns.set_style('whitegrid')
sns.countplot(x='GDM',hue='BMI',data=df,palette='RdBu_r')
Image by Author
import xgboost as xgb
from sklearn.metrics import mean_squared_error
xgb_classifier = xgb.XGBClassifier()
print(xgb_classifier)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
# Fitting the model with train data.
xgb_classifier.fit(X_train, y_train)

# Predicting the test data
y_pred = xgb_classifier.predict(X_test)
print(y_pred)

# Evaluate predictions
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Image by Author
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import numpy as np
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
imp = IterativeImputer(estimator=lr,missing_values=np.nan, max_iter=20)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
clf = RandomForestClassifier(n_estimators = 170)
#Mice Imputer
X=imp.fit_transform(X)

#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
from sklearn.impute import KNNImputer
knn = KNNImputer(n_neighbors=5, add_indicator=True)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
clf = RandomForestClassifier(n_estimators = 170)
#knn Imputer
X=knn.fit_transform(X)

#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
# Import Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

#knn Imputer
X=knn.fit_transform(X)

#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
# Import Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()
#knn Imputer
X=knn.fit_transform(X)
#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
#Import svm model
from sklearn import svm
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#KNN Imputer
X=knn.fit_transform(X)
#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
#Import svm model
from sklearn import svm
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel
#MICE Imputer
X=imp.fit_transform(X)
#Splitting test and train data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
#Train the model using the training sets
clf.fit(X_train, y_train)
#Predict the response for test dataset
y_pred = clf.predict(X_test)
# Model Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store