#ARTIFICIAL NEURAL NETWORK

from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras.layers import Activation
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import matplotlib.patches as mpl_patches
from sklearn.metrics import mean_absolute_error
from math import sqrt
plt.rcParams["font.family"] = "Cambria"
from matplotlib.lines import Line2D
import time
t = time.process_time()


def mean_absolute_percentage_error(y_test, y_predicted):
    y_test, y_predicted = np.array(y_test), np.array(y_predicted)
    return np.mean(np.abs((y_test - y_predicted) / y_test)) * 100

#Load data
dataset = loadtxt("ANN V4.csv", delimiter=',')
X = dataset[:,0:5]
y = dataset[:,8]

#Scale the data 0-1
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
# split into training and testing data
X_train, X_val_and_test, y_train, y_val_and_test = train_test_split(X_scale, y, test_size=0.3,random_state=3)
X_val, X_test, y_val, y_test = train_test_split(X_val_and_test, y_val_and_test, test_size=0.5,random_state=3)
print(X_train.shape, X_val.shape, X_test.shape, y_train.shape, y_val.shape, y_test.shape)

rsqrddata = []
msedata = []
maedata = []
mapedata = []

model = Sequential()
model.add(Dense(32, input_dim=5, activation='relu'))
model.add(Activation("linear"))
model.add(Dense(20, activation='sigmoid'))
model.add(Dense(1, activation='relu'))
model.summary() 

# compile the keras model
model.compile(optimizer='adam', loss='mse', metrics=['mae' , 'mape', 'mse' ])
# fit the keras model on the dataset
np.random.seed(3)
history = model.fit(X_train, y_train, epochs=30, batch_size=2, validation_data=(X_val, y_val), verbose=0)
# evaluate the keras model
model.evaluate(X_test, y_test)

#Predict
y_predicted = model.predict(X_test)

#Metrics
Rsquared = (r2_score(y_test, y_predicted))
print("Rsquared : %f" %Rsquared)
mse = mean_squared_error(y_test, y_predicted)
print("MSE : %f" % mse)
mae = mean_absolute_error(y_test, y_predicted)
print ("MAE: %f" % mae)
mape = mean_absolute_percentage_error(y_test, y_predicted)
print ("MAPE: %f" % mape)
rmse = sqrt(mse)
print ("RMSE: %f" % rmse)

rsqrddata.append(Rsquared)
msedata.append(mse)
maedata.append(mae)
mapedata.append(mape)

#GRAPHS
fig, ax = plt.subplots()
ax.scatter(y_test, y_predicted ,s=10,color='mediumseagreen',linewidths=1)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k-', lw=1.75)
ax.set_xlabel('Actual (%)',fontsize='x-large')
ax.set_ylabel('Predicted (%)',fontsize='x-large')
y_test, y_predicted = y_test.reshape(-1,1), y_predicted.reshape(-1,1)
ax.plot(y_test, LinearRegression().fit(y_test, y_predicted).predict(y_test), color="red", lw=1.75)
ax.set_title('CH4 Conversion')
handles = [mpl_patches.Rectangle((0, 0), 1, 1, fc="white", ec="white",
                                 lw=0, alpha=0)] * 4
# create the corresponding number of labels (= the text you want to display)
lines = []
handles[0] = Line2D([0], [0], color='red',lw=3)
labels = []
labels.append("R² = {0:.3g}".format(Rsquared))
labels.append("RMSE = {0:.2g}".format(rmse))
labels.append("MAE = {0:.2g}".format(mae))
ax.legend(handles, labels, loc='best', fontsize='xx-large',
          fancybox=True, framealpha=0.7)
plt.show()


#RANDOM FOREST

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.patches as mpl_patches
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error
plt.rcParams["font.family"] = "Cambria"
from matplotlib.lines import Line2D
plt.rcParams["font.family"] = "Cambria"

data = pd.read_csv('RF V4.csv')
data.head(4)
print  ('The shape of our data is:', data.shape)


data.describe()
data = pd.get_dummies(data)
data.iloc[:,1:].head(1)
labels = np.array(data['H2'])
data= data.drop('H2', axis = 1)
data_list = list(data.columns)
data = np.array(data)

from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size = 0.2, random_state = 42)

print('Training Data Shape:', train_data.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Data Shape:', test_data.shape)
print('Testing Labels Shape:', test_labels.shape)

baseline_preds = test_data[:, data_list.index('CaO/C')]
baseline_errors = abs(baseline_preds - test_labels)
print('Average baseline error: ', round(np.mean(baseline_errors), 4))

from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 2000, min_samples_leaf=3, max_features=2, max_depth=8, random_state = 60)
rf.fit(train_data, train_labels);

#Metrics
Rsquared = (r2_score(test_labels, predictions))
print("Rsquared : %f" %Rsquared)
_mse = mean_squared_error(test_labels, predictions)
print("MSE : %f" % _mse)
mae = mean_absolute_error(test_labels, predictions)
print ("MAE: %f" % mae)
rmse = sqrt(_mse)
print ("RMSE: %f" % rmse)

errors = abs(predictions - test_labels)
print('Mean Absolute Error:', round(np.mean(errors), 6))

mape = 100 * (errors / test_labels)
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 4), '%.')
print('MAPE:', round(np.mean(mape), 4), '%.')

importances = list(rf.feature_importances_)
feature_importances_ = [(feature, round(importance, 4)) for feature, importance in zip(data_list, importances)]
feature_importances_ = sorted(feature_importances_, key = lambda x: x[1], reverse = True)
[print('Variable: {:20} Importance: {}'.format(*pair)) for pair in feature_importances_];


#Graph
fig, ax = plt.subplots()
ax.scatter(test_labels, predictions ,s=10,color='MEDIUMSEAGREEN',linewidths=1)
ax.plot([labels.min(), labels.max()], [labels.min(), labels.max()], 'k-', lw=1.75)
ax.set_xlabel('Actual (%)',fontsize='x-large')
ax.set_ylabel('Predicted (%)',fontsize='x-large')
test_labels, predictions = test_labels.reshape(-1,1), predictions.reshape(-1,1)
ax.plot(test_labels, LinearRegression().fit(test_labels, predictions).predict(test_labels), color="red", lw=1.75)

ax.set_title('H2O Reformer (%): Actual vs Predicted')
handles = [mpl_patches.Rectangle((0, 0), 1, 1, fc="white", ec="white",
                                 lw=0, alpha=0)] * 4
lines = []
handles[0] = Line2D([0], [0], color='red',lw=3)
labels = []
labels.append("R² = {0:.3g}".format(Rsquared))
labels.append("RMSE = {0:.2g}".format(rmse))
labels.append("MAE = {0:.2g}".format(mae))
ax.legend(handles, labels, loc='best', fontsize='xx-large',
          fancybox=True, framealpha=0.7)

plt.show()