#!/usr/bin/python

#-------------------------------------------------------------------------------
#License GPL v3.0
#Author: Alexandre Manhaes Savio <alexsavio@gmail.com>
#Grupo de Inteligencia Computational <www.ehu.es/ccwintco>
#Universidad del Pais Vasco UPV/EHU
#Use this at your own risk!
#-------------------------------------------------------------------------------

import os
import sys
import time
import numpy as np
import scipy.io as sio
import scipy as sci

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn import svm
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier

from IPython.core.debugger import Tracer; debug_here = Tracer()

rootdir = '/home/alexandre/Dropbox/Documents/phd/work/salinas'

dataf = rootdir + os.path.sep + 'Salinas_corrected.mat'
trutf = rootdir + os.path.sep + 'Salinas_gt.mat'

data  = sio.loadmat(dataf)['salinas_corrected']
truth = sio.loadmat(trutf)['salinas_gt']

targets = np.unique(truth)
nclass  = len(targets)
sizes   = np.histogram(truth, bins=nclass)[0]

preds = np.ones(truth.shape) * -1
cv    = np.ones(truth.shape) * -1

#folding using the ground truth (cheating? could we use ICA?)
nfolds = 10
for c in np.arange(nclass):
    #uniform random distribution
    #fc = np.random.random_integers (0, nfolds-1, len(truth[truth == targets[c]]))

    #stratified distribution
    nsamps   = sizes[c]/nfolds
    rem      = sci.mod(sizes[c],nfolds) #random distribution of remainders
    samps    = np.repeat(np.arange(nfolds), nsamps)
    remsamps = np.random.random_integers (0, nfolds-1, rem)
    samps    = np.hstack((samps,remsamps))

    np.random.shuffle(samps)

    cv[truth == targets[c]] = samps

#classify
accs = np.zeros(nfolds)
for f in np.arange(nfolds):
    print ('Fold: ' + str(f))
    #train
    rdata = data [cv != f,:]
    rtarg = truth[cv != f]

    time0 = time.time()

    #1 (tarda la eternidad)
    #OnevsRest SVM
    #classif = OneVsRestClassifier(svm.SVC(kernel='linear'))

    #2
    #Random Forest
    #classif = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_split=1, random_state=None)

    #3
    #Decision Tree
    classif = tree.DecisionTreeClassifier()
    classif.fit(rdata, rtarg)

    time1 = time.time()
    print('Training time: ' + str(time1 - time0) + 's')

    #test
    sdata = data [cv == f,:]
    starg = truth[cv == f]
    sn    = len(starg)

    time2 = time.time()
    pre = classif.predict(sdata)

    print('Test time: ' + str(time2 - time1) + 's')

    pre.astype(starg.dtype)
    accs[f] = float(np.sum(starg == pre))/sn
    preds[cv == f] = pre

    print ('Fold accuracy: ' + str(accs[f]) + '\n')

mean_acc   = np.mean(accs)
std_acc    = np.std (accs)
result_str = "%.2f" % mean_acc + ' (' + "%.2f" % std_acc + ')'
print ('Mean Accuracy and StdDev: ' + result_str)

plt.subplot(1,2,1)
plt.imshow(truth, interpolation='nearest')
plt.title('GROUND TRUTH')

plt.subplot(1,2,2)
plt.imshow(preds, interpolation='nearest')
plt.title('PREDICTION Accuracy: ' + result_str )

plt.show()

#RESIZE data
dim1 = data.shape[0] * data.shape[1]
X = np.reshape(data, (dim1, data.shape[2]))

##PCA
from sklearn.decomposition import PCA
pca   = PCA()
X_pca = pca.fit_transform(X)

##Kernel PCA
from sklearn.decomposition import PCA, KernelPCA

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)

##NMF, similar to PCA, but with only positive values in data
from sklearn.decomposition import NMF

nmf = NMF(n_components=n_components, init='nndsvda', beta=5.0, tol=5e-3, sparseness='components'),

X_nmf = nmf.fit_transform(X)


##FastICA
from sklearn.decomposition import FastICA

fica = FastICA()
S_ = fica.fit(X).transform(X)
A_ = fica.get_mixing_matrix() 

##GMM model selection through BIC
#from sklearn import mixture

##RESIZE data
#dim1 = data.shape[0] * data.shape[1]
#X = np.reshape(data, (dim1, data.shape[2]))


#lowest_bic = np.infty
#bic = []
#gmms = []
#n_components_range = np.arange(1, 30)

##cv_types = ['spherical', 'tied', 'diag', 'full']
#cv_types = ['diag', 'full']
#for cv_type in cv_types:
#    for n_components in n_components_range:
#        print ('GMM ' + cv_type + ' ' + str(n_components))

#        # Fit a mixture of gaussians with EM
#        time0 = time.time()

#        gmm = mixture.GMM(n_components=n_components, covariance_type=cv_type)
#        gmm.fit(X)

#        gmms.append(gmm)
#        bic.append(gmm.bic(X))

#        time1 = time.time()

#        print("Training time: " + "%.2f" % ((time1 - time0)/60) + " min")
#        print("BIC: " + str(bic[-1]))

#        if bic[-1] < lowest_bic:
#            lowest_bic = bic[-1]
#            best_gmm = gmm

#    np.save(rootdir + os.path.sep + cv_type + 'gmm.npy', gmm)
#    np.save(rootdir + os.path.sep + cv_type + 'gmm_bic.npy', bic)

