#include<iostream>
#include<fstream>
#include<vector>
#include<cmath>
#include"../headers/rand.h"
#include<cfloat>
#include"../headers/indexes.h"
#include<sstream>
#include<iomanip>
#include<map>
#include<sys/resource.h>

#define PRINTSAMPLES

using namespace std;

#include"../headers/rnn.h"
#include"../headers/lrnn.h"
#include"../headers/model.h"

#ifdef RTRL
#include"../headers/rtrl.h"
#elif defined(RKN)
#include"../headers/rankn.h"
#elif defined(BPTT)
#ifndef TRUNCATE
#define TRUNCATE 15
#endif
#include"../headers/bptt.h"
#elif defined(PRKN)
#include"../headers/prankn.h"
#else
#include"../headers/rankone.h"
#endif

extern const bool layered;

#define NETWORK RNN
#ifdef USE_RNN
#undef NETWORK
#define NETWORK RNN
#endif
#ifdef USE_LRNN
#undef NETWORK
#define NETWORK LRNN
#endif
#if defined(LAYERED)||defined(LLAYERED)||defined(NPLAYERED)||defined(NPLLAYERED)
#undef NETWORK
#ifdef LAYERED
#define NETWORK LayeredRNN
#elif defined(LLAYERED)
#define NETWORK LayeredLRNN
#elif defined(NPLAYERED)
#define NETWORK NPLayeredRNN
#elif defined(NPLLAYERED)
#define NETWORK NPLayeredLRNN
#endif
const bool layered=true;
#else
const bool layered=false;
#endif
#ifdef USE_LMBN
#undef NETWORK
#define NETWORK LMBN
#endif
#ifdef USE_CNN
#undef NETWORK
#define NETWORK CNN
#endif
#ifdef USE_PCNN
#undef NETWORK
#define NETWORK PCNN
#endif
#ifdef USE_LSTM
#undef NETWORK
#define NETWORK LSTM
#endif
#ifdef USE_ERFPCNN
#define NETWORK ERFPCNN
#endif

#ifndef USE_METRIC
#ifdef RTRL
typedef RTRLOptim<> Optimizer;
#elif defined(RKN)
#ifdef RANK
typedef OnlineOptim<RANK> Optimizer;
#else
typedef OnlineOptim<1> Optimizer;
#endif
#elif defined(BPTT)
typedef BPTTOptim<TRUNCATE,false> Optimizer;
#elif defined(GRK1)
typedef GOnlineRk1Optim<> Optimizer;
#elif defined(PRKN)
#ifdef RANK
typedef OnlineRkNOptimParam<RANK> Optimizer;
#else
typedef OnlineOptimRkNOptimParams<1> Optimizer;
#endif
#else
typedef OnlineRk1Optim<> Optimizer;
#endif
#endif
#ifdef USE_METRIC
 #ifndef METRIC
  #define METRIC QDMetric
 #endif
 #ifdef RTRL
typedef RTRLOptim<METRIC> Optimizer;
 #elif defined(RKN)
  #ifdef RANK
   typedef OnlineOptim<RANK,METRIC> Optimizer;
  #else
   typedef OnlineOptim<1,METRIC> Optimizer;
  #endif
 #elif defined(BPTT)
  typedef BPTTOptim<TRUNCATE,false,METRIC> Optimizer;
 #elif defined(GRK1)
  typedef GOnlineRk1Optim<METRIC> Optimizer;
 #elif defined(PRKN)
  #ifdef RANK
   typedef OnlineRkNOptimParam<RANK,METRIC> Optimizer;
  #else
   typedef OnlineRkNOptimParam<1,METRIC> Optimizer;
  #endif
 #else
  typedef OnlineRk1Optim<METRIC> Optimizer;
 #endif
#endif
//
#define VERBOSE

#ifdef VERBOSE
const bool verbose=true;
#else
const bool verbose=false;
#endif

int maxtime;int maxiter;
extern const bool stopbyiter;
extern const bool unrandomize;
extern const bool printsamples;
#ifdef STOPBYITER
const bool stopbyiter=true;
#else
const bool stopbyiter=false;
#endif
#ifdef UNRANDOMIZE
const bool unrandomize=true;
#else
const bool unrandomize=false;
#endif
#ifdef PRINTSAMPLES
const bool printsamples=true;
#endif
const long double epsilon=10*LDBL_EPSILON;//to avoid divisions by 0

//alphabet size=number of distinct symbols in data
long alph;
//raw training data and validation data
vector<unsigned char> rawdatum,rawvdatum;
//training and validation data transcribed as integers
vector<long> datum,vdatum;
//dictionary between raw data and integer representation
MyDict<unsigned char> dict;

//Reads data, prints results, etc.
#include"io.cpp"

bool isToPredict(const vector<long>&datum,long pos)
{
#ifdef XOR
 return pos>0&&(datum[pos-1]==dict.index['=']);//for the XOR problem
#else
 return true;
#endif
}

long samplefrom(const vector<long double>&prob)//prob is a vector of length alph. Returns i with probability prob[i]
{
 long double s=0;long y;
 s=0;for(y=0;y<alph;y++)s+=prob[y];
 s*=alea();
 for(y=0;y<alph&&s>=0;y++)s-=prob[y];
 y--;
 if(y<0)y=0;//should never happen
 return y;
}

template<class Network,class Optimizer> 
int launch_exp(int argc,char**argv);
//#include"modelsetup.cpp" //Builds the links between the optimizer and the network, provides the functions computing network derivatives

int main(int argc,char**argv)
{
 launch_exp<NETWORK,Optimizer>(argc,argv);
}

template <class Network,class Optimizer>
int launch_exp(int argc,char**argv){
 long n,lays,conn,exposuretime=1;
 int nlay;
 vector<int> lan;
 if(!layered){
	 if(argc<5){cerr<<"Usage: <command> <network_size> <network_connectivity> <maxtime_or_maxiter> <train_file> [valid_file]"<<endl;return -1;}
 }
 else{
	 if(argc<5){cerr<<"Usage: <command> <number_of_layers> <layers_size> <maxtime_or_maxiter> <train_file> [valid_file]"<<endl;return -1;}
 }
 if(!layered){
 	n=atoi(argv[1]);conn=atoi(argv[2]); //negative conn has special meaning, see below
 }
 else{
	nlay=atoi(argv[1]);
	lays=atoi(argv[2]);
	exposuretime=nlay;
	lan=vector<int>(nlay,lays);
 }

 if(stopbyiter)maxiter=atoi(argv[3]);
 else{maxtime=atoi(argv[3]);maxiter=-1;}

 if(!unrandomize)randomize();
 bool valid=false;if(argc>=6)valid=true;
 if(valid){
	 ifstream f2(argv[5]);getAsciiData(f2,rawvdatum,vdatum);f2.close();
 }
 ifstream f(argv[4]);getAsciiData(f,rawdatum,datum);f.close();

 Network network;

#if defined(LAYERED)||defined(LLAYERED)||defined(NPLAYERED)||defined(NPLLAYERED)
 network.preSetup(nlay,lan);
#endif

 network.alph=alph;

#if !defined(LAYERED)&&!defined(LLAYERED)&&!defined(NPLAYERED)&&!defined(NPLLAYERED)
 if(conn<=0)conn=network.suggestedConn();
 if(conn>=n)conn=n;
 if(verbose)cout<<"Using graph with "<<n<<" nodes and "<<conn<<" edges per node (including a loop for every node)."<<endl<<endl;
 network.buildErdosRenyi_loops(n,conn);
#endif

 network.Setup();
 network.initWeights();

 long t;
 Optimizer optimizer;

 connectModelToOptimizer(network,optimizer);
 long double transrate=10.l/(n+0.l);
 long double outrate=10.l/(n+0.l);

 pair<long double,long>llinfo;
 long double ll=0,valid_ll=-INFINITY,validllmax=-INFINITY,cumll=0;
 long class_err=0,valid_class_err=0;
 long double regul;
 network.setToStartAct();//TODO: should reset internal gradient of the optimizer to 0, or else not reset to StartAct
 for(int iter=0;iter!=maxiter;iter++){
	 ll=0;class_err=0;
	 //network.setToStartAct();//TODO: should reset internal gradient of the optimizer to 0, or else not reset to StartAct
	 for(t=0;t<datum.size();t++){
		 long double tottime=iter*datum.size()+t+1.;
		 //optimizer.transrate=transrate;
		 //optimizer.outrate=outrate;
#ifdef USE_METRIC
		 optimizer.transrate=1.;
		 optimizer.outrate=1.;
#else
		 optimizer.transrate=0.03;
		 optimizer.outrate=0.03;
#endif
		 optimizer.outmetric_gamma=1. - pow(1.-1./sqrt(tottime),1./exposuretime);
		 optimizer.transmetric_gamma=1.-pow(1.-1./sqrt(tottime),1./exposuretime);
		 network.computePred();
		 long c=datum[t];long double p=network.pred[c];
		 network.observe(c);
		 regul=1./(t+2.);
		 ll+=logl(p*(1.-regul)+regul/alph);
		 cumll+=logl(p*(1.-regul)+regul/alph);
		 if(alea()<100./tottime){//logging with decreasing frequency to reduce log size...
			 if(printsamples)printSample(&network,499,exposuretime);
			 cerr<<tottime<<" "<<cumll/log(2)<<" "<<cumll/log(2)/tottime<<endl;
		 }

		 for(long y=0;y<alph;y++)if(y!=c&&network.pred[y]>=p){class_err++;break;}
		 optimizer.MakeGradStep(true);
		 optimizer.PrepareTransition();
		 network.computeNextAct();
		 for(long et=1;et<exposuretime;et++){
		 	optimizer.MakeGradStep(false);
		 	optimizer.PrepareTransition();
		 	network.computeNextAct();
		 }

		 if(!stopbyiter)if(elapsedtime()>maxtime)return 0;//maxtime exceeded
	 }
 if(verbose)printSample(&network,500);
 cout<<endl<<"Iter: "<<iter<<endl;
 cout<<"LL (bits): "<<ll/logl(2.)<<endl;
 cout<<"class err: "<<class_err<<endl;
 cout<<endl<<endl;
 }


 return 0;
}
