
//..
//..DGEMM Module ..
//..developed by GTS..
//..
//..Intel(R) Corporation (C) 2018
//..

//..includes
#include "Include/Main.h"
#include "mkl.h"
#include <immintrin.h>
//#include <pmmintrin.h>
#include <iostream>
#include <vector>
#include <bitset>
#include <array>
#include <string>
//#include <conio.h>
#include <memory>

//..defines
#ifdef __WIN_OS__
#include <Windows.h>
#endif
#ifdef __LIN_OS__
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <ivec.h>
#include <fvec.h>
#include <dvec.h>
#include <math.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <dlfcn.h>

#endif

#ifdef __LIN_OS__

string trim(const string& str)
{
	size_t first = str.find_first_not_of(' ');
	if (string::npos == first)
	{
		return str;
	}
	size_t last = str.find_last_not_of(' ');
	return str.substr(first, (last - first + 1));
}

#endif

#ifdef __LIN_OS__
std::string (*OutputDLLVersion)(void);
bool(*bISFeature_OS_Supported)(void);
int(*max_avx_supported)(void);
bool(*bCheckAVX)(void);
bool(*bCheckAVX2)(void);
bool(*bCheckAVX_512BW)(void);
bool(*bCheckAVX_512CD)(void);
bool(*bCheckAVX_512DQ)(void);
bool(*bCheckAVX_512ER)(void);
bool(*bCheckAVX_512F)(void);
bool(*bCheckAVX_512IFMA52)(void);
bool(*bCheckAVX_512PF)(void);
bool(*bCheckAVX_512VBMI)(void);
bool(*bCheckAVX_512VL)(void); 
#endif

#ifdef __WIN_OS__

typedef char *(*pOutputDLLVersion)(void);
typedef bool(*pISFeature_OS_Supported)(void);
typedef int(*pMaxAVX_Support)(void);
#endif


bool isAVX_OS_Supported;

//Max AVX Detected
// 0 = No AVX is supported
// 1 = AVX is max supported
// 2 = AVX2 is max supported
// 3 = AVX512 is max supported
int AVX_Max_Supp;


void Run_AVX()
{
	std::string sResult = "Fail --- DGEMM Stress Testing complete ---";


#ifdef __WIN_OS__
	HINSTANCE hInstanceLoadDLL;


#ifdef __WIN_32__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll")))
#elif defined __WIN_64__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll")))
#endif
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}

#endif

#ifdef __WIN_OS__

	pOutputDLLVersion pODV = (pOutputDLLVersion)GetProcAddress(hInstanceLoadDLL, "DUdll_OutputDLLVersion");
	pISFeature_OS_Supported pISFOSS = (pISFeature_OS_Supported)GetProcAddress(hInstanceLoadDLL, "DUdll_bISFeature_OS_Supported");
	pMaxAVX_Support pMaxAVX = (pMaxAVX_Support)GetProcAddress(hInstanceLoadDLL, "DUdll_Max_AVX_Supported");
	
	if (!pODV)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		//output DLL version
		char* cDLLversion = pODV();
		std::string sDLLversion = pODV();
#ifdef __WIN_32__
		cout << "..DetectUtils DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils DLL Version - " + sDLLversion + "\n");
#elif defined __WIN_64__
		cout << "..DetectUtils64 DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils64 DLL Version - " + sDLLversion + "\n");
#endif
	}


	//Output AVX Features Detection Section to Screen
	std::cout << std::endl << " --- CPU Features Detection --- " << std::endl;
	strPFOut.append("\n  --- CPU Features Detection --- \n");


	if (!pISFOSS)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		isAVX_OS_Supported = pISFOSS();
	}

	if (!pMaxAVX)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX_Max_Supp = pMaxAVX();
	}

	//unload DLL
	FreeLibrary(hInstanceLoadDLL);

#endif

#ifdef __LIN_OS__

	void *handle;
	char *error;
	int x, y, z;

	handle = dlopen("./libDetectUtils64.so.1.0", RTLD_LAZY);
	if (!handle) {
		fputs(dlerror(), stderr);
		exit(1);
	}

	//DLLVersion
	OutputDLLVersion = (std::string(*)(void))dlsym(handle, "OutputDLLVersion");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	std::cout << "..DetectUtils.so Version.. --> " << trim(OutputDLLVersion()) << endl;
	strPFOut.append("\n..DetectUtils.so Version.. --> " + trim(OutputDLLVersion()) + "\n");

	//Output AVX Features Detection Section to Screen
	std::cout << std::endl << " --- CPU Features Detection --- " << std::endl;
	strPFOut.append("\n  --- CPU Features Detection --- \n");

	//Is AVX supported by OS
	bISFeature_OS_Supported = (bool(*)(void))dlsym(handle, "bISFeature_OS_Supported");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	isAVX_OS_Supported = bISFeature_OS_Supported();

	//Max AVX Supported
	max_avx_supported = (int(*)(void))dlsym(handle, "max_avx_supported");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX_Max_Supp = max_avx_supported();

	//AVX
	bCheckAVX = (bool(*)(void))dlsym(handle, "bCheckAVX");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX_HW = bCheckAVX();

	//AVX2
	bCheckAVX2 = (bool(*)(void))dlsym(handle, "bCheckAVX2");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX2_HW = bCheckAVX2();

	//AVX_512BW
	bCheckAVX_512BW = (bool(*)(void))dlsym(handle, "bCheckAVX_512BW");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512BW_Supported = bCheckAVX_512BW();

	//AVX_512CD
	bCheckAVX_512CD = (bool(*)(void))dlsym(handle, "bCheckAVX_512CD");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512CD_Supported = bCheckAVX_512CD();

	//AVX_512DQ
	bCheckAVX_512DQ = (bool(*)(void))dlsym(handle, "bCheckAVX_512DQ");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512DQ_Supported = bCheckAVX_512DQ();

	//AVX_512ER
	bCheckAVX_512ER = (bool(*)(void))dlsym(handle, "bCheckAVX_512ER");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512ER_Supported = bCheckAVX_512ER();

	//AVX_512F
	bCheckAVX_512F = (bool(*)(void))dlsym(handle, "bCheckAVX_512F");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512F_Supported = bCheckAVX_512F();

	//AVX_512IFMA52
	bCheckAVX_512IFMA52 = (bool(*)(void))dlsym(handle, "bCheckAVX_512IFMA52");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512IFMA52_Supported = bCheckAVX_512IFMA52();

	//AVX_512PF
	bCheckAVX_512PF = (bool(*)(void))dlsym(handle, "bCheckAVX_512PF");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512PF_Supported = bCheckAVX_512PF();

	//AVX_512VBMI
	bCheckAVX_512VBMI = (bool(*)(void))dlsym(handle, "bCheckAVX_512VBMI");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512VBMI_Supported = bCheckAVX_512VBMI();

	//AVX_512VL
	bCheckAVX_512VL = (bool(*)(void))dlsym(handle, "bCheckAVX_512VL");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512VL_Supported = bCheckAVX_512VL();

	dlclose(handle);

#endif


	bool AVX_TestResult_Flag = true;
	bool DGEMM_TestResult_Flag = false;


	//Initialise for Results file output
	std::string sOut;
	
	//Output MMX & SSE Features Detection Section to Screen
	std::cout << (isAVX_OS_Supported ? "..AVX is supported by this Operating System..\n" : "..AVX is NOT supported by this Operating System..\n");
	std::cout << "\n";

	//Output MMXSSE Features Detection Section to Test Results File	
	strPFOut.append(isAVX_OS_Supported ? "..AVX is supported by this Operating System..\n" : "..AVX is NOT supported by this Operating System..\n");
	strPFOut.append("\n");


	if (AVX_Max_Supp == 0)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. No AVX features found\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. No AVX features found\n");
	}

	if (AVX_Max_Supp == 1)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX\n");
	}

	if (AVX_Max_Supp == 2)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX2\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX2\n");
	}

	if (AVX_Max_Supp == 3)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX512\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX512\n");
	}


	
	if (isAVX_OS_Supported)
	{
		if (AVX_Max_Supp == 3)
		{
			int maxMatrixSize = 0;
			int minMatrixSize = 0;
			ReadLocalConfig();
			maxMatrixSize = igAVX512maxMatrixSize;
			std::cout << std::endl << "maxMatrixSize = " << maxMatrixSize << std::endl;
			minMatrixSize = igAVX512minMatrixSize;
			std::cout << std::endl << "minMatrixSize = " << minMatrixSize << std::endl;
			std::cout << "\nTesting Most Advanced AVX Feature - AVX512....\n";
			if (testdgemm(maxMatrixSize, minMatrixSize))
			{
				DGEMM_TestResult_Flag = true;
				std::cout << "DGEMM AVX512 Test Result --- PASS\n" << std::endl;
				strPFOut.append("DGEMM AVX512 Test Result --- PASS\n");
			}
			else
			{
				DGEMM_TestResult_Flag = false;
				std::cout << "DGEMM AVX512 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("DGEMM AVX512 Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 2)
		{
			int maxMatrixSize = 0;
			int minMatrixSize = 0;
			ReadLocalConfig();
			maxMatrixSize = igAVX2maxMatrixSize;
			std::cout << std::endl << "maxMatrixSize = " << maxMatrixSize << std::endl;
			minMatrixSize = igAVX2minMatrixSize;
			std::cout << std::endl << "minMatrixSize = " << minMatrixSize << std::endl;
			std::cout << "\nTesting Most Advanced AVX Feature - AVX2....\n";
			if (testdgemm(maxMatrixSize, minMatrixSize))
			{
				DGEMM_TestResult_Flag = true;
				std::cout << "DGEMM AVX2 Test Result --- PASS\n" << std::endl;
				strPFOut.append("DGEMM AVX2 Test Result --- PASS\n");
			}
			else
			{
				DGEMM_TestResult_Flag = false;
				std::cout << "DGEMM AVX2 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("DGEMM AVX2 Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 1)
		{
			int maxMatrixSize = 0;
			int minMatrixSize = 0;
			ReadLocalConfig();
			maxMatrixSize = igAVXmaxMatrixSize;
			std::cout << std::endl << "maxMatrixSize = " << maxMatrixSize << std::endl;
			minMatrixSize = igAVXminMatrixSize;
			std::cout << std::endl << "minMatrixSize = " << minMatrixSize << std::endl;
			std::cout << "\nTesting Most Advanced AVX Feature - AVX....\n";
			if (testdgemm(maxMatrixSize, minMatrixSize))
			{
				DGEMM_TestResult_Flag = true;
				std::cout << "DGEMM AVX Test Result --- PASS\n" << std::endl;
				strPFOut.append("DGEMM AVX Test Result --- PASS\n");
			}
			else
			{
				DGEMM_TestResult_Flag = false;
				std::cout << "DGEMM AVX Test Result --- FAIL\n" << std::endl;
				strPFOut.append("DGEMM AVX Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 0)
		{
			int maxMatrixSize = 0;
			int minMatrixSize = 0;
			ReadLocalConfig();
			maxMatrixSize = igSSEmaxMatrixSize;
			std::cout << std::endl << "maxMatrixSize = " << maxMatrixSize << std::endl;
			minMatrixSize = igSSEminMatrixSize;
			std::cout << std::endl << "minMatrixSize = " << minMatrixSize << std::endl;
			std::cout << "\nTesting Most Advanced SSE Feature - SSE4_2....\n";
			if (testdgemm(maxMatrixSize, minMatrixSize))
			{
				DGEMM_TestResult_Flag = true;
				std::cout << "DGEMM SSE4_2 Test Result --- PASS\n" << std::endl;
				strPFOut.append("DGEMM SSE4_2 Test Result --- PASS\n");
			}
			else
			{
				DGEMM_TestResult_Flag = false;
				std::cout << "DGEMM SSE4_2 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("DGEMM SSE4_2 Test Result --- FAIL\n");
			}
		}
	}
	else
	{
		int maxMatrixSize = 0;
		int minMatrixSize = 0;
		ReadLocalConfig();
		maxMatrixSize = igSSEmaxMatrixSize;
		std::cout << std::endl << "maxMatrixSize = " << maxMatrixSize << std::endl;
		minMatrixSize = igSSEminMatrixSize;
		std::cout << std::endl << "minMatrixSize = " << minMatrixSize << std::endl;
		std::cout << "\nTesting Most Advanced SSE Feature - SSE4_2....\n";
		if (testdgemm(maxMatrixSize, minMatrixSize))
			{
				DGEMM_TestResult_Flag = true;
				std::cout << "DGEMM SSE4_2 Test Result --- PASS\n" << std::endl;
				strPFOut.append("DGEMM SSE4_2 Test Result --- PASS\n");
			}
			else
			{
				DGEMM_TestResult_Flag = false;
				std::cout << "DGEMM SSE4_2 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("DGEMM SSE4_2 Test Result --- FAIL\n");
			}
	
	}

	if (DGEMM_TestResult_Flag == false)
	{
		igPassFailStatus = 1;
	}
	else
	{
		igPassFailStatus = 0;
	}


}

bool testdgemm(int maxMatrixSize, int minMatrixSize);
///void print_arr(int N, char * name, double* array);
void init_arr(int N, double* a);
void Dgemm_multiply(double* a, double*  b, double*  c, int N);

void Dgemm_multiply(double* a, double*  b, double*  c, int N)
{

	double alpha = 1.0, beta = 0.;
	int incx = 1;
	int incy = N;
	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, b, N, a, N, beta, c, N);
}

//initialize array with random data
void init_arr(int N, double* a)
{
	int i, j;
	for (i = 0; i< N; i++) {
		for (j = 0; j<N; j++) {
			a[i*N + j] = (i + j + 1);
			///a[i*N + j] = (i + j + 1) % 10; //keep all entries less than 10. pleasing to the eye!
		}
	}
}

bool testdgemm(int maxMatrixSize, int minMatrixSize)
{
	int i, j;
	int N = maxMatrixSize;
	double a_last = 0;
	double b_last = 0;
	double c_last = 0;
	double ac_chk = 0;
	double bc_chk = 0;
	double* a = 0;
	double* b = 0;
	double* c = 0;
	long dif = 0;
	long endtime = 0;
	time_t start, end;
	time(&start);
	bool DGEMM_TestResult_Flag = false;

	while (dif < iAVXtimer)
	{
		a = (double*)malloc(sizeof(double)*N*N);
		b = (double*)malloc(sizeof(double)*N*N);
		c = (double*)malloc(sizeof(double)*N*N);

		init_arr(N, a);
		init_arr(N, b);

		//DGEMM Multiply
		//reallocate to force cash to be flushed
		///a = (double*)malloc(sizeof(double)*N*N);
		///b = (double*)malloc(sizeof(double)*N*N);
		///c = (double*)malloc(sizeof(double)*N*N);
		///init_arr(N, a);
		///init_arr(N, b);

		///#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
		#pragma omp parallel for 
		for (i = 0; i < N ; i++)
		{
			Dgemm_multiply(a, b, c, N);
		}

		a_last = (*&a[N*(N - 1) + (N - 1)]);
		b_last = (*&b[N*(N - 1) + (N - 1)]);
		c_last = (*&c[N*(N - 1) + (N - 1)]);
		ac_chk = (c_last / a_last);
		bc_chk = (c_last / b_last);
		if (iDebugFlag)
		{
			std::cout << std::endl << "a_last = " << a_last << ", b_last = " << b_last << ", c_last = " << c_last << std::endl;
			std::cout << std::endl << "a = " << *a << ",b = " << *b << ",c = " << *c << ",N = " << N << std::endl;
			std::cout << std::endl << "ac_chk = " << ac_chk << " ,bc_chk = " << bc_chk << std::endl;
		}
		if (ac_chk == bc_chk)
		{
			DGEMM_TestResult_Flag = true;
		}

		free(a);
		free(b);
		free(c);

		time(&end);
		dif = difftime(end, start);
		if (!DGEMM_TestResult_Flag)
		{
			return false;
		}
		else
		{
			if (iDebugFlag)
			{
				std::cout << std::endl << "dif = " << dif << ",iAVXtimer = " << iAVXtimer << ",N = " << N << std::endl;
			}
			endtime = (iAVXtimer - dif);
			if (endtime < dif)
			{
				N = minMatrixSize;
			}
		}
	}

	if (DGEMM_TestResult_Flag)
	{
		return true;
	}
	else {
		return false;
	}
}


// ---------------------------------------------------------------------------
//  PopulateData -- A routine to populate configuration variables
// ---------------------------------------------------------------------------

void ReadLocalConfig(void)
{
	XMLParser localXMLObj;
	try
	{
		
		//Local config
		localXMLObj.LoadXMLFile("DGEMM_LocalConfig.xml");

		// Set up the root element
		localXMLObj.SetRootElem("DGEMM_W_LocalConfig");

		string SSEmaxMatrixSize = localXMLObj.ReadNodeKeyAsString("@SSEmaxMatrixSize", "");
		igSSEmaxMatrixSize = CustomAsciiToInteger(SSEmaxMatrixSize.c_str());
		string SSEminMatrixSize = localXMLObj.ReadNodeKeyAsString("@SSEminMatrixSize", "");
		igSSEminMatrixSize = CustomAsciiToInteger(SSEminMatrixSize.c_str());
		string AVXmaxMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVXmaxMatrixSize", "");
		igAVXmaxMatrixSize = CustomAsciiToInteger(AVXmaxMatrixSize.c_str());
		string AVXminMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVXminMatrixSize", "");
		igAVXminMatrixSize = CustomAsciiToInteger(AVXminMatrixSize.c_str());
		string AVX2maxMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVX2maxMatrixSize", "");
		igAVX2maxMatrixSize = CustomAsciiToInteger(AVX2maxMatrixSize.c_str());
		string AVX2minMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVX2minMatrixSize", "");
		igAVX2minMatrixSize = CustomAsciiToInteger(AVX2minMatrixSize.c_str());
		string AVX512maxMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVX512maxMatrixSize", "");
		igAVX512maxMatrixSize = CustomAsciiToInteger(AVX512maxMatrixSize.c_str());
		string AVX512minMatrixSize = localXMLObj.ReadNodeKeyAsString("@AVX512minMatrixSize", "");
		igAVX512minMatrixSize = CustomAsciiToInteger(AVX512minMatrixSize.c_str());
		//if (iDebugFlag)
		//{
			//cout << "igMatrixSize = " << igMatrixSize << endl;
		//}
		
	}
	catch (...)
	{
		std::string sException;
		sException.append("\n-------------------------------\n");
		sException.append("Cannot locate config file: 'DGEMM_LocalConfig.xml'! \n");
		sException.append("Application terminated! \n");
		sException.append("\n-------------------------------\n");
		throw(sException);
	}
	return;
}
void Init(void)
{
	
	tStartTime = std::time(nullptr);	
	if (iDebugFlag)
	{
		std::string sTemp = std::asctime(std::localtime(&tStartTime));
		std::cout << "tStartTime = " << sTemp << std::endl;
	}
	
	// Logic needs to be done to determine if this test is a pass or fail.  
	//If indeterminate, it means that the test was interrupted
	igPassFailStatus = ReturnValueDef::Success;  // Success
	//igPassFailStatus = 1;  // Fail
	//igPassFailStatus = 2;  // Indeterminate 
	iColorText = 0;
	iSec = 0;
	iMin = 0;
	iHour = 0;
	iOptionValid = 0;
	iRun = 1;
	iArgIndex = 0;
	iPrintVersionFlag = 1;
	iPrintSuccessFlag = 1;


}

void PrintVersion(void)
{
	std::cout << std::endl << "..." << std::endl << "Version: " << sgGoldModuleVersion << std::endl << "..." << std::endl;
}

void SignalFun(int iSigNum)
{
	// If you want to print out the signal, do the following
	cout << "Signal is:" << iSigNum << endl;

	// Depending on your routine, you may want to have some sort of exit message ... however you may also want to comment these out
	cout << endl << "Do the soft clean up here" << endl;
	
	// Do cleanup and close up stuff here 
	CleanUp();

	// Exit the program with one of the accepted error levels. 
	//0 = Success
	//1 = Fail
	//2 = Indeterminate 
	igPassFailStatus = ReturnValueDef::Indeterminate;

	// if you get to this logic, most likely your answer should always be a 2
	std::string sTemp = "Error: " + UtilConvert(igPassFailStatus) + " Indeterminate!";
	PrintColorMsg(sTemp, TextColor::Yellow);
	exit(igPassFailStatus);

}

void CleanUp()
{
	td.RemoveFile(sgGoldModuleResultsFile);
}

void WriteResultsFile(int iPassFailStatus)  
{

	// pass fail logic using iPassFailStatus  with 0=pass, 1=fail, 2=indeterminate

	// Write Results File
	td.WriteToFile(sgGoldModuleResultsFile, "DGEMM Stress Test");
	std::string sTemp = "Module Version: " + sgGoldModuleVersion;
	td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	sTemp = "Start Time: ";
	sTemp.append(std::asctime(std::localtime(&tStartTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);


	if (iPassFailStatus == 0)
	{
		igPassFailStatus = 0;  // Success
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - PASS");

	}
	else if (iPassFailStatus == 1) //Fail
	{
		igPassFailStatus = 1;  // Fail
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - FAIL");
	}

	else if (iPassFailStatus == 2) //Indeterminate
	{
		igPassFailStatus = 2;  // Indeterminate 
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - INDETERMINATE");
	}

	td.WriteToFile(sgGoldModuleResultsFile, strPFOut);

	tEndTime = std::time(nullptr);
	sTemp = "End Time: ";
	sTemp.append(std::asctime(std::localtime(&tEndTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);

	double seconds = difftime(tEndTime, tStartTime);
	sTemp = "Total Time: ";
	sTemp.append(" seconds: ");
	sTemp.append(UtilConvert(seconds));
	

	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);
}

void HelpUseage(void)
{
	//std::cout << "Help or Usage ... version info ... copyright info ..." << std::endl;
	std::cout << std::endl << "Test Information:" << std::endl;
	std::cout << " The purpose of the DGEMM module is to stress test your processor using " << std::endl;
	std::cout << " Intel Math Kernel Library (MKL) for Matrix Multiply. The test will detect " << std::endl;
	std::cout << " the presence and utilize the highest level of the following features on " << std::endl;
	std::cout << " your Intel processor : " << std::endl;
	std::cout << "        AVX512 (Advanced Vector Extensions 512)" << std::endl;
	std::cout << "        AVX2 (Advanced Vector Extensions 2)" << std::endl;
	std::cout << "        AVX (Advanced Vector Extensions)" << std::endl;
	std::cout << "        SSE4.2 (Streaming SIMD Extensions 4.2)." << std::endl;
	std::cout << " Note: AVX capability is first detected on your Intel Processor and then your " << std::endl;
	std::cout << " Operating System is checked to see if it supports running AVX. " << std::endl;
	std::cout << " Minumum Operating System versions that support AVX: " << std::endl;
	std::cout << "        Windows 7 SP1, Windows Server 2008 R2 SP1, " << std::endl;
	std::cout << "        Linux since kernel version 2.6.30." << std::endl;
	std::cout << " " << std::endl;
	std::cout << "Possible options are as follows:" << std::endl;
	std::cout << " -h  = Help or Usage (this message)" << std::endl;
	std::cout << "       Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "       dgemm -h" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "       ./dgemm -h" << std::endl << std::endl;
#endif
	std::cout << " -info = Information switch that publishes parallel information" << std::endl;
	std::cout << "         using the following scheme:" << std::endl;
	std::cout << "         \"parallel:yes|socket:yes|core:yes\" " << std::endl;
	std::cout << "         Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "         dgemm -info" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "         ./dgemm -info" << std::endl << std::endl;
#endif
	std::cout << " -resultName = The resultName switch provides a way to name " << std::endl;
	std::cout << "               the results file as desired." << std::endl;
	std::cout << "               Example :" << std::endl;
#ifdef __WIN_OS__
	std::cout << "               dgemm -resultName AVX_results_0001.txt " << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "               ./dgemm -resultName AVX_results_0001.txt " << std::endl << std::endl;
#endif
	std::cout << " -pa = pause app option" << std::endl;
	std::cout << "       Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "       dgemm -pa" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "       ./dgemm -pa" << std::endl << std::endl;
#endif
	std::cout << " -c = This option will display text in various colors" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      dgemm -c" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./dgemm -c" << std::endl << std::endl;
#endif
	std::cout << " -hrs = hours option ..." << std::endl;
	std::cout << "        Indicating the number of hours dgemm should run" << std::endl;
	std::cout << "        Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "        dgemm -hrs 2" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "        ./dgemm -hrs 2" << std::endl << std::endl;
#endif
	std::cout << " -m = minutes option ..." << std::endl;
	std::cout << "      Indicating the number of minutes dgemm should run" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      dgemm -m 5" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./dgemm -m 5" << std::endl << std::endl;
#endif
	std::cout << " -s = seconds option ..." << std::endl;
	std::cout << "      Indicating the number of seconds dgemm should run" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      dgemm -s 40" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./dgemm -s 10" << std::endl << std::endl;
#endif
#ifdef __WIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libmmd.dll " << std::endl;
	std::cout << "      libiomp5md.dll " << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll " << std::endl;
#endif
#ifdef __WIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libmmd.dll " << std::endl;
	std::cout << "      libiomp5md.dll " << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll " << std::endl;
#endif
#ifdef __LIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils.so.1.0 " << std::endl;
#endif
#ifdef __LIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils64.so.1.0 " << std::endl;
#endif

	std::cout << std::endl << " Copyright (C) 2019, Intel Corporation" << std::endl;
}
void PauseWQuit(void)
{
	// Pause code
	std::cout << "Please press 'q' <ENTER> to quit: ";
	std::string PauseStr;
	std::cin >> PauseStr;
}

void PrintColorMsg(std::string sMsg, TextColor iColor)
{
	if (iColorText == 0)
	{
		std::cout << std::endl << sMsg << std::endl;
	}
	else
	{
		if (iColor == TextColor::Red)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;31m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Green)
		{

#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;32m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Yellow)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;33m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}

	}
}

bool is_integer(const std::string & s){
	return std::regex_match(s, std::regex("[0-9]+"));
}


bool isOptionValid(int iArgLocal, int argcLocal, char *argvLocal[], VarType vt)
{
	// Start out as Valid
	bool iRetVal = true;
	std::string sMsg = "";
	std::string sTemp = argvLocal[iArgLocal];

	if (((iArgLocal + 1) < argcLocal) && !(argvLocal[iArgLocal + 1] == NULL) && !(argvLocal[iArgLocal + 1][0] == '\0'))
	{
		if (vt == VarType::vtINT)
		{
			if (!is_integer(argvLocal[iArgLocal + 1]))
			{
				iRetVal = false;
				sMsg = "Wrong argument type format for argument: '" + sTemp + "'!";
			}
		}
	}
	else
	{
		// invalid
		iRetVal = false;
		sMsg = "No argument for '" + sTemp + "' detected!";
	}
	// if there was an invalid argument
	if (iRetVal == false)
	{
		PrintColorMsg(sMsg, TextColor::Red);
	}
	return iRetVal;
}

void handleArgs(int argc, char *argv[])
{

	// Optional args
	// Make sure the options can only be run one time
	// Load up all the possible args
	std::vector <string> sArgs_v;
	sArgs_v.push_back("-h");
	sArgs_v.push_back("-info");
	sArgs_v.push_back("-resultName");
	sArgs_v.push_back("-hrs");
	sArgs_v.push_back("-m");
	sArgs_v.push_back("-s");
	sArgs_v.push_back("-d");
	//sArgs_v.push_back("-p");
	//sArgs_v.push_back("-plx");
	//sArgs_v.push_back("-plt");
	sArgs_v.push_back("-pa");
	sArgs_v.push_back("-c");
	//sArgs_v.push_back("-r");

	if (argc > 1)
	{
		for (int iArg = 1; iArg < argc; iArg++)
		{
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					// Only the options that need to be processed before all the other ones need to be in this list
					if (sArgs_v[iVec] == std::string("-c"))
					{
						iColorText = 1;
					}
				}
			}
		}
		for (int iArg = 1; iArg < argc; iArg++)
		{
			iArgIndex = iArg;
			iOptionValid = 0;
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					iOptionValid = 1;
					if (sArgs_v[iVec] == std::string("-h"))
					{
						iRun = 0;
						std::cout << std::endl << " --- DGEMM Stress Test --- ";
						PrintVersion();
						HelpUseage();
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
					}
					if (sArgs_v[iVec] == std::string("-info"))
					{
						iRun = 0;
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
						std::cout << "\"parallel:yes|socket:yes|core:yes\"" << std::endl;
					}
					if (sArgs_v[iVec] == std::string("-resultName"))
					{
						sgGoldModuleResultsFile = argv[++iArg];
					}
					if (sArgs_v[iVec] == std::string("-hrs"))
					{
						// Hours
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iHour = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iHour * 60 * 60;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}

					}
					if (sArgs_v[iVec] == std::string("-m"))
					{
						// Minutes
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iMin = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iMin * 60;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-s"))
					{
						// Seconds
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iSec = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iSec;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-d"))
					{
						// debug
						iDebugFlag = 1;
					}
					if (sArgs_v[iVec] == std::string("-p"))
					{
						std::cout << std::endl << "Entering print opt" << std::endl;
						igPrintConfig = 1;
					}
					if (sArgs_v[iVec] == std::string("-pa"))
					{
						std::cout << std::endl << "Entering pause app opt" << std::endl;
						igPauseApp = 1;
					}
					if (sArgs_v[iVec] == std::string("-c"))
					{
						std::cout << std::endl << "Entering color text opt" << std::endl;
#ifdef __WIN_OS__
						HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
						//std::cout << "Some green text ..." << std::endl << std::endl;
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
						std::cout << "\033[1;32m";
						//std::cout << std::endl << ".." << std::endl << "Some green text ..." << std::endl << std::endl;
						std::cout << "\033[0m";
#endif
					}


					// Erase current vector so the option doesn't repeat
					sArgs_v.erase(sArgs_v.begin() + (iVec));
				}

			}
			if (iDebugFlag)
			{
				std::cout << std::endl << "iOptionValid = " << iOptionValid << std::endl;
			}
			if (iOptionValid == 0)
			{
				break;
			}
		}
		if (iOptionValid == 0)
		{
			std::string sArgTemp = argv[iArgIndex];
			std::string sTempMsg = "Option " + sArgTemp + " invalid!";
			PrintColorMsg(sTempMsg, TextColor::Red);
			iRun = 0;
			igPassFailStatus = ReturnValueDef::InvalidArgs;
			HelpUseage();

		}
	}
	else
	{
		// Default message if no arguments are used  ... it may be the useage message or it may be OK that no args are used
		//std::cout << "No args used!!" << std::endl;
	}

}



void PrintSuccess(void)
{
	if (igPassFailStatus == ReturnValueDef::Success)
	{
		PrintColorMsg("DGEMM Module Success!", TextColor::Green);
	}
	if (igPassFailStatus == ReturnValueDef::Fail)
	{
		PrintColorMsg("DGEMM Module Fail!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::Indeterminate)
	{
		PrintColorMsg("DGEMM Module Interrupted!", TextColor::Yellow);
	}
	if (igPassFailStatus == ReturnValueDef::InvalidArgs)
	{
		PrintColorMsg("DGEMM Module has invalid arguments!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::ConfigMismatch)
	{
		PrintColorMsg("DGEMM Module has a configuration mismatch!", TextColor::Yellow);
	}	
	if (igPassFailStatus != ReturnValueDef::Success)
	{
		cout << "Return Status = " << igPassFailStatus << endl;
	}

}

//..main
int main(int argc, char *argv[])
{
	

#if defined __WIN_64__ 
	sgGoldModuleVersion = "1.0.7.64b.W";
#endif

#if defined __WIN_32__ 
	sgGoldModuleVersion = "1.0.7.32b.W";
#endif

#if defined __LIN_64__
	sgGoldModuleVersion = "1.0.1.64b.L";
#endif
#if defined __LIN_32__
	sgGoldModuleVersion = "1.0.1.32b.L";
#endif


	// Need to include signal for Softkill functions
	// If this thread is sent a signal SIGINT, we need to send it to the signal function 
	signal(SIGINT, SignalFun);


	// Parse both text local config style and xml style


	Init();
	handleArgs(argc, argv);

	// Print out version
	if (iPrintVersionFlag != 0)
	{
		std::cout << std::endl << " --- DGEMM Stress Test --- ";
		PrintVersion();
	}


	//..clean up result files ... Clean file before another one is written
	CleanUp();
		
	if (iRun)
	{
		//ReadLocalConfig();
		//iMatrixSize = igMatrixSize;
		//std::cout << std::endl << "iMatrixSize = " << iMatrixSize << std::endl;
		Run_AVX();
		WriteResultsFile(igPassFailStatus);
	}

	if (iPrintSuccessFlag != 0)
	{
		PrintSuccess();
	}

	if (igPauseApp)
	{
		PauseWQuit();
	}


	return igPassFailStatus;

}
