
//..
//..AVX Module ..
//..developed by GTS..
//..
//..Intel(R) Corporation (C) 2016
//..

//..includes
#include "Include/Main.h"
#include <immintrin.h>
//#include <pmmintrin.h>

//..defines
#ifdef __WIN_OS__
#include <Windows.h>
#endif
#ifdef __LIN_OS__
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <ivec.h>
#include <fvec.h>
#include <dvec.h>
#include <math.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <dlfcn.h>

#endif

#ifdef __LIN_OS__

string trim(const string& str)
{
	size_t first = str.find_first_not_of(' ');
	if (string::npos == first)
	{
		return str;
	}
	size_t last = str.find_last_not_of(' ');
	return str.substr(first, (last - first + 1));
}

#endif

#ifdef __LIN_OS__
std::string (*OutputDLLVersion)(void);
bool(*bISFeature_OS_Supported)(void);
int(*max_avx_supported)(void);
bool(*bCheckAVX)(void);
bool(*bCheckAVX2)(void);
bool(*bCheckAVX_512BW)(void);
bool(*bCheckAVX_512CD)(void);
bool(*bCheckAVX_512DQ)(void);
bool(*bCheckAVX_512ER)(void);
bool(*bCheckAVX_512F)(void);
bool(*bCheckAVX_512IFMA52)(void);
bool(*bCheckAVX_512PF)(void);
bool(*bCheckAVX_512VBMI)(void);
bool(*bCheckAVX_512VL)(void); 
bool(*bCheckAES)(void);
bool(*bCheckPCLMULQDQ)(void);
#endif

#ifdef __WIN_OS__

typedef char *(*pOutputDLLVersion)(void);
typedef bool(*pISFeature_OS_Supported)(void);
typedef int(*pMaxAVX_Support)(void);
typedef bool(*pCheckAVX)(void);
typedef bool(*pCheckAVX2)(void);
typedef bool(*pCheckAES)(void);
typedef bool(*pCheckPCLMULQDQ)(void);
typedef bool(*pCheckAVX512BW)(void);
typedef bool(*pCheckAVX512CD)(void);
typedef bool(*pCheckAVX512DQ)(void);
typedef bool(*pCheckAVX512ER)(void);
typedef bool(*pCheckAVX512F)(void);
typedef bool(*pCheckAVX512IFMA52)(void);
typedef bool(*pCheckAVX512PF)(void);
typedef bool(*pCheckAVX512VBMI)(void);
typedef bool(*pCheckAVX512VL)(void);
#endif


bool isAVX_OS_Supported;

//Max AVX Detected
// 0 = No AVX is supported
// 1 = AVX is max supported
// 2 = AVX2 is max supported
// 3 = AVX512 is max supported
int AVX_Max_Supp;

bool AVX_HW;
bool AVX2_HW;
bool AVX512_HW;
bool AVX512BW_Supported;
bool AVX512CD_Supported;
bool AVX512DQ_Supported;
bool AVX512ER_Supported;
bool AVX512F_Supported;
bool AVX512IFMA52_Supported;
bool AVX512PF_Supported;
bool AVX512VBMI_Supported;
bool AVX512VL_Supported;
bool AES_Supported;
bool PCLMULQDQ_Supported;



// Define loop for AVX test
#define AVX_LOOP 1000000


//Initialise AVX Test variables
int miBufferWidth = 1024;
double* DllBuf;
double* FIRBuf;
double refVal[2], outVal[2], outVal_avx[2];


void Run_AVX()
{
	std::string sResult = "Fail --- AVX AES PCLMULQDQ Testing complete ---";


#ifdef __WIN_OS__
	HINSTANCE hInstanceLoadDLL;


#ifdef __WIN_32__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll")))
#elif defined __WIN_64__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll")))
#endif
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}

#endif

#ifdef __WIN_OS__

	pOutputDLLVersion pODV = (pOutputDLLVersion)GetProcAddress(hInstanceLoadDLL, "DUdll_OutputDLLVersion");
	pISFeature_OS_Supported pISFOSS = (pISFeature_OS_Supported)GetProcAddress(hInstanceLoadDLL, "DUdll_bISFeature_OS_Supported");
	pMaxAVX_Support pMaxAVX = (pMaxAVX_Support)GetProcAddress(hInstanceLoadDLL, "DUdll_Max_AVX_Supported");
	pCheckAVX pCAVX = (pCheckAVX)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckAVX");
	pCheckAVX2 pCAVX2 = (pCheckAVX2)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckAVX2");
	pCheckAVX512BW pCAVX512BW = (pCheckAVX512BW)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512BW");
	pCheckAVX512CD pCAVX512CD = (pCheckAVX512CD)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512CD");
	pCheckAVX512DQ pCAVX512DQ = (pCheckAVX512DQ)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512DQ");
	pCheckAVX512ER pCAVX512ER = (pCheckAVX512ER)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512ER");
	pCheckAVX512F pCAVX512F = (pCheckAVX512F)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512F");
	pCheckAVX512IFMA52 pCAVX512IFMA52 = (pCheckAVX512IFMA52)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512IFMA52");
	pCheckAVX512PF pCAVX512PF = (pCheckAVX512PF)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512PF");
	pCheckAVX512VBMI pCAVX512VBMI = (pCheckAVX512VBMI)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512VBMI");
	pCheckAVX512VL pCAVX512VL = (pCheckAVX512VL)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheck512VL");
	pCheckAES pCAES = (pCheckAES)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckAES");
	pCheckPCLMULQDQ pCPCLMULQDQ = (pCheckPCLMULQDQ)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckPCLMULQDQ");

	if (!pODV)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		//output DLL version
		char* cDLLversion = pODV();
		std::string sDLLversion = pODV();
#ifdef __WIN_32__
		cout << "..DetectUtils DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils DLL Version - " + sDLLversion + "\n");
#elif defined __WIN_64__
		cout << "..DetectUtils64 DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils64 DLL Version - " + sDLLversion + "\n");
#endif
	}


	//Output AVX Features Detection Section to Screen
	std::cout << std::endl << " --- CPU Features Detection --- " << std::endl;
	strPFOut.append("\n  --- CPU Features Detection --- \n");


	if (!pISFOSS)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		isAVX_OS_Supported = pISFOSS();
	}

	if (!pMaxAVX)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX_Max_Supp = pMaxAVX();
	}

	if (!pCAVX)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX_HW = pCAVX();
	}

	if (!pCAVX2)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX2 from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX2 from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX2_HW = pCAVX2();
	}

	if (!pCAVX512BW)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512BW from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512BW from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512BW_Supported = pCAVX512BW();
	}

	if (!pCAVX512CD)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkMMX from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkMMX from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512CD_Supported = pCAVX512CD();
	}

	if (!pCAVX512DQ)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512DQ from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512DQ from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512DQ_Supported = pCAVX512DQ();
	}

	if (!pCAVX512ER)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512ER from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512ER from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512ER_Supported = pCAVX512ER();
	}

	if (!pCAVX512F)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512F from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512F from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512F_Supported = pCAVX512F();
	}

	if (!pCAVX512IFMA52)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512IFMA52 from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512IFMA52 from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512IFMA52_Supported = pCAVX512IFMA52();
	}

	if (!pCAVX512PF)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512PF from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512PF from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512PF_Supported = pCAVX512PF();
	}

	if (!pCAVX512VBMI)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512VBMI from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512VBMI from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512VBMI_Supported = pCAVX512VBMI();
	}

	if (!pCAVX512VL)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAVX512VL from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAVX512VL from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX512VL_Supported = pCAVX512VL();
	}

	if (!pCAES)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkAES from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkAES from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AES_Supported = pCAES();
	}

	if (!pCPCLMULQDQ)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkPCLMULQDQ from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkPCLMULQDQ from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		PCLMULQDQ_Supported = pCPCLMULQDQ();
	}


	//unload DLL
	FreeLibrary(hInstanceLoadDLL);

#endif

#ifdef __LIN_OS__

	void *handle;
	char *error;
	int x, y, z;

#ifdef __LIN_32__
	handle = dlopen("./libDetectUtils.so.1.1", RTLD_LAZY);
#elif defined __LIN_64__
	handle = dlopen("./libDetectUtils64.so.1.1", RTLD_LAZY);
#endif
	if (!handle) {
	#ifdef __LIN_32__
		cout << ".." << endl << "..could not load libDetectUtils shared library " << endl << ".." << endl;
	#elif defined __LIN_64__
		cout << ".." << endl << "..could not load libDetectUtils64 shared library " << endl << ".." << endl;
	#endif
		fputs(dlerror(), stderr);
		exit(1);
	}

	//DLLVersion
	OutputDLLVersion = (std::string(*)(void))dlsym(handle, "OutputDLLVersion");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	#ifdef __LIN_32__
	std::cout << "..DetectUtils.so Version.. --> " << trim(OutputDLLVersion()) << endl;
	strPFOut.append("\n..DetectUtils.so Version.. --> " + trim(OutputDLLVersion()) + "\n");
	#elif defined __LIN_64__
	std::cout << "..DetectUtils64.so Version.. --> " << trim(OutputDLLVersion()) << endl;
	strPFOut.append("\n..DetectUtils64.so Version.. --> " + trim(OutputDLLVersion()) + "\n");
	#endif

	//Output AVX Features Detection Section to Screen
	std::cout << std::endl << " --- CPU Features Detection --- " << std::endl;
	strPFOut.append("\n  --- CPU Features Detection --- \n");

	//Is AVX supported by OS
	bISFeature_OS_Supported = (bool(*)(void))dlsym(handle, "bISFeature_OS_Supported");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	isAVX_OS_Supported = bISFeature_OS_Supported();

	//Max AVX Supported
	max_avx_supported = (int(*)(void))dlsym(handle, "max_avx_supported");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX_Max_Supp = max_avx_supported();

	//AVX
	bCheckAVX = (bool(*)(void))dlsym(handle, "bCheckAVX");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX_HW = bCheckAVX();

	//AVX2
	bCheckAVX2 = (bool(*)(void))dlsym(handle, "bCheckAVX2");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX2_HW = bCheckAVX2();

	//AES
	bCheckAES = (bool(*)(void))dlsym(handle, "bCheckAES");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AES_Supported = bCheckAES();

	//PCLMULQDQ
	bCheckPCLMULQDQ = (bool(*)(void))dlsym(handle, "bCheckPCLMULQDQ");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	PCLMULQDQ_Supported = bCheckPCLMULQDQ();


	//AVX_512BW
	bCheckAVX_512BW = (bool(*)(void))dlsym(handle, "bCheckAVX_512BW");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512BW_Supported = bCheckAVX_512BW();

	//AVX_512CD
	bCheckAVX_512CD = (bool(*)(void))dlsym(handle, "bCheckAVX_512CD");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512CD_Supported = bCheckAVX_512CD();

	//AVX_512DQ
	bCheckAVX_512DQ = (bool(*)(void))dlsym(handle, "bCheckAVX_512DQ");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512DQ_Supported = bCheckAVX_512DQ();

	//AVX_512ER
	bCheckAVX_512ER = (bool(*)(void))dlsym(handle, "bCheckAVX_512ER");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512ER_Supported = bCheckAVX_512ER();

	//AVX_512F
	bCheckAVX_512F = (bool(*)(void))dlsym(handle, "bCheckAVX_512F");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512F_Supported = bCheckAVX_512F();

	//AVX_512IFMA52
	bCheckAVX_512IFMA52 = (bool(*)(void))dlsym(handle, "bCheckAVX_512IFMA52");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512IFMA52_Supported = bCheckAVX_512IFMA52();

	//AVX_512PF
	bCheckAVX_512PF = (bool(*)(void))dlsym(handle, "bCheckAVX_512PF");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512PF_Supported = bCheckAVX_512PF();

	//AVX_512VBMI
	bCheckAVX_512VBMI = (bool(*)(void))dlsym(handle, "bCheckAVX_512VBMI");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512VBMI_Supported = bCheckAVX_512VBMI();

	//AVX_512VL
	bCheckAVX_512VL = (bool(*)(void))dlsym(handle, "bCheckAVX_512VL");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	AVX512VL_Supported = bCheckAVX_512VL();

	dlclose(handle);

#endif




	bool AVX_TestResult_Flag = true;
	bool AES_TestResult_Flag = true;
	bool PCLMULQDQ_TestResult_Flag = true;


	//Initialise for Results file output
	std::string sOut;
	std::string sOut_AVX;
	std::string sOut_AVX2;
	std::string sOut_AVX512BW;
	std::string sOut_AVX512CD;
	std::string sOut_AVX512DQ;
	std::string sOut_AVX512ER;
	std::string sOut_AVX512F;
	std::string sOut_AVX512IFMA52;
	std::string sOut_AVX512PF;
	std::string sOut_AVX512VBMI;
	std::string sOut_AVX512VL;
	std::string sOut_AES;
	std::string sOut_PCLMULQDQ;

	//Output MMX & SSE Features Detection Section to Screen
	std::cout << (AVX_HW ? "..AVX is supported by this CPU..\n" : "..AVX is NOT supported by this CPU..\n");
	std::cout << (AVX2_HW ? "..AVX2 is supported by this CPU..\n" : "..AVX2 is NOT supported by this CPU..\n");
	std::cout << (AVX512BW_Supported ? "..AVX512BW is supported by this CPU..\n" : "..AVX512BW is NOT supported by this CPU..\n");
	std::cout << (AVX512CD_Supported ? "..AVX512CD is supported by this CPU..\n" : "..AVX512CD is NOT supported by this CPU..\n");
	std::cout << (AVX512DQ_Supported ? "..AVX512DQ is supported by this CPU..\n" : "..AVX512DQ is NOT supported by this CPU..\n");
	std::cout << (AVX512ER_Supported ? "..AVX512ER is supported by this CPU..\n" : "..AVX512ER is NOT supported by this CPU..\n");
	std::cout << (AVX512F_Supported ? "..AVX512F is supported by this CPU..\n" : "..AVX512F is NOT supported by this CPU..\n");
	std::cout << (AVX512IFMA52_Supported ? "..AVX512IFMA52 is supported by this CPU..\n" : "..AVX512IFMA52 is NOT supported by this CPU..\n");
	std::cout << (AVX512PF_Supported ? "..AVX512PF is supported by this CPU..\n" : "..AVX512PF is NOT supported by this CPU..\n");
	std::cout << (AVX512VBMI_Supported ? "..AVX512VBMI is supported by this CPU..\n" : "..AVX512VBMI is NOT supported by this CPU..\n");
	std::cout << (AVX512VL_Supported ? "..AVX512VL is supported by this CPU..\n" : "..AVX512VL is NOT supported by this CPU..\n");
	std::cout << (AES_Supported ? "..AES is supported by this CPU..\n" : "..AES is NOT supported by this CPU..\n");
	std::cout << (PCLMULQDQ_Supported ? "..PCLMULQDQ is supported by this CPU..\n" : "..PCLMULQDQ is NOT supported by this CPU..\n");
	std::cout << "\n";
	std::cout << (isAVX_OS_Supported ? "..AVX is supported by this Operating System..\n" : "..AVX is NOT supported by this Operating System..\n");
	std::cout << "\n";

	//Output MMXSSE Features Detection Section to Test Results File	
	strPFOut.append(AVX_HW ? "..AVX is supported by this CPU..\n" : "..AVX is NOT supported by this CPU..\n");
	strPFOut.append(AVX2_HW ? "..AVX2 is supported by this CPU..\n" : "..AVX2 is NOT supported by this CPU..\n");
	strPFOut.append(AVX512BW_Supported ? "..AVX512BW is supported by this CPU..\n" : "..AVX512BW is NOT supported by this CPU..\n");
	strPFOut.append(AVX512CD_Supported ? "..AVX512CD is supported by this CPU..\n" : "..AVX512CD is NOT supported by this CPU..\n");
	strPFOut.append(AVX512DQ_Supported ? "..AVX512DQ is supported by this CPU..\n" : "..AVX512DQ is NOT supported by this CPU..\n");
	strPFOut.append(AVX512ER_Supported ? "..AVX512ER is supported by this CPU..\n" : "..AVX512ER is NOT supported by this CPU..\n");
	strPFOut.append(AVX512F_Supported ? "..AVX512F is supported by this CPU..\n" : "..AVX512F is NOT supported by this CPU..\n");
	strPFOut.append(AVX512IFMA52_Supported ? "..AVX512IFMA52 is supported by this CPU..\n" : "..AVX512IFMA52 is NOT supported by this CPU..\n");
	strPFOut.append(AVX512PF_Supported ? "..AVX512PF is supported by this CPU..\n" : "..AVX512PF is NOT supported by this CPU..\n");
	strPFOut.append(AVX512VBMI_Supported ? "..AVX512VBMI is supported by this CPU..\n" : "..AVX512VBMI is NOT supported by this CPU..\n");
	strPFOut.append(AVX512VL_Supported ? "..AVX512VL is supported by this CPU..\n" : "..AVX512VL is NOT supported by this CPU..\n");
	strPFOut.append(AES_Supported ? "..AES is supported by this CPU..\n" : "..AES is NOT supported by this CPU..\n");
	strPFOut.append(PCLMULQDQ_Supported ? "..PCLMULQDQ is supported by this CPU..\n" : "..PCLMULQDQ is NOT supported by this CPU..\n");
	strPFOut.append("\n");
	strPFOut.append(isAVX_OS_Supported ? "..AVX is supported by this Operating System..\n" : "..AVX is NOT supported by this Operating System..\n");
	strPFOut.append("\n");


	if (AVX_Max_Supp == 0)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. No AVX features found\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. No AVX features found\n");
	}

	if (AVX_Max_Supp == 1)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX\n");
	}

	if (AVX_Max_Supp == 2)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX2\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX2\n");
	}

	if (AVX_Max_Supp == 3)
	{
		std::cout << ("Most Advanced AVX Feature Detected.. AVX512\n");
		strPFOut.append("Most Advanced AVX Feature Detected.. AVX512\n");
	}


	
	if (isAVX_OS_Supported)
	{
		if (AVX_Max_Supp == 3)
		{
			std::cout << "\nTesting Most Advanced AVX Feature - AVX512....\n";
			if (testavx512())
			{
				AVX_TestResult_Flag = true;
				std::cout << "AVX512 Test Result --- PASS\n" << std::endl;
				strPFOut.append("AVX512 Test Result --- PASS\n");
			}
			else
			{
				AVX_TestResult_Flag = false;
				std::cout << "AVX512 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("AVX512 Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 2)
		{
			std::cout << "\nTesting Most Advanced AVX Feature - AVX2....\n";
			if (testavx2())
			{
				AVX_TestResult_Flag = true;
				std::cout << "AVX2 Test Result --- PASS\n" << std::endl;
				strPFOut.append("AVX2 Test Result --- PASS\n");
			}
			else
			{
				AVX_TestResult_Flag = false;
				std::cout << "AVX2 Test Result --- FAIL\n" << std::endl;
				strPFOut.append("AVX2 Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 1)
		{
			std::cout << "\nTesting Most Advanced AVX Feature - AVX....\n";
			if (testavx1())
			{
				AVX_TestResult_Flag = true;
				std::cout << "AVX Test Result --- PASS\n" << std::endl;
				strPFOut.append("AVX Test Result --- PASS\n");
			}
			else
			{
				AVX_TestResult_Flag = false;
				std::cout << "AVX Test Result --- FAIL\n" << std::endl;
				strPFOut.append("AVX Test Result --- FAIL\n");
			}
		}
		if (AVX_Max_Supp == 0)
		{
			std::cout << "AVX Test - Not Tested" << std::endl;
			std::cout << "AVX2 Test - Not Tested" << std::endl;
			std::cout << "AVX512 Test - Not Tested" << std::endl;
			strPFOut.append("AVX Test - Not Tested\n");
			strPFOut.append("AVX2 Test - Not Tested\n");
			strPFOut.append("AVX512 Test - Not Tested\n");
			AVX_TestResult_Flag = true;
		}
	}
	else
	{
		std::cout << "AVX Test - Not Tested" << std::endl;
		std::cout << "AVX2 Test - Not Tested" << std::endl;
		std::cout << "AVX512 Test - Not Tested" << std::endl << std::endl;
		strPFOut.append("AVX Test - Not Tested\n");
		strPFOut.append("AVX2 Test - Not Tested\n");
		strPFOut.append("AVX512 Test - Not Tested\n");
		strPFOut.append("\n");
		AVX_TestResult_Flag = true;

		std::cout << "Minimum Operating System requirements are " << std::endl;
#ifdef __WIN_OS__
		std::cout << "Windows 10,8.1,8,7 SP1, Windows Server 2008 R2 SP1" << std::endl;
#endif
#ifdef __LIN_OS__
		std::cout << "Linux kernel version 2.6.30" << std::endl << std::endl;
#endif
		strPFOut.append("Minimum Operating System requirements are \n");
#ifdef __WIN_OS__
		strPFOut.append("Windows 10,8.1,8,7 SP1, Windows Server 2008 R2 SP1\n");
#endif
#ifdef __LIN_OS__
		strPFOut.append("Linux kernel version 2.6.30\n\n");
#endif
	}


	if (AES_Supported)
	{
		if (testaes())
		{
			AES_TestResult_Flag = true;
			std::cout << "AES Test Result --- PASS\n" << std::endl;
			strPFOut.append("AES Test Result --- PASS\n");
		}
		else
		{
			AES_TestResult_Flag = false;
			std::cout << "AES Test Result --- FAIL\n" << std::endl;
			strPFOut.append("AES Test Result --- FAIL\n");
		}
	}
	else
	{
		std::cout << "AES Test Result --- Not Tested\n" << std::endl;
		strPFOut.append("AES Test Result --- Not Tested\n");
		AES_TestResult_Flag = true;
	}


	if (PCLMULQDQ_Supported)
	{
		if (testpclmulqdq())
		{
			PCLMULQDQ_TestResult_Flag = true;
			std::cout << "PCLMULQDQ Test Result --- PASS\n" << std::endl;
			strPFOut.append("PCLMULQDQ Test Result --- PASS\n");
		}
		else
		{
			PCLMULQDQ_TestResult_Flag = false;
			std::cout << "PCLMULQDQ Test Result --- FAIL\n" << std::endl;
			strPFOut.append("PCLMULQDQ Test Result --- FAIL\n");
		}
	}
	else
	{
		PCLMULQDQ_TestResult_Flag = true;
		std::cout << "PCLMULQDQ Test Result --- Not Tested\n" << std::endl;
		strPFOut.append("PCLMULQDQ Test Result --- Not Tested\n");
	}



	if (AVX_TestResult_Flag == false || AES_TestResult_Flag == false || PCLMULQDQ_TestResult_Flag == false)
	{
		igPassFailStatus = 1;
	}
	else
	{
		igPassFailStatus = 0;
	}


}

typedef unsigned long long uint64; 

bool testavx1()
{
	int OMPnum = 0;
	bool AVX_Test_Flag = true;
	
	const unsigned int loopsize = 19000000;
	const unsigned int loopstart = 18990000;

	float *a = 0;
	float *b = 0;
	float *c = 0;
	float *a2 = 0;
	float *b2 = 0;
	float *c2 = 0;

	long dif = 0;
	time_t start, end;

	time(&start);

	while (dif <= iAVXtimer)
	{
		a = new float[loopsize];
		b = new float[loopsize];
		c = new float[loopsize];
		a2 = new float[loopsize];
		b2 = new float[loopsize];
		c2 = new float[loopsize];

		register __m256d r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, rA, rB, rC, rD, rE, rF;

		#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
		for (int i = loopstart; i < loopsize; i++)
		{
			OMPnum++;
			a[i] = i*exp(exp(exp(i)));
			b[i] = i*exp(exp(exp(i)));
			c[i];
			a2[i] = i*exp(exp(exp(i)));
			b2[i] = i*exp(exp(exp(i)));
			c2[i];

			__m256i a_ = _mm256_loadu_si256((__m256i*)a);
			__m256i b_ = _mm256_loadu_si256((__m256i*)b);
			__m256i c_ = _mm256_castps_si256(
				_mm256_or_ps(_mm256_castsi256_ps(a_), _mm256_castsi256_ps(b_)));
			_mm256_storeu_si256((__m256i*)c, c_);
			__m256i a2_ = _mm256_loadu_si256((__m256i*)a2);
			__m256i b2_ = _mm256_loadu_si256((__m256i*)b2);
			__m256i c2_ = _mm256_castps_si256(
				_mm256_or_ps(_mm256_castsi256_ps(a2_), _mm256_castsi256_ps(b2_)));
			_mm256_storeu_si256((__m256i*)c2, c2_);

			if (c[i] != c2[i])
			{
				//Set Fail Flag
				AVX_Test_Flag = false;
			}


			//  Arithmetic
			r0 = _mm256_set1_pd(i);
			r1 = _mm256_set1_pd(i);

			r8 = _mm256_set1_pd(-0.0);

			r2 = _mm256_xor_pd(r0, r8);
			r3 = _mm256_or_pd(r0, r8);
			r4 = _mm256_andnot_pd(r8, r0);
			r5 = _mm256_mul_pd(r1, _mm256_set1_pd(0.37796447300922722721));
			r6 = _mm256_mul_pd(r1, _mm256_set1_pd(0.24253562503633297352));
			r7 = _mm256_mul_pd(r1, _mm256_set1_pd(4.1231056256176605498));
			r8 = _mm256_add_pd(r0, _mm256_set1_pd(0.37796447300922722721));
			r9 = _mm256_add_pd(r1, _mm256_set1_pd(0.24253562503633297352));
			rA = _mm256_sub_pd(r0, _mm256_set1_pd(4.1231056256176605498));
			rB = _mm256_sub_pd(r1, _mm256_set1_pd(4.1231056256176605498));

			rC = _mm256_set1_pd(1.4142135623730950488);
			rD = _mm256_set1_pd(1.7320508075688772935);
			rE = _mm256_set1_pd(0.57735026918962576451);
			rF = _mm256_set1_pd(0.70710678118654752440);

			uint64 iMASK = 0x800fffffffffffffull;
			__m256d MASK = _mm256_set1_pd(*(double*)&iMASK);
			__m256d vONE = _mm256_set1_pd(1.0);
			uint64 c = 0;
			while (c < 40) {
				size_t i2 = 0;
				while (i2 < 40) {
					r0 = _mm256_mul_pd(r0, rC);
					r1 = _mm256_add_pd(r1, rD);
					r2 = _mm256_mul_pd(r2, rE);
					r3 = _mm256_sub_pd(r3, rF);
					r4 = _mm256_mul_pd(r4, rC);
					r5 = _mm256_add_pd(r5, rD);
					r6 = _mm256_mul_pd(r6, rE);
					r7 = _mm256_sub_pd(r7, rF);
					r8 = _mm256_mul_pd(r8, rC);
					r9 = _mm256_add_pd(r9, rD);
					rA = _mm256_mul_pd(rA, rE);
					rB = _mm256_sub_pd(rB, rF);

					r0 = _mm256_add_pd(r0, rF);
					r1 = _mm256_mul_pd(r1, rE);
					r2 = _mm256_sub_pd(r2, rD);
					r3 = _mm256_mul_pd(r3, rC);
					r4 = _mm256_add_pd(r4, rF);
					r5 = _mm256_mul_pd(r5, rE);
					r6 = _mm256_sub_pd(r6, rD);
					r7 = _mm256_mul_pd(r7, rC);
					r8 = _mm256_add_pd(r8, rF);
					r9 = _mm256_mul_pd(r9, rE);
					rA = _mm256_sub_pd(rA, rD);
					rB = _mm256_mul_pd(rB, rC);

					r0 = _mm256_mul_pd(r0, rC);
					r1 = _mm256_add_pd(r1, rD);
					r2 = _mm256_mul_pd(r2, rE);
					r3 = _mm256_sub_pd(r3, rF);
					r4 = _mm256_mul_pd(r4, rC);
					r5 = _mm256_add_pd(r5, rD);
					r6 = _mm256_mul_pd(r6, rE);
					r7 = _mm256_sub_pd(r7, rF);
					r8 = _mm256_mul_pd(r8, rC);
					r9 = _mm256_add_pd(r9, rD);
					rA = _mm256_mul_pd(rA, rE);
					rB = _mm256_sub_pd(rB, rF);

					r0 = _mm256_add_pd(r0, rF);
					r1 = _mm256_mul_pd(r1, rE);
					r2 = _mm256_sub_pd(r2, rD);
					r3 = _mm256_mul_pd(r3, rC);
					r4 = _mm256_add_pd(r4, rF);
					r5 = _mm256_mul_pd(r5, rE);
					r6 = _mm256_sub_pd(r6, rD);
					r7 = _mm256_mul_pd(r7, rC);
					r8 = _mm256_add_pd(r8, rF);
					r9 = _mm256_mul_pd(r9, rE);
					rA = _mm256_sub_pd(rA, rD);
					rB = _mm256_mul_pd(rB, rC);
					i2++;
				}
				//  Need to renormalize to prevent denormal/overflow.
				r0 = _mm256_and_pd(r0, MASK);
				r1 = _mm256_and_pd(r1, MASK);
				r2 = _mm256_and_pd(r2, MASK);
				r3 = _mm256_and_pd(r3, MASK);
				r4 = _mm256_and_pd(r4, MASK);
				r5 = _mm256_and_pd(r5, MASK);
				r6 = _mm256_and_pd(r6, MASK);
				r7 = _mm256_and_pd(r7, MASK);
				r8 = _mm256_and_pd(r8, MASK);
				r9 = _mm256_and_pd(r9, MASK);
				rA = _mm256_and_pd(rA, MASK);
				rB = _mm256_and_pd(rB, MASK);
				r0 = _mm256_or_pd(r0, vONE);
				r1 = _mm256_or_pd(r1, vONE);
				r2 = _mm256_or_pd(r2, vONE);
				r3 = _mm256_or_pd(r3, vONE);
				r4 = _mm256_or_pd(r4, vONE);
				r5 = _mm256_or_pd(r5, vONE);
				r6 = _mm256_or_pd(r6, vONE);
				r7 = _mm256_or_pd(r7, vONE);
				r8 = _mm256_or_pd(r8, vONE);
				r9 = _mm256_or_pd(r9, vONE);
				rA = _mm256_or_pd(rA, vONE);
				rB = _mm256_or_pd(rB, vONE);
				c++;
			}
			r0 = _mm256_add_pd(r0, r1);
			r2 = _mm256_add_pd(r2, r3);
			r4 = _mm256_add_pd(r4, r5);
			r6 = _mm256_add_pd(r6, r7);
			r8 = _mm256_add_pd(r8, r9);
			rA = _mm256_add_pd(rA, rB);

			r0 = _mm256_add_pd(r0, r2);
			r4 = _mm256_add_pd(r4, r6);
			r8 = _mm256_add_pd(r8, rA);

			r0 = _mm256_add_pd(r0, r4);
			r0 = _mm256_add_pd(r0, r8);

			//  Prevent Dead Code Elimination
			double out = 0;
			__m256d temp = r0;
			out += ((double*)&temp)[0];
			out += ((double*)&temp)[1];
			out += ((double*)&temp)[2];
			out += ((double*)&temp)[3];

		}

		delete[] a;
		delete[] b;
		delete[] c;
		delete[] a2;
		delete[] b2;
		delete[] c2;
		time(&end);
		dif = difftime(end, start);

	}

	if (AVX_Test_Flag)
	{
		return true;
	}
	else
	{
		return false;
	}
}

bool testavx2()
{
	int OMPnum = 0;
	bool AVX_Test_Flag = true;

	const unsigned int loopsize = 19000000;
	const unsigned int loopstart = 18990000;

	float *a = 0;
	float *b = 0;
	float *c = 0;
	float *a2 = 0;
	float *b2 = 0;
	float *c2 = 0;

	long dif = 0;
	time_t start, end;

	time(&start);

	while (dif <= iAVXtimer)
	{
		a = new float[loopsize];
		b = new float[loopsize];
		c = new float[loopsize];
		a2 = new float[loopsize];
		b2 = new float[loopsize];
		c2 = new float[loopsize];

		register __m256i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, rA, rB, rC, rD, rE, rF;

		#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
		for (int i = loopstart; i < loopsize; i++)
		{
			OMPnum++;
			a[i] = i*exp(exp(exp(i)));
			b[i] = i*exp(exp(exp(i)));
			c[i];
			a2[i] = i*exp(exp(exp(i)));
			b2[i] = i*exp(exp(exp(i)));
			c2[i];

			__m256i a_ = _mm256_loadu_si256((__m256i*)a);
			__m256i b_ = _mm256_loadu_si256((__m256i*)b);
			__m256i c_ = _mm256_castps_si256(
				_mm256_or_ps(_mm256_castsi256_ps(a_), _mm256_castsi256_ps(b_)));
			_mm256_storeu_si256((__m256i*)c, c_);
			__m256i a2_ = _mm256_loadu_si256((__m256i*)a2);
			__m256i b2_ = _mm256_loadu_si256((__m256i*)b2);
			__m256i c2_ = _mm256_castps_si256(
				_mm256_or_ps(_mm256_castsi256_ps(a2_), _mm256_castsi256_ps(b2_)));
			_mm256_storeu_si256((__m256i*)c2, c2_);

			if (c[i] != c2[i])
			{
				//Set Fail Flag
				AVX_Test_Flag = false;
			}


			//  Arithmetic
			r0 = _mm256_set1_epi32(i);
			r1 = _mm256_set1_epi32(i);

			r8 = _mm256_set1_epi32(-0.0);

			r2 = _mm256_xor_si256(r0, r8);
			r3 = _mm256_or_si256(r0, r8);
			r4 = _mm256_andnot_si256(r8, r0);
			r5 = _mm256_mul_epi32(r1, _mm256_set1_epi32(0.37796447300922722721));
			r6 = _mm256_mul_epi32(r1, _mm256_set1_epi32(0.24253562503633297352));
			r7 = _mm256_mul_epi32(r1, _mm256_set1_epi32(4.1231056256176605498));
			r8 = _mm256_add_epi32(r0, _mm256_set1_epi32(0.37796447300922722721));
			r9 = _mm256_add_epi32(r1, _mm256_set1_epi32(0.24253562503633297352));
			rA = _mm256_sub_epi32(r0, _mm256_set1_epi32(4.1231056256176605498));
			rB = _mm256_sub_epi32(r1, _mm256_set1_epi32(4.1231056256176605498));

			rC = _mm256_set1_epi32(1.4142135623730950488);
			rD = _mm256_set1_epi32(1.7320508075688772935);
			rE = _mm256_set1_epi32(0.57735026918962576451);
			rF = _mm256_set1_epi32(0.70710678118654752440);

			uint64 iMASK = 0x800fffffffffffffull;
			__m256i MASK = _mm256_set1_epi32(*(double*)&iMASK);
			__m256i vONE = _mm256_set1_epi32(1.0);
			uint64 c = 0;
			while (c < 40) {
				size_t i2 = 0;
				while (i2 < 40) {
					r0 = _mm256_mul_epi32(r0, rC);
					r1 = _mm256_add_epi32(r1, rD);
					r2 = _mm256_mul_epi32(r2, rE);
					r3 = _mm256_sub_epi32(r3, rF);
					r4 = _mm256_mul_epi32(r4, rC);
					r5 = _mm256_add_epi32(r5, rD);
					r6 = _mm256_mul_epi32(r6, rE);
					r7 = _mm256_sub_epi32(r7, rF);
					r8 = _mm256_mul_epi32(r8, rC);
					r9 = _mm256_add_epi32(r9, rD);
					rA = _mm256_mul_epi32(rA, rE);
					rB = _mm256_sub_epi32(rB, rF);

					r0 = _mm256_add_epi32(r0, rF);
					r1 = _mm256_mul_epi32(r1, rE);
					r2 = _mm256_sub_epi32(r2, rD);
					r3 = _mm256_mul_epi32(r3, rC);
					r4 = _mm256_add_epi32(r4, rF);
					r5 = _mm256_mul_epi32(r5, rE);
					r6 = _mm256_sub_epi32(r6, rD);
					r7 = _mm256_mul_epi32(r7, rC);
					r8 = _mm256_add_epi32(r8, rF);
					r9 = _mm256_mul_epi32(r9, rE);
					rA = _mm256_sub_epi32(rA, rD);
					rB = _mm256_mul_epi32(rB, rC);

					r0 = _mm256_mul_epi32(r0, rC);
					r1 = _mm256_add_epi32(r1, rD);
					r2 = _mm256_mul_epi32(r2, rE);
					r3 = _mm256_sub_epi32(r3, rF);
					r4 = _mm256_mul_epi32(r4, rC);
					r5 = _mm256_add_epi32(r5, rD);
					r6 = _mm256_mul_epi32(r6, rE);
					r7 = _mm256_sub_epi32(r7, rF);
					r8 = _mm256_mul_epi32(r8, rC);
					r9 = _mm256_add_epi32(r9, rD);
					rA = _mm256_mul_epi32(rA, rE);
					rB = _mm256_sub_epi32(rB, rF);

					r0 = _mm256_add_epi32(r0, rF);
					r1 = _mm256_mul_epi32(r1, rE);
					r2 = _mm256_sub_epi32(r2, rD);
					r3 = _mm256_mul_epi32(r3, rC);
					r4 = _mm256_add_epi32(r4, rF);
					r5 = _mm256_mul_epi32(r5, rE);
					r6 = _mm256_sub_epi32(r6, rD);
					r7 = _mm256_mul_epi32(r7, rC);
					r8 = _mm256_add_epi32(r8, rF);
					r9 = _mm256_mul_epi32(r9, rE);
					rA = _mm256_sub_epi32(rA, rD);
					rB = _mm256_mul_epi32(rB, rC);
					i2++;
				}
				//  Need to renormalize to prevent denormal/overflow.
				r0 = _mm256_and_si256(r0, MASK);
				r1 = _mm256_and_si256(r1, MASK);
				r2 = _mm256_and_si256(r2, MASK);
				r3 = _mm256_and_si256(r3, MASK);
				r4 = _mm256_and_si256(r4, MASK);
				r5 = _mm256_and_si256(r5, MASK);
				r6 = _mm256_and_si256(r6, MASK);
				r7 = _mm256_and_si256(r7, MASK);
				r8 = _mm256_and_si256(r8, MASK);
				r9 = _mm256_and_si256(r9, MASK);
				rA = _mm256_and_si256(rA, MASK);
				rB = _mm256_and_si256(rB, MASK);
				r0 = _mm256_or_si256(r0, vONE);
				r1 = _mm256_or_si256(r1, vONE);
				r2 = _mm256_or_si256(r2, vONE);
				r3 = _mm256_or_si256(r3, vONE);
				r4 = _mm256_or_si256(r4, vONE);
				r5 = _mm256_or_si256(r5, vONE);
				r6 = _mm256_or_si256(r6, vONE);
				r7 = _mm256_or_si256(r7, vONE);
				r8 = _mm256_or_si256(r8, vONE);
				r9 = _mm256_or_si256(r9, vONE);
				rA = _mm256_or_si256(rA, vONE);
				rB = _mm256_or_si256(rB, vONE);
				c++;
			}
			r0 = _mm256_add_epi32(r0, r1);
			r2 = _mm256_add_epi32(r2, r3);
			r4 = _mm256_add_epi32(r4, r5);
			r6 = _mm256_add_epi32(r6, r7);
			r8 = _mm256_add_epi32(r8, r9);
			rA = _mm256_add_epi32(rA, rB);

			r0 = _mm256_add_epi32(r0, r2);
			r4 = _mm256_add_epi32(r4, r6);
			r8 = _mm256_add_epi32(r8, rA);

			r0 = _mm256_add_epi32(r0, r4);
			r0 = _mm256_add_epi32(r0, r8);

			//  Prevent Dead Code Elimination
			double out = 0;
			__m256i temp = r0;
			out += ((double*)&temp)[0];
			out += ((double*)&temp)[1];
			out += ((double*)&temp)[2];
			out += ((double*)&temp)[3];

		}
		delete[] a;
		delete[] b;
		delete[] c;
		delete[] a2;
		delete[] b2;
		delete[] c2;
		time(&end);
		dif = difftime(end, start);

	}

	if (AVX_Test_Flag)
	{
		return true;
	}
	else
	{
		return false;
	}
}

bool testavx512()
{
	int OMPnum = 0;
	bool AVX_Test_Flag = true;

	const unsigned int loopsize = 19000000;
	const unsigned int loopstart = 18990000;

	float *a = 0;
	float *b = 0;
	float *c = 0;
	float *a2 = 0;
	float *b2 = 0;
	float *c2 = 0;

	long dif = 0;
	time_t start, end;

	time(&start);

	while (dif <= iAVXtimer)
	{
		a = new float[loopsize];
		b = new float[loopsize];
		c = new float[loopsize];
		a2 = new float[loopsize];
		b2 = new float[loopsize];
		c2 = new float[loopsize];

		register __m512i r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, rA, rB, rC, rD, rE, rF, r10, r11, r12, r13, r14, r15, r16, r17, r18, r19, r1A, r1B, r1C, r1D, r1E, r1F;

		#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
		for (int i = loopstart; i < loopsize; i++)
		{
			OMPnum++;
			a[i] = i*exp(exp(exp(i)));
			b[i] = i*exp(exp(exp(i)));
			c[i];
			a2[i] = i*exp(exp(exp(i)));
			b2[i] = i*exp(exp(exp(i)));
			c2[i];

			__m512i a_ = _mm512_loadu_si512((__m512i*)a);
			__m512i b_ = _mm512_loadu_si512((__m512i*)b);
			__m512i c_ = _mm512_or_epi64(a_, b_);
			_mm512_storeu_si512((__m512i*)c, c_);			
			__m512i a2_ = _mm512_loadu_si512((__m512i*)a2);
			__m512i b2_ = _mm512_loadu_si512((__m512i*)b2);
			__m512i c2_ = _mm512_or_epi64(a2_, b2_);
			_mm512_storeu_si512((__m512i*)c2, c2_);

			if (c[i] != c2[i])
			{
				//Set Fail Flag
				AVX_Test_Flag = false;
			}


			//  Arithmetic
			r0 = _mm512_set1_epi64(i);
			r1 = _mm512_set1_epi64(i);

			r8 = _mm512_set1_epi64(-0.0);

			r2 = _mm512_xor_epi64(r0, r8);
			r3 = _mm512_or_epi64(r0, r8);
			r4 = _mm512_abs_epi64(r8);
			r5 = _mm512_mullox_epi64(r1, _mm512_set1_epi64(0.37796447300922722721));
			r6 = _mm512_mullox_epi64(r1, _mm512_set1_epi64(0.24253562503633297352));
			r7 = _mm512_mullox_epi64(r1, _mm512_set1_epi64(4.1231056256176605498));
			r8 = _mm512_add_epi64(r0, _mm512_set1_epi64(0.37796447300922722721));
			r9 = _mm512_add_epi64(r1, _mm512_set1_epi64(0.24253562503633297352));
			rA = _mm512_sub_epi64(r0, _mm512_set1_epi64(4.1231056256176605498));
			rB = _mm512_sub_epi64(r1, _mm512_set1_epi64(4.1231056256176605498));

			rC = _mm512_set1_epi64(1.4142135623730950488);
			rD = _mm512_set1_epi64(1.7320508075688772935);
			rE = _mm512_set1_epi64(0.57735026918962576451);
			rF = _mm512_set1_epi64(0.70710678118654752440);
			
			r10 = _mm512_set1_epi64(i);
			r11 = _mm512_set1_epi64(i);

			r18 = _mm512_set1_epi64(-0.0);

			r12 = _mm512_xor_epi64(r10, r18);
			r13 = _mm512_or_epi64(r10, r18);
			r14 = _mm512_abs_epi64(r18);
			r15 = _mm512_mullox_epi64(r11, _mm512_set1_epi64(0.37796447300922722721));
			r16 = _mm512_mullox_epi64(r11, _mm512_set1_epi64(0.24253562503633297352));
			r17 = _mm512_mullox_epi64(r11, _mm512_set1_epi64(4.1231056256176605498));
			r18 = _mm512_add_epi64(r10, _mm512_set1_epi64(0.37796447300922722721));
			r19 = _mm512_add_epi64(r11, _mm512_set1_epi64(0.24253562503633297352));
			r1A = _mm512_sub_epi64(r10, _mm512_set1_epi64(4.1231056256176605498));
			r1B = _mm512_sub_epi64(r11, _mm512_set1_epi64(4.1231056256176605498));

			r1C = _mm512_set1_epi64(1.4142135623730950488);
			r1D = _mm512_set1_epi64(1.7320508075688772935);
			r1E = _mm512_set1_epi64(0.57735026918962576451);
			r1F = _mm512_set1_epi64(0.70710678118654752440);

			uint64 iMASK = 0x800fffffffffffffull;
			__m512i MASK = _mm512_set1_epi64(*(double*)&iMASK);
			__m512i vONE = _mm512_set1_epi64(1.0);
			uint64 c = 0;
			while (c < 20) {
				size_t i2 = 0;
				while (i2 < 20) {
					r0 = _mm512_mullox_epi64(r0, rC);
					r1 = _mm512_add_epi64(r1, rD);
					r2 = _mm512_mullox_epi64(r2, rE);
					r3 = _mm512_sub_epi64(r3, rF);
					r4 = _mm512_mullox_epi64(r4, rC);
					r5 = _mm512_add_epi64(r5, rD);
					r6 = _mm512_mullox_epi64(r6, rE);
					r7 = _mm512_sub_epi64(r7, rF);
					r8 = _mm512_mullox_epi64(r8, rC);
					r9 = _mm512_add_epi64(r9, rD);
					rA = _mm512_mullox_epi64(rA, rE);
					rB = _mm512_sub_epi64(rB, rF);

					r0 = _mm512_add_epi64(r0, rF);
					r1 = _mm512_mullox_epi64(r1, rE);
					r2 = _mm512_sub_epi64(r2, rD);
					r3 = _mm512_mullox_epi64(r3, rC);
					r4 = _mm512_add_epi64(r4, rF);
					r5 = _mm512_mullox_epi64(r5, rE);
					r6 = _mm512_sub_epi64(r6, rD);
					r7 = _mm512_mullox_epi64(r7, rC);
					r8 = _mm512_add_epi64(r8, rF);
					r9 = _mm512_mullox_epi64(r9, rE);
					rA = _mm512_sub_epi64(rA, rD);
					rB = _mm512_mullox_epi64(rB, rC);

					r0 = _mm512_mullox_epi64(r0, rC);
					r1 = _mm512_add_epi64(r1, rD);
					r2 = _mm512_mullox_epi64(r2, rE);
					r3 = _mm512_sub_epi64(r3, rF);
					r4 = _mm512_mullox_epi64(r4, rC);
					r5 = _mm512_add_epi64(r5, rD);
					r6 = _mm512_mullox_epi64(r6, rE);
					r7 = _mm512_sub_epi64(r7, rF);
					r8 = _mm512_mullox_epi64(r8, rC);
					r9 = _mm512_add_epi64(r9, rD);
					rA = _mm512_mullox_epi64(rA, rE);
					rB = _mm512_sub_epi64(rB, rF);

					r0 = _mm512_add_epi64(r0, rF);
					r1 = _mm512_mullox_epi64(r1, rE);
					r2 = _mm512_sub_epi64(r2, rD);
					r3 = _mm512_mullox_epi64(r3, rC);
					r4 = _mm512_add_epi64(r4, rF);
					r5 = _mm512_mullox_epi64(r5, rE);
					r6 = _mm512_sub_epi64(r6, rD);
					r7 = _mm512_mullox_epi64(r7, rC);
					r8 = _mm512_add_epi64(r8, rF);
					r9 = _mm512_mullox_epi64(r9, rE);
					rA = _mm512_sub_epi64(rA, rD);
					rB = _mm512_mullox_epi64(rB, rC);
					
					r0 = _mm512_mullox_epi64(r0, rC);
					r1 = _mm512_add_epi64(r1, rD);
					r2 = _mm512_mullox_epi64(r2, rE);
					r3 = _mm512_sub_epi64(r3, rF);
					r4 = _mm512_mullox_epi64(r4, rC);
					r5 = _mm512_add_epi64(r5, rD);
					r6 = _mm512_mullox_epi64(r6, rE);
					r7 = _mm512_sub_epi64(r7, rF);
					r8 = _mm512_mullox_epi64(r8, rC);
					r9 = _mm512_add_epi64(r9, rD);
					rA = _mm512_mullox_epi64(rA, rE);
					rB = _mm512_sub_epi64(rB, rF);

					r0 = _mm512_add_epi64(r0, rF);
					r1 = _mm512_mullox_epi64(r1, rE);
					r2 = _mm512_sub_epi64(r2, rD);
					r3 = _mm512_mullox_epi64(r3, rC);
					r4 = _mm512_add_epi64(r4, rF);
					r5 = _mm512_mullox_epi64(r5, rE);
					r6 = _mm512_sub_epi64(r6, rD);
					r7 = _mm512_mullox_epi64(r7, rC);
					r8 = _mm512_add_epi64(r8, rF);
					r9 = _mm512_mullox_epi64(r9, rE);
					rA = _mm512_sub_epi64(rA, rD);
					rB = _mm512_mullox_epi64(rB, rC);

					r10 = _mm512_mullox_epi64(r10, r1C);
					r11 = _mm512_add_epi64(r11, r1D);
					r12 = _mm512_mullox_epi64(r12, r1E);
					r13 = _mm512_sub_epi64(r13, r1F);
					r14 = _mm512_mullox_epi64(r14, r1C);
					r15 = _mm512_add_epi64(r15, r1D);
					r16 = _mm512_mullox_epi64(r16, r1E);
					r17 = _mm512_sub_epi64(r17, r1F);
					r18 = _mm512_mullox_epi64(r18, r1C);
					r19 = _mm512_add_epi64(r19, r1D);
					r1A = _mm512_mullox_epi64(r1A, r1E);
					r1B = _mm512_sub_epi64(r1B, r1F);

					r10 = _mm512_add_epi64(r10, r1F);
					r11 = _mm512_mullox_epi64(r11, r1E);
					r12 = _mm512_sub_epi64(r12, r1D);
					r13 = _mm512_mullox_epi64(r13, r1C);
					r14 = _mm512_add_epi64(r14, r1F);
					r15 = _mm512_mullox_epi64(r15, r1E);
					r16 = _mm512_sub_epi64(r16, r1D);
					r17 = _mm512_mullox_epi64(r17, r1C);
					r18 = _mm512_add_epi64(r18, r1F);
					r19 = _mm512_mullox_epi64(r19, r1E);
					r1A = _mm512_sub_epi64(r1A, r1D);
					r1B = _mm512_mullox_epi64(r1B, r1C);
					i2++;
				}
				//  Need to renormalize to prevent denormal/overflow.
				r0 = _mm512_and_epi64(r0, MASK);
				r1 = _mm512_and_epi64(r1, MASK);
				r2 = _mm512_and_epi64(r2, MASK);
				r3 = _mm512_and_epi64(r3, MASK);
				r4 = _mm512_and_epi64(r4, MASK);
				r5 = _mm512_and_epi64(r5, MASK);
				r6 = _mm512_and_epi64(r6, MASK);
				r7 = _mm512_and_epi64(r7, MASK);
				r8 = _mm512_and_epi64(r8, MASK);
				r9 = _mm512_and_epi64(r9, MASK);
				rA = _mm512_and_epi64(rA, MASK);
				rB = _mm512_and_epi64(rB, MASK);
				r0 = _mm512_or_epi64(r0, vONE);
				r1 = _mm512_or_epi64(r1, vONE);
				r2 = _mm512_or_epi64(r2, vONE);
				r3 = _mm512_or_epi64(r3, vONE);
				r4 = _mm512_or_epi64(r4, vONE);
				r5 = _mm512_or_epi64(r5, vONE);
				r6 = _mm512_or_epi64(r6, vONE);
				r7 = _mm512_or_epi64(r7, vONE);
				r8 = _mm512_or_epi64(r8, vONE);
				r9 = _mm512_or_epi64(r9, vONE);
				rA = _mm512_or_epi64(rA, vONE);
				rB = _mm512_or_epi64(rB, vONE);
				
				r10 = _mm512_and_epi64(r10, MASK);
				r11 = _mm512_and_epi64(r11, MASK);
				r12 = _mm512_and_epi64(r12, MASK);
				r13 = _mm512_and_epi64(r13, MASK);
				r14 = _mm512_and_epi64(r14, MASK);
				r15 = _mm512_and_epi64(r15, MASK);
				r16 = _mm512_and_epi64(r16, MASK);
				r17 = _mm512_and_epi64(r17, MASK);
				r18 = _mm512_and_epi64(r18, MASK);
				r19 = _mm512_and_epi64(r19, MASK);
				r1A = _mm512_and_epi64(r1A, MASK);
				r1B = _mm512_and_epi64(r1B, MASK);
				r10 = _mm512_or_epi64(r10, vONE);
				r11 = _mm512_or_epi64(r11, vONE);
				r12 = _mm512_or_epi64(r12, vONE);
				r13 = _mm512_or_epi64(r13, vONE);
				r14 = _mm512_or_epi64(r14, vONE);
				r15 = _mm512_or_epi64(r15, vONE);
				r16 = _mm512_or_epi64(r16, vONE);
				r17 = _mm512_or_epi64(r17, vONE);
				r18 = _mm512_or_epi64(r18, vONE);
				r19 = _mm512_or_epi64(r19, vONE);
				r1A = _mm512_or_epi64(r1A, vONE);
				r1B = _mm512_or_epi64(r1B, vONE);
				c++;
			}
			r0 = _mm512_add_epi64(r0, r1);
			r2 = _mm512_add_epi64(r2, r3);
			r4 = _mm512_add_epi64(r4, r5);
			r6 = _mm512_add_epi64(r6, r7);
			r8 = _mm512_add_epi64(r8, r9);
			rA = _mm512_add_epi64(rA, rB);

			r0 = _mm512_add_epi64(r0, r2);
			r4 = _mm512_add_epi64(r4, r6);
			r8 = _mm512_add_epi64(r8, rA);

			r0 = _mm512_add_epi64(r0, r4);
			r0 = _mm512_add_epi64(r0, r8);
			
			r10 = _mm512_add_epi64(r10, r11);
			r12 = _mm512_add_epi64(r12, r13);
			r14 = _mm512_add_epi64(r14, r15);
			r16 = _mm512_add_epi64(r16, r17);
			r18 = _mm512_add_epi64(r18, r19);
			r1A = _mm512_add_epi64(r1A, r1B);

			r10 = _mm512_add_epi64(r10, r12);
			r14 = _mm512_add_epi64(r14, r16);
			r18 = _mm512_add_epi64(r18, r1A);

			r10 = _mm512_add_epi64(r10, r14);
			r10 = _mm512_add_epi64(r10, r18);

			//  Prevent Dead Code Elimination
			double out = 0;
			__m512i temp = r0;
			__m512i temp1 = r10;
			out += ((double*)&temp)[0];
			out += ((double*)&temp)[1];
			out += ((double*)&temp)[2];
			out += ((double*)&temp)[3];
			out += ((double*)&temp1)[0];
			out += ((double*)&temp1)[1];
			out += ((double*)&temp1)[2];
			out += ((double*)&temp1)[3];

		}
		delete[] a;
		delete[] b;
		delete[] c;
		delete[] a2;
		delete[] b2;
		delete[] c2;
		time(&end);
		dif = difftime(end, start);

	}

	if (AVX_Test_Flag)
	{
		return true;
	}
	else
	{
		return false;
	}
}

bool testpclmulqdq()
{
	// PCLMULQDQ Test will execute the multiply with carry less intrinsic _mm_clmulepi64_si128
	float pclmulqdqcalc_Time;
	bool PCLMULQDQFLAG = true;
	std::cout << "Testing PCLMULQDQ";
	std::cout << "\n";

	m128i_ a;
	m128i_  b;
	m128i_ result1;
	m128i_ result2;
	m128i_ result3;

	a.m128i_i64_[1] = 2;
	a.m128i_i64_[0] = -1284;
	b.m128i_i64_[1] = 25;
	b.m128i_i64_[0] = 65535;

	const int product1 = 0x11;
	const int product2 = 0x00;

	result1.m128i_a = _mm_clmulepi64_si128(a.m128i_a, b.m128i_a, product1);
	result2.m128i_a = _mm_clmulepi64_si128(a.m128i_a, b.m128i_a, product2);

	__int64 pclmulqdq_result1 = __int64(result1.m128i_i64_[0]);
	__int64 pclmulqdq_result2 = __int64(result2.m128i_i64_[0]);

	if (pclmulqdq_result1 != 50)
	{
		std::cout << "PCLMULQDQ Carry less test failed\n";
		std::cout << "2 times 25 without carry using immediate 0x11 = 50\n";
		std::cout << "Result = " << pclmulqdq_result1 << "\n";
		PCLMULQDQFLAG = false;
	}
	if (pclmulqdq_result2 != 50419284)
	{
		std::cout << "PCLMULQDQ Carry less test failed\n";
		std::cout << "-1284 times 65535 without carry using immediate 0x00 = 50419284\n";
		std::cout << "Result = " << pclmulqdq_result2 << "\n";
		PCLMULQDQFLAG = false;
	}

	if (PCLMULQDQFLAG)
	{
		return true;
	}
	else
	{
		return false;
	}
}

bool testaes()
{
	// AES Test will execute the following AES intrinsics
	// _mm_aesdec_si128
	// Emits the Advanced Encryption Standard (AES) instruction aesdec. 
	// This instruction performs one round of AES decryption.

	// _mm_aesdeclast_si128
	// Emits the Advanced Encryption Standard (AES) instruction aesdeclast. 
	// This instruction performs the final round of AES decryption.
	//
	// _mm_aesenc_si128
	// Emits the Advanced Encryption Standard (AES) instruction aesenc. 
	// This instruction performs one round of AES encryption.
	//
	// _mm_aesenclast_si128
	// Emits the Advanced Encryption Standard (AES) instruction aesenclast. 
	// This instruction performs the final round of AES encryption.
	//
	// _mm_aesimc_si128
	// Emits the Advanced Encryption Standard (AES) instruction aesimc. 
	// This instruction performs the InverseMixColumn operation.
	//
	// _mm_aeskeygenassist_si128
	// Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist. 
	// This instruction generates a round key for AES encryption.


	bool AESFLAG = true;
	std::cout << "Testing AES";
	std::cout << "\n";

	const int round = 5;
	m128i_ x;
	m128i_ decresult;
	m128i_ encresult;
	m128i_ imcresult;
	m128i_ rndkeyresult;
	m128i_ cikey;

	x.m128i_u64_[1] = 0x8899AABBCCDDEEFF;
	x.m128i_u64_[0] = 0x0123456789ABCDEF;
	cikey.m128i_u64_[1] = 0x0022446688AACCEE;
	cikey.m128i_u64_[0] = 0x1133557799BBDDFF;

	decresult.m128i_a = _mm_aesdec_si128(x.m128i_a, cikey.m128i_a);
	unsigned long long AESDEC_Detected1;
	unsigned long long AESDEC_Detected0;
	AESDEC_Detected1 = decresult.m128i_u64_[1];
	AESDEC_Detected0 = decresult.m128i_u64_[0];

	decresult.m128i_a = _mm_aesdeclast_si128(x.m128i_a, cikey.m128i_a);
	unsigned long long AESDECLAST_Detected1, AESDECLAST_Detected0;
	AESDECLAST_Detected1 = decresult.m128i_u64_[1];
	AESDECLAST_Detected0 = decresult.m128i_u64_[0];

	encresult.m128i_a = _mm_aesenc_si128(x.m128i_a, cikey.m128i_a);
	unsigned long long AESENC_Detected1, AESENC_Detected0;
	AESENC_Detected1 = encresult.m128i_u64_[1];
	AESENC_Detected0 = encresult.m128i_u64_[0];

	encresult.m128i_a = _mm_aesenclast_si128(x.m128i_a, cikey.m128i_a);
	unsigned long long AESENCLAST_Detected1, AESENCLAST_Detected0;
	AESENCLAST_Detected1 = encresult.m128i_u64_[1];
	AESENCLAST_Detected0 = encresult.m128i_u64_[0];

	imcresult.m128i_a = _mm_aesimc_si128(x.m128i_a);
	unsigned long long AESIMC_Detected1, AESIMC_Detected0;
	AESIMC_Detected1 = imcresult.m128i_u64_[1];
	AESIMC_Detected0 = imcresult.m128i_u64_[0];

	rndkeyresult.m128i_a = _mm_aeskeygenassist_si128(x.m128i_a, round);
	unsigned long long AESRNDKEY_Detected1, AESRNDKEY_Detected0;
	AESRNDKEY_Detected1 = rndkeyresult.m128i_u64_[1];
	AESRNDKEY_Detected0 = rndkeyresult.m128i_u64_[0];


	if ((AESDEC_Detected1 != 13078118669095680494) || (AESDEC_Detected0 != 310272635642496143))
	{
		std::cout << "AES _mm_aesdec instruction failed \n";
		AESFLAG = false;
	}
	if ((AESDECLAST_Detected1 != 17442685002241582227) || (AESDECLAST_Detected0 != 3948202758614728606))
	{
		std::cout << "AES _mm_aesdeclast instruction failed \n";
		AESFLAG = false;
	}
	if ((AESENC_Detected1 != 2946741845273625395) || (AESENC_Detected0 != 1633415060431061741))
	{
		std::cout << "AES _mm_aesenc instruction failed \n";
		AESFLAG = false;
	}
	if ((AESENCLAST_Detected1 != 5405719836548948216) || (AESENCLAST_Detected0 != 13176826564659819296))
	{
		std::cout << "AES _mm_aesenclast instruction failed \n";
		AESFLAG = false;
	}
	if ((AESIMC_Detected1 != 7364304656064249941) || (AESIMC_Detected0 != 14297946026628352672))
	{
		std::cout << "AES _mm_aesimc instruction failed \n";
		AESFLAG = false;
	}
	if ((AESRNDKEY_Detected1 != 16916908513231285482) || (AESRNDKEY_Detected0 != 9618605147242786437))
	{
		std::cout << "AES _mm_aeskeygenassist instruction failed \n";
		AESFLAG = false;
	}


	if (AESFLAG)
	{
		return true;
	}
	else
	{
		return false;
	}

}



// ---------------------------------------------------------------------------
//  PopulateData -- A routine to populate configuration variables
// ---------------------------------------------------------------------------

void Init(void)
{
	
	tStartTime = std::time(nullptr);	
	if (iDebugFlag)
	{
		std::string sTemp = std::asctime(std::localtime(&tStartTime));
		std::cout << "tStartTime = " << sTemp << std::endl;
	}

	// Logic needs to be done to determine if this test is a pass or fail.  
	//If indeterminate, it means that the test was interrupted
	igPassFailStatus = ReturnValueDef::Success;  // Success
	//igPassFailStatus = 1;  // Fail
	//igPassFailStatus = 2;  // Indeterminate 
	iColorText = 0;
	iSec = 0;
	iMin = 0;
	iHour = 0;
	iOptionValid = 0;
	iRun = 1;
	iArgIndex = 0;
	iPrintVersionFlag = 1;
	iPrintSuccessFlag = 1;


}

void PrintVersion(void)
{
	std::cout << std::endl << "..." << std::endl << "Version: " << sgGoldModuleVersion << std::endl << "..." << std::endl;
}

void SignalFun(int iSigNum)
{
	// If you want to print out the signal, do the following
	cout << "Signal is:" << iSigNum << endl;

	// Depending on your routine, you may want to have some sort of exit message ... however you may also want to comment these out
	cout << endl << "Do the soft clean up here" << endl;
	
	// Do cleanup and close up stuff here 
	CleanUp();

	// Exit the program with one of the accepted error levels. 
	//0 = Success
	//1 = Fail
	//2 = Indeterminate 
	igPassFailStatus = ReturnValueDef::Indeterminate;

	// if you get to this logic, most likely your answer should always be a 2
	std::string sTemp = "Error: " + UtilConvert(igPassFailStatus) + " Indeterminate!";
	PrintColorMsg(sTemp, TextColor::Yellow);
	exit(igPassFailStatus);

}

void CleanUp()
{
	td.RemoveFile(sgGoldModuleResultsFile);
}

void WriteResultsFile(int iPassFailStatus)  
{

	// pass fail logic using iPassFailStatus  with 0=pass, 1=fail, 2=indeterminate

	// Write Results File
	td.WriteToFile(sgGoldModuleResultsFile, "AVX Test");
	std::string sTemp = "Module Version: " + sgGoldModuleVersion;
	td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	sTemp = "Start Time: ";
	sTemp.append(std::asctime(std::localtime(&tStartTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);


	if (iPassFailStatus == 0)
	{
		igPassFailStatus = 0;  // Success
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - PASS");

	}
	else if (iPassFailStatus == 1) //Fail
	{
		igPassFailStatus = 1;  // Fail
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - FAIL");
	}

	else if (iPassFailStatus == 2) //Indeterminate
	{
		igPassFailStatus = 2;  // Indeterminate 
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - INDETERMINATE");
	}

	td.WriteToFile(sgGoldModuleResultsFile, strPFOut);

	tEndTime = std::time(nullptr);
	sTemp = "End Time: ";
	sTemp.append(std::asctime(std::localtime(&tEndTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);

	double seconds = difftime(tEndTime, tStartTime);
	sTemp = "Total Time: ";
	sTemp.append(" seconds: ");
	sTemp.append(UtilConvert(seconds));
	

	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);
}

void HelpUseage(void)
{
	//std::cout << "Help or Usage ... version info ... copyright info ..." << std::endl;
	std::cout << std::endl << "Test Information:" << std::endl;
	std::cout << " The purpose of the Advanced Vector Extensions (AVX) test is to detect and " << std::endl;
	std::cout << " test the presence of the following features on your Intel processor : " << std::endl;
	std::cout << "        AVX (Advanced Vector Extensions)" << std::endl;
	std::cout << "        AVX2 (Advanced Vector Extensions 2)" << std::endl;
	std::cout << "        AVX512 (Advanced Vector Extensions 512)" << std::endl;
	std::cout << "        AES (Advanced Encryption Standard)" << std::endl;
	std::cout << "        PCLMULQDQ (Carry-Less Multiplication of two 64 bit operands)" << std::endl;
	std::cout << " Note: AVX capability is first detected on your Intel Processor and then your" << std::endl;
	std::cout << " Operating System is checked to see if it supports running AVX." << std::endl;
	std::cout << " Minumum Operating System versions that support AVX:" << std::endl;
	std::cout << " Windows 7 SP1, Windows Server 2008 R2 SP1," << std::endl;
	std::cout << " Linux since kernel version 2.6.30" << std::endl;
	std::cout << " " << std::endl;
	std::cout << "Possible options are as follows:" << std::endl;
	std::cout << " -h  = Help or Usage (this message)" << std::endl;
	std::cout << "       Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "       AVX -h" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "       ./avx -h" << std::endl << std::endl;
#endif
	std::cout << " -info = Information switch that publishes parallel information" << std::endl;
	std::cout << "         using the following scheme:" << std::endl;
	std::cout << "         \"parallel:yes|socket:yes|core:yes\" " << std::endl;
	std::cout << "         Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "         AVX -info" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "         ./avx -info" << std::endl << std::endl;
#endif
	std::cout << " -resultName = The resultName switch provides a way to name " << std::endl;
	std::cout << "               the results file as desired." << std::endl;
	std::cout << "               Example :" << std::endl;
#ifdef __WIN_OS__
	std::cout << "               AVX -resultName AVX_results_0001.txt " << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "               ./avx -resultName AVX_results_0001.txt " << std::endl << std::endl;
#endif
	std::cout << " -pa = pause app option" << std::endl;
	std::cout << "       Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "       AVX -pa" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "       ./avx -pa" << std::endl << std::endl;
#endif
	std::cout << " -c = This option will display text in various colors" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      AVX -c" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./avx -c" << std::endl << std::endl;
#endif
	std::cout << " -hrs = hours option ..." << std::endl;
	std::cout << "        Indicating the number of hours AVX should run" << std::endl;
	std::cout << "        Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "        AVX -hrs 2" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "        ./avx -hrs 2" << std::endl << std::endl;
#endif
	std::cout << " -m = minutes option ..." << std::endl;
	std::cout << "      Indicating the number of minutes AVX should run" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      AVX -m 5" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./avx -m 5" << std::endl << std::endl;
#endif
	std::cout << " -s = seconds option ..." << std::endl;
	std::cout << "      Indicating the number of seconds AVX should run" << std::endl;
	std::cout << "      Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "      AVX -s 10" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "      ./avx -s 10" << std::endl << std::endl;
#endif
#ifdef __WIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libmmd.dll " << std::endl;
	std::cout << "      libiomp5md.dll " << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll " << std::endl;
#endif
#ifdef __WIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libmmd.dll " << std::endl;
	std::cout << "      libiomp5md.dll " << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll " << std::endl;
#endif
#ifdef __LIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils.so.1.1 " << std::endl;
#endif
#ifdef __LIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils64.so.1.1 " << std::endl;
#endif

	std::cout << std::endl << " Copyright (C) 2017, Intel Corporation" << std::endl;
}
void PauseWQuit(void)
{
	// Pause code
	std::cout << "Please press 'q' <ENTER> to quit: ";
	std::string PauseStr;
	std::cin >> PauseStr;
}

void PrintColorMsg(std::string sMsg, TextColor iColor)
{
	if (iColorText == 0)
	{
		std::cout << std::endl << sMsg << std::endl;
	}
	else
	{
		if (iColor == TextColor::Red)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;31m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Green)
		{

#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;32m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Yellow)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;33m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}

	}
}

bool is_integer(const std::string & s){
	return std::regex_match(s, std::regex("[0-9]+"));
}


bool isOptionValid(int iArgLocal, int argcLocal, char *argvLocal[], VarType vt)
{
	// Start out as Valid
	bool iRetVal = true;
	std::string sMsg = "";
	std::string sTemp = argvLocal[iArgLocal];

	if (((iArgLocal + 1) < argcLocal) && !(argvLocal[iArgLocal + 1] == NULL) && !(argvLocal[iArgLocal + 1][0] == '\0'))
	{
		if (vt == VarType::vtINT)
		{
			if (!is_integer(argvLocal[iArgLocal + 1]))
			{
				iRetVal = false;
				sMsg = "Wrong argument type format for argument: '" + sTemp + "'!";
			}
		}
	}
	else
	{
		// invalid
		iRetVal = false;
		sMsg = "No argument for '" + sTemp + "' detected!";
	}
	// if there was an invalid argument
	if (iRetVal == false)
	{
		PrintColorMsg(sMsg, TextColor::Red);
	}
	return iRetVal;
}

void handleArgs(int argc, char *argv[])
{

	// Optional args
	// Make sure the options can only be run one time
	// Load up all the possible args
	std::vector <string> sArgs_v;
	sArgs_v.push_back("-h");
	sArgs_v.push_back("-info");
	sArgs_v.push_back("-resultName");
	sArgs_v.push_back("-hrs");
	sArgs_v.push_back("-m");
	sArgs_v.push_back("-s");
	//sArgs_v.push_back("-d");
	//sArgs_v.push_back("-p");
	//sArgs_v.push_back("-plx");
	//sArgs_v.push_back("-plt");
	sArgs_v.push_back("-pa");
	sArgs_v.push_back("-c");
	//sArgs_v.push_back("-r");

	if (argc > 1)
	{
		for (int iArg = 1; iArg < argc; iArg++)
		{
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					// Only the options that need to be processed before all the other ones need to be in this list
					if (sArgs_v[iVec] == std::string("-c"))
					{
						iColorText = 1;
					}
				}
			}
		}
		for (int iArg = 1; iArg < argc; iArg++)
		{
			iArgIndex = iArg;
			iOptionValid = 0;
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					iOptionValid = 1;
					if (sArgs_v[iVec] == std::string("-h"))
					{
						iRun = 0;
						std::cout << std::endl << " --- AVX Test --- ";
						PrintVersion();
						HelpUseage();
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
					}
					if (sArgs_v[iVec] == std::string("-info"))
					{
						iRun = 0;
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
						std::cout << "\"parallel:yes|socket:yes|core:yes\"" << std::endl;
					}
					if (sArgs_v[iVec] == std::string("-resultName"))
					{
						sgGoldModuleResultsFile = argv[++iArg];
					}
					if (sArgs_v[iVec] == std::string("-hrs"))
					{
						// Hours
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iHour = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iHour * 60 * 60;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}

					}
					if (sArgs_v[iVec] == std::string("-m"))
					{
						// Minutes
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iMin = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iMin * 60;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-s"))
					{
						// Seconds
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iSec = CustomAsciiToInteger(argv[++iArg]);
							iAVXtimer = iSec;
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-d"))
					{
						// debug
						iDebugFlag = 1;
					}
					if (sArgs_v[iVec] == std::string("-p"))
					{
						std::cout << std::endl << "Entering print opt" << std::endl;
						igPrintConfig = 1;
					}
					if (sArgs_v[iVec] == std::string("-pa"))
					{
						std::cout << std::endl << "Entering pause app opt" << std::endl;
						igPauseApp = 1;
					}
					if (sArgs_v[iVec] == std::string("-c"))
					{
						std::cout << std::endl << "Entering color text opt" << std::endl;
#ifdef __WIN_OS__
						HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
						//std::cout << "Some green text ..." << std::endl << std::endl;
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
						std::cout << "\033[1;32m";
						//std::cout << std::endl << ".." << std::endl << "Some green text ..." << std::endl << std::endl;
						std::cout << "\033[0m";
#endif
					}


					// Erase current vector so the option doesn't repeat
					sArgs_v.erase(sArgs_v.begin() + (iVec));
				}

			}
			if (iDebugFlag)
			{
				std::cout << std::endl << "iOptionValid = " << iOptionValid << std::endl;
			}
			if (iOptionValid == 0)
			{
				break;
			}
		}
		if (iOptionValid == 0)
		{
			std::string sArgTemp = argv[iArgIndex];
			std::string sTempMsg = "Option " + sArgTemp + " invalid!";
			PrintColorMsg(sTempMsg, TextColor::Red);
			iRun = 0;
			igPassFailStatus = ReturnValueDef::InvalidArgs;
			HelpUseage();

		}
	}
	else
	{
		// Default message if no arguments are used  ... it may be the useage message or it may be OK that no args are used
		//std::cout << "No args used!!" << std::endl;
	}

}



void PrintSuccess(void)
{
	if (igPassFailStatus == ReturnValueDef::Success)
	{
		PrintColorMsg("AVX Module Success!", TextColor::Green);
	}
	if (igPassFailStatus == ReturnValueDef::Fail)
	{
		PrintColorMsg("AVX Module Fail!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::Indeterminate)
	{
		PrintColorMsg("AVX Module Interrupted!", TextColor::Yellow);
	}
	if (igPassFailStatus == ReturnValueDef::InvalidArgs)
	{
		PrintColorMsg("AVX Module has invalid arguments!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::ConfigMismatch)
	{
		PrintColorMsg("AVX Module has a configuration mismatch!", TextColor::Yellow);
	}	
	if (igPassFailStatus != ReturnValueDef::Success)
	{
		cout << "Return Status = " << igPassFailStatus << endl;
	}

}

//..main
int main(int argc, char *argv[])
{
	

#if defined __WIN_64__ 
	sgGoldModuleVersion = "2.0.22.64b.W";
#endif

#if defined __WIN_32__ 
	sgGoldModuleVersion = "2.0.22.32b.W";
#endif

#if defined __LIN_64__
	sgGoldModuleVersion = "2.0.20.64b.L";
#endif
#if defined __LIN_32__
	sgGoldModuleVersion = "2.0.20.32b.L";
#endif


	// Need to include signal for Softkill functions
	// If this thread is sent a signal SIGINT, we need to send it to the signal function 
	signal(SIGINT, SignalFun);


	// Parse both text local config style and xml style


	Init();
	handleArgs(argc, argv);

	// Print out version
	if (iPrintVersionFlag != 0)
	{
		std::cout << std::endl << " --- AVX Test --- ";
		PrintVersion();
	}


	//..clean up result files ... Clean file before another one is written
	CleanUp();

	if (iRun)
	{
		Run_AVX();
		WriteResultsFile(igPassFailStatus);
	}

	if (iPrintSuccessFlag != 0)
	{
		PrintSuccess();
	}

	if (igPauseApp)
	{
		PauseWQuit();
	}


	return igPassFailStatus;

}
