﻿
//..
//..Gold Module - Example template ..
//..developed by GTS..
//..
//..Intel(R) Corporation (C) 2015
//..

//..includes
#include "Include/Main.h"


//..defines
#ifdef __WIN_OS__
#include <Windows.h>

#endif
#ifdef __LIN_OS__
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/time.h>
#include <dlfcn.h>
#endif

#ifdef __LIN_OS__
std::string(*OutputDLLVersion)(void);
bool(*bISFeature_OS_Supported)(void);
int(*max_avx_supported)(void);
bool(*bCheckFMA3)(void);
#endif

#ifdef __WIN_OS__
typedef char *(*pOutputDLLVersion)(void);
typedef bool(*pISFeature_OS_Supported)(void);
typedef int(*pMaxAVX_Support)(void);
typedef bool(*pCheckFMA3)(void);
#endif

string sFPTimer;
double dMFLOPS = (double)0;
long lQtyErr = 0;
bool sStopOnError = false;
int displayOnly = 0;  //nc
long CycleErr = 0;
long n = 2500;
int AVXnumber = 0;
bool AVXOSsupported = false;
bool FMAOSsupported = false;
int AVX_Max_Supp;
std::string sDLLversion;
std::string sDLLname;

string trim(const string& str)
{
	size_t first = str.find_first_not_of(' ');
	if (string::npos == first)
	{
		return str;
	}
	size_t last = str.find_last_not_of(' ');
	return str.substr(first, (last - first + 1));
}

void WriteResultsFile(int iPassFailStatus)
{
	//..clean up result files ... Clean file before another one is written
	//CleanUp();
	// pass fail logic using iPassFailStatus  with 0=pass, 1=fail, 2=indeterminate
	// CPUFreqResultStatus 0=within lower and higher tolerance, 1=lower that lower tolerance, 2=higher than higher tolerance, 3=DisplayOnly
	// Write Results File
	td.WriteToFile(sgGoldModuleResultsFile, "Floating Point Test");
	std::string sTemp = "Module Version: " + sgTestModuleVersion;
	td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	sTemp = "Start Time: ";
	sTemp.append(std::asctime(std::localtime(&tStartTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);
	sTemp = sDLLname + " Version - " + sDLLversion;
	td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	if (AVXOSsupported)
	{
		td.WriteToFile(sgGoldModuleResultsFile, "AVX is supported in your OS");

		if (AVXnumber == 1)
		{
			td.WriteToFile(sgGoldModuleResultsFile, "Max AVX supported = AVX");
		}
		else if (AVXnumber == 2)
		{
			td.WriteToFile(sgGoldModuleResultsFile, "Max AVX supported = AVX2");
		}
		else if (AVXnumber == 5)
		{
			td.WriteToFile(sgGoldModuleResultsFile, "Max AVX supported = AVX512");
		}
		else
			td.WriteToFile(sgGoldModuleResultsFile, "AVX, AVX2 or AVX512 not supported");
	}
	else
	{
		td.WriteToFile(sgGoldModuleResultsFile, "AVX is NOT supported in your OS");
	}
	if (FMAOSsupported)
	{
		td.WriteToFile(sgGoldModuleResultsFile, "FMA3 supported");
	}
	else
	{
		td.WriteToFile(sgGoldModuleResultsFile, "FMA3 not supported");
	}

	if (displayOnly) //No Compare Option from argument -nc
	{
		igPassFailStatus = ReturnValueDef::NoCompare;
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - No Compare Option Used");
		std::string sTemp = "Million Floating Points per Second, MFLOPS: ";
		sTemp.append(UtilConvert(dMFLOPS));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
		sTemp = "Error: ";
		sTemp.append(UtilConvert(lQtyErr));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	}
	else if (iPassFailStatus == 0)
	{
		igPassFailStatus = ReturnValueDef::Success;
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - PASS");
		std::string sTemp = "Million Floating Points per Second, MFLOPS: ";
		sTemp.append(UtilConvert(dMFLOPS));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
		sTemp = "Error: ";
		sTemp.append(UtilConvert(lQtyErr));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
		td.WriteToFile(sgGoldModuleResultsFile, "--- Floating Point Test Passed!!!---");
	}
	else if (iPassFailStatus == 1) //Fail
	{
		igPassFailStatus = ReturnValueDef::Fail;
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - FAIL");
		std::string sTemp = "Million Floating Points per Second, MFLOPS: ";
		sTemp.append(UtilConvert(dMFLOPS));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
		sTemp = "Error: ";
		sTemp.append(UtilConvert(lQtyErr));
		td.WriteToFile(sgGoldModuleResultsFile, sTemp);
		td.WriteToFile(sgGoldModuleResultsFile, "--- Floating Point Test Failed!!!---");
	}
	
	tEndTime = std::time(nullptr);
	sTemp = "End Time: ";
	sTemp.append(std::asctime(std::localtime(&tEndTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);

	double seconds = difftime(tEndTime, tStartTime);
	sTemp = "Total Time: ";
	sTemp.append(" seconds: ");
	sTemp.append(UtilConvert(seconds));


	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);
}

//void ReadLocalTextConfig(void)
//{
//	TextData tdLocal;
//	std::list<std::string> lLineArr;
//	tdLocal.ReadFile("local_text_cfg.txt", lLineArr);
//	for (std::list<string>::iterator li = lLineArr.begin(); li != lLineArr.end(); ++li)
//	{
//		std::cout << ' ' << *li << endl;
//	}
//
//}
//
//void ReadLocalConfig(void)
//{
//	XMLParser localXMLObj;
//	try
//	{
//		
//		//Local config
//		localXMLObj.LoadXMLFile("LocalConfig.xml");
//
//		// Set up the root element
//		localXMLObj.SetRootElem("LocalModuleConfig");
//
//		string localStrItem = localXMLObj.ReadNodeKeyAsString("@LocalConfigItem", "");
//		cout << "LocalConfigItem = " << localStrItem << endl;
//		
//	}
//	catch (...)
//	{
//		std::string sException;
//		sException.append("\n-------------------------------\n");
//		sException.append("Cannot locate config file: 'LocalConfig.xml'! \n");
//		sException.append("Application terminated! \n");
//		sException.append("\n-------------------------------\n");
//		throw(sException);
//	}
//	return;
//}

// ---------------------------------------------------------------------------
//  PopulateData -- A routine to populate configuration variables
// ---------------------------------------------------------------------------

//void PopulateData(XMLParser& xd, GlobalConfig& globalConfig)
//{
//	try
//	{
//		//Global config
//		// Set up the root element
//		xd.SetRootElem("GlobalModuleConfig");
//
//		globalConfig.setGlobalConfigItem(xd.ReadNodeKeyAsString("@GlobalConfigItem", ""));
//	}
//	catch (...)
//	{
//		std::string sException;
//		sException.append("\n-------------------------------\n");
//		sException.append("Cannot locate config file: 'GlobalConfig.xml'! \n");
//		sException.append("Application terminated! \n");
//		sException.append("\n-------------------------------\n");
//		throw(sException);
//	}
//	return;
//}

void Init(void)
{
	
	tStartTime = std::time(nullptr);	
	//xd.LoadXMLFile("GlobalConfig.xml");
	
	igPassFailStatus = ReturnValueDef::Success;  // Success
	iColorText = 0;
	iOptionValid = 0;
	iRun = 1;
	iArgIndex = 0;
	iPrintVersionFlag = 1;
	iPrintSuccessFlag = 1;
}

void PrintVersion(void)
{
	std::cout << std::endl << "--- Floating Point Test ---" << std::endl << "..." << std::endl << "Version: " << sgTestModuleVersion << std::endl << "..." << std::endl << std::endl;
}

void SignalFun(int iSigNum)
{
	// If you want to print out the signal, do the following
	cout << "Signal is:" << iSigNum << endl;

	// Depending on your routine, you may want to have some sort of exit message ... however you may also want to comment these out
	//cout << endl << "Do the soft clean up here" << endl;
	
	// Do cleanup and close up stuff here 
	//CleanUp();

	// Exit the program with one of the accepted error levels. 
	//0 = Success
	//1 = Fail
	//2 = Indeterminate 
	igPassFailStatus = 2;

    // if you get to this logic, most likely your answer should always be a 2
	//cout << "Error is: " << igPassFailStatus << " Indeterminate!" << endl;
	std::string sTemp = "Error is: " + UtilConvert(igPassFailStatus) + " Indeterminate!";
	PrintColorMsg(sTemp, TextColor::Yellow);

	exit(igPassFailStatus);

}

void CleanUp()
{
	td.RemoveFile(sgGoldModuleResultsFile);
}

void HelpUseage(void)
{
	//std::cout << "Help or Useage ... version info ... copyright info ..." << std::endl;
	std::cout << "Test Information:" << std::endl;
	std::cout << " Math_FP.exe performs floating points mathematic operation." << std::endl << std::endl;
	std::cout << "Possible options are as follows:" << std::endl;
	std::cout << " -h           = Help or Usage (this message)" << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -h" << std::endl << std::endl;
	std::cout << " -info        = Information switch publishes parallel information" << std::endl;
	std::cout << "                using the following scheme:" << std::endl;
	std::cout << "                \"parallel:yes/no|socket:yes/no|core:yes/no\"" << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -info" << std::endl << std::endl;
	std::cout << " -resultName  = The resultName switch provides a way to name " << std::endl;
	std::cout << "                the results file as desired." << std::endl;
	std::cout << "                This is for the convenience of the control program. " << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -resultName Math_FP_results_01.txt " << std::endl << std::endl;
	std::cout << " -c           = If present, this option will display text in various colors" << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP..exe -c" << std::endl << std::endl;
	std::cout << " -nc          = Skip the result, will display only." << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -nc " << std::endl << std::endl;
	std::cout << " -errstop     = Stop on error, default is continue on error." << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -errstop " << std::endl << std::endl;
	std::cout << " -s [d]       = Time in seconds to perform the test" << std::endl;
	std::cout << "                default is 2 seconds." << std::endl;
	std::cout << "                Example:" << std::endl;
	std::cout << "                Math_FP.exe -s 5 " << std::endl << std::endl;
	std::cout << "Dependencies:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "               libiomp5md.dll" << std::endl;
#ifdef __WIN_64__
	std::cout << "               C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll" << std::endl;
#endif
#ifdef __WIN_32__
	std::cout << "               C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils32.dll" << std::endl;
#endif
#endif
#ifdef __LIN_OS__
	std::cout << "               libiomp5.so" << std::endl;
#ifdef __LIN_64__
	std::cout << "               libDetectUtils64.so.1.1" << std::endl;
#endif
#ifdef __LIN_32__
	std::cout << "               libDetectUtils.so.1.1" << std::endl;
#endif
#endif
	std::cout << std::endl << "Copyright (C) 2015, Intel Corporation" << std::endl;
}
void PauseWQuit(void)
{
	// Pause code
	std::cout << "Please press 'q' <ENTER> to quit: ";
	std::string PauseStr;
	std::cin >> PauseStr;
}

void PrintColorMsg(std::string sMsg, TextColor iColor)
{
	if (iColorText == 0)
	{
		std::cout << std::endl << sMsg << std::endl;
	}
	else
	{
		if (iColor == TextColor::Red)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;31m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Green)
		{

#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;32m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Yellow)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;33m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}

	}
}

bool is_integer(const std::string & s){
	return std::regex_match(s, std::regex("[0-9]+"));
}

bool isOptionValid(int iArgLocal, int argcLocal, char *argvLocal[], VarType vt)
{
	// Start out as Valid
	bool iRetVal = true;
	std::string sMsg = "";
	std::string sTemp = argvLocal[iArgLocal];

	if (((iArgLocal + 1) < argcLocal) && !(argvLocal[iArgLocal + 1] == NULL) && !(argvLocal[iArgLocal + 1][0] == '\0'))
	{
		if (vt == VarType::vtINT)
		{
			if (!is_integer(argvLocal[iArgLocal + 1]))
			{
				iRetVal = false;
				sMsg = "Wrong argument type format for argument: '" + sTemp + "'!";
			}
		}
	}
	else
	{
		// invalid
		iRetVal = false;
		sMsg = "No argument for '" + sTemp + "' detected!";
	}
	// if there was an invalid argument
	if (iRetVal == false)
	{
		PrintColorMsg(sMsg, TextColor::Red);
	}
	return iRetVal;
}

long ConvertToLong(const std::string& s)
{
	std::istringstream i(s);
	long x = 0l;
	if (!(i >> x))
		return 0;
	return x;
}

bool approximatelyEqual(double a, double b, double epsilon)
{
	return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon);
}

void AVXFP(int RangeNumber)
{
	int OMPnum = 0;
	
	double a = (double)22345678.1231234567890;
	double b = (double)12234678.1231234567890;
	double c = (double)-12345.1231234567890;
	
	#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
	for (long i = 0; i < n; i++)
	{
		OMPnum++;
		a += i*992200999001234.567890;
		b += i*992200999001234.567890;
		c += i*992200999001234.567890;

		__m256d aIntrin = _mm256_set1_pd(a);
		__m256d bIntrin = _mm256_set1_pd(b);
		__m256d cIntrin = _mm256_set1_pd(c);

		__m256d addIntrin1 = _mm256_add_pd(aIntrin, bIntrin);
		__m256d addIntrin2 = _mm256_add_pd(addIntrin1, cIntrin);
		double* eq1 = (double*)&addIntrin2;

		__m256d addIntrinRev1 = _mm256_add_pd(cIntrin, bIntrin);
		__m256d addIntrinRev2 = _mm256_add_pd(addIntrinRev1, aIntrin);
		double* eq2 = (double*)&addIntrinRev2;

		if (!approximatelyEqual(eq1[0], eq2[0], 0.000000001))
		{
			CycleErr += 1;
		}		
	
		__m256d mulIntrin1 = _mm256_mul_pd(aIntrin, bIntrin);
		__m256d mulIntrin2 = _mm256_mul_pd(mulIntrin1, cIntrin);
		double* eqmul1 = (double*)&mulIntrin2;

		__m256d mulIntrinRev1 = _mm256_mul_pd(cIntrin, bIntrin);
		__m256d mulIntrinRev2 = _mm256_mul_pd(mulIntrinRev1, aIntrin);
		double* eqmul2 = (double*)&mulIntrinRev2;

		if (!approximatelyEqual(eqmul1[0], eqmul2[0], 0.000000001))
		{
			CycleErr += 1;
		}
		
		__m256d dbl1 = _mm256_set1_pd((double)1.0);
		__m256d mul1Intrin1 = _mm256_mul_pd(dbl1, aIntrin);
		__m256d mul1Intrin2 = _mm256_mul_pd(mul1Intrin1, bIntrin);
		double* eq1mul1 = (double*)&mul1Intrin2;

		__m256d mul1IntrinRev1 = _mm256_mul_pd(aIntrin, dbl1);
		__m256d mul1IntrinRev2 = _mm256_mul_pd(mul1IntrinRev1, bIntrin);
		double* eq1mul2 = (double*)&mul1IntrinRev2;

		__m256d mul1Intrin3 = _mm256_mul_pd(aIntrin, bIntrin);
		__m256d mul1Intrin4 = _mm256_mul_pd(mul1Intrin3, dbl1);
		double* eq1mul3 = (double*)&mul1Intrin4;

		__m256d mul1Intrin5 = _mm256_mul_pd(aIntrin, bIntrin);
		double* eq1mul4 = (double*)&mul1Intrin5;
			
		if (!approximatelyEqual(eq1mul1[0], eq1mul2[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul2[0], eq1mul3[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul3[0], eq1mul4[0], 0.000000001))
		{
			CycleErr += 1;
		}
				
		__m256d muldivIntrin1 = _mm256_div_pd(aIntrin, dbl1);
		__m256d muldivIntrin2 = _mm256_mul_pd(muldivIntrin1, bIntrin);
		double* eqmuldiv1 = (double*)&muldivIntrin2;

		__m256d muldivIntrin3 = _mm256_mul_pd(aIntrin, bIntrin);
		__m256d muldivIntrin4 = _mm256_div_pd(muldivIntrin3, dbl1);
		double* eqmuldiv2 = (double*)&muldivIntrin4;

		__m256d muldivIntrin5 = _mm256_mul_pd(aIntrin, bIntrin);
		double* eqmuldiv3 = (double*)&muldivIntrin5;
		
		if (!approximatelyEqual(eqmuldiv1[0], eqmuldiv2[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eqmuldiv2[0], eqmuldiv3[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m256d dbl2 = _mm256_set1_pd((double)2.0);
		__m256d muladdIntrin1 = _mm256_mul_pd(dbl2, aIntrin);
		double* eqmuladd1 = (double*)&muladdIntrin1;

		__m256d muladdIntrin2 = _mm256_add_pd(aIntrin, aIntrin);
		double* eqmuladd2 = (double*)&muladdIntrin2;

		if (!approximatelyEqual(eqmuladd1[0], eqmuladd2[0], 0.000000001))
		{
			CycleErr += 1;
		}
	}

}
void AVX5FP(int RangeNumber)
{
	int OMPnum = 0;
	double a = (double)22345678.1231234567890;
	double b = (double)12234678.1231234567890;
	double c = (double)-12345.1231234567890;

#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
	for (long i = 0; i < n; i++)
	{
		OMPnum++;
		a += i*992200999001234.567890;
		b += i*992200999001234.567890;
		c += i*992200999001234.567890;

		__m512d aIntrin = _mm512_set1_pd(a);
		__m512d bIntrin = _mm512_set1_pd(b);
		__m512d cIntrin = _mm512_set1_pd(c);
				
		__m512d addIntrin1 = _mm512_add_pd(aIntrin, bIntrin);
		__m512d addIntrin2 = _mm512_add_pd(addIntrin1, cIntrin);
		double* eq1 = (double*)&addIntrin2;

		__m512d addIntrinRev1 = _mm512_add_pd(cIntrin, bIntrin);
		__m512d addIntrinRev2 = _mm512_add_pd(addIntrinRev1, aIntrin);
		double* eq2 = (double*)&addIntrinRev2;

		if (!approximatelyEqual(eq1[0], eq2[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m512d mulIntrin1 = _mm512_mul_pd(aIntrin, bIntrin);
		__m512d mulIntrin2 = _mm512_mul_pd(mulIntrin1, cIntrin);
		double* eqmul1 = (double*)&mulIntrin2;

		__m512d mulIntrinRev1 = _mm512_mul_pd(cIntrin, bIntrin);
		__m512d mulIntrinRev2 = _mm512_mul_pd(mulIntrinRev1, aIntrin);
		double* eqmul2 = (double*)&mulIntrinRev2;

		if (!approximatelyEqual(eqmul1[0], eqmul2[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m512d dbl1 = _mm512_set1_pd((double)1.0);
		__m512d mul1Intrin1 = _mm512_mul_pd(dbl1, aIntrin);
		__m512d mul1Intrin2 = _mm512_mul_pd(mul1Intrin1, bIntrin);
		double* eq1mul1 = (double*)&mul1Intrin2;

		__m512d mul1IntrinRev1 = _mm512_mul_pd(aIntrin, dbl1);
		__m512d mul1IntrinRev2 = _mm512_mul_pd(mul1IntrinRev1, bIntrin);
		double* eq1mul2 = (double*)&mul1IntrinRev2;

		__m512d mul1Intrin3 = _mm512_mul_pd(aIntrin, bIntrin);
		__m512d mul1Intrin4 = _mm512_mul_pd(mul1Intrin3, dbl1);
		double* eq1mul3 = (double*)&mul1Intrin4;

		__m512d mul1Intrin5 = _mm512_mul_pd(aIntrin, bIntrin);
		double* eq1mul4 = (double*)&mul1Intrin5;


		if (!approximatelyEqual(eq1mul1[0], eq1mul2[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul2[0], eq1mul3[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul3[0], eq1mul4[0], 0.000000001))
		{
			CycleErr += 1;
		}
		
		__m512d muldivIntrin1 = _mm512_div_pd(aIntrin, dbl1);
		__m512d muldivIntrin2 = _mm512_mul_pd(muldivIntrin1, bIntrin);
		double* eqmuldiv1 = (double*)&muldivIntrin2;

		__m512d muldivIntrin3 = _mm512_mul_pd(aIntrin, bIntrin);
		__m512d muldivIntrin4 = _mm512_div_pd(muldivIntrin3, dbl1);
		double* eqmuldiv2 = (double*)&muldivIntrin4;

		__m512d muldivIntrin5 = _mm512_mul_pd(aIntrin, bIntrin);
		double* eqmuldiv3 = (double*)&muldivIntrin5;
		
		if (!approximatelyEqual(eqmuldiv1[0], eqmuldiv2[0], 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eqmuldiv2[0], eqmuldiv3[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m512d dbl2 = _mm512_set1_pd((double)2.0);
		__m512d muladdIntrin1 = _mm512_mul_pd(dbl2, aIntrin);
		double* eqmuladd1 = (double*)&muladdIntrin1;

		__m512d muladdIntrin2 = _mm512_add_pd(aIntrin, aIntrin);
		double* eqmuladd2 = (double*)&muladdIntrin2;

		if (!approximatelyEqual(eqmuladd1[0], eqmuladd2[0], 0.000000001))
		{
			CycleErr += 1;
		}
	}
}
void FMAFP(int RangeNumber)
{
	int OMPnum = 0;

	double a = (double)123.1231234567890;
	double b = (double)234.1231234567890;
	double c = (double)44.1231234567890;

#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
	for (long i = 0; i < n; i++)
	{
		OMPnum++;
		a += i*9870012.567890;
		b += i*9870012.567890;
		c += i*9870012.567890;

		__m256d aIntrin = _mm256_set1_pd(a);
		__m256d bIntrin = _mm256_set1_pd(b);
		__m256d cIntrin = _mm256_set1_pd(c);

		__m256d addIntrin1 = _mm256_fmadd_pd(aIntrin, bIntrin, cIntrin);
		__m256d addIntrin2 = _mm256_fmadd_pd(bIntrin, aIntrin, cIntrin);
		double* eq1 = (double*)&addIntrin1;
		double* eq2 = (double*)&addIntrin2;
		if (!approximatelyEqual(eq1[0], eq2[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m256d subIntrin1 = _mm256_fmsub_pd(aIntrin, bIntrin, cIntrin);
		__m256d subIntrin2 = _mm256_fmsub_pd(bIntrin, aIntrin, cIntrin);
		double* eqfmasub1 = (double*)&subIntrin1;
		double* eqfmasub2 = (double*)&subIntrin2;

		if (!approximatelyEqual(eqfmasub1[0], eqfmasub2[0], 0.000000001))
		{
			CycleErr += 1;
		}

		__m256d dbl1 = _mm256_set1_pd((double)1.0);
		__m256d mul1Intrin1 = _mm256_fmadd_pd(aIntrin, dbl1, cIntrin);
		__m256d mul1Intrin2 = _mm256_fmadd_pd(dbl1, aIntrin, cIntrin);
		double* eq1mul1 = (double*)&mul1Intrin1;
		double* eq1mul2 = (double*)&mul1Intrin2;
		if (!approximatelyEqual(eq1mul1[0], eq1mul2[0], 0.000000001))
		{
			CycleErr += 1;
		}
	}
}
void FPTest(int RangeNumber)
{
	int OMPnum = 0;
	double a = (double)22345678.1231234567890;
	double b = (double)12234678.1231234567890;
	double c = (double)-12345.1231234567890;

#pragma omp parallel for schedule(dynamic) reduction(+:OMPnum)
	for (long i = 0; i < n; i++)
	{
		OMPnum++;
		a += i*992200999001234.567890;
		b += i*992200999001234.567890;
		c += i*992200999001234.567890;

		double eq1 = a + b + c;
		double eq2 = c + b + a;
		if (!approximatelyEqual(eq1, eq2, 0.000000001))
		{
			CycleErr += 1;
		}

		double eqmul1 = a*b*c;
		double eqmul2 = c*b*a;
		if (!approximatelyEqual(eqmul1, eqmul2, 0.000000001))
		{
			CycleErr += 1;
		}

		double eq1mul1 = (double)1.0 * a * b; 
		double eq1mul2 = a * (double)1.0 * b;
		double eq1mul3 = a * b  * (double)1.0;
		double eq1mul4 = a * b;
		
		if (!approximatelyEqual(eq1mul1, eq1mul2, 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul2, eq1mul3, 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eq1mul3, eq1mul4, 0.000000001))
		{
			CycleErr += 1;
		}
		
		double eqmuldiv1 = a/(double)1.0 * b;
		double eqmuldiv2 = a * b / (double)1.0 ;
		double eqmuldiv3 = a * b;
		
		if (!approximatelyEqual(eqmuldiv1, eqmuldiv2, 0.000000001))
		{
			CycleErr += 1;
		}
		else if (!approximatelyEqual(eqmuldiv2, eqmuldiv3, 0.000000001))
		{
			CycleErr += 1;
		}

		double eqmuladd1 = (double)2.0 * a;
		double eqmuladd2 = a + a;

		if (!approximatelyEqual(eqmuladd1, eqmuladd2, 0.000000001))
		{
			CycleErr += 1;
		}

	}

}

long RunFPTest(long runTime, bool StopOnErr)
{
	time_t start, end;
	long timeBalance = runTime;
	long dif = 0;
	long dif2 = 0;
	long timeUpdateFLOPS = 0;
	double dFLOPS = (double)0;
	dMFLOPS = (double)0;

#ifdef __WIN_OS__
	HINSTANCE hInstanceLoadDLL;
#ifdef __WIN_32__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll")))
#elif defined __WIN_64__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll")))
#endif
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}

#endif

#ifdef __WIN_OS__

	pOutputDLLVersion pODV = (pOutputDLLVersion)GetProcAddress(hInstanceLoadDLL, "DUdll_OutputDLLVersion");
	pISFeature_OS_Supported pISFOSS = (pISFeature_OS_Supported)GetProcAddress(hInstanceLoadDLL, "DUdll_bISFeature_OS_Supported");
	pMaxAVX_Support pMaxAVX = (pMaxAVX_Support)GetProcAddress(hInstanceLoadDLL, "DUdll_Max_AVX_Supported");
	pCheckFMA3 pCFMA3 = (pCheckFMA3)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckFMA3");

	if (!pODV)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		//output DLL version
		char* cDLLversion = pODV();
		sDLLversion = pODV();
#ifdef __WIN_32__
		cout << "..DetectUtils DLL Version - " << cDLLversion << endl;
		sDLLname = "DetectUtils DLL";
#elif defined __WIN_64__
		cout << "..DetectUtils64 DLL Version - " << cDLLversion << endl;
		sDLLname = "DetectUtils64 DLL";
#endif
	}

	if (!pISFOSS)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load ISFeature_OS_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVXOSsupported = pISFOSS();
	}

	if (!pMaxAVX)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load Max_AVX_Supported from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		AVX_Max_Supp = pMaxAVX();
	}

	if (!pCFMA3)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkFMA3 from DetectUtils dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkFMA3 from DetectUtils64 dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		FMAOSsupported = pCFMA3();
	}
	//unload DLL
	FreeLibrary(hInstanceLoadDLL);

#endif

#ifdef __LIN_OS__

	void *handle;
	char *error;
	int x, y, z;

#ifdef __LIN_32__
	handle = dlopen("./libDetectUtils.so.1.1", RTLD_LAZY);
#elif defined __LIN_64__
	handle = dlopen("./libDetectUtils64.so.1.1", RTLD_LAZY);
#endif
	if (!handle) {
#ifdef __LIN_32__
		cout << ".." << endl << "..could not load libDetectUtils shared library " << endl << ".." << endl;
#elif defined __LIN_64__
		cout << ".." << endl << "..could not load libDetectUtils64 shared library " << endl << ".." << endl;
#endif
		fputs(dlerror(), stderr);
		exit(1);
	}

	//DLLVersion
	OutputDLLVersion = (std::string(*)(void))dlsym(handle, "OutputDLLVersion");
	sDLLversion = OutputDLLVersion();
	if ((error = dlerror()) != NULL)
	{
#ifdef __LIN_32__
		cout << ".." << endl << "..could not load OutputDLLVersion from libDetectUtils shared library " << endl << ".." << endl;
#elif defined __LIN_64__
		cout << ".." << endl << "..could not load OutputDLLVersion from libDetectUtils64 shared library" << endl << ".." << endl;
#endif
		fputs(error, stderr);
		exit(1);
	}
#ifdef __LIN_32__
	cout << ".libDetectUtils Shared Library Version - " << trim(sDLLversion) << endl;
	sDLLname = "libDetectUtils Shared Library";
#elif defined __LIN_64__
	cout << ".libDetectUtils64 Shared Library Version - " << trim(sDLLversion) << endl;
	sDLLname = "libDetectUtils64 Shared Library";
#endif

	//Is AVX supported by OS
	bISFeature_OS_Supported = (bool(*)(void))dlsym(handle, "bISFeature_OS_Supported");
	if ((error = dlerror()) != NULL)
	{
#ifdef __LIN_32__
		cout << ".." << endl << "..could not load bISFeature_OS_Supported from libDetectUtils Shared Library" << endl << ".." << endl;
#elif defined __LIN_64__
		cout << ".." << endl << "..could not load bISFeature_OS_Supported from libDetectUtils64 Shared Library" << endl << ".." << endl;
#endif
		fputs(error, stderr);
		exit(1);
	}
	AVXOSsupported = bISFeature_OS_Supported();

	//Max AVX Supported
	max_avx_supported = (int(*)(void))dlsym(handle, "max_avx_supported");
	if ((error = dlerror()) != NULL)
	{
#ifdef __LIN_32__
		cout << ".." << endl << "..could not load Max_AVX_Supported from libDetectUtils Shared Library " << endl << ".." << endl;
#elif defined __LIN_64__
		cout << ".." << endl << "..could not load Max_AVX_Supported from libDetectUtils64 Shared Library " << endl << ".." << endl;
#endif
		fputs(error, stderr);
		exit(1);
	}
	AVX_Max_Supp = max_avx_supported();


	//FMA3 Check
	bCheckFMA3 = (bool(*)(void))dlsym(handle, "bCheckFMA3");
	if ((error = dlerror()) != NULL)
	{
#ifdef __LIN_32__
		cout << ".." << endl << "..could not load Check FMA3 from libDetectUtils Shared Library " << endl << ".." << endl;
#elif defined __LIN_64__
		cout << ".." << endl << "..could not load Check FMA3 from libDetectUtils64 Shared Library " << endl << ".." << endl;
#endif
		fputs(error, stderr);
		exit(1);
	}
	FMAOSsupported = bCheckFMA3();

	dlclose(handle);

#endif

	if (AVXOSsupported == true)
	{
		std::cout << "AVX is supported in your OS" << std::endl;
		AVXOSsupported = true;
		if (AVX_Max_Supp == AVXLevelSupport::AVX_Supported)
		{
			AVXnumber = 1;
			std::cout << "Max AVX supported = AVX" << std::endl;
		}
		else if (AVX_Max_Supp == AVXLevelSupport::AVX2_Supported)
		{
			AVXnumber = 2;
			std::cout << "Max AVX supported = AVX2" << std::endl;
		}
		else if (AVX_Max_Supp == AVXLevelSupport::AVX512_Supported)
		{
			AVXnumber = 5;
			std::cout << "Max AVX supported = AVX512" << std::endl;
		}
		else
			std::cout << "AVX, AVX2 or AVX512 not supported" << std::endl;
	}
	else
	{
		std::cout << "AVX is NOT supported in your OS" << std::endl;
		AVXOSsupported = false;

	}

	if (FMAOSsupported)
	{
		FMAOSsupported = true;
		std::cout << "FMA3 supported" << std::endl;
	}
	else
	{
		FMAOSsupported = false;
		std::cout << "FMA3 not supported" << std::endl;
	}

	time(&start);

	std::cout << "MFLOPS            CycleRun       Error       Time(sec)\n";
	for (long QtyCycle = 1; ; QtyCycle++)
	{
		if (!AVXOSsupported && !FMAOSsupported)
		{
			FPTest(1);
			dFLOPS += n * 13;
		}
		else if (AVXOSsupported)
		{
			if (AVXnumber == 1 || AVXnumber == 2)
			{
				AVXFP(1);
				dFLOPS += n * 20;
			}

			else if (AVXnumber == 5)
			{
				AVX5FP(1);
				dFLOPS += n * 20;
			}			
			else
			{
				FPTest(1);
				dFLOPS += n * 13;
			}
		}
		else
		{
			FPTest(1);
			dFLOPS += n * 13;
		}
		if (FMAOSsupported)
		{
			FMAFP(1);
			dFLOPS += n * 6;
		}

		time(&end);

		dif = difftime(end, start);
		timeBalance = runTime - dif;
		timeUpdateFLOPS = dif - dif2;
		
		if (timeUpdateFLOPS >= 1)
		{
			dMFLOPS = dFLOPS / 1000000 / timeUpdateFLOPS;
			dif2 = dif;
			dFLOPS = 0;
			std::cout << "\r";
			std::cout.precision(7);
			std::cout << dMFLOPS << "              " << QtyCycle << "          " << CycleErr << "            " << dif;
			std::cout << "    " << "\b" << "    " << std::flush;
		}

		if (dif>(long)1 && dif >= runTime)
		{
			break;
		}
		else if (CycleErr >= 1 && StopOnErr)
		{
			std::cout << "CycleErr --> " << CycleErr <<std::endl;
			break;
		}
	}

	return CycleErr;	
}


void handleArgs(int argc, char *argv[])
{
	// int iHelpSelected = 0;
	// Optional args
	// Make sure the options can only be run one time
	// Load up all the possible args
	std::vector <string> sArgs_v;
	sArgs_v.push_back("-h");
	sArgs_v.push_back("-info");
	sArgs_v.push_back("-resultName");
	sArgs_v.push_back("-s");
	sArgs_v.push_back("-errstop");
	sArgs_v.push_back("-nc");
	sArgs_v.push_back("-c");

	if (argc > 1)
	{
		for (int iArg = 1; iArg < argc; iArg++)
		{
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					// Only the options that need to be processed before all the other ones need to be in this list
					if (sArgs_v[iVec] == std::string("-c"))
					{
						iColorText = 1;
					}
				}
			}
		}
		for (int iArg = 1; iArg < argc; iArg++)
		{
			iArgIndex = iArg;
			iOptionValid = 0;
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					iOptionValid = 1;

					if (sArgs_v[iVec] == std::string("-h"))
					{
						// Print out version
						if (iPrintVersionFlag != 0)
						{
							PrintVersion();
						}
						/*iHelpSelected = 1;*/
						iRun = 0;
						HelpUseage();
					}
					if (sArgs_v[iVec] == std::string("-info"))
					{
						iRun = 0;
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
						std::cout << "\"parallel:yes|socket:yes|core:yes\"" << std::endl;
					}
					if (sArgs_v[iVec] == std::string("-resultName"))
					{
						sgGoldModuleResultsFile = argv[++iArg];
					}
					if (sArgs_v[iVec] == std::string("-s"))
					{
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							sFPTimer = argv[iArg + 1];
							iArg++;

						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-errstop"))
					{
						sStopOnError = true;
					}
					if (sArgs_v[iVec] == std::string("-nc"))
					{
						// No Compare between expected and detected, display only
						// std::cout << std::endl << "Entering no compare opt" << std::endl;
						displayOnly = 1;
					}
					// Erase current vector so the option doesn't repeat
					sArgs_v.erase(sArgs_v.begin() + (iVec));
				}
			}
			if (iOptionValid == 0)
			{
				break;
			}
		}
		if (iOptionValid == 0)
		{
			std::string sArgTemp = argv[iArgIndex];
			std::string sTempMsg = "Option " + sArgTemp + " invalid!";
			PrintColorMsg(sTempMsg, TextColor::Red);
			iRun = 0;
			igPassFailStatus = ReturnValueDef::InvalidArgs;
			HelpUseage();
		}
	}
	else
	{
		// Default message if no arguments are used  ... it may be the useage message or it may be OK that no args are used
		//std::cout << "No args used!!" << std::endl;
	}
}

void PrintSuccess(void)
{
	if (igPassFailStatus == ReturnValueDef::Success)
	{
		//PrintColorMsg("Math_FP Success!", TextColor::Green);
	}
	if (igPassFailStatus == ReturnValueDef::Fail)
	{
		PrintColorMsg("Math_FP Fail!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::Indeterminate)
	{
		PrintColorMsg("Math_FP Interrupted!", TextColor::Yellow);
	}
	if (igPassFailStatus == ReturnValueDef::InvalidArgs)
	{
		PrintColorMsg("Math_FP has invalid arguments!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::ConfigMismatch)
	{
		PrintColorMsg("Math_FP has a configuration mismatch!", TextColor::Yellow);
	}
	if (igPassFailStatus != ReturnValueDef::Success)
	{
		cout << "Return Status = " << igPassFailStatus << endl;
	}

}

//..main
int main(int argc, char *argv[])
{

	// .W // windows 
#if defined __WIN_64__ 
	sgTestModuleVersion = "1.0.19.64b.W";
#endif

#if defined __WIN_32__ 
	sgTestModuleVersion = "1.0.19.32b.W";
#endif

	// .L linux
#if defined __LIN_64__
	sgTestModuleVersion = "1.0.18.64b.L";
#endif
#if defined __LIN_32__
	sgTestModuleVersion = "1.0.18.32b.L";
#endif


	// Need to include signal for Softkill functions
	// If this thread is sent a signal SIGINT, we need to send it to the signal function 
	signal(SIGINT, SignalFun);


	// Parse both text local config style and xml style
	
	Init();
	handleArgs(argc, argv);
			
	// Logic needs to be done to determine if this test is a pass or fail.  
	//If indeterminate, it means that the test was interrupted
	//igPassFailStatus = 0;  // Success
	//igPassFailStatus = 1;  // Fail
	//igPassFailStatus = 2;  // Indeterminate 
		
		//..clean up result files ... Clean file before another one is written
		CleanUp();

		//WriteResultsFile(igPassFailStatus);
		//PopulateData(xd, gGlobalConfig);

		if (iRun)
		{
			// Print out version
			if (iPrintVersionFlag != 0)
			{
				PrintVersion();
			}
			
			//std::cout << " --- Floating Point Test --- " << std::endl << std::endl;

			if (displayOnly)
			{
				PrintColorMsg("No Compare Option Used.\n", TextColor::Yellow);
			}

			// Fill XML the object
			if (sFPTimer.size())
			{
				long FloatingPointTimer = ConvertToLong(sFPTimer);
				lQtyErr = RunFPTest(FloatingPointTimer, sStopOnError);
			}
			else
			{
				//Default is 2 sec
				lQtyErr = RunFPTest(2, sStopOnError);
			}


			std::cout << std::endl << std::endl << "Million Floating Points per Second, MFLOPS --> " << dMFLOPS;
			std::cout << std::endl << "Error --> " << lQtyErr << std::endl;

			if (lQtyErr == 0)
			{
				//std::cout << "Floating Point Test Pass!!!" << std::endl;
				PrintColorMsg("Floating Point Test Passed!!!", TextColor::Green);
				WriteResultsFile(0);
			}
			else
			{
				//std::cout << "Floating Point Test Fail!!!" << std::endl;
				PrintColorMsg("Floating Point Test Failed!!!", TextColor::Red);
				WriteResultsFile(1);
			}
			/*if (igPauseApp)
			{
			PauseWQuit();
			}*/
		}

		if (iPrintSuccessFlag != 0)
		{
			PrintSuccess();
		}

	// Returns 0=pass, 1=fail, 2=indeterminate 
	return igPassFailStatus;
}
