
//..
//..MMXSSE Module ..
//..developed by GTS..
//..
//..Intel(R) Corporation (C) 2015
//..

//..includes
#include <immintrin.h>
#include <pmmintrin.h>
#include "Include/Main.h"

//..defines
#ifdef __WIN_OS__
#include <Windows.h>
#endif
#ifdef __LIN_OS__
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <dlfcn.h>
#include <iostream>
#include <string>
#endif

// Define loop & Size variable for MMX SSE tests
#define MMXSSE_LOOP 1000000
#define SIZE 12

#ifdef __LIN_OS__

string trim(const string& str)
{
    size_t first = str.find_first_not_of(' ');
    if (string::npos == first)
    {
        return str;
    }
    size_t last = str.find_last_not_of(' ');
    return str.substr(first, (last - first + 1));
}

#endif

#ifdef __WIN_OS__

typedef char *(*pOutputDLLVersion)(void);
typedef bool(*pCheckMMX)(void);
typedef bool(*pCheckSSE)(void);
typedef bool(*pCheckSSE2)(void);
typedef bool(*pCheckSSE3)(void);
typedef bool(*pCheckSSSE3)(void);
typedef bool(*pCheckSSE4_1)(void);
typedef bool(*pCheckSSE4_2)(void);

#endif

#ifdef __LIN_OS__
std::string (*OutputDLLVersion)(void); 
bool (*bCheckMMX)(void);
bool (*bCheckSSE)(void);
bool (*bCheckSSE2)(void);
bool (*bCheckSSE3)(void);
bool (*bCheckSSSE3)(void);
bool (*bCheckSSE4_1)(void);
bool (*bCheckSSE4_2)(void);
#endif


void Run_MMXSSE()
{

	bool MMX_Supported;
	bool SSE_Supported;
	bool SSE2_Supported;
	bool SSE3_Supported;
	bool SSSE3_Supported;
	bool SSE41_Supported;
	bool SSE42_Supported;

#ifdef __WIN_OS__
	HINSTANCE hInstanceLoadDLL;


#ifdef __WIN_32__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll")))
#elif defined __WIN_64__
	if (!(hInstanceLoadDLL = LoadLibraryA("c:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll")))
#endif
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}

#endif

#ifdef __WIN_OS__

	pOutputDLLVersion pODV = (pOutputDLLVersion)GetProcAddress(hInstanceLoadDLL, "DUdll_OutputDLLVersion");
	pCheckMMX pCMMX = (pCheckMMX)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckMMX");
	pCheckSSE pCSSE = (pCheckSSE)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSE");
	pCheckSSE2 pCSSE2 = (pCheckSSE2)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSE2");
	pCheckSSE3 pCSSE3 = (pCheckSSE3)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSE3");
	pCheckSSSE3 pCSSSE3 = (pCheckSSSE3)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSSE3");
	pCheckSSE4_1 pCSSE4_1 = (pCheckSSE4_1)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSE4_1");
	pCheckSSE4_2 pCSSE4_2 = (pCheckSSE4_2)GetProcAddress(hInstanceLoadDLL, "DUdll_bCheckSSE4_2");


	if (!pODV)
	{
#ifdef __WIN_32__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils.dll " << endl << ".." << endl;
#elif defined __WIN_64__
		cout << ".." << endl << "..could not load OutputDLLVersion from DetectUtils64.dll " << endl << ".." << endl;
#endif
		exit(1);
	}
	else
	{
		//output DLL version
		char* cDLLversion = pODV();
		std::string sDLLversion = pODV();
#ifdef __WIN_32__
		cout << "..DetectUtils DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils DLL Version - " + sDLLversion + "\n");
#elif defined __WIN_64__
		cout << "..DetectUtils64 DLL Version - " << cDLLversion << endl;
		strPFOut.append("\nDetectUtils64 DLL Version - " + sDLLversion + "\n");
#endif
	}


	std::cout << std::endl << " --- Determining MMX - SSE capabilities --- " << std::endl;
	strPFOut.append("\n --- Determining MMX - SSE capabilities --- \n");


	if (!pCMMX)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkMMX from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkMMX from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for MMX check from DLL
		MMX_Supported = pCMMX();
	}

	if (!pCSSE)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSE from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSE from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSE check from DLL
		SSE_Supported = pCSSE();
	}

	if (!pCSSE2)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSE2 from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSE2 from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSE2 check from DLL
		SSE2_Supported = pCSSE2();
	}

	if (!pCSSE3)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSE3 from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSE3 from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSE3 check from DLL
		SSE3_Supported = pCSSE3();
	}

	if (!pCSSSE3)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSSE3 from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSSE3 from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSSE3 check from DLL
		SSSE3_Supported = pCSSSE3();
	}

	if (!pCSSE4_1)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSE4.1 from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSE4.1 from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSE4_1 check from DLL
		SSE41_Supported = pCSSE4_1();
	}

	if (!pCSSE4_2)
	{
	#ifdef __WIN_32__
		cout << ".." << endl << "..could not load checkSSE4.2 from DetectUtils dll " << endl << ".." << endl;
	#elif defined __WIN_64__
		cout << ".." << endl << "..could not load checkSSE4.2 from DetectUtils64 dll " << endl << ".." << endl;
	#endif
		exit(1);
	}
	else
	{
		//get bool returned for SSE4_2 check from DLL
		SSE42_Supported = pCSSE4_2();
	}

	//unload DLL
	FreeLibrary(hInstanceLoadDLL);

#endif


#ifdef __LIN_OS__
	
    void *handle;
    char *error;
    int x, y, z;
 
#ifdef __LIN_32__
	handle = dlopen("./libDetectUtils.so.1.1", RTLD_LAZY);
#elif defined __LIN_64__
	handle = dlopen("./libDetectUtils64.so.1.1", RTLD_LAZY);
#endif
	if (!handle) {
	#ifdef __LIN_32__
		cout << ".." << endl << "..could not load libDetectUtils shared library " << endl << ".." << endl;
	#elif defined __LIN_64__
		cout << ".." << endl << "..could not load libDetectUtils64 shared library " << endl << ".." << endl;
	#endif
		fputs(dlerror(), stderr);
		exit(1);
	}

	//DLLVersion
	OutputDLLVersion = (std::string(*)(void))dlsym(handle, "OutputDLLVersion");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	#ifdef __LIN_32__
	std::cout << "..DetectUtils.so Version.. --> " << trim(OutputDLLVersion()) << endl;
	strPFOut.append("\n..DetectUtils.so Version.. --> " + trim(OutputDLLVersion()) + "\n");
	#elif defined __LIN_64__
	std::cout << "..DetectUtils64.so Version.. --> " << trim(OutputDLLVersion()) << endl;
	strPFOut.append("\n..DetectUtils64.so Version.. --> " + trim(OutputDLLVersion()) + "\n");
	#endif

	std::cout << std::endl << " --- Determining MMX - SSE capabilities --- " << std::endl;
	strPFOut.append("\n --- Determining MMX - SSE capabilities --- \n");

	//MMX
	bCheckMMX = (bool(*)(void))dlsym(handle, "bCheckMMX");
	if ((error = dlerror()) != NULL)
	{
		fputs(error, stderr);
		exit(1);
	}
	bool bSupportMMX = bCheckMMX();


    //SSE
    bCheckSSE = (bool (*)(void))dlsym(handle, "bCheckSSE");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSE = bCheckSSE();
    
    //SSE2
    bCheckSSE2 = (bool (*)(void))dlsym(handle, "bCheckSSE2");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSE2 = bCheckSSE2();
 
    //SSE3
    bCheckSSE3 = (bool (*)(void))dlsym(handle, "bCheckSSE3");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSE3 = bCheckSSE3();

    //SSSE3
    bCheckSSSE3 = (bool (*)(void))dlsym(handle, "bCheckSSSE3");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSSE3 = bCheckSSSE3();

    //SSE4_1
    bCheckSSE4_1 = (bool (*)(void))dlsym(handle, "bCheckSSE4_1");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSE4_1 = bCheckSSE4_1();

    //SSE4_2
    bCheckSSE4_2 = (bool (*)(void))dlsym(handle, "bCheckSSE4_2");
    if (( error = dlerror() ) != NULL)  
    {
        fputs(error, stderr);
        exit(1);
    }
    bool bSupportSSE4_2 = bCheckSSE4_2();
    
    dlclose(handle);

#endif

	std::string sResult = "Fail --- MMX SSE Testing complete ---";


#ifdef __LIN_OS__
	MMX_Supported = bSupportMMX;
	SSE_Supported = bSupportSSE;
	SSE2_Supported = bSupportSSE2;
	SSE3_Supported = bSupportSSE3;
	SSSE3_Supported = bSupportSSSE3;
	SSE41_Supported = bSupportSSE4_1;
	SSE42_Supported = bSupportSSE4_2;
#endif

	bool MMX_TestResult_Flag = true;
	bool SSE_TestResult_Flag = true;
	bool SSE2_TestResult_Flag = true;
	bool SSE3_TestResult_Flag = true;
	bool SSSE3_TestResult_Flag = true;
	bool SSE41_TestResult_Flag = true;
	bool SSE42_TestResult_Flag = true;


	//Initialise for Results file output
	std::string sOut;
	std::string sOut_MMX;
	std::string sOut_SSE;
	std::string sOut_SSE2;
	std::string sOut_SSE3;
	std::string sOut_SSSE3;
	std::string sOut_SSE41;
	std::string sOut_SSE42;

	//Output MMX & SSE Features Detection Section to Screen
	std::cout << (MMX_Supported ? "..MMX is supported on this CPU..\n" : "..MMX is NOT supported on this CPU..\n");
	std::cout << (SSE_Supported ? "..SSE is supported on this CPU..\n" : "..SSE is NOT supported on this CPU..\n");
	std::cout << (SSE2_Supported ? "..SSE2 is supported on this CPU..\n" : "..SSE2 is NOT supported on this CPU..\n");
	std::cout << (SSE3_Supported ? "..SSE3 is supported on this CPU..\n" : "..SSE3 is NOT supported on this CPU..\n");
	std::cout << (SSSE3_Supported ? "..SSSE3 is supported on this CPU..\n" : "..SSSE3 is NOT supported on this CPU..\n");
	std::cout << (SSE41_Supported ? "..SSE4.1 is supported on this CPU..\n" : "..SSE4.1 is NOT supported on this CPU..\n");
	std::cout << (SSE42_Supported ? "..SSE4.2 is supported on this CPU..\n" : "..SSE4.2 is NOT supported on this CPU..\n");
	std::cout << "\n";

	//Output MMXSSE Features Detection Section to Test Results File	
	strPFOut.append(MMX_Supported ? "..MMX is supported on this CPU..\n" : "..MMX is NOT supported on this CPU..\n");
	strPFOut.append(SSE_Supported ? "..SSE is supported on this CPU..\n" : "..SSE is NOT supported on this CPU..\n");
	strPFOut.append(SSE2_Supported ? "..SSE2 is supported on this CPU..\n" : "..SSE2 is NOT supported on this CPU..\n");
	strPFOut.append(SSE3_Supported ? "..SSE3 is supported on this CPU..\n" : "..SSE3 is NOT supported on this CPU..\n");
	strPFOut.append(SSSE3_Supported ? "..SSSE3 is supported on this CPU..\n" : "..SSSE3 is NOT supported on this CPU..\n");
	strPFOut.append(SSE41_Supported ? "..SSE4.1 is supported on this CPU..\n" : "..SSE4.1 is NOT supported on this CPU..\n");
	strPFOut.append(SSE42_Supported ? "..SSE4.2 is supported on this CPU..\n" : "..SSE4.2 is NOT supported on this CPU..\n");
	strPFOut.append("\n");


	if (MMX_Supported)
	{
		strPFOut.append("MMX Test Result ");
		if (testmmx())
		{
			MMX_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			MMX_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("MMX Test Result --- Not Tested\n");
		MMX_TestResult_Flag = true;
	}

	if (SSE_Supported)
	{
		strPFOut.append("SSE Test Result ");
		if (testsse())
		{
			SSE_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSE_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSE Test Result --- Not Tested\n");
		SSE_TestResult_Flag = true;
	}

	if (SSE2_Supported)
	{
		strPFOut.append("SSE2 Test Result ");
		if (testsse2())
		{
			SSE2_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSE2_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSE2 Test Result --- Not Tested\n");
		SSE2_TestResult_Flag = true;
	}

	if (SSE3_Supported)
	{
		strPFOut.append("SSE3 Test Result ");
		if (testsse3())
		{
			SSE3_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSE3_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSE3 Test Result --- Not Tested\n");
		SSE3_TestResult_Flag = true;
	}

	if (SSSE3_Supported)
	{
		strPFOut.append("SSSE3 Test Result ");
		if (testssse3())
		{
			SSSE3_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSSE3_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSSE3 Test Result --- Not Tested\n");
		SSSE3_TestResult_Flag = true;
	}

	if (SSE41_Supported)
	{
		strPFOut.append("SSE4.1 Test Result ");
		if (testsse41())
		{
			SSE41_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSE41_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSE4.1 Test Result --- Not Tested\n");
		SSE41_TestResult_Flag = true;
	}

	if (SSE42_Supported)
	{
		strPFOut.append("SSE4.2 Test Result ");
		if (testsse42())
		{
			SSE42_TestResult_Flag = true;
			strPFOut.append("--- PASS\n");
		}
		else
		{
			SSE42_TestResult_Flag = false;
			strPFOut.append("--- FAIL\n");
		}
	}
	else
	{
		strPFOut.append("SSE4.2 Test Result --- Not Tested\n");
		SSE42_TestResult_Flag = true;
	}

	if (MMX_TestResult_Flag && SSE_TestResult_Flag && SSE2_TestResult_Flag && SSE3_TestResult_Flag && SSSE3_TestResult_Flag && SSE41_TestResult_Flag && SSE42_TestResult_Flag)
	{
		sResult = "Pass --- MMX SSE Testing complete ---";
	}
	else
	{
		sResult = "Fail --- MMX SSE Testing complete ---";
	}



	// Populate Pass Fail Object
	if (sResult.find_first_of("Pass") == 0)
	{
		igPassFailStatus=0;
	}
	else
	{
		igPassFailStatus = 1;
	}


}





void multiply_c(complex_num x, complex_num y, complex_num *z)
{
	z->real = (x.real*y.real) - (x.img*y.img);
	z->img = (x.img*y.real) + (y.img*x.real);
}

void multiply_sse2(complex_num x, complex_num y, complex_num *z)
{
	__m128d num1, num2, num3, num4;
	num1 = _mm_load1_pd(&x.real);
	num2 = _mm_set_pd(y.img, y.real);
	num3 = _mm_mul_pd(num2, num1);
	num1 = _mm_load1_pd(&x.img);
	num2 = _mm_shuffle_pd(num2, num2, 1);
	num2 = _mm_mul_pd(num2, num1);
	num4 = _mm_add_pd(num3, num2);
	num3 = _mm_sub_pd(num3, num2);
	num4 = _mm_shuffle_pd(num3, num4, 2);
	_mm_storeu_pd((double *)z, num4);
}

void multiply_sse3(complex_num x, complex_num y, complex_num *z)
{
	__m128d num1, num2, num3;
	num1 = _mm_loaddup_pd(&x.real);
	num2 = _mm_set_pd(y.img, y.real);
	num3 = _mm_mul_pd(num2, num1);
	num1 = _mm_loaddup_pd(&x.img);
	num2 = _mm_shuffle_pd(num2, num2, 1);
	num2 = _mm_mul_pd(num2, num1);
	num3 = _mm_addsub_pd(num3, num2);
	_mm_storeu_pd((double *)z, num3);
}

bool testmmx()
{
	std::cout << "Testing MMX";
	std::cout << "\n";

	int FailFlag = 0;

	//Test Here
	float dot_product(float *a, float *b);			// Compute dot product using c
	short dot_product_MMX(short *a, short *b);		// Compute dot product using MMX intrinsics

	float x[SIZE], y[SIZE];
	short a[SIZE], b[SIZE];
	double c[SIZE], d[SIZE];

	int i = 0;
	float c_product = 0;
	short mmx_product = 0;

	for (i = 0; i<SIZE; i++)
	{
		x[i] = i;
		y[i] = i;
		c[i] = i;
		d[i] = i;
	}

	for (short x = 0; x<SIZE; x++)
	{
		a[x] = x;
		b[x] = x;
	}



	for (i = 0; i<MMXSSE_LOOP; i++)
	{
		c_product = dot_product(x, y);
		mmx_product = dot_product_MMX(a, b);
		if (c_product != mmx_product)
		{
			FailFlag = 1;
			break;
		}
	}
	std::cout << "Dot Product computed using C code = " << c_product << "\n";
	std::cout << "Dot Product computed using MMX intrinsics = " << mmx_product << "\n";

	if (c_product != mmx_product)
	{
		FailFlag = 1;
		std::cout << "MMX Dot Product Computation Test Failed" << "\n";
	}
	else
	{
		std::cout << "MMX Dot Product Computation Test Passed" << "\n";
	}


	if (FailFlag == 0)
	{
		std::cout << "Passed MMX Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed MMX Test\n";
		std::cout << "\n";
		return false;
	}

}

bool testsse()
{
	std::cout << "Testing SSE";
	std::cout << "\n";

	int FailFlag = 0;
	//Test Here
	float dot_product(float *a, float *b);			// Compute dot product using c
	float dot_product_SSE1(float *a, float *b);	// Compute dot product using SSE1 intrinsics

	float x[SIZE], y[SIZE];

	int i = 0;
	float c_product = 0;
	float sse1_product = 0;

	for (i = 0; i<SIZE; i++)
	{
		x[i] = i;
		y[i] = i;
	}

	for (i = 0; i<MMXSSE_LOOP; i++)
	{
		c_product = dot_product(x, y);
		sse1_product = dot_product_SSE1(x, y);
		if (c_product != sse1_product)
		{
			FailFlag = 1;
			break;
		}
	}
	std::cout << "Dot Product computed using C code = " << c_product << "\n";
	std::cout << "Dot Product computed using SSE intrinsics = " << sse1_product << "\n";

	if (c_product != sse1_product)
	{
		FailFlag = 1;
		std::cout << "SSE Dot Product Computation Test Failed" << "\n";
	}
	else
	{
		std::cout << "SSE Dot Product Computation Test Passed" << "\n";
	}


	if (FailFlag == 0)
	{
		std::cout << "Passed SSE Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSE Test\n";
		std::cout << "\n";
		return false;
	}

}

bool testsse2()
{
	std::cout << "Testing SSE2";
	std::cout << "\n";

	int i = 0;
	int FailFlag = 0;
	//Test Here
	complex_num a, b, c, d;
	a.real = 3;
	a.img = 2;
	b.real = 5;
	b.img = -4;

	for (i = 0; i<MMXSSE_LOOP; i++)
	{
		multiply_c(a, b, &c);
		multiply_sse2(a, b, &d);
		if (c.real != d.real || c.img != d.img)
		{
			FailFlag = 1;
			break;
		}
	}

	printf("Complex Product computed using C code: %2.2f+ %2.2fi\n", c.real, c.img);
	printf("Complex Product computed using SSE2 code: %2.2f+ %2.2fi\n", d.real, d.img);

	if (c.real != d.real || c.img != d.img)
	{
		FailFlag = 1;
		std::cout << "SSE2 Complex Product Computation Test Failed" << "\n";
	}
	else
	{
		std::cout << "SSE2 Complex Product Computation Test Passed" << "\n";
	}

	if (FailFlag == 0)
	{
		std::cout << "Passed SSE2 Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSE2 Test\n";
		std::cout << "\n";
		return false;
	}
}

bool testsse3()
{
	std::cout << "Testing SSE3";
	std::cout << "\n";

	int i = 0;
	int FailFlag = 0;
	//Test Here
	complex_num a, b, c, d;
	a.real = 3;
	a.img = 2;
	b.real = 5;
	b.img = -4;

	for (i = 0; i<MMXSSE_LOOP; i++)
	{
		multiply_c(a, b, &c);
		multiply_sse3(a, b, &d);
		if (c.real != d.real || c.img != d.img)
		{
			FailFlag = 1;
			break;
		}
	}

	printf("Complex Product using C code: %2.2f+ %2.2fi\n", c.real, c.img);
	printf("Complex Product using SSE3 code: %2.2f+ %2.2fi\n", d.real, d.img);

	if (c.real != d.real || c.img != d.img)
	{
		FailFlag = 1;
		std::cout << "SSE3 Complex Product Computation Test Failed" << "\n";
	}
	else
	{
		std::cout << "SSE3 Complex Product Computation Test Passed" << "\n";
	}

	if (FailFlag == 0)
	{
		std::cout << "Passed SSE3 Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSE3 Test\n";
		std::cout << "\n";
		return false;
	}
}

bool testssse3()
{
	std::cout << "Testing SSSE3";
	std::cout << "\n";
	int FailFlag = 0;

	//Test Here
	m128i_ a;
	m128i_ b;

	int ABS_Flag = 0;
	// Compute Absolute Value
	a.m128i_i8[0] = 127;
	a.m128i_i8[1] = -127;
	b.m128i_a = _mm_abs_epi8(a.m128i_a);
	__int8 abs_epi8_0 = __int8(b.m128i_i8_[0]); // Should be 127
	__int8 abs_epi8_1 = __int8(b.m128i_i8_[1]); // Should be 127
	if (abs_epi8_0 != 127 || abs_epi8_1 != 127)
	{
		FailFlag = 1;
		ABS_Flag = 1;
		std::cout << "_mm_abs_epi8 expected 127 = " << abs_epi8_0 << "\n";
		std::cout << "_mm_abs_epi8 expected 127 = " << abs_epi8_1 << "\n";
	}

	a.m128i_i16[0] = 128;
	a.m128i_i16[1] = -128;
	b.m128i_a = _mm_abs_epi16(a.m128i_a);
	__int16 abs_epi16_0 = __int16(b.m128i_i16_[0]); // Should be 128
	__int16 abs_epi16_1 = __int16(b.m128i_i16_[1]); // Should be 128
	if (abs_epi16_0 != 128 || abs_epi16_1 != 128)
	{
		FailFlag = 1;
		ABS_Flag = 1;
		std::cout << "_mm_abs_epi16 expected 128 = " << abs_epi16_0 << "\n";
		std::cout << "_mm_abs_epi16 expected 128 = " << abs_epi16_1 << "\n";
	}

	a.m128i_i32[0] = 123456789;
	a.m128i_i32[1] = -123456789;
	b.m128i_a = _mm_abs_epi32(a.m128i_a);
	__int32 abs_epi32_0 = __int32(b.m128i_i32_[0]); // Should be 123456789
	__int32 abs_epi32_1 = __int32(b.m128i_i32_[1]); // Should be 123456789
	if (abs_epi32_0 != 123456789 || abs_epi32_1 != 123456789)
	{
		FailFlag = 1;
		ABS_Flag = 1;
		std::cout << "_mm_abs_epi32 expected 123456789 = " << abs_epi32_0 << "\n";
		std::cout << "_mm_abs_epi32 expected 123456789 = " << abs_epi32_1 << "\n";
	}

	if (ABS_Flag == 0)
	{
		std::cout << "SSSE3 Absolute Value Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSSE3 Absolute Value Tests Failed" << "\n";
	}

	int ARITH3_Flag = 0;
	// Add and Subtract
	m128i_ resadd;
	m128i_ ressub;

	a.m128i_i16[0] = -1;
	a.m128i_i16[1] = 1;
	a.m128i_i16[2] = 0;
	a.m128i_i16[3] = 8;
	a.m128i_i16[4] = -8;
	a.m128i_i16[5] = 0;
	a.m128i_i16[6] = 2;
	a.m128i_i16[7] = 2;
	b.m128i_i16[0] = -2;
	b.m128i_i16[1] = -2;
	b.m128i_i16[2] = 1000;
	b.m128i_i16[3] = 2000;
	b.m128i_i16[4] = 128;
	b.m128i_i16[5] = 32;
	b.m128i_i16[6] = 81;
	b.m128i_i16[7] = -21;

	resadd.m128i_a = _mm_hadd_epi16(a.m128i_a, b.m128i_a);
	ressub.m128i_a = _mm_hsub_epi16(a.m128i_a, b.m128i_a);
	__int16 res_hadd_epi16_0 = __int16(resadd.m128i_i16_[0]);
	__int16 res_hadd_epi16_1 = __int16(resadd.m128i_i16_[1]);
	__int16 res_hadd_epi16_2 = __int16(resadd.m128i_i16_[2]);
	__int16 res_hadd_epi16_3 = __int16(resadd.m128i_i16_[3]);
	__int16 res_hadd_epi16_4 = __int16(resadd.m128i_i16_[4]);
	__int16 res_hadd_epi16_5 = __int16(resadd.m128i_i16_[5]);
	__int16 res_hadd_epi16_6 = __int16(resadd.m128i_i16_[6]);
	__int16 res_hadd_epi16_7 = __int16(resadd.m128i_i16_[7]);
	__int16 res_hsub_epi16_0 = __int16(ressub.m128i_i16_[0]);
	__int16 res_hsub_epi16_1 = __int16(ressub.m128i_i16_[1]);
	__int16 res_hsub_epi16_2 = __int16(ressub.m128i_i16_[2]);
	__int16 res_hsub_epi16_3 = __int16(ressub.m128i_i16_[3]);
	__int16 res_hsub_epi16_4 = __int16(ressub.m128i_i16_[4]);
	__int16 res_hsub_epi16_5 = __int16(ressub.m128i_i16_[5]);
	__int16 res_hsub_epi16_6 = __int16(ressub.m128i_i16_[6]);
	__int16 res_hsub_epi16_7 = __int16(ressub.m128i_i16_[7]);
	if (res_hadd_epi16_0 != 0 || res_hadd_epi16_1 != 8 || res_hadd_epi16_2 != -8 || res_hadd_epi16_3 != 4 || res_hadd_epi16_4 != -4 || res_hadd_epi16_5 != 3000 || res_hadd_epi16_6 != 160 || res_hadd_epi16_7 != 60 || res_hsub_epi16_0 != -2 || res_hsub_epi16_1 != -8 || res_hsub_epi16_2 != -8 || res_hsub_epi16_3 != 0 || res_hsub_epi16_4 != 0 || res_hsub_epi16_5 != -1000 || res_hsub_epi16_6 != 96 || res_hsub_epi16_7 != 102)
	{
		FailFlag = 1;
		ARITH3_Flag = 1;
		std::cout << "_mm_hadd_epi16 expected 0 = " << res_hadd_epi16_0 << "\n"; // 0
		std::cout << "_mm_hadd_epi16 expected 8 = " << res_hadd_epi16_1 << "\n"; // 8
		std::cout << "_mm_hadd_epi16 expected -8 = " << res_hadd_epi16_2 << "\n"; // -8
		std::cout << "_mm_hadd_epi16 expected 4 = " << res_hadd_epi16_3 << "\n"; // 4
		std::cout << "_mm_hadd_epi16 expected -4 = " << res_hadd_epi16_4 << "\n"; // -4
		std::cout << "_mm_hadd_epi16 expected 3000 = " << res_hadd_epi16_5 << "\n"; // 3000
		std::cout << "_mm_hadd_epi16 expected 160 = " << res_hadd_epi16_6 << "\n"; // 160
		std::cout << "_mm_hadd_epi16 expected 60 = " << res_hadd_epi16_7 << "\n"; // 60
		std::cout << "_mm_hsub_epi16 expected -2 = " << res_hsub_epi16_0 << "\n"; // -2 
		std::cout << "_mm_hsub_epi16 expected -8 = " << res_hsub_epi16_1 << "\n"; // -8
		std::cout << "_mm_hsub_epi16 expected -8 = " << res_hsub_epi16_2 << "\n"; // -8
		std::cout << "_mm_hsub_epi16 expected 0 = " << res_hsub_epi16_3 << "\n"; // 0
		std::cout << "_mm_hsub_epi16 expected 0 = " << res_hsub_epi16_4 << "\n"; // 0
		std::cout << "_mm_hsub_epi16 expected -1000 = " << res_hsub_epi16_5 << "\n"; // -1000
		std::cout << "_mm_hsub_epi16 expected 96 = " << res_hsub_epi16_6 << "\n"; // 96
		std::cout << "_mm_hsub_epi16 expected 102 = " << res_hsub_epi16_7 << "\n"; // 102
	}

	a.m128i_i32[0] = -1;
	a.m128i_i32[1] = 1;
	a.m128i_i32[2] = 0;
	a.m128i_i32[3] = 65535;
	b.m128i_i32[0] = -65535;
	b.m128i_i32[1] = 0;
	b.m128i_i32[2] = 128;
	b.m128i_i32[3] = -32;

	resadd.m128i_a = _mm_hadd_epi32(a.m128i_a, b.m128i_a);
	ressub.m128i_a = _mm_hsub_epi32(a.m128i_a, b.m128i_a);
	__int32 res_hadd_epi32_0 = __int32(resadd.m128i_i32_[0]);
	__int32 res_hadd_epi32_1 = __int32(resadd.m128i_i32_[1]);
	__int32 res_hadd_epi32_2 = __int32(resadd.m128i_i32_[2]);
	__int32 res_hadd_epi32_3 = __int32(resadd.m128i_i32_[3]);
	__int32 res_hsub_epi32_0 = __int32(ressub.m128i_i32_[0]);
	__int32 res_hsub_epi32_1 = __int32(ressub.m128i_i32_[1]);
	__int32 res_hsub_epi32_2 = __int32(ressub.m128i_i32_[2]);
	__int32 res_hsub_epi32_3 = __int32(ressub.m128i_i32_[3]);
	if (res_hadd_epi32_0 != 0 || res_hadd_epi32_1 != 65535 || res_hadd_epi32_2 != -65535 || res_hadd_epi32_3 != 96 || res_hsub_epi32_0 != -2 || res_hsub_epi32_1 != -65535 || res_hsub_epi32_2 != -65535 || res_hsub_epi32_3 != 160)
	{
		FailFlag = 1;
		ARITH3_Flag = 1;
		std::cout << "_mm_hadd_epi32 expected 0 = " << res_hadd_epi32_0 << "\n"; // 0
		std::cout << "_mm_hadd_epi32 expected 65535 = " << res_hadd_epi32_1 << "\n"; // 65535
		std::cout << "_mm_hadd_epi32 expected -65535 = " << res_hadd_epi32_2 << "\n"; // -65535
		std::cout << "_mm_hadd_epi32 expected 96 = " << res_hadd_epi32_3 << "\n"; // 96
		std::cout << "_mm_hsub_epi32 expected -2 = " << res_hsub_epi32_0 << "\n"; // -2
		std::cout << "_mm_hsub_epi32 expected -65535 = " << res_hsub_epi32_1 << "\n"; // -65535
		std::cout << "_mm_hsub_epi32 expected -65535 = " << res_hsub_epi32_2 << "\n"; // -65535
		std::cout << "_mm_hsub_epi32 expected 160 = " << res_hsub_epi32_3 << "\n"; // 160
	}

	//Multiply 2 sets of 16bit Integers
	m128i_ final, resmul;
	a.m128i_i16[0] = -0x5CEE;
	a.m128i_i16[1] = 0x0105;
	a.m128i_i16[2] = 0x3DA9;
	a.m128i_i16[3] = -0x7FFF;
	a.m128i_i16[4] = 0x7FFF;
	a.m128i_i16[5] = 0x1111;
	a.m128i_i16[6] = -0x219D;
	a.m128i_i16[7] = -0x1DBC;
	b.m128i_i16[0] = 0x4000;
	b.m128i_i16[1] = -0x510A;
	b.m128i_i16[2] = 0x209D;
	b.m128i_i16[3] = -0x7FFF;
	b.m128i_i16[4] = 0x0000;
	b.m128i_i16[5] = 0x2222;
	b.m128i_i16[6] = 0x1027;
	b.m128i_i16[7] = 0x7AEF;

	for (int index = 0; index < 8; index++)
	{
		final.m128i_i16_[index] = ((a.m128i_i16_[index] * b.m128i_i16_[index]) + 0x4000) >> 15;
	}

	resmul.m128i_a = _mm_mulhrs_epi16(a.m128i_a, b.m128i_a);
	__int16 res_mulhrs_epi16_0 = __int16(resmul.m128i_i16_[0]);
	__int16 res_mulhrs_epi16_1 = __int16(resmul.m128i_i16_[1]);
	__int16 res_mulhrs_epi16_2 = __int16(resmul.m128i_i16_[2]);
	__int16 res_mulhrs_epi16_3 = __int16(resmul.m128i_i16_[3]);
	__int16 res_mulhrs_epi16_4 = __int16(resmul.m128i_i16_[4]);
	__int16 res_mulhrs_epi16_5 = __int16(resmul.m128i_i16_[5]);
	__int16 res_mulhrs_epi16_6 = __int16(resmul.m128i_i16_[6]);
	__int16 res_mulhrs_epi16_7 = __int16(resmul.m128i_i16_[7]);
	if (res_mulhrs_epi16_0 != -11895 || res_mulhrs_epi16_1 != -165 || res_mulhrs_epi16_2 != 4022 || res_mulhrs_epi16_3 != 32766 || res_mulhrs_epi16_4 != 0 || res_mulhrs_epi16_5 != 1165 || res_mulhrs_epi16_6 != -1086 || res_mulhrs_epi16_7 != -7311)
	{
		FailFlag = 1;
		ARITH3_Flag = 1;
		std::cout << "_mm_mulhrs_epi16 expected -11895 = " << res_mulhrs_epi16_0 << "\n"; // -11895
		std::cout << "_mm_mulhrs_epi16 expected -165 = " << res_mulhrs_epi16_1 << "\n"; // -165
		std::cout << "_mm_mulhrs_epi16 expected 4022 = " << res_mulhrs_epi16_2 << "\n"; // 4022
		std::cout << "_mm_mulhrs_epi16 expected 32766 = " << res_mulhrs_epi16_3 << "\n"; // 32766
		std::cout << "_mm_mulhrs_epi16 expected 0 = " << res_mulhrs_epi16_4 << "\n"; // 0
		std::cout << "_mm_mulhrs_epi16 expected 1165 = " << res_mulhrs_epi16_5 << "\n"; // 1165
		std::cout << "_mm_mulhrs_epi16 expected -1086 = " << res_mulhrs_epi16_6 << "\n"; // -1086
		std::cout << "_mm_mulhrs_epi16 expected -7311 = " << res_mulhrs_epi16_7 << "\n"; // -7311
	}
	if (ARITH3_Flag == 0)
	{
		std::cout << "SSSE3 Arithmetic Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSSE3 Arithmetic Tests Failed" << "\n";
	}


	int DotProd_Flag = 0;
	// Dot Product Calc
	m128i_ final2, resaddubs;
	int temp;
	a.m128i_u8[0] = 14;
	b.m128i_i8[0] = 14;
	a.m128i_u8[1] = 15;
	b.m128i_i8[1] = 15;
	temp = (a.m128i_u8_[0] * b.m128i_i8_[0]) + (a.m128i_u8_[1] * b.m128i_i8_[1]);
	final2.m128i_i16_[0] = (temp > 32767) ? 32767 : (temp < -32768) ? -32768 : temp;

	resaddubs.m128i_a = _mm_maddubs_epi16(a.m128i_a, b.m128i_a);
	__int16 res_maddubs_epi16_0 = __int16(resaddubs.m128i_i16_[0]);
	if (res_maddubs_epi16_0 != 421)
	{
		FailFlag = 1;
		DotProd_Flag = 1;
		std::cout << "_mm_maddubs_epi16 expected 421 = " << res_maddubs_epi16_0 << "\n"; // 421
	}
	if (DotProd_Flag == 0)
	{
		std::cout << "SSSE3 Dot Product Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSSE3 Dot Product Test Failed" << "\n";
	}



	if (FailFlag == 0)
	{
		std::cout << "Passed SSSE3 Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSSE3 Test\n";
		std::cout << "\n";
		return false;
	}
}

bool testsse41()
{
	std::cout << "Testing SSE4.1";
	std::cout << "\n";

	//Test Here
	m128i_ a;
	m128i_ b;

	m128i_ res1, res2, res3, res4, res5;
	m128d_ res1a;

	int FailFlag = 0;

	//Blend Test
	int Blend_Flag = 0;
	const int mask1 = 0xF0;
	a.m128i_u16[7] = 0xFFFF;
	a.m128i_u16[6] = 0xEEEE;
	a.m128i_u16[5] = 0xDDDD;
	a.m128i_u16[4] = 0xCCCC;
	a.m128i_u16[3] = 0xBBBB;
	a.m128i_u16[2] = 0xAAAA;
	a.m128i_u16[1] = 0x9999;
	a.m128i_u16[0] = 0x8888;
	b.m128i_u16[7] = 0xFEDC;
	b.m128i_u16[6] = 0xBA98;
	b.m128i_u16[5] = 0x7654;
	b.m128i_u16[4] = 0x3210;
	b.m128i_u16[3] = 0x0123;
	b.m128i_u16[2] = 0x4567;
	b.m128i_u16[1] = 0x89AB;
	b.m128i_u16[0] = 0xCDEF;
	res1.m128i_a = _mm_blend_epi16(a.m128i_a, b.m128i_a, mask1);
	unsigned __int64 res_blend_epil16_0 = res1.m128i_u64_[0];
	unsigned __int64 res_blend_epil16_1 = res1.m128i_u64_[1];
	if (res_blend_epil16_0 != 13527593555435554952 || res_blend_epil16_1 != 18364758544493064720)
	{
		FailFlag = 1;
		Blend_Flag = 1;
		std::cout << "_mm_blend_epi16_0 expected 13527593555435554952 = " << res_blend_epil16_0 << "\n";
		std::cout << "_mm_blend_epi16 1 expected 18364758544493064720 = " << res_blend_epil16_1 << "\n";
	}

	m128d_ c, d;
	const int mask2 = 2;
	c.m128d_f64[1] = -10.5;
	c.m128d_f64[0] = -3.14159;
	d.m128d_f64[1] = 500.25;
	d.m128d_f64[0] = 2.19;

	res1a.m128d_c = _mm_blend_pd(c.m128d_c, d.m128d_c, mask2);
	float res_blend_pd_0 = float(res1a.m128d_f64_[0]);
	float res_blend_pd_1 = float(res1a.m128d_f64_[1]);
	if (res_blend_pd_0 != -3.141590118408203 || res_blend_pd_1 != -10.5)
	{
		FailFlag = 1;
		Blend_Flag = 1;
		std::cout << "_mm_blend_pd_0 expected -3.141590118408203 = " << res_blend_pd_0 << "\n";
		std::cout << "_mm_blend_pd_1 expected -10.5 = " << res_blend_pd_1 << "\n";
	}
	if (Blend_Flag == 0)
	{
		std::cout << "SSE4.1 Blend Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Blend Tests Failed" << "\n";
	}

	int MinMax_Flag = 0;
	a.m128i_i8[0] = 1;
	b.m128i_i8[0] = 127;
	res1.m128i_a = _mm_max_epi8(a.m128i_a, b.m128i_a);
	res2.m128i_a = _mm_min_epi8(a.m128i_a, b.m128i_a);
	int resmax_epi8_0 = int(res1.m128i_i8_[0]); // Res = 127
	int resmin_epi8_0 = int(res2.m128i_i8_[0]); // Res = 1
	if (resmax_epi8_0 != 127 || resmin_epi8_0 != 1)
	{
		FailFlag = 1;
		MinMax_Flag = 1;
		std::cout << "_mm_max_epi8 expected 127 = " << resmax_epi8_0 << "\n";
		std::cout << "_mm_min_epi8 expected 1 = " << resmin_epi8_0 << "\n";
	}

	a.m128i_i32[0] = 32000;
	b.m128i_i32[0] = 2000;
	res1.m128i_a = _mm_max_epi32(a.m128i_a, b.m128i_a);
	res2.m128i_a = _mm_min_epi32(a.m128i_a, b.m128i_a);
	__int32 resmax_epi32_0 = __int32(res1.m128i_i32_[0]); // Res = 32000
	__int32 resmin_epi32_0 = __int32(res2.m128i_i32_[0]); // Res = 2000
	if (resmax_epi32_0 != 32000 || resmin_epi32_0 != 2000)
	{
		FailFlag = 1;
		MinMax_Flag = 1;
		std::cout << "_mm_max_epi32 expected 32000 = " << resmax_epi32_0 << "\n";
		std::cout << "_mm_min_epi32 expected 2000 = " << resmin_epi32_0 << "\n";
	}

	a.m128i_u16[0] = 65535;
	b.m128i_u16[0] = 65534;
	res1.m128i_a = _mm_max_epu16(a.m128i_a, b.m128i_a);
	res2.m128i_a = _mm_min_epu16(a.m128i_a, b.m128i_a);
	unsigned __int16 resmax_epu16_0 = res1.m128i_u16_[0]; // Res = 65535
	unsigned __int16 resmin_epu16_0 = res2.m128i_u16_[0]; // Res = 65534
	if (resmax_epu16_0 != 65535 || resmin_epu16_0 != 65534)
	{
		FailFlag = 1;
		MinMax_Flag = 1;
		std::cout << "_mm_max_epu16 expected 65535 = " << resmax_epu16_0 << "\n";
		std::cout << "_mm_min_epu16 expected 65534 = " << resmin_epu16_0 << "\n";
	}

	a.m128i_u32[0] = 65000;
	b.m128i_u32[0] = 70000;
	res1.m128i_a = _mm_max_epu32(a.m128i_a, b.m128i_a);
	res2.m128i_a = _mm_min_epu32(a.m128i_a, b.m128i_a);
	unsigned __int32 resmax_epu32_0 = res1.m128i_u32_[0]; // Res = 70000
	unsigned __int32 resmin_epu32_0 = res2.m128i_u32_[0]; // Res = 65000
	if (resmax_epu32_0 != 70000 || resmin_epu32_0 != 65000)
	{
		FailFlag = 1;
		MinMax_Flag = 1;
		std::cout << "_mm_max_epu32 expected 70000 = " << resmax_epu32_0 << "\n";
		std::cout << "_mm_min_epu32 expected 65000 = " << resmin_epu32_0 << "\n";
	}

	if (MinMax_Flag == 0)
	{
		std::cout << "SSE4.1 Min Max Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Min Max Tests Failed" << "\n";
	}

	// Insert Bit Tests
	int Insert_Flag = 0;
	m128i_ resinsert;
	int binsert16 = -32;
	const int ndx16 = 7;
	a.m128i_i8[0] = 0;
	a.m128i_i8[1] = 1;
	a.m128i_i8[2] = 2;
	a.m128i_i8[3] = 3;
	a.m128i_i8[4] = 4;
	a.m128i_i8[5] = 5;
	a.m128i_i8[6] = 6;
	a.m128i_i8[7] = 7;
	a.m128i_i8[8] = 8;
	a.m128i_i8[9] = 9;
	a.m128i_i8[10] = 10;
	a.m128i_i8[11] = 11;
	a.m128i_i8[12] = 12;
	a.m128i_i8[13] = 13;
	a.m128i_i8[14] = 14;
	a.m128i_i8[15] = 15;
	resinsert.m128i_a = _mm_insert_epi8(a.m128i_a, binsert16, ndx16);
	int resinsert_epi8_7 = int(resinsert.m128i_i8_[7]); // Res = -32
	if (resinsert_epi8_7 != -32)
	{
		FailFlag = 1;
		Insert_Flag = 1;
		std::cout << "_mm_insert_epi8 expected -32 = " << resinsert_epi8_7 << "\n";
	}

	int binsert32 = -65536;
	const int ndx32 = 2;
	a.m128i_i32[0] = 0;
	a.m128i_i32[1] = 11;
	a.m128i_i32[2] = 2222;
	a.m128i_i32[3] = 333333;
	resinsert.m128i_a = _mm_insert_epi32(a.m128i_a, binsert32, ndx32);
	__int32 resinsert_epi32_2 = __int32(resinsert.m128i_i32_[2]); // Res = -65536
	if (resinsert_epi32_2 != -65536)
	{
		FailFlag = 1;
		Insert_Flag = 1;
		std::cout << "_mm_insert_epi32 expected -65536 = " << resinsert_epi32_2 << "\n";
	}

	if (Insert_Flag == 0)
	{
		std::cout << "SSE4.1 Insert Bit Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Insert Bit Tests Failed" << "\n";
	}

	int Extract_Flag = 0;
	const int ndx1 = 4;
	const int ndx2 = 13;
	a.m128i_i8[0] = 1;
	a.m128i_i8[1] = 2;
	a.m128i_i8[2] = 4;
	a.m128i_i8[3] = 8;
	a.m128i_i8[4] = 16;
	a.m128i_i8[5] = 32;
	a.m128i_i8[6] = 64;
	a.m128i_i8[7] = 127;
	a.m128i_i8[8] = -1;
	a.m128i_i8[9] = -2;
	a.m128i_i8[10] = -4;
	a.m128i_i8[11] = -8;
	a.m128i_i8[12] = -16;
	a.m128i_i8[13] = -32;
	a.m128i_i8[14] = -64;
	a.m128i_i8[15] = -128;
	int resextract1 = _mm_extract_epi8(a.m128i_a, ndx1);
	int resextract2 = _mm_extract_epi8(a.m128i_a, ndx2);
	if (resextract1 != 16 || resextract2 != 224)
	{
		FailFlag = 1;
		Extract_Flag = 1;
		std::cout << "_mm_extract_epi8_1 expected 16 = " << resextract1 << "\n"; // 16
		std::cout << "_mm_extract_epi8_2 expected 224 = " << resextract2 << "\n"; // 224
	}

	const int ndx3 = 1;
	const int ndx4 = 2;
	a.m128i_i32[0] = 0;
	a.m128i_i32[1] = 65535;
	a.m128i_i32[2] = -320000000;
	a.m128i_i32[3] = 128;
	int resextract3 = _mm_extract_epi32(a.m128i_a, ndx3);
	int resextract4 = _mm_extract_epi32(a.m128i_a, ndx4);
	if (resextract3 != 65535 || resextract4 != -320000000)
	{
		FailFlag = 1;
		Extract_Flag = 1;
		std::cout << "_mm_extract_epi32_1 expected 65535 = " << resextract3 << "\n"; // 65535
		std::cout << "_mm_extract_epi32_2 expected -320000000 = " << resextract4 << "\n"; // -320000000
	}
	if (Extract_Flag == 0)
	{
		std::cout << "SSE4.1 Extract Bit Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Extract Bit Tests Failed" << "\n";
	}

	//Bitwise Comparison Tests
	int BitWise_Flag = 0;
	a.m128i_u64[0] = 0xAAAA55551111FFFF;
	b.m128i_u64[0] = 0xAAAA55551111FFFF;
	a.m128i_u64[1] = 0xFEDCBA9876543210;
	b.m128i_u64[1] = 0xFEDCBA9876543210;
	int restestc1 = _mm_testc_si128(a.m128i_a, b.m128i_a);
	if (restestc1 != 1)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testc_si128 expected 1 = " << restestc1 << "\n"; // 1
	}
	a.m128i_u64[0] = 0xAAAA55551011FFFF;
	int restestc2 = _mm_testc_si128(a.m128i_a, b.m128i_a);
	if (restestc2 != 0)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testc_si128 expected 0 = " << restestc2 << "\n"; // 0
	}

	a.m128i_u64[0] = 0x000000000000000;
	b.m128i_u64[0] = 0xFFFFFFFFFFFFFFF;
	a.m128i_u64[1] = 0x000000000000000;
	b.m128i_u64[1] = 0x000000000000000;
	int restestnzc1 = _mm_testnzc_si128(a.m128i_a, b.m128i_a);
	if (restestnzc1 != 0)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testnzc_si128 expected 0 = " << restestnzc1 << "\n"; // 0
	}
	a.m128i_u64[0] = 0x000000000000001;
	int restestnzc2 = _mm_testnzc_si128(a.m128i_a, b.m128i_a);
	if (restestnzc2 != 1)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testnzc_si128 expected 1 = " << restestnzc2 << "\n"; // 1
	}
	a.m128i_u64[0] = 0x55550000BBBB9999;
	b.m128i_u64[0] = 0xAAAAFFFF44446666;
	a.m128i_u64[1] = 0x0123456789ABCDEF;
	b.m128i_u64[1] = 0xFEDCBA9876543210;
	int restestz1 = _mm_testz_si128(a.m128i_a, b.m128i_a);
	if (restestz1 != 1)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testz_si128_1 expected 1 = " << restestz1 << "\n"; // 0
	}
	a.m128i_u64[0] = 0x55550000BBCB9999;
	int restestz2 = _mm_testz_si128(a.m128i_a, b.m128i_a);
	if (restestz2 != 0)
	{
		FailFlag = 1;
		BitWise_Flag = 1;
		std::cout << "_mm_testz_si128_2 expected 0 = " << restestz2 << "\n"; // 0
	}

	if (BitWise_Flag == 0)
	{
		std::cout << "SSE4.1 Bitwise Comparison Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Bitwise Comparison Tests Failed" << "\n";
	}


	// Dot Product
	int DotProd_Flag = 0;
	m128_ a1, b1, resdpps;
	const int mask_dpps = 0x55;
	a1.m128_f32[0] = 1.5;
	a1.m128_f32[1] = 10.25;
	a1.m128_f32[2] = -11.0625;
	a1.m128_f32[3] = 81.0;
	b1.m128_f32[0] = -1.5;
	b1.m128_f32[1] = 3.125;
	b1.m128_f32[2] = -50.5;
	b1.m128_f32[3] = 100.0;
	resdpps.m128_d = _mm_dp_ps(a1.m128_d, b1.m128_d, mask_dpps);
	float resdotps0 = float(resdpps.m128_f32_[0]);
	float resdotps1 = float(resdpps.m128_f32_[1]);
	float resdotps2 = float(resdpps.m128_f32_[2]);
	float resdotps3 = float(resdpps.m128_f32_[3]);
	if (resdotps0 != 556.406250 || resdotps1 != 0 || resdotps2 != 556.406250 || resdotps3 != 0)
	{
		FailFlag = 1;
		DotProd_Flag = 1;
		std::cout << "_mm_dp_ps_0 = " << resdotps0 << "\n"; // 556.406250
		std::cout << "_mm_dp_ps_1 = " << resdotps1 << "\n"; // 0
		std::cout << "_mm_dp_ps_2 = " << resdotps2 << "\n"; // 556.406250
		std::cout << "_mm_dp_ps_3 = " << resdotps3 << "\n"; // 0
	}
	if (DotProd_Flag == 0)
	{
		std::cout << "SSE4.1 Dot Product Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Dot Product Test Failed" << "\n";
	}

	//Arithmetic Tests
	int Arith_Flag = 0;
	const int maskadbw = 5;
	m128i_ resmpsadbw, finaladbw;
	a.m128i_u8[0] = 15;
	a.m128i_u8[1] = 60;
	a.m128i_u8[2] = 55;
	a.m128i_u8[3] = 31;
	a.m128i_u8[4] = 0;
	a.m128i_u8[5] = 1;
	a.m128i_u8[6] = 2;
	a.m128i_u8[7] = 4;
	a.m128i_u8[8] = 8;
	a.m128i_u8[9] = 16;
	a.m128i_u8[10] = 32;
	a.m128i_u8[11] = 64;
	a.m128i_u8[12] = 128;
	a.m128i_u8[13] = 255;
	a.m128i_u8[14] = 1;
	a.m128i_u8[15] = 17;
	b.m128i_u8[0] = 2;
	b.m128i_u8[1] = 4;
	b.m128i_u8[2] = 8;
	b.m128i_u8[3] = 64;
	b.m128i_u8[4] = 255;
	b.m128i_u8[5] = 0;
	b.m128i_u8[6] = 1;
	b.m128i_u8[7] = 16;
	b.m128i_u8[8] = 32;
	b.m128i_u8[9] = 64;
	b.m128i_u8[10] = 128;
	b.m128i_u8[11] = 255;
	b.m128i_u8[12] = 75;
	b.m128i_u8[13] = 31;
	b.m128i_u8[14] = 42;
	b.m128i_u8[15] = 11;

	resmpsadbw.m128i_a = _mm_mpsadbw_epu8(a.m128i_a, b.m128i_a, maskadbw);

	int temp1, temp2, temp3, temp4, index;
	for (index = 0; index < 8; index++)
	{
		temp1 = abs(a.m128i_u8[4 + index] - b.m128i_u8[4]);
		temp2 = abs(a.m128i_u8[4 + index + 1] - b.m128i_u8[4 + 1]);
		temp3 = abs(a.m128i_u8[4 + index + 2] - b.m128i_u8[4 + 2]);
		temp4 = abs(a.m128i_u8[4 + index + 3] - b.m128i_u8[4 + 3]);
		finaladbw.m128i_u16_[index] = temp1 + temp2 + temp3 + temp4;
	}

	unsigned __int16 finaladbw0 = finaladbw.m128i_u16_[0];
	unsigned __int16 resadbw0 = resmpsadbw.m128i_u16_[0];
	unsigned __int16 finaladbw1 = finaladbw.m128i_u16_[1];
	unsigned __int16 resadbw1 = resmpsadbw.m128i_u16_[1];
	unsigned __int16 finaladbw2 = finaladbw.m128i_u16_[2];
	unsigned __int16 resadbw2 = resmpsadbw.m128i_u16_[2];
	unsigned __int16 finaladbw3 = finaladbw.m128i_u16_[3];
	unsigned __int16 resadbw3 = resmpsadbw.m128i_u16_[3];
	unsigned __int16 finaladbw4 = finaladbw.m128i_u16_[4];
	unsigned __int16 resadbw4 = resmpsadbw.m128i_u16_[4];
	unsigned __int16 finaladbw5 = finaladbw.m128i_u16_[5];
	unsigned __int16 resadbw5 = resmpsadbw.m128i_u16_[5];
	unsigned __int16 finaladbw6 = finaladbw.m128i_u16_[6];
	unsigned __int16 resadbw6 = resmpsadbw.m128i_u16_[6];
	unsigned __int16 finaladbw7 = finaladbw.m128i_u16_[7];
	unsigned __int16 resadbw7 = resmpsadbw.m128i_u16_[7];
	if (finaladbw0 != resadbw0 || finaladbw1 != resadbw1 || finaladbw2 != resadbw2 || finaladbw3 != resadbw3 || finaladbw4 != resadbw4 || finaladbw5 != resadbw5 || finaladbw6 != resadbw6 || finaladbw7 != resadbw7)
	{
		FailFlag = 1;
		Arith_Flag = 1;
		std::cout << "finaladbw_0 expected 269 = " << finaladbw0 << "\n"; // 269
		std::cout << "_mm_mpsadbw_epu8_0 expected 269 = " << resadbw0 << "\n"; // 269
		std::cout << "finaladbw_1 expected 267 = " << finaladbw1 << "\n"; // 267
		std::cout << "_mm_mpsadbw_epu8_1 expected 267 = " << resadbw1 << "\n"; // 267
		std::cout << "finaladbw_2 expected 264= " << finaladbw2 << "\n"; // 264
		std::cout << "_mm_mpsadbw_epu8_2 expected 264 = " << resadbw2 << "\n"; // 264
		std::cout << "finaladbw_3 expected 290 = " << finaladbw3 << "\n"; // 290
		std::cout << "_mm_mpsadbw_epu8_3 expected 290 = " << resadbw3 << "\n"; // 290
		std::cout << "finaladbw_4 expected 342 = " << finaladbw4 << "\n"; // 342
		std::cout << "_mm_mpsadbw_epu8_4 expected 342 = " << resadbw4 << "\n"; // 342
		std::cout << "finaladbw_5 expected 446 = " << finaladbw5 << "\n"; // 446
		std::cout << "_mm_mpsadbw_epu8_5 expected 446 = " << resadbw5 << "\n"; // 446
		std::cout << "finaladbw_6 expected 653 = " << finaladbw6 << "\n"; // 653
		std::cout << "_mm_mpsadbw_epu8_6 expected 653 = " << resadbw6 << "\n"; // 653
		std::cout << "finaladbw_7 expected 588 = " << finaladbw7 << "\n"; // 588
		std::cout << "_mm_mpsadbw_epu8_7 expected 588 = " << resadbw7 << "\n"; // 588
	}

	//Multiply
	m128i_ resmul;
	a.m128i_i32[0] = 65000;
	a.m128i_i32[1] = 0;
	a.m128i_i32[2] = 24000000;
	a.m128i_i32[3] = 0;
	b.m128i_i32[0] = -320000;
	b.m128i_i32[1] = 0;
	b.m128i_i32[2] = 56400000;
	b.m128i_i32[3] = 0;

	resmul.m128i_a = _mm_mul_epi32(a.m128i_a, b.m128i_a);
	__int64 resmul0 = __int64(resmul.m128i_i64_[0]);
	__int64 resmul1 = __int64(resmul.m128i_i64_[1]);
	if (resmul0 != -20800000000 || resmul1 != 1353600000000000)
	{
		FailFlag = 1;
		Arith_Flag = 1;
		std::cout << "_mm_mul_epi32_0 expected -20800000000 = " << resmul0 << "\n"; // -20800000000
		std::cout << "_mm_mul_epi32_1 expected 1353600000000000 = " << resmul1 << "\n"; // 1353600000000000
	}


	a.m128i_i32[0] = 65535;
	a.m128i_i32[1] = -512;
	a.m128i_i32[2] = 77910;
	a.m128i_i32[3] = 0;
	b.m128i_i32[0] = 2;
	b.m128i_i32[1] = 4431;
	b.m128i_i32[2] = -7969;
	b.m128i_i32[3] = 240000000;
	resmul.m128i_a = _mm_mullo_epi32(a.m128i_a, b.m128i_a);
	__int32 resmullo_0 = __int32(resmul.m128i_i32_[0]);
	__int32 resmullo_1 = __int32(resmul.m128i_i32_[1]);
	__int32 resmullo_2 = __int32(resmul.m128i_i32_[2]);
	__int32 resmullo_3 = __int32(resmul.m128i_i32_[3]);
	if (resmullo_0 != 131070 || resmullo_1 != -2268672 || resmullo_2 != -620864790 || resmullo_3 != 0)
	{
		FailFlag = 1;
		Arith_Flag = 1;
		std::cout << "_mm_mullo_epi32_0 expected 131070 = " << resmullo_0 << "\n"; // 131070
		std::cout << "_mm_mullo_epi32_1 expected -2268672 = " << resmullo_1 << "\n"; //  -2268672
		std::cout << "_mm_mullo_epi32_2 expected -620864790 = " << resmullo_2 << "\n"; // -620864790
		std::cout << "_mm_mullo_epi32_3 expected 0 = " << resmullo_3 << "\n"; // 0
	}
	if (Arith_Flag == 0)
	{
		std::cout << "SSE4.1 Arithmetic Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Arithmetic Tests Failed" << "\n";
	}



	//Bit Conversion Tests
	int BitConv_Flag = 0;
	m128i_ a16, b16, rescvtepi8_16, rescvtepu8_16, rescvtepi8_32, rescvtepu8_32, rescvtepi16_32, rescvtepu16_32;
	a.m128i_i8[0] = 100;
	a.m128i_i8[1] = -100;
	b.m128i_i8[0] = 100;
	b.m128i_i8[1] = 200;
	a16.m128i_i16[0] = 32767;
	a16.m128i_i16[1] = -32768;
	b16.m128i_i16[0] = 32767;
	b16.m128i_i16[1] = 32768;

	rescvtepi8_16.m128i_a = _mm_cvtepi8_epi16(a.m128i_a);
	__int16 rescvtepi8_16_0 = __int16(rescvtepi8_16.m128i_i16_[0]);
	__int16 rescvtepi8_16_1 = __int16(rescvtepi8_16.m128i_i16_[1]);
	if (rescvtepi8_16_0 != 100 || rescvtepi8_16_1 != -100)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepi8_epi16_0 expected 100 = " << rescvtepi8_16_0 << "\n"; // 100
		std::cout << "_mm_cvtepi8_epi16_1 expected -100 = " << rescvtepi8_16_1 << "\n"; // -100
	}

	rescvtepu8_16.m128i_a = _mm_cvtepu8_epi16(b.m128i_a);
	__int16 rescvtepu8_16_0 = __int16(rescvtepu8_16.m128i_i16_[0]);
	__int16 rescvtepu8_16_1 = __int16(rescvtepu8_16.m128i_i16_[1]);
	if (rescvtepu8_16_0 != 100 || rescvtepu8_16_1 != 200)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepu8_epi16_0 expected 100 = " << rescvtepu8_16_0 << "\n"; // 100
		std::cout << "_mm_cvtepu8_epi16_1 expected 200 = " << rescvtepu8_16_1 << "\n"; // 200
	}

	rescvtepi8_32.m128i_a = _mm_cvtepi8_epi32(a.m128i_a);
	__int32 rescvtep8_32_0 = __int32(rescvtepi8_32.m128i_i32_[0]);
	__int32 rescvtep8_32_1 = __int32(rescvtepi8_32.m128i_i32_[1]);
	if (rescvtep8_32_0 != 100 || rescvtep8_32_1 != -100)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepi8_epi32_0 expected 100 = " << rescvtep8_32_0 << "\n"; // 100
		std::cout << "_mm_cvtepi8_epi32_1 expected -100 = " << rescvtep8_32_1 << "\n"; // -100
	}

	rescvtepu8_32.m128i_a = _mm_cvtepu8_epi32(b.m128i_a);
	__int32 rescvtepu8_32_0 = __int32(rescvtepu8_32.m128i_i32_[0]);
	__int32 rescvtepu8_32_1 = __int32(rescvtepu8_32.m128i_i32_[1]);
	if (rescvtepu8_32_0 != 100 || rescvtepu8_32_1 != 200)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepu8_epi32_0 expected 100 = " << rescvtepu8_32_0 << "\n"; // 100
		std::cout << "_mm_cvtepu8_epi32_1 expected 200 = " << rescvtepu8_32_1 << "\n"; // 200
	}

	rescvtepi16_32.m128i_a = _mm_cvtepi16_epi32(a16.m128i_a);
	__int32 rescvtepi16_32_0 = __int32(rescvtepi16_32.m128i_i32_[0]);
	__int32 rescvtepi16_32_1 = __int32(rescvtepi16_32.m128i_i32_[1]);
	if (rescvtepi16_32_0 != 32767 || rescvtepi16_32_1 != -32768)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepi16_epi32_0 expected 32767 = " << rescvtepi16_32_0 << "\n"; // 32767
		std::cout << "_mm_cvtepi16_epi32_1 expected -32768 = " << rescvtepi16_32_1 << "\n"; // -32768
	}

	rescvtepu16_32.m128i_a = _mm_cvtepu16_epi32(b16.m128i_a);
	__int32 rescvtepu16_32_0 = __int32(rescvtepu16_32.m128i_i32_[0]);
	__int32 rescvtepu16_32_1 = __int32(rescvtepu16_32.m128i_i32_[1]);
	if (rescvtepu16_32_0 != 32767 || rescvtepu16_32_1 != 32768)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_cvtepu16_epi32_0 expected 32767 = " << rescvtepu16_32_0 << "\n"; // 32767
		std::cout << "_mm_cvtepu16_epi32_1 expected 32768 = " << rescvtepu16_32_1 << "\n"; // 32768
	}


	m128i_ respackus;
	a.m128i_i32[0] = 0;
	a.m128i_i32[1] = -1;
	a.m128i_i32[2] = 70000;
	a.m128i_i32[3] = 128;
	b.m128i_i32[0] = -512;
	b.m128i_i32[1] = 5200;
	b.m128i_i32[2] = 32768;
	b.m128i_i32[3] = 65536;

	respackus.m128i_a = _mm_packus_epi32(a.m128i_a, b.m128i_a);
	unsigned __int16 respackus_0 = respackus.m128i_u16_[0];
	unsigned __int16 respackus_1 = respackus.m128i_u16_[1];
	unsigned __int16 respackus_2 = respackus.m128i_u16_[2];
	unsigned __int16 respackus_3 = respackus.m128i_u16_[3];
	unsigned __int16 respackus_4 = respackus.m128i_u16_[4];
	unsigned __int16 respackus_5 = respackus.m128i_u16_[5];
	unsigned __int16 respackus_6 = respackus.m128i_u16_[6];
	unsigned __int16 respackus_7 = respackus.m128i_u16_[7];
	if (respackus_0 != 0 || respackus_1 != 0 || respackus_2 != 65535 || respackus_3 != 128 || respackus_4 != 0 || respackus_5 != 5200 || respackus_6 != 32768 || respackus_7 != 65535)
	{
		FailFlag = 1;
		BitConv_Flag = 1;
		std::cout << "_mm_packus_epi32_0 expected 0 = " << respackus_0 << "\n"; // 0
		std::cout << "_mm_packus_epi32_1 expected 0 = " << respackus_1 << "\n"; // 0
		std::cout << "_mm_packus_epi32_2 expected 65535 = " << respackus_2 << "\n"; // 65535
		std::cout << "_mm_packus_epi32_3 expected 128  = " << respackus_3 << "\n"; // 128
		std::cout << "_mm_packus_epi32_4 expected 0 = " << respackus_4 << "\n"; // 0
		std::cout << "_mm_packus_epi32_5 expected 5200 = " << respackus_5 << "\n"; // 5200
		std::cout << "_mm_packus_epi32_6 expected 32768 = " << respackus_6 << "\n"; // 32768
		std::cout << "_mm_packus_epi32_7 expected 65535 = " << respackus_7 << "\n"; // 65535
	}

	if (BitConv_Flag == 0)
	{
		std::cout << "SSE4.1 Bit Conversion Tests Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Bit Conversion Tests Failed" << "\n";
	}


	//Bit Compare Test
	int BitComp_Flag = 0;
	m128i_ rescmpeq;
	a.m128i_i64[1] = 0xFFFFFFFFFFFFFFFF;
	a.m128i_i64[0] = 0xA0A0A0A0A0A0A0A0;
	b.m128i_i64[1] = 0xFFFFFFFFFFFFFFFF;
	b.m128i_i64[0] = 0xA0A0A0A0A0A0A0A1;

	rescmpeq.m128i_a = _mm_cmpeq_epi64(a.m128i_a, b.m128i_a);
	__int64 rescmpeq_0 = __int64(rescmpeq.m128i_i64_[0]);
	__int64 rescmpeq_1 = __int64(rescmpeq.m128i_i64_[1]);
	if (rescmpeq_0 != 0 || rescmpeq_1 != -1)
	{
		FailFlag = 1;
		BitComp_Flag = 1;
		std::cout << "_mm_cmpeq_epi64_0 expected 0 = " << rescmpeq_0 << "\n"; // 0x0000000000000000   0
		std::cout << "_mm_cmpeq_epi64_1 expected -1 = " << rescmpeq_1 << "\n"; // 0xffffffffffffffff   -1
	}

	if (BitComp_Flag == 0)
	{
		std::cout << "SSE4.1 Bit Compare Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.1 Bit Compare Test Failed" << "\n";
	}

	if (FailFlag == 0)
	{
		std::cout << "Passed SSE4.1 Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSE4.1 Test\n";
		std::cout << "\n";
		return false;
	}
}

bool testsse42()
{
	std::cout << "Testing SSE4.2";
	std::cout << "\n";

	int FailFlag = 0;

	//Test Here
	int CMP_Flag = 0;
	m128i_ a;
	m128i_ b;
	m128i_ res_cmpestrm;
	// NOTE: _SIDD_LEAST_SIGNIFICANT sets the same bit as _SIDD_BIT_MASK
	const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_LEAST_SIGNIFICANT;

	m128i_ res1, res2, res3, res4, res5;

	a.m128i_u16[7] = 0xCCCC;
	a.m128i_u16[6] = 0xCCCC;
	a.m128i_u16[5] = 0xCCCC;
	a.m128i_u16[4] = 0xCCCC;
	a.m128i_u16[3] = 0xCCCC;
	a.m128i_u16[2] = 0xCCCC;
	a.m128i_u16[1] = 0xCCCC;
	a.m128i_u16[0] = 0xCCCC;
	b.m128i_u16[7] = 0x3333;
	b.m128i_u16[6] = 0x3333;
	b.m128i_u16[5] = 0x3333;
	b.m128i_u16[4] = 0x3333;
	b.m128i_u16[3] = 0x3333;
	b.m128i_u16[2] = 0x3333;
	b.m128i_u16[1] = 0x3333;
	b.m128i_u16[0] = 0x3333;

	int ereturnValue1 = _mm_cmpestra(a.m128i_a, 8, b.m128i_a, -8, mode); // Should be 1
	if (ereturnValue1 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestra expected 1 = " << ereturnValue1 << "\n"; // 1
	}
	int ereturnValue2 = _mm_cmpestrc(a.m128i_a, 8, b.m128i_a, -8, mode); // Should be 0
	if (ereturnValue2 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrc expected 0 = " << ereturnValue2 << "\n"; // 0
	}
	a.m128i_u16[7] = 0x3333;
	a.m128i_u16[5] = 0x3333;
	int ereturnValue3 = _mm_cmpestri(a.m128i_a, 8, b.m128i_a, -8, mode); // Should be 5
	if (ereturnValue3 != 5)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestri expected 5 = " << ereturnValue3 << "\n"; // 5
	}

	res_cmpestrm.m128i_a = _mm_cmpestrm(a.m128i_a, 8, b.m128i_a, 8, mode);
	unsigned __int64 res_fullcmpestrm1 = res_cmpestrm.m128i_u64_[0];
	unsigned __int64 res_fullcmpestrm2 = res_cmpestrm.m128i_u64_[1];
	if (res_fullcmpestrm1 != 160 || res_fullcmpestrm2 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrm_0 expected 160 = " << res_fullcmpestrm1 << "\n"; // 160
		std::cout << "_mm_cmpestrm_1 expected 0 = " << res_fullcmpestrm2 << "\n"; // 0
	}

	int ereturnValue4 = _mm_cmpestro(a.m128i_a, 8, b.m128i_a, 8, mode); // Should be 0
	if (ereturnValue4 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestro_1 expected 0 = " << ereturnValue4 << "\n"; // 0
	}
	a.m128i_u16[0] = 0x3333;
	int ereturnValue5 = _mm_cmpestro(a.m128i_a, 8, b.m128i_a, 8, mode); // Should be 1
	if (ereturnValue5 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestro_2 expected 1 = " << ereturnValue5 << "\n"; // 1
	}

	int ereturnValue6 = _mm_cmpestrs(a.m128i_a, 8, b.m128i_a, 8, mode); // Should be 0
	if (ereturnValue6 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrs_1 expected 0 = " << ereturnValue6 << "\n"; // 0
	}
	int ereturnValue7 = _mm_cmpestrs(a.m128i_a, 7, b.m128i_a, 8, mode); // Should be 1
	if (ereturnValue7 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrs_2 expected 1 = " << ereturnValue7 << "\n"; // 1
	}

	int ereturnValue8 = _mm_cmpestrz(a.m128i_a, 8, b.m128i_a, 8, mode); // Should be 0
	if (ereturnValue8 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrz_1 expected 0 = " << ereturnValue8 << "\n"; // 0
	}
	int ereturnValue9 = _mm_cmpestrz(a.m128i_a, 8, b.m128i_a, 7, mode); // Should be 1
	if (ereturnValue9 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpestrz_2 expected 1 = " << ereturnValue9 << "\n"; // 1
	}

	m128i_ res_cmpistrm;
	a.m128i_u16[7] = 0xFFFF;
	a.m128i_u16[6] = 0xFFFF;
	a.m128i_u16[5] = 0xFFFF;
	a.m128i_u16[4] = 0xFFFF;
	a.m128i_u16[3] = 0xFFFF;
	a.m128i_u16[2] = 0xFFFF;
	a.m128i_u16[1] = 0xFFFF;
	a.m128i_u16[0] = 0xFFFF;
	b.m128i_u16[7] = 0x0001;
	b.m128i_u16[6] = 0x0001;
	b.m128i_u16[5] = 0x0001;
	b.m128i_u16[4] = 0x0001;
	b.m128i_u16[3] = 0x0001;
	b.m128i_u16[2] = 0x0001;
	b.m128i_u16[1] = 0x0001;
	b.m128i_u16[0] = 0x0001;

	int ireturnValue1 = _mm_cmpistra(a.m128i_a, b.m128i_a, mode); // 1
	if (ireturnValue1 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistra_1 expected 1 = " << ireturnValue1 << "\n"; // 1
	}
	b.m128i_u16[4] = 0x0000;
	int ireturnValue2 = _mm_cmpistra(a.m128i_a, b.m128i_a, mode); //0
	if (ireturnValue2 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistra_2 expected 0 = " << ireturnValue2 << "\n"; // 0
	}
	b.m128i_u16[5] = 0xFFFF;
	int ireturnValue3 = _mm_cmpistrc(a.m128i_a, b.m128i_a, mode); //0
	if (ireturnValue3 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrc_1 expected 0 = " << ireturnValue3 << "\n"; // 0
	}
	b.m128i_u16[4] = 0x0001;
	int ireturnValue4 = _mm_cmpistrc(a.m128i_a, b.m128i_a, mode); //1
	if (ireturnValue4 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrc_2 expected 1 = " << ireturnValue4 << "\n"; // 1
	}
	int ireturnValue5 = _mm_cmpistri(a.m128i_a, b.m128i_a, mode); //5
	if (ireturnValue5 != 5)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistri expected 5 = " << ireturnValue5 << "\n"; // 5
	}
	b.m128i_u16[0] = 0xFFFF;
	res_cmpistrm.m128i_a = _mm_cmpistrm(a.m128i_a, b.m128i_a, mode);
	unsigned __int64 res_fullcmpistrm1 = res_cmpistrm.m128i_u64_[0];
	unsigned __int64 res_fullcmpistrm2 = res_cmpistrm.m128i_u64_[1];
	if (res_fullcmpistrm1 != 33 || res_fullcmpistrm2 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrm_0 expected 33 = " << res_fullcmpistrm1 << "\n"; // 33
		std::cout << "_mm_cmpistrm_1 expected 0 = " << res_fullcmpistrm2 << "\n"; // 0
	}
	int ireturnValue6 = _mm_cmpistro(a.m128i_a, b.m128i_a, mode); //1
	if (ireturnValue6 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistro expected 1 = " << ireturnValue6 << "\n"; // 1
	}
	int ireturnValue7 = _mm_cmpistrs(a.m128i_a, b.m128i_a, mode); //0
	if (ireturnValue7 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrs_1 expected 0 = " << ireturnValue7 << "\n"; // 0
	}
	a.m128i_u16[7] = 0x0000;
	int ireturnValue8 = _mm_cmpistrs(a.m128i_a, b.m128i_a, mode); //1
	if (ireturnValue8 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrs_2 expected 1 = " << ireturnValue8 << "\n"; // 1
	}
	int ireturnValue9 = _mm_cmpistrz(a.m128i_a, b.m128i_a, mode); //0
	if (ireturnValue9 != 0)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrz_1 expected 0 = " << ireturnValue9 << "\n"; // 0
	}
	b.m128i_u16[7] = 0x0000;
	int ireturnValue10 = _mm_cmpistrz(a.m128i_a, b.m128i_a, mode); //1
	if (ireturnValue10 != 1)
	{
		FailFlag = 1;
		CMP_Flag = 1;
		std::cout << "_mm_cmpistrz_2 expected 1 = " << ireturnValue10 << "\n"; // 1
	}

	if (CMP_Flag == 0)
	{
		std::cout << "SSE4.2 Bit Compare Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.2 Bit Compare Test Failed" << "\n";
	}


	int POP_Flag = 0;
	unsigned int c = 0x2F63A150;
	int res6 = _mm_popcnt_u32(c); // res = 14
	if (res6 != 14)
	{
		FailFlag = 1;
		POP_Flag = 1;
		std::cout << "_mm_popcnt_u32 expected 14 = " << res6 << "\n";
	}

#if defined __WIN_64__ 
	unsigned __int64 d = 0x123456789ABCDEF0;
	int res7 = _mm_popcnt_u64(d); // res = 32
	if (res6 != 14)
	{
		FailFlag = 1;
		POP_Flag = 1;
		std::cout << "_mm_popcnt_u64 expected 32 = " << res7 << "\n";
	}
#endif

	if (POP_Flag == 0)
	{
		std::cout << "SSE4.2 Calculate Bit Set to 1 Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.2 Calculate Bit Set to 1 Test Failed" << "\n";
	}


	int CRC_Flag = 0;
	unsigned int crc = 1;
	unsigned char input1 = 100;
	unsigned int res8 = _mm_crc32_u8(crc, input1); //res = 1412925310
	if (res8 != 1412925310)
	{
		FailFlag = 1;
		CRC_Flag = 1;
		std::cout << "_mm_crc32_u8 expected 1412925310 = " << res8 << "\n";
	}
	unsigned short input2 = 1000;
	unsigned int res9 = _mm_crc32_u16(crc, input2); // res = 3870914500
	if (res9 != 3870914500)
	{
		FailFlag = 1;
		CRC_Flag = 1;
		std::cout << "_mm_crc32_u16 expected 3870914500 = " << res9 << "\n";
	}
	unsigned int input3 = 50000;
	unsigned int res10 = _mm_crc32_u32(crc, input3); // res = 971731851
	if (res10 != 971731851)
	{
		FailFlag = 1;
		CRC_Flag = 1;
		std::cout << "_mm_crc32_u32 expected 971731851 = " << res10 << "\n";
	}

#if defined __WIN_64__ 
	unsigned __int64 crc2 = 0x000011115555AAAA;
	unsigned __int64 input4 = 0x88889999EEEE3333;
	unsigned __int64 res11 = _mm_crc32_u64(crc2, input4); // res = 385185313
	if (res11 != 385185313)
	{
		FailFlag = 1;
		CRC_Flag = 1;
		std::cout << "_mm_crc32_u64 expected 385185313 = " << res11 << "\n";
	}
#endif

	if (CRC_Flag == 0)
	{
		std::cout << "SSE4.2 CRC Test Passed" << "\n";
	}
	else
	{
		std::cout << "SSE4.2 CRC Test Failed" << "\n";
	}

	if (FailFlag == 0)
	{
		std::cout << "Passed SSE4.2 Test\n";
		std::cout << "\n";
		return true;
	}
	else
	{
		std::cout << "Failed SSE4.2 Test\n";
		std::cout << "\n";
		return false;
	}
}


float dot_product(float *a, float *b)
{
	int i = 0;
	int sum = 0;
	for (i = 0; i<SIZE; i++)
	{
		sum += a[i] * b[i];
	}
	return sum;
}


short int dot_product_MMX(short *a, short *b)
{
	int i = 0;
	short int result, data;
	__m64 num3, sum;
	__m64 *ptr1, *ptr2;
	sum = _mm_setzero_si64();

	for (i = 0; i<SIZE; i += 4)
	{
		ptr1 = (__m64*)&a[i];
		ptr2 = (__m64*)&b[i];
		num3 = _m_pmaddwd(*ptr1, *ptr2);
		sum = _m_paddw(sum, num3);
	}

	data = _m_to_int(sum);
	sum = _m_psrlqi(sum, 32);
	result = _m_to_int(sum);
	result = result + data;
	_m_empty();
	return result;
}

float dot_product_SSE1(float *a, float *b)
{
	float arr[4];
	float total = 0;
	int i = 0;
	__m128 num1, num2, num3, num4;
	num4 = _mm_setzero_ps();   // Sets sum to zero

	for (i = 0; i<SIZE; i += 4)
	{
		num1 = _mm_loadu_ps(a + i);
		num2 = _mm_loadu_ps(b + i);
		num3 = _mm_mul_ps(num1, num2);
		num3 = _mm_hadd_ps(num3, num3);
		num4 = _mm_add_ps(num4, num3);

	}
	num4 = _mm_hadd_ps(num4, num4);
	_mm_store_ss(&total, num4);
	return total;
}


void Init(void)
{
	
	tStartTime = std::time(nullptr);	
	if (iDebugFlag)
	{
		std::string sTemp = std::asctime(std::localtime(&tStartTime));
		std::cout << "tStartTime = " << sTemp << std::endl;
	}
	
	// Logic needs to be done to determine if this test is a pass or fail.  
	//If indeterminate, it means that the test was interrupted
	igPassFailStatus = ReturnValueDef::Success;  // Success
	//igPassFailStatus = 1;  // Fail
	//igPassFailStatus = 2;  // Indeterminate 
	iColorText = 0;
	iSec = 0;
	iMin = 0;
	iHour = 0;
	iOptionValid = 0;
	iRun = 1;
	iArgIndex = 0;
	iPrintVersionFlag = 1;
	iPrintSuccessFlag = 1;


}

void PrintVersion(void)
{
	std::cout << std::endl << "..." << std::endl << "Version: " << sgGoldModuleVersion << std::endl << "..." << std::endl;
}

void SignalFun(int iSigNum)
{
	// If you want to print out the signal, do the following
	cout << "Signal is:" << iSigNum << endl;

	// Depending on your routine, you may want to have some sort of exit message ... however you may also want to comment these out
	cout << endl << "Do the soft clean up here" << endl;
	
	// Do cleanup and close up stuff here 
	CleanUp();

	// Exit the program with one of the accepted error levels. 
	//0 = Success
	//1 = Fail
	//2 = Indeterminate 
	igPassFailStatus = ReturnValueDef::Indeterminate;

    // if you get to this logic, most likely your answer should always be a 2
	std::string sTemp = "Error is: " + UtilConvert(igPassFailStatus) + " Indeterminate!";
	PrintColorMsg(sTemp, TextColor::Yellow);
	exit(igPassFailStatus);

}

void CleanUp()
{
	td.RemoveFile(sgGoldModuleResultsFile);
}

void WriteResultsFile(int iPassFailStatus)  
{

	// pass fail logic using iPassFailStatus  with 0=pass, 1=fail, 2=indeterminate

	// Write Results File
	td.WriteToFile(sgGoldModuleResultsFile, "MMXSSE Test");
	std::string sTemp = "Module Version: " + sgGoldModuleVersion;
	td.WriteToFile(sgGoldModuleResultsFile, sTemp);
	sTemp = "Start Time: ";
	sTemp.append(std::asctime(std::localtime(&tStartTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);


	if (iPassFailStatus == 0)
	{
		igPassFailStatus = 0;  // Success
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - PASS");

	}
	else if (iPassFailStatus == 1) //Fail
	{
		igPassFailStatus = 1;  // Fail
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - FAIL");
	}

	else if (iPassFailStatus == 2) //Indeterminate
	{
		igPassFailStatus = 2;  // Indeterminate 
		td.WriteToFile(sgGoldModuleResultsFile, "Test Result - INDETERMINATE");
	}
	
	td.WriteToFile(sgGoldModuleResultsFile, strPFOut);

	tEndTime = std::time(nullptr);
	sTemp = "End Time: ";
	sTemp.append(std::asctime(std::localtime(&tEndTime)));
	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);

	double seconds = difftime(tEndTime, tStartTime);
	sTemp = "Total Time: ";
	sTemp.append(" seconds: ");
	sTemp.append(UtilConvert(seconds));
	

	td.WriteToFileNoEndL(sgGoldModuleResultsFile, sTemp);
}

void HelpUseage(void)
{
	//std::cout << "Help or Usage ... version info ... copyright info ..." << std::endl;
	std::cout << std::endl << "Test Information:" << std::endl;
	std::cout << " This test detects which MMX & SSE, SSE2, SSE3, SSSE3, SSE4 instruction sets" << std::endl; 
	std::cout << " are supported on the processor being tested." << std::endl;
	std::cout << " If the instruction set is supported then the test will execute all" << std::endl; 
	std::cout << " MMX & SSE instructions for the supported instruction sets." << std::endl;
	std::cout << " The test will display which features were detected and what was tested." << std::endl;
	std::cout << " " << std::endl;
	std::cout << "Possible options are as follows:" << std::endl;
	std::cout << " -h           = Help or Usage (this message)" << std::endl;
	std::cout << "                Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "                MMXSSE.exe -h" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "                ./mmxsse -h" << std::endl << std::endl;
#endif
	std::cout << " -info        = Information switch that publishes parallel information" << std::endl;
	std::cout << "                using the following scheme:" << std::endl;
	std::cout << "               \"parallel:yes|socket:yes|core:yes\" " << std::endl;
	std::cout << "                Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "                MMXSSE.exe -info" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "                ./mmxsse -info" << std::endl << std::endl;
#endif
	std::cout << " -resultName  = The resultName switch provides a way to name " << std::endl;
	std::cout << "                the results file as desired." << std::endl;
	std::cout << "                Example :" << std::endl;
#ifdef __WIN_OS__
	std::cout << "                MMXSSE.exe -resultName MMXSSE_results_0001.txt " << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "                ./mmxsse -resultName MMXSSE_results_0001.txt " << std::endl << std::endl;
#endif
	std::cout << " -pa          = pause app option" << std::endl;
	std::cout << "                Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "                MMXSSE.exe -pa" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "                ./mmxsse -pa" << std::endl << std::endl;
#endif
	std::cout << " -c           = This option will display text in various colors" << std::endl;
	std::cout << "                Example:" << std::endl;
#ifdef __WIN_OS__
	std::cout << "                MMXSSE.exe -c" << std::endl << std::endl;
#endif
#ifdef __LIN_OS__
	std::cout << "                ./mmxsse -c" << std::endl << std::endl;
#endif
#ifdef __WIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool\\DetectUtils.dll " << std::endl;
#endif
#ifdef __WIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      C:\\Program Files\\Intel Corporation\\Intel Processor Diagnostic Tool 64bit\\DetectUtils64.dll " << std::endl;
#endif
#ifdef __LIN_32__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils.so.1.1 " << std::endl;
#endif
#ifdef __LIN_64__
	std::cout << "Dependencies:" << std::endl;
	std::cout << "      libDetectUtils64.so.1.1 " << std::endl;
#endif
	std::cout << std::endl << " Copyright (C) 2017, Intel Corporation" << std::endl;
}
void PauseWQuit(void)
{
	// Pause code
	std::cout << "Please press 'q' <ENTER> to quit: ";
	std::string PauseStr;
	std::cin >> PauseStr;
}

void PrintColorMsg(std::string sMsg, TextColor iColor)
{
	if (iColorText == 0)
	{
		std::cout << std::endl << sMsg << std::endl;
	}
	else
	{
		if (iColor == TextColor::Red)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;31m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Green)
		{

#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;32m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}
		if (iColor == TextColor::Yellow)
		{
#ifdef __WIN_OS__
			HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_INTENSITY));
			std::cout << std::endl << sMsg << std::endl;
			SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
			std::cout << "\033[0;33m";
			std::cout << std::endl << sMsg << std::endl;
			std::cout << "\033[0m";
#endif
		}

	}
}

bool is_integer(const std::string & s){
	return std::regex_match(s, std::regex("[0-9]+"));
}

bool isOptionValid(int iArgLocal, int argcLocal, char *argvLocal[], VarType vt)
{
	// Start out as Valid
	bool iRetVal = true;
	std::string sMsg = "";
	std::string sTemp = argvLocal[iArgLocal];

	if (((iArgLocal + 1) < argcLocal) && !(argvLocal[iArgLocal + 1] == NULL) && !(argvLocal[iArgLocal + 1][0] == '\0'))
	{
		if (vt == VarType::vtINT)
		{
			if (!is_integer(argvLocal[iArgLocal + 1]))
			{
				iRetVal = false;
				sMsg = "Wrong argument type format for argument: '" + sTemp + "'!";
			}
		}
	}
	else
	{
		// invalid
		iRetVal = false;
		sMsg = "No argument for '" + sTemp + "' detected!";
	}
	// if there was an invalid argument
	if (iRetVal == false)
	{
		PrintColorMsg(sMsg, TextColor::Red);
	}
	return iRetVal;
}


void handleArgs(int argc, char *argv[])
{

	// Optional args
	// Make sure the options can only be run one time
	// Load up all the possible args
	std::vector <string> sArgs_v;
	sArgs_v.push_back("-h");
	sArgs_v.push_back("-info");
	sArgs_v.push_back("-resultName");
	//sArgs_v.push_back("-hrs");
	//sArgs_v.push_back("-m");
	//sArgs_v.push_back("-s");
	//sArgs_v.push_back("-d");
	//sArgs_v.push_back("-p");
	//sArgs_v.push_back("-plx");
	//sArgs_v.push_back("-plt");
	sArgs_v.push_back("-pa");
	sArgs_v.push_back("-c");
	//sArgs_v.push_back("-r");

	if (argc > 1)
	{
		for (int iArg = 1; iArg < argc; iArg++)
		{
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					// Only the options that need to be processed before all the other ones need to be in this list
					if (sArgs_v[iVec] == std::string("-c"))
					{
						iColorText = 1;
					}
				}
			}
		}
		for (int iArg = 1; iArg < argc; iArg++)
		{
			iArgIndex = iArg;
			iOptionValid = 0;
			for (int iVec = 0; iVec < sArgs_v.size(); iVec++)
			{
				if (argv[iArg] == sArgs_v[iVec])
				{
					iOptionValid = 1;
					if (sArgs_v[iVec] == std::string("-h"))
					{
						iRun = 0;
						std::cout << std::endl << " --- MMXSSE Test --- ";
						PrintVersion();
						HelpUseage();
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
					}
					if (sArgs_v[iVec] == std::string("-info"))
					{
						iRun = 0;
						iPrintVersionFlag = 0;
						iPrintSuccessFlag = 0;
						std::cout << "\"parallel:yes|socket:yes|core:yes\"" << std::endl;
					}
					if (sArgs_v[iVec] == std::string("-resultName"))
					{
						sgGoldModuleResultsFile = argv[++iArg];
					}
					if (sArgs_v[iVec] == std::string("-hrs"))
					{
						// Hours
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iHour = CustomAsciiToInteger(argv[++iArg]);
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}

					}
					if (sArgs_v[iVec] == std::string("-m"))
					{
						// Minutes
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iMin = CustomAsciiToInteger(argv[++iArg]);
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-s"))
					{
						// Seconds
						if (isOptionValid(iArg, argc, argv, VarType::vtINT))
						{
							iSec = CustomAsciiToInteger(argv[++iArg]);
						}
						else
						{
							iRun = 0;
							iOptionValid = 0;
						}
					}
					if (sArgs_v[iVec] == std::string("-d"))
					{
						// debug
						iDebugFlag = 1;
					}
					if (sArgs_v[iVec] == std::string("-p"))
					{
						std::cout << std::endl << "Entering print opt" << std::endl;
						igPrintConfig = 1;
					}
					if (sArgs_v[iVec] == std::string("-pa"))
					{
						std::cout << std::endl << "Entering pause app opt" << std::endl;
						igPauseApp = 1;
					}
					if (sArgs_v[iVec] == std::string("-c"))
					{
						std::cout << std::endl << "Entering color text opt" << std::endl;
#ifdef __WIN_OS__
						HANDLE hCommandWindowTextColor = GetStdHandle(STD_OUTPUT_HANDLE);
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_GREEN | FOREGROUND_INTENSITY));
						//std::cout << "Some green text ..." << std::endl << std::endl;
						SetConsoleTextAttribute(hCommandWindowTextColor, (FOREGROUND_RED | FOREGROUND_BLUE | FOREGROUND_GREEN));
#endif
#ifdef __LIN_OS__
						std::cout << "\033[1;32m";
						//std::cout << std::endl << ".." << std::endl << "Some green text ..." << std::endl << std::endl;
						std::cout << "\033[0m";
#endif
					}


					// Erase current vector so the option doesn't repeat
					sArgs_v.erase(sArgs_v.begin() + (iVec));
				}

			}
			if (iDebugFlag)
			{
				std::cout << std::endl << "iOptionValid = " << iOptionValid << std::endl;
			}
			if (iOptionValid == 0)
			{
				break;
			}
		}
		if (iOptionValid == 0)
		{
			std::string sArgTemp = argv[iArgIndex];
			std::string sTempMsg = "Option " + sArgTemp + " invalid!";
			PrintColorMsg(sTempMsg, TextColor::Red);
			iRun = 0;
			igPassFailStatus = ReturnValueDef::InvalidArgs;
			HelpUseage();

		}
	}
	else
	{
		// Default message if no arguments are used  ... it may be the useage message or it may be OK that no args are used
		//std::cout << "No args used!!" << std::endl;
	}

}

void PrintSuccess(void)
{
	if (igPassFailStatus == ReturnValueDef::Success)
	{
		PrintColorMsg("MMXSSE Module Success!", TextColor::Green);
	}
	if (igPassFailStatus == ReturnValueDef::Fail)
	{
		PrintColorMsg("MMXSSE Module Fail!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::Indeterminate)
	{
		PrintColorMsg("MMXSSE Module Interrupted!", TextColor::Yellow);
	}
	if (igPassFailStatus == ReturnValueDef::InvalidArgs)
	{
		PrintColorMsg("MMXSSE Module has invalid arguments!", TextColor::Red);
	}
	if (igPassFailStatus == ReturnValueDef::ConfigMismatch)
	{
		PrintColorMsg("MMXSSE Module has a configuration mismatch!", TextColor::Yellow);
	}	
	if (igPassFailStatus != ReturnValueDef::Success)
	{
		cout << "Return Status = " << igPassFailStatus << endl;
	}

}


//..main
int main(int argc, char *argv[])
{
	
	// .W // windows 
#if defined __WIN_64__ 
	sgGoldModuleVersion = "1.0.22.64b.W";
#endif

#if defined __WIN_32__ 
	sgGoldModuleVersion = "1.0.22.32b.W";
#endif

	// .L linux
#if defined __LIN_64__
	sgGoldModuleVersion = "1.0.21.64b.L";
#endif
#if defined __LIN_32__
	sgGoldModuleVersion = "1.0.21.32b.L";
#endif


	// Need to include signal for Softkill functions
	// If this thread is sent a signal SIGINT, we need to send it to the signal function 
	signal(SIGINT, SignalFun);


	// Parse both text local config style and xml style

	Init();
	handleArgs(argc, argv);

	// Print out version
	if (iPrintVersionFlag != 0)
	{
		std::cout << std::endl << " --- MMXSSE Test --- ";
		PrintVersion();
	}


	//..clean up result files ... Clean file before another one is written
	CleanUp();

	if (iRun)
	{
		Run_MMXSSE();
		WriteResultsFile(igPassFailStatus);
	}

	if (iPrintSuccessFlag != 0)
	{
		PrintSuccess();
	}

	if (igPauseApp)
	{
		PauseWQuit();
	}
	
	return igPassFailStatus;

}
