// Copyright 2025 RnD Center "ELVEES", JSC

#include "tests_tile_segmentation.hpp"

template <typename Ret_Type, typename Type, class create_func, class ref_func, class run_calc_ptr>
bool test_vecsumsq(Type* src, create_func create_vector, ref_func reference, run_calc_ptr run_calc, int size,
                   int* localmem) {
  create_vector(src, size, -5, 5);

  FLUSH_ALL_CACHES();
  uint32_t tic_count[2], instruction_count[2];
  count_tics(tic_count, instruction_count);
  Ret_Type ref_result = reference(src, size);
  count_tics(&tic_count[1], &instruction_count[1]);

  std::cout << "Ref func result (size = " << size << "): tic = " << tic_count[1] - tic_count[0]
            << " instr = " << instruction_count[1] - instruction_count[0] << std::endl;

  TileSegConfig config;
  CreateTileSegConfigVecSumSq(src, size, &config, (uint16_t*)localmem);

  FLUSH_ALL_CACHES();

  count_tics(tic_count, instruction_count);
  Ret_Type opt_result = run_calc(&config);
  count_tics(&tic_count[1], &instruction_count[1]);

  std::cout << "Opt func result (size = " << size << "): tic = " << tic_count[1] - tic_count[0]
            << " instr = " << instruction_count[1] - instruction_count[0] << std::endl;

  int ret = (ref_result != opt_result);
  return ret;
}

int main() {
  disable_l2_cache();

  void* src = memalign(64, SIZE * sizeof(int64_t));

  int ret = 0;
  int test_status = 0;

  for (int i = 1; i <= SIZE; i *= 2) {
    std::cout << "vecsumsq_s16" << std::endl;
#ifdef USE_REF_VER
    ret = test_vecsumsq<int32_t>(static_cast<int16_t*>(src), data_generator<int16_t>, ref_vecsumsq,
                                 RunCalculationVecSumSq16, i, &__local_mem);
#else
    ret = test_vecsumsq<int32_t>(static_cast<int16_t*>(src), data_generator<int16_t>, vecsumsq,
                                 RunCalculationVecSumSq16, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "vecsumsq16 error!\n";

    std::cout << "vecsumsq_s32" << std::endl;
#ifdef USE_REF_VER
    ret = test_vecsumsq<int64_t>(static_cast<int32_t*>(src), data_generator<int32_t>, ref_vecsumsq32,
                                 RunCalculationVecSumSq32, i, &__local_mem);
#else
    ret = test_vecsumsq<int64_t>(static_cast<int32_t*>(src), data_generator<int32_t>, vecsumsq32,
                                 RunCalculationVecSumSq32, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "vecsumsq32 error!\n";

    std::cout << "vecsumsq_fl" << std::endl;
#ifdef USE_REF_VER
    ret = test_vecsumsq<float>(static_cast<float*>(src), data_generator<float>, ref_vecsumsq_fl,
                               RunCalculationVecSumSqFl, i, &__local_mem);
#else
    ret = test_vecsumsq<float>(static_cast<float*>(src), data_generator<float>, vecsumsq_fl, RunCalculationVecSumSqFl,
                               i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "vecsumsq_fl error!\n";

    std::cout << "vecsumsq_db" << std::endl;
#ifdef USE_REF_VER
    ret = test_vecsumsq<double>(static_cast<double*>(src), data_generator<double>, ref_vecsumsq_db,
                                RunCalculationVecSumSqDb, i, &__local_mem);
#else
    ret = test_vecsumsq<double>(static_cast<double*>(src), data_generator<double>, vecsumsq_db,
                                RunCalculationVecSumSqDb, i, &__local_mem);
#endif

    test_status |= ret;
    if (ret) std::cout << "vecsumsq_db error!\n";

    if (!test_status)
      std::cout << "Test passed" << std::endl;
    else
      std::cout << "Test failed" << std::endl;
  }

  free(src);

  enable_l2_cache(L2_CACHE_SIZE);

  return test_status;
}
