#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <omp.h>
#include <time.h>


// Global variables (simulating shared memory access)
int* data = NULL;
size_t data_size = 0;

/**
 * @brief Merges two sorted sub-arrays (data[low...mid] and data[mid+1...high]).
 * NOTE: This function must manually manage temporary memory allocation and deallocation.
 */
void merge(int low, int mid, int high) {
    // Calculate sizes of the two halves
    int n1 = mid - low + 1;
    int n2 = high - mid;

    // Manual Memory Allocation for Temporary Arrays
    // We must allocate space for L and R.
    int* L = (int*)malloc(n1 * sizeof(int));
    int* R = (int*)malloc(n2 * sizeof(int));

    if (L == NULL || R == NULL) {
        perror("Failed to allocate temporary merge arrays");
        exit(EXIT_FAILURE);
    }

    // Copy data from the main array into the temporary arrays
    for (int i = 0; i < n1; i++) {
        L[i] = data[low + i];
    }
    for (int j = 0; j < n2; j++) {
        R[j] = data[mid + 1 + j];
    }

    // Standard sequential merging logic
    int i = 0; // Index for L
    int j = 0; // Index for R
    int k = low; // Index for the main data array

    while (i < n1 && j < n2) {
        if (L[i] <= R[j]) {
            data[k++] = L[i++];
        } else {
            data[k++] = R[j++];
        }
    }

    // Copy remaining elements of L
    while (i < n1) {
        data[k++] = L[i++];
    }

    // Copy remaining elements of R
    while (j < n2) {
        data[k++] = R[j++];
    }

    // Manual Memory Cleanup
    free(L);
    free(R);
}

/**
 * @brief Recursive function to perform parallel Merge Sort using OpenMP.
 */
void parallel_merge_sort(int low, int high) {
    if (low < high) {
        int mid = low + (high - low) / 2;

        // Use OpenMP sections to parallelize the recursive calls, runtime handles the threads assignment and synchronization.
        #pragma omp parallel sections
        {
            #pragma omp section
            {
                // Recurse on the left half
                parallel_merge_sort(low, mid);
            }
            #pragma omp section
            {
                // Recurse on the right half
                parallel_merge_sort(mid + 1, high);
            }
        }
        // Implicit barrier ensures merge only happens when both halves are sorted.
        // Combine the two sorted halves
        merge(low, mid, high);
    }
}


/**
 * @brief Reads all integers from the specified input file.
 * @return 1 on success, 0 on failure.
 */
int read_file(const char* filename) {
    FILE* infile = fopen(filename, "r");
    if (infile == NULL) {
        perror("Error opening input file");
        return 0;
    }

    // Initial allocation (guessing a reasonable maximum size)
    size_t capacity = 1024;
    data = (int*)malloc(capacity * sizeof(int));
    if (data == NULL) {
        perror("Memory allocation failed for data array");
        fclose(infile);
        return 0;
    }
    int count = 0;

    int num;
    while (fscanf(infile, "%d", &num) == 1) {
        // Check if realloc is necessary
        if (count == capacity - 1) {
            capacity *= 2;
            int* new_data = (int*)realloc(data, capacity * sizeof(int));
            if (new_data == NULL) {
                perror("Failed to reallocate memory for data array");
                free(data);
                fclose(infile);
                return 0;
            }
            data = new_data;
        }
        data[count++] = num;
    }

    fclose(infile);
    data_size = count;
    return 1;
}

/**
 * @brief Writes the sorted elements to the specified output file.
* @return 1 on success, 0 on failure.
*/
int write_file(const char* filename) {
    FILE* outfile = fopen(filename, "w");
    if (outfile == NULL) {
        perror("Error opening output file");
        return 0;
    }

    // Write the data sequentially
    for (size_t i = 0; i < data_size; ++i) {
        fprintf(outfile, "%d\n", data[i]);
    }

    fclose(outfile);
    return 1;
}


int main(int argc, char *argv[]) {
    // Handle Command Line Arguments
    int thread_count = -1;
    char *input_file = NULL;
    char *output_file = NULL;

    for (int i = 1; i < argc; ++i) {
        if (strcmp(argv[i], "-t") == 0 && i + 1 < argc) {
            // Get thread count from the next argument
            thread_count = atoi(argv[i+1]);
            i++; // Skip the argument after -t
        } else if (input_file == NULL) {
            input_file = argv[i];
        } else if (output_file == NULL) {
            output_file = argv[i];
        }
    }

    // Check for mandatory arguments and thread count
    if (thread_count <= 0 || input_file == NULL || output_file == NULL) {
        fprintf(stderr, "Usage: %s <input_file> <output_file> [-t <thread_count>]\n", argv[0]);
        return EXIT_FAILURE;
    }

    // Setup OpenMP Threads ---
    omp_set_num_threads(thread_count);
    printf("--- Starting Sort Process ---\n");
    printf("Threads allocated by OpenMP: %d\n", omp_get_max_threads());

    //  Read Data
    if (!read_file(input_file)) {
        fprintf(stderr, "Failed to read data.\n");
        // Clean up data memory before exit
        free(data);
        return EXIT_FAILURE;
    }
    printf("Successfully read %zu elements from %s\n", data_size, input_file);

    // ------------------------------- WALL CLOCK TIME ------------------------------------
    struct timespec start_sort_time;
    clock_gettime(CLOCK_MONOTONIC, &start_sort_time);
    if (data_size > 0) {
        // Execute the parallel sort
        parallel_merge_sort(0, data_size - 1);
        printf("Sorting completed successfully.\n");
    }
    struct timespec end_sort_time;
    clock_gettime(CLOCK_MONOTONIC, &end_sort_time);
    
    // determine actual wall clock time
    double elapsed_time = (end_sort_time.tv_sec - start_sort_time.tv_sec);
    elapsed_time += (end_sort_time.tv_nsec - start_sort_time.tv_nsec) / 1e9;
    // ------------------------------- END WALL CLOCK TIME ---------------------------------

    
    if (!write_file(output_file)) {
        fprintf(stderr, "Failed to write output data.\n");
        free(data);
        return EXIT_FAILURE;
    }
    printf("Sorted data written successfully to %s\n", output_file);
    printf("Total Elapsed Wall-Clock Time: %lf seconds\n", elapsed_time);

    // Cleanup
    free(data);
    return EXIT_SUCCESS;
}
