const int m_rows = 40000;
const int m_cols = 40000;
-const int threads_to_use = 8;
+const int threads_to_use = 4;
_Atomic int hv = 0;
int **matrix;
int end;
} thread_args;
+void* parallel_populate_matrix(void *arg){
+ thread_args *args = (thread_args *)arg;
+ for(int r = 0; r < m_rows; r++){
+ for(int c = args->start; c < args->end;c++){
+ matrix[r][c] = r*m_cols+c;
+ }
+ }
+ return NULL;
+}
+
void* parallel_search_cols(void *arg){
thread_args *args = (thread_args *)arg;
int local_hv = 0;
free(args);
}
-void init_parallel_matrix(int num_threads){
- for(int r = 0; r < m_rows; r++){
- for(int c = 0; c < m_cols; c++){
- matrix[r][c] = r*m_cols+c;
+void init_parallel_matrix(int *data,int num_threads){
+ pthread_t *threads = malloc(num_threads * sizeof(pthread_t));
+ thread_args *args = malloc(num_threads * sizeof(thread_args));
+ if (threads == NULL || args == NULL) {
+ perror("Failed to allocate memory for threads");
+ exit(1);
}
- }
+
+ for(int r = 0; r<m_rows;r++){
+ matrix[r] = data + r * m_cols;
+ }
+
+ int chunk_size = m_cols / num_threads;
+ for (int i = 0; i < num_threads; i++) {
+ args[i].start = i * chunk_size;
+ if (i == num_threads - 1) {
+ args[i].end = m_cols;
+ } else {
+ args[i].end = (i + 1) * chunk_size;
+ }
+ if (pthread_create(&threads[i], NULL, ¶llel_populate_matrix, &args[i]) != 0) {
+ perror("Failed to create thread");
+ exit(1);
+ }
+ }
+
+ for (int i = 0; i < num_threads; i++) {
+ pthread_join(threads[i], NULL);
+ }
+ free(threads);
+ free(args);
}
+
int main(void){
printf("1st --> Allocate memory\n");
clock_t t_t1; t_t1 = clock();
printf("2nd --> Populate the matrix\n");
clock_t t_t2; t_t2 = clock();
- for(int r = 0; r<m_rows;r++){
- matrix[r] = data + r * m_cols;
- }
- init_parallel_matrix(threads_to_use);
+ init_parallel_matrix(data, threads_to_use);
t_t2 = clock() - t_t2;
double t2_ttaken = ((double)t_t2)/CLOCKS_PER_SEC;
printf(" %f sec\n",t2_ttaken);