]>
vgcfreebox.myrthtech.pt Git - ue-pp-squarematrixparallelisation.git/blob - BlocksParallelMatrixSearch.c
8 const int m_rows
= 40000;
9 const int m_cols
= m_rows
;
22 void* parallel_search_blocks(void *arg
) {
23 thread_args
*args
= (thread_args
*)arg
;
24 for (int r
= args
->start_r
; r
< args
->end_r
; r
++) {
25 for (int c
= args
->start_c
; c
< args
->end_c
; c
++) {
26 if (matrix
[r
][c
] > hv
){
34 void run_parallel_search(int num_threads
){
35 pthread_t
*threads
= malloc(num_threads
* sizeof(pthread_t
));
36 thread_args
*args
= malloc(num_threads
* sizeof(thread_args
));
38 if (threads
== NULL
|| args
== NULL
) {
39 perror("Failed to allocate memory for threads");
43 int blocks_per_side
= (int)sqrt(num_threads
);
44 int row_chunk
= m_rows
/ blocks_per_side
;
45 int col_chunk
= m_cols
/ blocks_per_side
;
48 for (int i
= 0; i
< blocks_per_side
; i
++) {
49 for (int j
= 0; j
< blocks_per_side
; j
++) {
50 args
[t_count
].start_r
= i
* row_chunk
;
51 args
[t_count
].end_r
= (i
== blocks_per_side
- 1) ? m_rows
: (i
+ 1) * row_chunk
;
53 args
[t_count
].start_c
= j
* col_chunk
;
54 args
[t_count
].end_c
= (j
== blocks_per_side
- 1) ? m_cols
: (j
+ 1) * col_chunk
;
56 if (pthread_create(&threads
[t_count
], NULL
, ¶llel_search_blocks
, &args
[t_count
]) != 0) {
57 perror("Failed to create thread");
64 for (int i
= 0; i
< t_count
; i
++) {
65 pthread_join(threads
[i
], NULL
);
73 printf("1st --> Allocate memory\n");
74 clock_t t_t1
= clock();
76 matrix
= malloc(m_rows
* sizeof(int*));
77 if (matrix
== NULL
) { perror("malloc failed"); return 1; }
80 size_t data_size
= (size_t)m_rows
* m_cols
* sizeof(int);
81 if (posix_memalign((void**)&data
, 64, data_size
) != 0) {
82 perror("alignment failed");
87 t_t1
= clock() - t_t1
;
88 printf(" %f sec\n", ((double)t_t1
) / CLOCKS_PER_SEC
);
90 printf("2nd --> Populate the matrix\n");
91 clock_t t_t2
= clock();
92 for (int r
= 0; r
< m_rows
; r
++) {
93 matrix
[r
] = data
+ r
* m_cols
;
95 for (int r
= 0; r
< m_rows
; r
++) {
96 for (int c
= 0; c
< m_cols
; c
++) {
97 matrix
[r
][c
] = r
* m_cols
+ c
;
100 t_t2
= clock() - t_t2
;
101 printf(" %f sec\n", ((double)t_t2
) / CLOCKS_PER_SEC
);
103 printf("3rd --> Search matrix (Block Split)\n");
104 clock_t t_t3
= clock();
106 int threads_to_use
= 8000;
107 run_parallel_search(threads_to_use
);
109 t_t3
= clock() - t_t3
;
110 printf(" %f sec\n", ((double)t_t3
) / CLOCKS_PER_SEC
);
112 printf("Done\n The biggest value found is %d\n", hv
);