You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							89 lines
						
					
					
						
							2.5 KiB
						
					
					
				
			
		
		
	
	
							89 lines
						
					
					
						
							2.5 KiB
						
					
					
				| // gcc -O2 waste.c -lpthread -owaste
 | |
| // gcc -O2 waste.c -lpthread -owaste -DMEM
 | |
| 
 | |
| #define _GNU_SOURCE
 | |
| #include <stdio.h>
 | |
| #include <math.h>
 | |
| #include <sched.h>
 | |
| #include <string.h>
 | |
| #include <stdlib.h>
 | |
| #include <unistd.h>
 | |
| #include <pthread.h>
 | |
| #include <arm_neon.h>
 | |
| #include <sys/sysinfo.h>
 | |
| #include "../common/timing.h"
 | |
| 
 | |
| int get_nprocs(void);
 | |
| double *ttime, *oout;
 | |
| 
 | |
| void waste(int pid) {
 | |
|   cpu_set_t my_set;
 | |
|   CPU_ZERO(&my_set);
 | |
|   CPU_SET(pid, &my_set);
 | |
|   int ret = sched_setaffinity(0, sizeof(cpu_set_t), &my_set);
 | |
|   printf("set affinity to %d: %d\n", pid, ret);
 | |
| 
 | |
|   // 128 MB
 | |
|   float32x4_t *tmp = (float32x4_t *)malloc(0x800000*sizeof(float32x4_t));
 | |
| 
 | |
|   // comment out the memset for CPU only and not RAM
 | |
|   // otherwise we need this to avoid the zero page
 | |
| #ifdef MEM
 | |
|   memset(tmp, 0xaa, 0x800000*sizeof(float32x4_t));
 | |
| #endif
 | |
| 
 | |
|   float32x4_t out;
 | |
| 
 | |
|   double sec = seconds_since_boot();
 | |
|   while (1) {
 | |
|     for (int i = 0; i < 0x10; i++) {
 | |
|       for (int j = 0; j < 0x800000; j+=0x20) {
 | |
|         out = vmlaq_f32(out, tmp[j+0], tmp[j+1]);
 | |
|         out = vmlaq_f32(out, tmp[j+2], tmp[j+3]);
 | |
|         out = vmlaq_f32(out, tmp[j+4], tmp[j+5]);
 | |
|         out = vmlaq_f32(out, tmp[j+6], tmp[j+7]);
 | |
|         out = vmlaq_f32(out, tmp[j+8], tmp[j+9]);
 | |
|         out = vmlaq_f32(out, tmp[j+10], tmp[j+11]);
 | |
|         out = vmlaq_f32(out, tmp[j+12], tmp[j+13]);
 | |
|         out = vmlaq_f32(out, tmp[j+14], tmp[j+15]);
 | |
|         out = vmlaq_f32(out, tmp[j+16], tmp[j+17]);
 | |
|         out = vmlaq_f32(out, tmp[j+18], tmp[j+19]);
 | |
|         out = vmlaq_f32(out, tmp[j+20], tmp[j+21]);
 | |
|         out = vmlaq_f32(out, tmp[j+22], tmp[j+23]);
 | |
|         out = vmlaq_f32(out, tmp[j+24], tmp[j+25]);
 | |
|         out = vmlaq_f32(out, tmp[j+26], tmp[j+27]);
 | |
|         out = vmlaq_f32(out, tmp[j+28], tmp[j+29]);
 | |
|         out = vmlaq_f32(out, tmp[j+30], tmp[j+31]);
 | |
|       }
 | |
|     }
 | |
|     double nsec = seconds_since_boot();
 | |
|     ttime[pid] = nsec-sec;
 | |
|     oout[pid] = out[0] + out[1] + out[2] + out[3];
 | |
|     sec = nsec;
 | |
|   }
 | |
| }
 | |
| 
 | |
| int main() {
 | |
|   int CORES = get_nprocs();
 | |
|   ttime = (double *)malloc(CORES*sizeof(double));
 | |
|   oout = (double *)malloc(CORES*sizeof(double));
 | |
| 
 | |
|   pthread_t waster[CORES];
 | |
|   for (long i = 0; i < CORES; i++) {
 | |
|     ttime[i] = NAN;
 | |
|     pthread_create(&waster[i], NULL, (void *(*)(void *))waste, (void*)i);
 | |
|   }
 | |
|   while (1) {
 | |
|     double avg = 0.0;
 | |
|     double iavg = 0.0;
 | |
|     for (int i = 0; i < CORES; i++) {
 | |
|       avg += ttime[i];
 | |
|       iavg += 1/ttime[i];
 | |
|       printf("%4.2f ", ttime[i]);
 | |
|     }
 | |
|     double mb_per_sec = (16.*0x800000/(1024*1024))*sizeof(float32x4_t)*iavg;
 | |
|     printf("-- %4.2f -- %.2f MB/s \n", avg/CORES, mb_per_sec);
 | |
|     sleep(1);
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 |