/* Affinity tests. Copyright (C) 2013-2014 Free Software Foundation, Inc. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ /* { dg-do run } */ /* { dg-set-target-env-var OMP_PROC_BIND "false" } */ /* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include "config.h" #include #include #include #include #include #ifdef DO_FORK #include #endif #ifdef HAVE_PTHREAD_AFFINITY_NP #include #include #ifdef INTERPOSE_GETAFFINITY #include #endif #endif struct place { int start, len; }; struct places { char name[40]; int count; struct place places[8]; } places_array[] = { { "", 1, { { -1, -1 } } }, { "{0}:8", 8, { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } }, { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } }, { "{1}:7:1", 7, { { 1, 1 }, { 2, 1 }, { 3, 1 }, { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5, { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } } }; unsigned long contig_cpucount; unsigned long min_cpusetsize; #if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \ && defined (CPU_ALLOC_SIZE) #if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY) int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *); int pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) { int ret; unsigned long i, max; if (orig_getaffinity_np == NULL) { orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) dlsym (RTLD_NEXT, "pthread_getaffinity_np"); if (orig_getaffinity_np == NULL) exit (0); } ret = orig_getaffinity_np (thread, cpusetsize, cpuset); if (ret != 0) return ret; if (contig_cpucount == 0) { max = 8 * cpusetsize; for (i = 0; i < max; i++) if (!CPU_ISSET_S (i, cpusetsize, cpuset)) break; contig_cpucount = i; min_cpusetsize = cpusetsize; } return ret; } #endif void print_affinity (struct place p) { static unsigned long size; if (size == 0) { if (min_cpusetsize) size = min_cpusetsize; else { size = sysconf (_SC_NPROCESSORS_CONF); size = CPU_ALLOC_SIZE (size); if (size < sizeof (cpu_set_t)) size = sizeof (cpu_set_t); } } cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size); if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0) { unsigned long i, len, max = 8 * size; int notfirst = 0, unexpected = 1; printf (" bound to {"); for (i = 0, len = 0; i < max; i++) if (CPU_ISSET_S (i, size, cpusetp)) { if (len == 0) { if (notfirst) { unexpected = 1; printf (","); } else if (i == (unsigned long) p.start) unexpected = 0; notfirst = 1; printf ("%lu", i); } ++len; } else { if (len && len != (unsigned long) p.len) unexpected = 1; if (len > 1) printf (":%lu", len); len = 0; } if (len && len != (unsigned long) p.len) unexpected = 1; if (len > 1) printf (":%lu", len); printf ("}"); if (p.start != -1 && unexpected) { printf (", expected {%d", p.start); if (p.len != 1) printf (":%d", p.len); printf ("} instead"); } else if (p.start != -1) printf (", verified"); } } #else void print_affinity (struct place p) { (void) p.start; (void) p.len; } #endif int main () { char *env_proc_bind = getenv ("OMP_PROC_BIND"); int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; int test_spread_master_close = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0; char *env_places = getenv ("OMP_PLACES"); int test_places = 0; #ifdef DO_FORK if (env_places == NULL && contig_cpucount >= 8 && test_false && getenv ("GOMP_AFFINITY") == NULL) { int i, j, status; pid_t pid; for (j = 0; j < 2; j++) { if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1) < 0) break; for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; i; --i) { if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0) break; pid = fork (); if (pid == -1) break; if (pid == 0) { execl ("/proc/self/exe", "affinity-1.exe", NULL); _exit (1); } if (waitpid (pid, &status, 0) < 0) break; if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT) abort (); else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) break; } if (i) break; } } #endif int first = 1; if (env_proc_bind) { printf ("OMP_PROC_BIND='%s'", env_proc_bind); first = 0; } if (env_places) printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places); printf ("\n"); if (env_places && contig_cpucount >= 8 && (test_true || test_spread_master_close)) { for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1; test_places; --test_places) if (strcmp (env_places, places_array[test_places].name) == 0) break; } #define verify(if_true, if_s_m_c) \ if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \ abort (); \ if (test_true && omp_get_proc_bind () != if_true) \ abort (); \ if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \ abort (); verify (omp_proc_bind_true, omp_proc_bind_spread); printf ("Initial thread"); print_affinity (places_array[test_places].places[0]); printf ("\n"); omp_set_nested (1); omp_set_dynamic (0); #pragma omp parallel if (0) { verify (omp_proc_bind_true, omp_proc_bind_master); #pragma omp parallel if (0) { verify (omp_proc_bind_true, omp_proc_bind_close); #pragma omp parallel if (0) { verify (omp_proc_bind_true, omp_proc_bind_close); } #pragma omp parallel if (0) proc_bind (spread) { verify (omp_proc_bind_spread, omp_proc_bind_spread); } } #pragma omp parallel if (0) proc_bind (master) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp parallel if (0) { verify (omp_proc_bind_master, omp_proc_bind_close); } #pragma omp parallel if (0) proc_bind (spread) { verify (omp_proc_bind_spread, omp_proc_bind_spread); } } } /* True/spread */ #pragma omp parallel num_threads (4) { verify (omp_proc_bind_true, omp_proc_bind_master); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1 thread %d", thr); if (omp_get_num_threads () == 4 && test_spread_master_close) switch (places_array[test_places].count) { case 8: /* T = 4, P = 8, each subpartition has 2 places. */ case 7: /* T = 4, P = 7, each subpartition has 2 places, but last partition, which has just one place. */ p = places_array[test_places].places[2 * thr]; break; case 5: /* T = 4, P = 5, first subpartition has 2 places, the rest just one. */ p = places_array[test_places].places[thr ? 1 + thr : 0]; break; case 3: /* T = 4, P = 3, unit sized subpartitions, first gets thr0 and thr3, second thr1, third thr2. */ p = places_array[test_places].places[thr == 3 ? 0 : thr]; break; case 2: /* T = 4, P = 2, unit sized subpartitions, each with 2 threads. */ p = places_array[test_places].places[thr / 2]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 3) { /* True/spread, true/master. */ #pragma omp parallel num_threads (3) { verify (omp_proc_bind_true, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1,#1 thread 3,%d", thr); if (omp_get_num_threads () == 3 && test_spread_master_close) /* Outer is spread, inner master, so just bind to the place or the master thread, which is thr 3 above. */ switch (places_array[test_places].count) { case 8: case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } /* True/spread, spread. */ #pragma omp parallel num_threads (5) proc_bind (spread) { verify (omp_proc_bind_spread, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1,#2 thread 3,%d", thr); if (omp_get_num_threads () == 5 && test_spread_master_close) /* Outer is spread, inner spread. */ switch (places_array[test_places].count) { case 8: /* T = 5, P = 2, unit sized subpartitions. */ p = places_array[test_places].places[thr == 4 ? 6 : 6 + thr / 2]; break; /* The rest are T = 5, P = 1. */ case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 3) { /* True/spread, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1,#2,#1 thread 3,3,%d", thr); if (omp_get_num_threads () == 5 && test_spread_master_close) /* Outer is spread, inner spread, innermost close. */ switch (places_array[test_places].count) { /* All are T = 5, P = 1. */ case 8: p = places_array[test_places].places[7]; break; case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } } } /* True/spread, master. */ #pragma omp parallel num_threads (4) proc_bind(master) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1,#3 thread 3,%d", thr); if (omp_get_num_threads () == 4 && test_spread_master_close) /* Outer is spread, inner master, so just bind to the place or the master thread, which is thr 3 above. */ switch (places_array[test_places].count) { case 8: case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } /* True/spread, close. */ #pragma omp parallel num_threads (6) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#1,#4 thread 3,%d", thr); if (omp_get_num_threads () == 6 && test_spread_master_close) /* Outer is spread, inner close. */ switch (places_array[test_places].count) { case 8: /* T = 6, P = 2, unit sized subpartitions. */ p = places_array[test_places].places[6 + thr / 3]; break; /* The rest are T = 6, P = 1. */ case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } } } /* Spread. */ #pragma omp parallel num_threads (5) proc_bind(spread) { verify (omp_proc_bind_spread, omp_proc_bind_master); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#2 thread %d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) switch (places_array[test_places].count) { case 8: /* T = 5, P = 8, first 3 subpartitions have 2 places, last 2 one place. */ p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr]; break; case 7: /* T = 5, P = 7, first 2 subpartitions have 2 places, last 3 one place. */ p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr]; break; case 5: /* T = 5, P = 5, unit sized subpartitions, each one with one thread. */ p = places_array[test_places].places[thr]; break; case 3: /* T = 5, P = 3, unit sized subpartitions, first gets thr0 and thr3, second thr1 and thr4, third thr2. */ p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; break; case 2: /* T = 5, P = 2, unit sized subpartitions, first with thr{0,1,4} and second with thr{2,3}. */ p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 3) { int pp = 0; switch (places_array[test_places].count) { case 8: pp = 6; break; case 7: pp = 5; break; case 5: pp = 3; break; case 2: pp = 1; break; } /* Spread, spread/master. */ #pragma omp parallel num_threads (3) firstprivate (pp) { verify (omp_proc_bind_spread, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#2,#1 thread 3,%d", thr); if (test_spread_master_close || test_true) /* Outer is spread, inner spread resp. master, bit we have just unit sized partitions. */ p = places_array[test_places].places[pp]; print_affinity (p); printf ("\n"); } } /* Spread, spread. */ #pragma omp parallel num_threads (5) proc_bind (spread) \ firstprivate (pp) { verify (omp_proc_bind_spread, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#2,#2 thread 3,%d", thr); if (test_spread_master_close || test_true) /* Outer is spread, inner spread, bit we have just unit sized partitions. */ p = places_array[test_places].places[pp]; print_affinity (p); printf ("\n"); } } /* Spread, master. */ #pragma omp parallel num_threads (4) proc_bind(master) \ firstprivate(pp) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#2,#3 thread 3,%d", thr); if (test_spread_master_close || test_true) /* Outer is spread, inner master, bit we have just unit sized partitions. */ p = places_array[test_places].places[pp]; print_affinity (p); printf ("\n"); } } /* Spread, close. */ #pragma omp parallel num_threads (6) proc_bind (close) \ firstprivate (pp) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#2,#4 thread 3,%d", thr); if (test_spread_master_close || test_true) /* Outer is spread, inner close, bit we have just unit sized partitions. */ p = places_array[test_places].places[pp]; print_affinity (p); printf ("\n"); } } } } /* Master. */ #pragma omp parallel num_threads (3) proc_bind(master) { verify (omp_proc_bind_master, omp_proc_bind_master); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3 thread %d", thr); if (test_spread_master_close || test_true) p = places_array[test_places].places[0]; print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 2) { /* Master, master. */ #pragma omp parallel num_threads (4) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#1 thread 2,%d", thr); if (test_spread_master_close || test_true) /* Outer is master, inner is master. */ p = places_array[test_places].places[0]; print_affinity (p); printf ("\n"); } } /* Master, spread. */ #pragma omp parallel num_threads (4) proc_bind (spread) { verify (omp_proc_bind_spread, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#2 thread 2,%d", thr); if (omp_get_num_threads () == 4 && (test_spread_master_close || test_true)) /* Outer is master, inner is spread. */ switch (places_array[test_places].count) { case 8: /* T = 4, P = 8, each subpartition has 2 places. */ case 7: /* T = 4, P = 7, each subpartition has 2 places, but last partition, which has just one place. */ p = places_array[test_places].places[2 * thr]; break; case 5: /* T = 4, P = 5, first subpartition has 2 places, the rest just one. */ p = places_array[test_places].places[thr ? 1 + thr : 0]; break; case 3: /* T = 4, P = 3, unit sized subpartitions, first gets thr0 and thr3, second thr1, third thr2. */ p = places_array[test_places].places[thr == 3 ? 0 : thr]; break; case 2: /* T = 4, P = 2, unit sized subpartitions, each with 2 threads. */ p = places_array[test_places].places[thr / 2]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 0) { /* Master, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#2,#1 thread 2,0,%d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) /* Outer is master, inner spread, innermost close. */ switch (places_array[test_places].count) { /* First 3 are T = 5, P = 2. */ case 8: case 7: case 5: p = places_array[test_places].places[(thr & 2) / 2]; break; /* All the rest are T = 5, P = 1. */ case 3: case 2: p = places_array[test_places].places[0]; break; } print_affinity (p); printf ("\n"); } } } #pragma omp barrier if (omp_get_thread_num () == 3) { /* Master, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#2,#2 thread 2,3,%d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) /* Outer is master, inner spread, innermost close. */ switch (places_array[test_places].count) { case 8: /* T = 5, P = 2. */ p = places_array[test_places].places[6 + (thr & 2) / 2]; break; /* All the rest are T = 5, P = 1. */ case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[0]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } } } /* Master, master. */ #pragma omp parallel num_threads (4) proc_bind(master) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#3 thread 2,%d", thr); if (test_spread_master_close || test_true) /* Outer is master, inner master. */ p = places_array[test_places].places[0]; print_affinity (p); printf ("\n"); } } /* Master, close. */ #pragma omp parallel num_threads (6) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#3,#4 thread 2,%d", thr); if (omp_get_num_threads () == 6 && (test_spread_master_close || test_true)) switch (places_array[test_places].count) { case 8: /* T = 6, P = 8. */ case 7: /* T = 6, P = 7. */ p = places_array[test_places].places[thr]; break; case 5: /* T = 6, P = 5. thr{0,5} go into the first place. */ p = places_array[test_places].places[thr == 5 ? 0 : thr]; break; case 3: /* T = 6, P = 3, two threads into each place. */ p = places_array[test_places].places[thr / 2]; break; case 2: /* T = 6, P = 2, 3 threads into each place. */ p = places_array[test_places].places[thr / 3]; break; } print_affinity (p); printf ("\n"); } } } } #pragma omp parallel num_threads (5) proc_bind(close) { verify (omp_proc_bind_close, omp_proc_bind_master); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4 thread %d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) switch (places_array[test_places].count) { case 8: /* T = 5, P = 8. */ case 7: /* T = 5, P = 7. */ case 5: /* T = 5, P = 5. */ p = places_array[test_places].places[thr]; break; case 3: /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second, thr2 in third. */ p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; break; case 2: /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */ p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 2) { int pp = 0; switch (places_array[test_places].count) { case 8: case 7: case 5: case 3: pp = 2; break; case 2: pp = 1; break; } /* Close, close/master. */ #pragma omp parallel num_threads (4) firstprivate (pp) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#1 thread 2,%d", thr); if (test_spread_master_close) /* Outer is close, inner is master. */ p = places_array[test_places].places[pp]; else if (omp_get_num_threads () == 4 && test_true) /* Outer is close, inner is close. */ switch (places_array[test_places].count) { case 8: /* T = 4, P = 8. */ case 7: /* T = 4, P = 7. */ p = places_array[test_places].places[2 + thr]; break; case 5: /* T = 4, P = 5. There is wrap-around for thr3. */ p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; break; case 3: /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2 into p1. */ p = places_array[test_places].places[(2 + thr) % 3]; break; case 2: /* T = 4, P = 2, 2 threads into each place. */ p = places_array[test_places].places[1 - thr / 2]; break; } print_affinity (p); printf ("\n"); } } /* Close, spread. */ #pragma omp parallel num_threads (4) proc_bind (spread) { verify (omp_proc_bind_spread, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#2 thread 2,%d", thr); if (omp_get_num_threads () == 4 && (test_spread_master_close || test_true)) /* Outer is close, inner is spread. */ switch (places_array[test_places].count) { case 8: /* T = 4, P = 8, each subpartition has 2 places. */ case 7: /* T = 4, P = 7, each subpartition has 2 places, but last partition, which has just one place. */ p = places_array[test_places].places[thr == 3 ? 0 : 2 + 2 * thr]; break; case 5: /* T = 4, P = 5, first subpartition has 2 places, the rest just one. */ p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; break; case 3: /* T = 4, P = 3, unit sized subpartitions, third gets thr0 and thr3, first thr1, second thr2. */ p = places_array[test_places].places[thr == 0 ? 2 : thr - 1]; break; case 2: /* T = 4, P = 2, unit sized subpartitions, each with 2 threads. */ p = places_array[test_places].places[1 - thr / 2]; break; } print_affinity (p); printf ("\n"); } #pragma omp barrier if (omp_get_thread_num () == 0) { /* Close, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#2,#1 thread 2,0,%d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) /* Outer is close, inner spread, innermost close. */ switch (places_array[test_places].count) { case 8: case 7: /* T = 5, P = 2. */ p = places_array[test_places].places[2 + (thr & 2) / 2]; break; /* All the rest are T = 5, P = 1. */ case 5: case 3: p = places_array[test_places].places[2]; break; case 2: p = places_array[test_places].places[1]; break; } print_affinity (p); printf ("\n"); } } } #pragma omp barrier if (omp_get_thread_num () == 2) { /* Close, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#2,#2 thread 2,2,%d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) /* Outer is close, inner spread, innermost close. */ switch (places_array[test_places].count) { case 8: /* T = 5, P = 2. */ p = places_array[test_places].places[6 + (thr & 2) / 2]; break; /* All the rest are T = 5, P = 1. */ case 7: p = places_array[test_places].places[6]; break; case 5: p = places_array[test_places].places[4]; break; case 3: p = places_array[test_places].places[1]; break; case 2: p = places_array[test_places].places[0]; break; } print_affinity (p); printf ("\n"); } } } #pragma omp barrier if (omp_get_thread_num () == 3) { /* Close, spread, close. */ #pragma omp parallel num_threads (5) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#2,#3 thread 2,3,%d", thr); if (omp_get_num_threads () == 5 && (test_spread_master_close || test_true)) /* Outer is close, inner spread, innermost close. */ switch (places_array[test_places].count) { case 8: case 7: case 5: /* T = 5, P = 2. */ p = places_array[test_places].places[(thr & 2) / 2]; break; /* All the rest are T = 5, P = 1. */ case 3: p = places_array[test_places].places[2]; break; case 2: p = places_array[test_places].places[0]; break; } print_affinity (p); printf ("\n"); } } } } /* Close, master. */ #pragma omp parallel num_threads (4) proc_bind(master) \ firstprivate (pp) { verify (omp_proc_bind_master, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#3 thread 2,%d", thr); if (test_spread_master_close || test_true) /* Outer is close, inner master. */ p = places_array[test_places].places[pp]; print_affinity (p); printf ("\n"); } } /* Close, close. */ #pragma omp parallel num_threads (6) proc_bind (close) { verify (omp_proc_bind_close, omp_proc_bind_close); #pragma omp critical { struct place p = places_array[0].places[0]; int thr = omp_get_thread_num (); printf ("#4,#4 thread 2,%d", thr); if (omp_get_num_threads () == 6 && (test_spread_master_close || test_true)) switch (places_array[test_places].count) { case 8: /* T = 6, P = 8. */ p = places_array[test_places].places[2 + thr]; break; case 7: /* T = 6, P = 7. */ p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr]; break; case 5: /* T = 6, P = 5. thr{0,5} go into the third place. */ p = places_array[test_places].places[thr >= 3 ? thr - 3 : 2 + thr]; break; case 3: /* T = 6, P = 3, two threads into each place. */ p = places_array[test_places].places[thr < 2 ? 2 : thr / 2 - 1]; break; case 2: /* T = 6, P = 2, 3 threads into each place. */ p = places_array[test_places].places[1 - thr / 3]; break; } print_affinity (p); printf ("\n"); } } } } return 0; }