1/* 2 * Copyright 2017, Gustavo Romero, IBM Corp. 3 * Licensed under GPLv2. 4 * 5 * Check if thread endianness is flipped inadvertently to BE on trap 6 * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after 7 * load_fp and load_vec overflowed). 8 * 9 * The issue can be checked on LE machines simply by zeroing load_fp 10 * and load_vec and then causing a trap in TM. Since the endianness 11 * changes to BE on return from the signal handler, 'nop' is 12 * thread as an illegal instruction in following sequence: 13 * tbegin. 14 * beq 1f 15 * trap 16 * tend. 17 * 1: nop 18 * 19 * However, although the issue is also present on BE machines, it's a 20 * bit trickier to check it on BE machines because MSR.LE bit is set 21 * to zero which determines a BE endianness that is the native 22 * endianness on BE machines, so nothing notably critical happens, 23 * i.e. no illegal instruction is observed immediately after returning 24 * from the signal handler (as it happens on LE machines). Thus to test 25 * it on BE machines LE endianness is forced after a first trap and then 26 * the endianness is verified on subsequent traps to determine if the 27 * endianness "flipped back" to the native endianness (BE). 28 */ 29 30#define _GNU_SOURCE 31#include <error.h> 32#include <stdio.h> 33#include <stdlib.h> 34#include <unistd.h> 35#include <htmintrin.h> 36#include <inttypes.h> 37#include <pthread.h> 38#include <sched.h> 39#include <signal.h> 40#include <stdbool.h> 41 42#include "tm.h" 43#include "utils.h" 44 45#define pr_error(error_code, format, ...) \ 46 error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__) 47 48#define MSR_LE 1UL 49#define LE 1UL 50 51pthread_t t0_ping; 52pthread_t t1_pong; 53 54int exit_from_pong; 55 56int trap_event; 57int le; 58 59bool success; 60 61void trap_signal_handler(int signo, siginfo_t *si, void *uc) 62{ 63 ucontext_t *ucp = uc; 64 uint64_t thread_endianness; 65 66 /* Get thread endianness: extract bit LE from MSR */ 67 thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; 68 69 /*** 70 * Little-Endian Machine 71 */ 72 73 if (le) { 74 /* First trap event */ 75 if (trap_event == 0) { 76 /* Do nothing. Since it is returning from this trap 77 * event that endianness is flipped by the bug, so just 78 * let the process return from the signal handler and 79 * check on the second trap event if endianness is 80 * flipped or not. 81 */ 82 } 83 /* Second trap event */ 84 else if (trap_event == 1) { 85 /* 86 * Since trap was caught in TM on first trap event, if 87 * endianness was still LE (not flipped inadvertently) 88 * after returning from the signal handler instruction 89 * (1) is executed (basically a 'nop'), as it's located 90 * at address of tbegin. +4 (rollback addr). As (1) on 91 * LE endianness does in effect nothing, instruction (2) 92 * is then executed again as 'trap', generating a second 93 * trap event (note that in that case 'trap' is caught 94 * not in transacional mode). On te other hand, if after 95 * the return from the signal handler the endianness in- 96 * advertently flipped, instruction (1) is tread as a 97 * branch instruction, i.e. b .+8, hence instruction (3) 98 * and (4) are executed (tbegin.; trap;) and we get sim- 99 * ilaly on the trap signal handler, but now in TM mode. 100 * Either way, it's now possible to check the MSR LE bit 101 * once in the trap handler to verify if endianness was 102 * flipped or not after the return from the second trap 103 * event. If endianness is flipped, the bug is present. 104 * Finally, getting a trap in TM mode or not is just 105 * worth noting because it affects the math to determine 106 * the offset added to the NIP on return: the NIP for a 107 * trap caught in TM is the rollback address, i.e. the 108 * next instruction after 'tbegin.', whilst the NIP for 109 * a trap caught in non-transactional mode is the very 110 * same address of the 'trap' instruction that generated 111 * the trap event. 112 */ 113 114 if (thread_endianness == LE) { 115 /* Go to 'success', i.e. instruction (6) */ 116 ucp->uc_mcontext.gp_regs[PT_NIP] += 16; 117 } else { 118 /* 119 * Thread endianness is BE, so it flipped 120 * inadvertently. Thus we flip back to LE and 121 * set NIP to go to 'failure', instruction (5). 122 */ 123 ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; 124 ucp->uc_mcontext.gp_regs[PT_NIP] += 4; 125 } 126 } 127 } 128 129 /*** 130 * Big-Endian Machine 131 */ 132 133 else { 134 /* First trap event */ 135 if (trap_event == 0) { 136 /* 137 * Force thread endianness to be LE. Instructions (1), 138 * (3), and (4) will be executed, generating a second 139 * trap in TM mode. 140 */ 141 ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; 142 } 143 /* Second trap event */ 144 else if (trap_event == 1) { 145 /* 146 * Do nothing. If bug is present on return from this 147 * second trap event endianness will flip back "automat- 148 * ically" to BE, otherwise thread endianness will 149 * continue to be LE, just as it was set above. 150 */ 151 } 152 /* A third trap event */ 153 else { 154 /* 155 * Once here it means that after returning from the sec- 156 * ond trap event instruction (4) (trap) was executed 157 * as LE, generating a third trap event. In that case 158 * endianness is still LE as set on return from the 159 * first trap event, hence no bug. Otherwise, bug 160 * flipped back to BE on return from the second trap 161 * event and instruction (4) was executed as 'tdi' (so 162 * basically a 'nop') and branch to 'failure' in 163 * instruction (5) was taken to indicate failure and we 164 * never get here. 165 */ 166 167 /* 168 * Flip back to BE and go to instruction (6), i.e. go to 169 * 'success'. 170 */ 171 ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL; 172 ucp->uc_mcontext.gp_regs[PT_NIP] += 8; 173 } 174 } 175 176 trap_event++; 177} 178 179void usr1_signal_handler(int signo, siginfo_t *si, void *not_used) 180{ 181 /* Got a USR1 signal from ping(), so just tell pong() to exit */ 182 exit_from_pong = 1; 183} 184 185void *ping(void *not_used) 186{ 187 uint64_t i; 188 189 trap_event = 0; 190 191 /* 192 * Wait an amount of context switches so load_fp and load_vec overflows 193 * and MSR_[FP|VEC|V] is 0. 194 */ 195 for (i = 0; i < 1024*1024*512; i++) 196 ; 197 198 asm goto( 199 /* 200 * [NA] means "Native Endianness", i.e. it tells how a 201 * instruction is executed on machine's native endianness (in 202 * other words, native endianness matches kernel endianness). 203 * [OP] means "Opposite Endianness", i.e. on a BE machine, it 204 * tells how a instruction is executed as a LE instruction; con- 205 * versely, on a LE machine, it tells how a instruction is 206 * executed as a BE instruction. When [NA] is omitted, it means 207 * that the native interpretation of a given instruction is not 208 * relevant for the test. Likewise when [OP] is omitted. 209 */ 210 211 " tbegin. ;" /* (0) tbegin. [NA] */ 212 " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */ 213 " trap ;" /* (2) trap [NA] */ 214 ".long 0x1D05007C;" /* (3) tbegin. [OP] */ 215 ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */ 216 " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */ 217 " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/ 218 219 : : : : failure, success); 220 221failure: 222 success = false; 223 goto exit_from_ping; 224 225success: 226 success = true; 227 228exit_from_ping: 229 /* Tell pong() to exit before leaving */ 230 pthread_kill(t1_pong, SIGUSR1); 231 return NULL; 232} 233 234void *pong(void *not_used) 235{ 236 while (!exit_from_pong) 237 /* 238 * Induce context switches on ping() thread 239 * until ping() finishes its job and signs 240 * to exit from this loop. 241 */ 242 sched_yield(); 243 244 return NULL; 245} 246 247int tm_trap_test(void) 248{ 249 uint16_t k = 1; 250 251 int rc; 252 253 pthread_attr_t attr; 254 cpu_set_t cpuset; 255 256 struct sigaction trap_sa; 257 258 SKIP_IF(!have_htm()); 259 260 trap_sa.sa_flags = SA_SIGINFO; 261 trap_sa.sa_sigaction = trap_signal_handler; 262 sigaction(SIGTRAP, &trap_sa, NULL); 263 264 struct sigaction usr1_sa; 265 266 usr1_sa.sa_flags = SA_SIGINFO; 267 usr1_sa.sa_sigaction = usr1_signal_handler; 268 sigaction(SIGUSR1, &usr1_sa, NULL); 269 270 /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ 271 CPU_ZERO(&cpuset); 272 CPU_SET(0, &cpuset); 273 274 /* Init pthread attribute */ 275 rc = pthread_attr_init(&attr); 276 if (rc) 277 pr_error(rc, "pthread_attr_init()"); 278 279 /* 280 * Bind thread ping() and pong() both to CPU 0 so they ping-pong and 281 * speed up context switches on ping() thread, speeding up the load_fp 282 * and load_vec overflow. 283 */ 284 rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); 285 if (rc) 286 pr_error(rc, "pthread_attr_setaffinity()"); 287 288 /* Figure out the machine endianness */ 289 le = (int) *(uint8_t *)&k; 290 291 printf("%s machine detected. Checking if endianness flips %s", 292 le ? "Little-Endian" : "Big-Endian", 293 "inadvertently on trap in TM... "); 294 295 rc = fflush(0); 296 if (rc) 297 pr_error(rc, "fflush()"); 298 299 /* Launch ping() */ 300 rc = pthread_create(&t0_ping, &attr, ping, NULL); 301 if (rc) 302 pr_error(rc, "pthread_create()"); 303 304 exit_from_pong = 0; 305 306 /* Launch pong() */ 307 rc = pthread_create(&t1_pong, &attr, pong, NULL); 308 if (rc) 309 pr_error(rc, "pthread_create()"); 310 311 rc = pthread_join(t0_ping, NULL); 312 if (rc) 313 pr_error(rc, "pthread_join()"); 314 315 rc = pthread_join(t1_pong, NULL); 316 if (rc) 317 pr_error(rc, "pthread_join()"); 318 319 if (success) { 320 printf("no.\n"); /* no, endianness did not flip inadvertently */ 321 return EXIT_SUCCESS; 322 } 323 324 printf("yes!\n"); /* yes, endianness did flip inadvertently */ 325 return EXIT_FAILURE; 326} 327 328int main(int argc, char **argv) 329{ 330 return test_harness(tm_trap_test, "tm_trap_test"); 331} 332