libeap/tests/hwsim/vm/parallel-vm.py

   1 #!/usr/bin/env python2
   2 #
   3 # Parallel VM test case executor
   4 # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
   5 #
   6 # This software may be distributed under the terms of the BSD license.
   7 # See README for more details.
   8
   9 import curses
  10 import fcntl
  11 import logging
  12 import os
  13 import subprocess
  14 import sys
  15 import time
  16
  17 logger = logging.getLogger()
  18
  19 # Test cases that take significantly longer time to execute than average.
  20 long_tests = [ "ap_roam_open",
  21                "wpas_mesh_password_mismatch_retry",
  22                "wpas_mesh_password_mismatch",
  23                "hostapd_oom_wpa2_psk_connect",
  24                "ap_hs20_fetch_osu_stop",
  25                "ap_roam_wpa2_psk",
  26                "ibss_wpa_none_ccmp",
  27                "nfc_wps_er_handover_pk_hash_mismatch_sta",
  28                "go_neg_peers_force_diff_freq",
  29                "p2p_cli_invite",
  30                "sta_ap_scan_2b",
  31                "ap_pmf_sta_unprot_deauth_burst",
  32                "ap_bss_add_remove_during_ht_scan",
  33                "wext_scan_hidden",
  34                "autoscan_exponential",
  35                "nfc_p2p_client",
  36                "wnm_bss_keep_alive",
  37                "ap_inactivity_disconnect",
  38                "scan_bss_expiration_age",
  39                "autoscan_periodic",
  40                "discovery_group_client",
  41                "concurrent_p2pcli",
  42                "ap_bss_add_remove",
  43                "wpas_ap_wps",
  44                "wext_pmksa_cache",
  45                "ibss_wpa_none",
  46                "ap_ht_40mhz_intolerant_ap",
  47                "ibss_rsn",
  48                "discovery_pd_retries",
  49                "ap_wps_setup_locked_timeout",
  50                "ap_vht160",
  51                "dfs_radar",
  52                "dfs",
  53                "dfs_ht40_minus",
  54                "grpform_cred_ready_timeout",
  55                "hostapd_oom_wpa2_eap_connect",
  56                "wpas_ap_dfs",
  57                "autogo_many",
  58                "hostapd_oom_wpa2_eap",
  59                "ibss_open",
  60                "proxyarp_open_ebtables",
  61                "radius_failover",
  62                "obss_scan_40_intolerant",
  63                "dbus_connect_oom",
  64                "proxyarp_open",
  65                "ap_wps_iteration",
  66                "ap_wps_iteration_error",
  67                "ap_wps_pbc_timeout",
  68                "ap_wps_http_timeout",
  69                "p2p_go_move_reg_change",
  70                "p2p_go_move_active",
  71                "p2p_go_move_scm",
  72                "p2p_go_move_scm_peer_supports",
  73                "p2p_go_move_scm_peer_does_not_support",
  74                "p2p_go_move_scm_multi" ]
  75
  76 def get_failed(vm):
  77     failed = []
  78     for i in range(num_servers):
  79         failed += vm[i]['failed']
  80     return failed
  81
  82 def vm_read_stdout(vm, i):
  83     global total_started, total_passed, total_failed, total_skipped
  84     global rerun_failures
  85
  86     ready = False
  87     try:
  88         out = vm['proc'].stdout.read()
  89     except:
  90         return False
  91     logger.debug("VM[%d] stdout.read[%s]" % (i, out))
  92     pending = vm['pending'] + out
  93     lines = []
  94     while True:
  95         pos = pending.find('\n')
  96         if pos < 0:
  97             break
  98         line = pending[0:pos].rstrip()
  99         pending = pending[(pos + 1):]
 100         logger.debug("VM[%d] stdout full line[%s]" % (i, line))
 101         if line.startswith("READY"):
 102             ready = True
 103         elif line.startswith("PASS"):
 104             ready = True
 105             total_passed += 1
 106         elif line.startswith("FAIL"):
 107             ready = True
 108             total_failed += 1
 109             vals = line.split(' ')
 110             if len(vals) < 2:
 111                 logger.info("VM[%d] incomplete FAIL line: %s" % (i, line))
 112                 name = line
 113             else:
 114                 name = vals[1]
 115             logger.debug("VM[%d] test case failed: %s" % (i, name))
 116             vm['failed'].append(name)
 117         elif line.startswith("NOT-FOUND"):
 118             ready = True
 119             total_failed += 1
 120             logger.info("VM[%d] test case not found" % i)
 121         elif line.startswith("SKIP"):
 122             ready = True
 123             total_skipped += 1
 124         elif line.startswith("START"):
 125             total_started += 1
 126             if len(vm['failed']) == 0:
 127                 vals = line.split(' ')
 128                 if len(vals) >= 2:
 129                     vm['fail_seq'].append(vals[1])
 130         vm['out'] += line + '\n'
 131         lines.append(line)
 132     vm['pending'] = pending
 133     return ready
 134
 135 def show_progress(scr):
 136     global num_servers
 137     global vm
 138     global dir
 139     global timestamp
 140     global tests
 141     global first_run_failures
 142     global total_started, total_passed, total_failed, total_skipped
 143
 144     total_tests = len(tests)
 145     logger.info("Total tests: %d" % total_tests)
 146
 147     scr.leaveok(1)
 148     scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
 149     for i in range(0, num_servers):
 150         scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
 151         scr.addstr(i + 1, 10, "starting VM")
 152     scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
 153     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
 154     scr.refresh()
 155
 156     completed_first_pass = False
 157     rerun_tests = []
 158
 159     while True:
 160         running = False
 161         first_running = False
 162         updated = False
 163
 164         for i in range(0, num_servers):
 165             if completed_first_pass:
 166                 continue
 167             if vm[i]['first_run_done']:
 168                 continue
 169             if not vm[i]['proc']:
 170                 continue
 171             if vm[i]['proc'].poll() is not None:
 172                 vm[i]['proc'] = None
 173                 scr.move(i + 1, 10)
 174                 scr.clrtoeol()
 175                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 176                 with open(log, 'r') as f:
 177                     if "Kernel panic" in f.read():
 178                         scr.addstr("kernel panic")
 179                         logger.info("VM[%d] kernel panic" % i)
 180                     else:
 181                         scr.addstr("unexpected exit")
 182                         logger.info("VM[%d] unexpected exit" % i)
 183                 updated = True
 184                 continue
 185
 186             running = True
 187             first_running = True
 188             try:
 189                 err = vm[i]['proc'].stderr.read()
 190                 vm[i]['err'] += err
 191                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
 192             except:
 193                 pass
 194
 195             if vm_read_stdout(vm[i], i):
 196                 scr.move(i + 1, 10)
 197                 scr.clrtoeol()
 198                 updated = True
 199                 if not tests:
 200                     vm[i]['first_run_done'] = True
 201                     scr.addstr("completed first round")
 202                     logger.info("VM[%d] completed first round" % i)
 203                     continue
 204                 else:
 205                     name = tests.pop(0)
 206                     vm[i]['proc'].stdin.write(name + '\n')
 207                     scr.addstr(name)
 208                     logger.debug("VM[%d] start test %s" % (i, name))
 209
 210         if not first_running and not completed_first_pass:
 211             logger.info("First round of testing completed")
 212             if tests:
 213                 logger.info("Unexpected test cases remaining from first round: " + str(tests))
 214                 raise Exception("Unexpected test cases remaining from first round")
 215             completed_first_pass = True
 216             for name in get_failed(vm):
 217                 if rerun_failures:
 218                     rerun_tests.append(name)
 219                 first_run_failures.append(name)
 220
 221         for i in range(num_servers):
 222             if not completed_first_pass:
 223                 continue
 224             if not vm[i]['proc']:
 225                 continue
 226             if vm[i]['proc'].poll() is not None:
 227                 vm[i]['proc'] = None
 228                 scr.move(i + 1, 10)
 229                 scr.clrtoeol()
 230                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 231                 with open(log, 'r') as f:
 232                     if "Kernel panic" in f.read():
 233                         scr.addstr("kernel panic")
 234                         logger.info("VM[%d] kernel panic" % i)
 235                     else:
 236                         scr.addstr("completed run")
 237                         logger.info("VM[%d] completed run" % i)
 238                 updated = True
 239                 continue
 240
 241             running = True
 242             try:
 243                 err = vm[i]['proc'].stderr.read()
 244                 vm[i]['err'] += err
 245                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
 246             except:
 247                 pass
 248
 249             ready = False
 250             if vm[i]['first_run_done']:
 251                 vm[i]['first_run_done'] = False
 252                 ready = True
 253             else:
 254                 ready = vm_read_stdout(vm[i], i)
 255             if ready:
 256                 scr.move(i + 1, 10)
 257                 scr.clrtoeol()
 258                 updated = True
 259                 if not rerun_tests:
 260                     vm[i]['proc'].stdin.write('\n')
 261                     scr.addstr("shutting down")
 262                     logger.info("VM[%d] shutting down" % i)
 263                 else:
 264                     name = rerun_tests.pop(0)
 265                     vm[i]['proc'].stdin.write(name + '\n')
 266                     scr.addstr(name + "(*)")
 267                     logger.debug("VM[%d] start test %s (*)" % (i, name))
 268
 269         if not running:
 270             break
 271
 272         if updated:
 273             scr.move(num_servers + 1, 10)
 274             scr.clrtoeol()
 275             scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
 276             scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
 277             failed = get_failed(vm)
 278             if len(failed) > 0:
 279                 scr.move(num_servers + 2, 0)
 280                 scr.clrtoeol()
 281                 scr.addstr("Failed test cases: ")
 282                 count = 0
 283                 for f in failed:
 284                     count += 1
 285                     if count > 30:
 286                         scr.addstr('...')
 287                         scr.clrtoeol()
 288                         break
 289                     scr.addstr(f)
 290                     scr.addstr(' ')
 291
 292             scr.move(0, 35)
 293             scr.clrtoeol()
 294             if rerun_tests:
 295                 scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
 296             elif rerun_failures:
 297                 pass
 298             elif first_run_failures:
 299                 scr.addstr("(RETRY FAILED)")
 300
 301             scr.refresh()
 302
 303         time.sleep(0.25)
 304
 305     scr.refresh()
 306     time.sleep(0.3)
 307
 308 def main():
 309     import argparse
 310     import os
 311     global num_servers
 312     global vm
 313     global dir
 314     global timestamp
 315     global tests
 316     global first_run_failures
 317     global total_started, total_passed, total_failed, total_skipped
 318     global rerun_failures
 319
 320     total_started = 0
 321     total_passed = 0
 322     total_failed = 0
 323     total_skipped = 0
 324
 325     debug_level = logging.INFO
 326     rerun_failures = True
 327     timestamp = int(time.time())
 328
 329     scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
 330
 331     p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
 332     p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
 333                    help="number of VMs to start")
 334     p.add_argument('-f', dest='testmodules', metavar='<test module>',
 335                    help='execute only tests from these test modules',
 336                    type=str, nargs='+')
 337     p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
 338                    help="don't retry failed tests automatically")
 339     p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
 340                    help="enable debug logging")
 341     p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
 342                    help="enable code coverage collection")
 343     p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
 344                    help="shuffle test cases to randomize order")
 345     p.add_argument('--short', dest='short', action='store_const', const=True,
 346                    default=False,
 347                    help="only run short-duration test cases")
 348     p.add_argument('--long', dest='long', action='store_const', const=True,
 349                    default=False,
 350                    help="include long-duration test cases")
 351     p.add_argument('--valgrind', dest='valgrind', action='store_const',
 352                    const=True, default=False,
 353                    help="run tests under valgrind")
 354     p.add_argument('params', nargs='*')
 355     args = p.parse_args()
 356
 357     dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs')
 358     try:
 359         os.makedirs(dir)
 360     except:
 361         pass
 362
 363     num_servers = args.num_servers
 364     rerun_failures = not args.no_retry
 365     if args.debug:
 366         debug_level = logging.DEBUG
 367     extra_args = []
 368     if args.valgrind:
 369         extra_args += [ '--valgrind' ]
 370     if args.long:
 371         extra_args += [ '--long' ]
 372     if args.codecov:
 373         print "Code coverage - build separate binaries"
 374         logdir = os.path.join(dir, str(timestamp))
 375         os.makedirs(logdir)
 376         subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
 377                                logdir])
 378         codecov_args = ['--codecov_dir', logdir]
 379         codecov = True
 380     else:
 381         codecov_args = []
 382         codecov = False
 383
 384     first_run_failures = []
 385     if args.params:
 386         tests = args.params
 387     else:
 388         tests = []
 389         cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'),
 390                 '-L' ]
 391         if args.testmodules:
 392             cmd += [ "-f" ]
 393             cmd += args.testmodules
 394         lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
 395         for l in lst.stdout.readlines():
 396             name = l.split(' ')[0]
 397             tests.append(name)
 398     if len(tests) == 0:
 399         sys.exit("No test cases selected")
 400
 401     if args.shuffle:
 402         from random import shuffle
 403         shuffle(tests)
 404     elif num_servers > 2 and len(tests) > 100:
 405         # Move test cases with long duration to the beginning as an
 406         # optimization to avoid last part of the test execution running a long
 407         # duration test case on a single VM while all other VMs have already
 408         # completed their work.
 409         for l in long_tests:
 410             if l in tests:
 411                 tests.remove(l)
 412                 tests.insert(0, l)
 413     if args.short:
 414         tests = [t for t in tests if t not in long_tests]
 415
 416     logger.setLevel(debug_level)
 417     log_handler = logging.FileHandler('parallel-vm.log')
 418     log_handler.setLevel(debug_level)
 419     fmt = "%(asctime)s %(levelname)s %(message)s"
 420     log_formatter = logging.Formatter(fmt)
 421     log_handler.setFormatter(log_formatter)
 422     logger.addHandler(log_handler)
 423
 424     vm = {}
 425     for i in range(0, num_servers):
 426         print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
 427         logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
 428         cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
 429                '--timestamp', str(timestamp),
 430                '--ext', 'srv.%d' % (i + 1),
 431                '-i'] + codecov_args + extra_args
 432         vm[i] = {}
 433         vm[i]['first_run_done'] = False
 434         vm[i]['proc'] = subprocess.Popen(cmd,
 435                                          stdin=subprocess.PIPE,
 436                                          stdout=subprocess.PIPE,
 437                                          stderr=subprocess.PIPE)
 438         vm[i]['out'] = ""
 439         vm[i]['pending'] = ""
 440         vm[i]['err'] = ""
 441         vm[i]['failed'] = []
 442         vm[i]['fail_seq'] = []
 443         for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
 444             fd = stream.fileno()
 445             fl = fcntl.fcntl(fd, fcntl.F_GETFL)
 446             fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
 447     print
 448
 449     curses.wrapper(show_progress)
 450
 451     with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
 452         for i in range(0, num_servers):
 453             f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
 454
 455     failed = get_failed(vm)
 456
 457     if first_run_failures:
 458         print "To re-run same failure sequence(s):"
 459         for i in range(0, num_servers):
 460             if len(vm[i]['failed']) == 0:
 461                 continue
 462             print "./vm-run.sh",
 463             if args.long:
 464                 print "--long",
 465             skip = len(vm[i]['fail_seq'])
 466             skip -= min(skip, 30)
 467             for t in vm[i]['fail_seq']:
 468                 if skip > 0:
 469                     skip -= 1
 470                     continue
 471                 print t,
 472             print
 473         print "Failed test cases:"
 474         for f in first_run_failures:
 475             print f,
 476             logger.info("Failed: " + f)
 477         print
 478     double_failed = []
 479     for name in failed:
 480         double_failed.append(name)
 481     for test in first_run_failures:
 482         double_failed.remove(test)
 483     if not rerun_failures:
 484         pass
 485     elif failed and not double_failed:
 486         print "All failed cases passed on retry"
 487         logger.info("All failed cases passed on retry")
 488     elif double_failed:
 489         print "Failed even on retry:"
 490         for f in double_failed:
 491             print f,
 492             logger.info("Failed on retry: " + f)
 493         print
 494     res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
 495                                                     total_passed,
 496                                                     total_failed,
 497                                                     total_skipped)
 498     print(res)
 499     logger.info(res)
 500     print "Logs: " + dir + '/' + str(timestamp)
 501     logger.info("Logs: " + dir + '/' + str(timestamp))
 502
 503     for i in range(0, num_servers):
 504         if len(vm[i]['pending']) > 0:
 505             logger.info("Unprocessed stdout from VM[%d]: '%s'" %
 506                         (i, vm[i]['pending']))
 507         log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
 508         with open(log, 'r') as f:
 509             if "Kernel panic" in f.read():
 510                 print "Kernel panic in " + log
 511                 logger.info("Kernel panic in " + log)
 512
 513     if codecov:
 514         print "Code coverage - preparing report"
 515         for i in range(num_servers):
 516             subprocess.check_call([os.path.join(scriptsdir,
 517                                                 'process-codecov.sh'),
 518                                    logdir + ".srv.%d" % (i + 1),
 519                                    str(i)])
 520         subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
 521                                logdir])
 522         print "file://%s/index.html" % logdir
 523         logger.info("Code coverage report: file://%s/index.html" % logdir)
 524
 525     if double_failed or (failed and not rerun_failures):
 526         logger.info("Test run complete - failures found")
 527         sys.exit(2)
 528     if failed:
 529         logger.info("Test run complete - failures found on first run; passed on retry")
 530         sys.exit(1)
 531     logger.info("Test run complete - no failures")
 532     sys.exit(0)
 533
 534 if __name__ == "__main__":
 535     main()