Updated to hostap_2_6
[mech_eap.git] / libeap / tests / hwsim / vm / parallel-vm.py
1 #!/usr/bin/env python2
2 #
3 # Parallel VM test case executor
4 # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
5 #
6 # This software may be distributed under the terms of the BSD license.
7 # See README for more details.
8
9 import curses
10 import fcntl
11 import logging
12 import os
13 import subprocess
14 import sys
15 import time
16
17 logger = logging.getLogger()
18
19 # Test cases that take significantly longer time to execute than average.
20 long_tests = [ "ap_roam_open",
21                "wpas_mesh_password_mismatch_retry",
22                "wpas_mesh_password_mismatch",
23                "hostapd_oom_wpa2_psk_connect",
24                "ap_hs20_fetch_osu_stop",
25                "ap_roam_wpa2_psk",
26                "ibss_wpa_none_ccmp",
27                "nfc_wps_er_handover_pk_hash_mismatch_sta",
28                "go_neg_peers_force_diff_freq",
29                "p2p_cli_invite",
30                "sta_ap_scan_2b",
31                "ap_pmf_sta_unprot_deauth_burst",
32                "ap_bss_add_remove_during_ht_scan",
33                "wext_scan_hidden",
34                "autoscan_exponential",
35                "nfc_p2p_client",
36                "wnm_bss_keep_alive",
37                "ap_inactivity_disconnect",
38                "scan_bss_expiration_age",
39                "autoscan_periodic",
40                "discovery_group_client",
41                "concurrent_p2pcli",
42                "ap_bss_add_remove",
43                "wpas_ap_wps",
44                "wext_pmksa_cache",
45                "ibss_wpa_none",
46                "ap_ht_40mhz_intolerant_ap",
47                "ibss_rsn",
48                "discovery_pd_retries",
49                "ap_wps_setup_locked_timeout",
50                "ap_vht160",
51                "dfs_radar",
52                "dfs",
53                "dfs_ht40_minus",
54                "grpform_cred_ready_timeout",
55                "hostapd_oom_wpa2_eap_connect",
56                "wpas_ap_dfs",
57                "autogo_many",
58                "hostapd_oom_wpa2_eap",
59                "ibss_open",
60                "proxyarp_open_ebtables",
61                "radius_failover",
62                "obss_scan_40_intolerant",
63                "dbus_connect_oom",
64                "proxyarp_open",
65                "ap_wps_iteration",
66                "ap_wps_iteration_error",
67                "ap_wps_pbc_timeout",
68                "ap_wps_http_timeout",
69                "p2p_go_move_reg_change",
70                "p2p_go_move_active",
71                "p2p_go_move_scm",
72                "p2p_go_move_scm_peer_supports",
73                "p2p_go_move_scm_peer_does_not_support",
74                "p2p_go_move_scm_multi" ]
75
76 def get_failed(vm):
77     failed = []
78     for i in range(num_servers):
79         failed += vm[i]['failed']
80     return failed
81
82 def vm_read_stdout(vm, i):
83     global total_started, total_passed, total_failed, total_skipped
84     global rerun_failures
85
86     ready = False
87     try:
88         out = vm['proc'].stdout.read()
89     except:
90         return False
91     logger.debug("VM[%d] stdout.read[%s]" % (i, out))
92     pending = vm['pending'] + out
93     lines = []
94     while True:
95         pos = pending.find('\n')
96         if pos < 0:
97             break
98         line = pending[0:pos].rstrip()
99         pending = pending[(pos + 1):]
100         logger.debug("VM[%d] stdout full line[%s]" % (i, line))
101         if line.startswith("READY"):
102             ready = True
103         elif line.startswith("PASS"):
104             ready = True
105             total_passed += 1
106         elif line.startswith("FAIL"):
107             ready = True
108             total_failed += 1
109             vals = line.split(' ')
110             if len(vals) < 2:
111                 logger.info("VM[%d] incomplete FAIL line: %s" % (i, line))
112                 name = line
113             else:
114                 name = vals[1]
115             logger.debug("VM[%d] test case failed: %s" % (i, name))
116             vm['failed'].append(name)
117         elif line.startswith("NOT-FOUND"):
118             ready = True
119             total_failed += 1
120             logger.info("VM[%d] test case not found" % i)
121         elif line.startswith("SKIP"):
122             ready = True
123             total_skipped += 1
124         elif line.startswith("START"):
125             total_started += 1
126             if len(vm['failed']) == 0:
127                 vals = line.split(' ')
128                 if len(vals) >= 2:
129                     vm['fail_seq'].append(vals[1])
130         vm['out'] += line + '\n'
131         lines.append(line)
132     vm['pending'] = pending
133     return ready
134
135 def show_progress(scr):
136     global num_servers
137     global vm
138     global dir
139     global timestamp
140     global tests
141     global first_run_failures
142     global total_started, total_passed, total_failed, total_skipped
143
144     total_tests = len(tests)
145     logger.info("Total tests: %d" % total_tests)
146
147     scr.leaveok(1)
148     scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
149     for i in range(0, num_servers):
150         scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
151         scr.addstr(i + 1, 10, "starting VM")
152     scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
153     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
154     scr.refresh()
155
156     completed_first_pass = False
157     rerun_tests = []
158
159     while True:
160         running = False
161         first_running = False
162         updated = False
163
164         for i in range(0, num_servers):
165             if completed_first_pass:
166                 continue
167             if vm[i]['first_run_done']:
168                 continue
169             if not vm[i]['proc']:
170                 continue
171             if vm[i]['proc'].poll() is not None:
172                 vm[i]['proc'] = None
173                 scr.move(i + 1, 10)
174                 scr.clrtoeol()
175                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
176                 with open(log, 'r') as f:
177                     if "Kernel panic" in f.read():
178                         scr.addstr("kernel panic")
179                         logger.info("VM[%d] kernel panic" % i)
180                     else:
181                         scr.addstr("unexpected exit")
182                         logger.info("VM[%d] unexpected exit" % i)
183                 updated = True
184                 continue
185
186             running = True
187             first_running = True
188             try:
189                 err = vm[i]['proc'].stderr.read()
190                 vm[i]['err'] += err
191                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
192             except:
193                 pass
194
195             if vm_read_stdout(vm[i], i):
196                 scr.move(i + 1, 10)
197                 scr.clrtoeol()
198                 updated = True
199                 if not tests:
200                     vm[i]['first_run_done'] = True
201                     scr.addstr("completed first round")
202                     logger.info("VM[%d] completed first round" % i)
203                     continue
204                 else:
205                     name = tests.pop(0)
206                     vm[i]['proc'].stdin.write(name + '\n')
207                     scr.addstr(name)
208                     logger.debug("VM[%d] start test %s" % (i, name))
209
210         if not first_running and not completed_first_pass:
211             logger.info("First round of testing completed")
212             if tests:
213                 logger.info("Unexpected test cases remaining from first round: " + str(tests))
214                 raise Exception("Unexpected test cases remaining from first round")
215             completed_first_pass = True
216             for name in get_failed(vm):
217                 if rerun_failures:
218                     rerun_tests.append(name)
219                 first_run_failures.append(name)
220
221         for i in range(num_servers):
222             if not completed_first_pass:
223                 continue
224             if not vm[i]['proc']:
225                 continue
226             if vm[i]['proc'].poll() is not None:
227                 vm[i]['proc'] = None
228                 scr.move(i + 1, 10)
229                 scr.clrtoeol()
230                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
231                 with open(log, 'r') as f:
232                     if "Kernel panic" in f.read():
233                         scr.addstr("kernel panic")
234                         logger.info("VM[%d] kernel panic" % i)
235                     else:
236                         scr.addstr("completed run")
237                         logger.info("VM[%d] completed run" % i)
238                 updated = True
239                 continue
240
241             running = True
242             try:
243                 err = vm[i]['proc'].stderr.read()
244                 vm[i]['err'] += err
245                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
246             except:
247                 pass
248
249             ready = False
250             if vm[i]['first_run_done']:
251                 vm[i]['first_run_done'] = False
252                 ready = True
253             else:
254                 ready = vm_read_stdout(vm[i], i)
255             if ready:
256                 scr.move(i + 1, 10)
257                 scr.clrtoeol()
258                 updated = True
259                 if not rerun_tests:
260                     vm[i]['proc'].stdin.write('\n')
261                     scr.addstr("shutting down")
262                     logger.info("VM[%d] shutting down" % i)
263                 else:
264                     name = rerun_tests.pop(0)
265                     vm[i]['proc'].stdin.write(name + '\n')
266                     scr.addstr(name + "(*)")
267                     logger.debug("VM[%d] start test %s (*)" % (i, name))
268
269         if not running:
270             break
271
272         if updated:
273             scr.move(num_servers + 1, 10)
274             scr.clrtoeol()
275             scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
276             scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
277             failed = get_failed(vm)
278             if len(failed) > 0:
279                 scr.move(num_servers + 2, 0)
280                 scr.clrtoeol()
281                 scr.addstr("Failed test cases: ")
282                 count = 0
283                 for f in failed:
284                     count += 1
285                     if count > 30:
286                         scr.addstr('...')
287                         scr.clrtoeol()
288                         break
289                     scr.addstr(f)
290                     scr.addstr(' ')
291
292             scr.move(0, 35)
293             scr.clrtoeol()
294             if rerun_tests:
295                 scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
296             elif rerun_failures:
297                 pass
298             elif first_run_failures:
299                 scr.addstr("(RETRY FAILED)")
300
301             scr.refresh()
302
303         time.sleep(0.25)
304
305     scr.refresh()
306     time.sleep(0.3)
307
308 def main():
309     import argparse
310     import os
311     global num_servers
312     global vm
313     global dir
314     global timestamp
315     global tests
316     global first_run_failures
317     global total_started, total_passed, total_failed, total_skipped
318     global rerun_failures
319
320     total_started = 0
321     total_passed = 0
322     total_failed = 0
323     total_skipped = 0
324
325     debug_level = logging.INFO
326     rerun_failures = True
327     timestamp = int(time.time())
328
329     scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
330
331     p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
332     p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
333                    help="number of VMs to start")
334     p.add_argument('-f', dest='testmodules', metavar='<test module>',
335                    help='execute only tests from these test modules',
336                    type=str, nargs='+')
337     p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
338                    help="don't retry failed tests automatically")
339     p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
340                    help="enable debug logging")
341     p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
342                    help="enable code coverage collection")
343     p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
344                    help="shuffle test cases to randomize order")
345     p.add_argument('--short', dest='short', action='store_const', const=True,
346                    default=False,
347                    help="only run short-duration test cases")
348     p.add_argument('--long', dest='long', action='store_const', const=True,
349                    default=False,
350                    help="include long-duration test cases")
351     p.add_argument('--valgrind', dest='valgrind', action='store_const',
352                    const=True, default=False,
353                    help="run tests under valgrind")
354     p.add_argument('params', nargs='*')
355     args = p.parse_args()
356
357     dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs')
358     try:
359         os.makedirs(dir)
360     except:
361         pass
362
363     num_servers = args.num_servers
364     rerun_failures = not args.no_retry
365     if args.debug:
366         debug_level = logging.DEBUG
367     extra_args = []
368     if args.valgrind:
369         extra_args += [ '--valgrind' ]
370     if args.long:
371         extra_args += [ '--long' ]
372     if args.codecov:
373         print "Code coverage - build separate binaries"
374         logdir = os.path.join(dir, str(timestamp))
375         os.makedirs(logdir)
376         subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
377                                logdir])
378         codecov_args = ['--codecov_dir', logdir]
379         codecov = True
380     else:
381         codecov_args = []
382         codecov = False
383
384     first_run_failures = []
385     if args.params:
386         tests = args.params
387     else:
388         tests = []
389         cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'),
390                 '-L' ]
391         if args.testmodules:
392             cmd += [ "-f" ]
393             cmd += args.testmodules
394         lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
395         for l in lst.stdout.readlines():
396             name = l.split(' ')[0]
397             tests.append(name)
398     if len(tests) == 0:
399         sys.exit("No test cases selected")
400
401     if args.shuffle:
402         from random import shuffle
403         shuffle(tests)
404     elif num_servers > 2 and len(tests) > 100:
405         # Move test cases with long duration to the beginning as an
406         # optimization to avoid last part of the test execution running a long
407         # duration test case on a single VM while all other VMs have already
408         # completed their work.
409         for l in long_tests:
410             if l in tests:
411                 tests.remove(l)
412                 tests.insert(0, l)
413     if args.short:
414         tests = [t for t in tests if t not in long_tests]
415
416     logger.setLevel(debug_level)
417     log_handler = logging.FileHandler('parallel-vm.log')
418     log_handler.setLevel(debug_level)
419     fmt = "%(asctime)s %(levelname)s %(message)s"
420     log_formatter = logging.Formatter(fmt)
421     log_handler.setFormatter(log_formatter)
422     logger.addHandler(log_handler)
423
424     vm = {}
425     for i in range(0, num_servers):
426         print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
427         logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
428         cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
429                '--timestamp', str(timestamp),
430                '--ext', 'srv.%d' % (i + 1),
431                '-i'] + codecov_args + extra_args
432         vm[i] = {}
433         vm[i]['first_run_done'] = False
434         vm[i]['proc'] = subprocess.Popen(cmd,
435                                          stdin=subprocess.PIPE,
436                                          stdout=subprocess.PIPE,
437                                          stderr=subprocess.PIPE)
438         vm[i]['out'] = ""
439         vm[i]['pending'] = ""
440         vm[i]['err'] = ""
441         vm[i]['failed'] = []
442         vm[i]['fail_seq'] = []
443         for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
444             fd = stream.fileno()
445             fl = fcntl.fcntl(fd, fcntl.F_GETFL)
446             fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
447     print
448
449     curses.wrapper(show_progress)
450
451     with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
452         for i in range(0, num_servers):
453             f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
454
455     failed = get_failed(vm)
456
457     if first_run_failures:
458         print "To re-run same failure sequence(s):"
459         for i in range(0, num_servers):
460             if len(vm[i]['failed']) == 0:
461                 continue
462             print "./vm-run.sh",
463             if args.long:
464                 print "--long",
465             skip = len(vm[i]['fail_seq'])
466             skip -= min(skip, 30)
467             for t in vm[i]['fail_seq']:
468                 if skip > 0:
469                     skip -= 1
470                     continue
471                 print t,
472             print
473         print "Failed test cases:"
474         for f in first_run_failures:
475             print f,
476             logger.info("Failed: " + f)
477         print
478     double_failed = []
479     for name in failed:
480         double_failed.append(name)
481     for test in first_run_failures:
482         double_failed.remove(test)
483     if not rerun_failures:
484         pass
485     elif failed and not double_failed:
486         print "All failed cases passed on retry"
487         logger.info("All failed cases passed on retry")
488     elif double_failed:
489         print "Failed even on retry:"
490         for f in double_failed:
491             print f,
492             logger.info("Failed on retry: " + f)
493         print
494     res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
495                                                     total_passed,
496                                                     total_failed,
497                                                     total_skipped)
498     print(res)
499     logger.info(res)
500     print "Logs: " + dir + '/' + str(timestamp)
501     logger.info("Logs: " + dir + '/' + str(timestamp))
502
503     for i in range(0, num_servers):
504         if len(vm[i]['pending']) > 0:
505             logger.info("Unprocessed stdout from VM[%d]: '%s'" %
506                         (i, vm[i]['pending']))
507         log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
508         with open(log, 'r') as f:
509             if "Kernel panic" in f.read():
510                 print "Kernel panic in " + log
511                 logger.info("Kernel panic in " + log)
512
513     if codecov:
514         print "Code coverage - preparing report"
515         for i in range(num_servers):
516             subprocess.check_call([os.path.join(scriptsdir,
517                                                 'process-codecov.sh'),
518                                    logdir + ".srv.%d" % (i + 1),
519                                    str(i)])
520         subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
521                                logdir])
522         print "file://%s/index.html" % logdir
523         logger.info("Code coverage report: file://%s/index.html" % logdir)
524
525     if double_failed or (failed and not rerun_failures):
526         logger.info("Test run complete - failures found")
527         sys.exit(2)
528     if failed:
529         logger.info("Test run complete - failures found on first run; passed on retry")
530         sys.exit(1)
531     logger.info("Test run complete - no failures")
532     sys.exit(0)
533
534 if __name__ == "__main__":
535     main()