Updated through tag hostap_2_5 from git://w1.fi/hostap.git
[mech_eap.git] / libeap / tests / hwsim / vm / parallel-vm.py
1 #!/usr/bin/env python2
2 #
3 # Parallel VM test case executor
4 # Copyright (c) 2014-2015, Jouni Malinen <j@w1.fi>
5 #
6 # This software may be distributed under the terms of the BSD license.
7 # See README for more details.
8
9 import curses
10 import fcntl
11 import logging
12 import os
13 import subprocess
14 import sys
15 import time
16
17 logger = logging.getLogger()
18
19 # Test cases that take significantly longer time to execute than average.
20 long_tests = [ "ap_roam_open",
21                "wpas_mesh_password_mismatch_retry",
22                "wpas_mesh_password_mismatch",
23                "hostapd_oom_wpa2_psk_connect",
24                "ap_hs20_fetch_osu_stop",
25                "ap_roam_wpa2_psk",
26                "ibss_wpa_none_ccmp",
27                "nfc_wps_er_handover_pk_hash_mismatch_sta",
28                "go_neg_peers_force_diff_freq",
29                "p2p_cli_invite",
30                "sta_ap_scan_2b",
31                "ap_pmf_sta_unprot_deauth_burst",
32                "ap_bss_add_remove_during_ht_scan",
33                "wext_scan_hidden",
34                "autoscan_exponential",
35                "nfc_p2p_client",
36                "wnm_bss_keep_alive",
37                "ap_inactivity_disconnect",
38                "scan_bss_expiration_age",
39                "autoscan_periodic",
40                "discovery_group_client",
41                "concurrent_p2pcli",
42                "ap_bss_add_remove",
43                "wpas_ap_wps",
44                "wext_pmksa_cache",
45                "ibss_wpa_none",
46                "ap_ht_40mhz_intolerant_ap",
47                "ibss_rsn",
48                "discovery_pd_retries",
49                "ap_wps_setup_locked_timeout",
50                "ap_vht160",
51                "dfs_radar",
52                "dfs",
53                "grpform_cred_ready_timeout",
54                "hostapd_oom_wpa2_eap_connect",
55                "wpas_ap_dfs",
56                "autogo_many",
57                "hostapd_oom_wpa2_eap",
58                "ibss_open",
59                "proxyarp_open_ebtables",
60                "radius_failover",
61                "obss_scan_40_intolerant",
62                "dbus_connect_oom",
63                "proxyarp_open",
64                "ap_wps_iteration",
65                "ap_wps_iteration_error",
66                "ap_wps_pbc_timeout",
67                "ap_wps_http_timeout",
68                "p2p_go_move_reg_change",
69                "p2p_go_move_active",
70                "p2p_go_move_scm",
71                "p2p_go_move_scm_peer_supports",
72                "p2p_go_move_scm_peer_does_not_support",
73                "p2p_go_move_scm_multi" ]
74
75 def get_failed(vm):
76     failed = []
77     for i in range(num_servers):
78         failed += vm[i]['failed']
79     return failed
80
81 def vm_read_stdout(vm, i):
82     global total_started, total_passed, total_failed, total_skipped
83     global rerun_failures
84
85     ready = False
86     try:
87         out = vm['proc'].stdout.read()
88     except:
89         return False
90     logger.debug("VM[%d] stdout.read[%s]" % (i, out))
91     pending = vm['pending'] + out
92     lines = []
93     while True:
94         pos = pending.find('\n')
95         if pos < 0:
96             break
97         line = pending[0:pos].rstrip()
98         pending = pending[(pos + 1):]
99         logger.debug("VM[%d] stdout full line[%s]" % (i, line))
100         if line.startswith("READY"):
101             ready = True
102         elif line.startswith("PASS"):
103             ready = True
104             total_passed += 1
105         elif line.startswith("FAIL"):
106             ready = True
107             total_failed += 1
108             vals = line.split(' ')
109             if len(vals) < 2:
110                 logger.info("VM[%d] incomplete FAIL line: %s" % (i, line))
111                 name = line
112             else:
113                 name = vals[1]
114             logger.debug("VM[%d] test case failed: %s" % (i, name))
115             vm['failed'].append(name)
116         elif line.startswith("NOT-FOUND"):
117             ready = True
118             total_failed += 1
119             logger.info("VM[%d] test case not found" % i)
120         elif line.startswith("SKIP"):
121             ready = True
122             total_skipped += 1
123         elif line.startswith("START"):
124             total_started += 1
125             if len(vm['failed']) == 0:
126                 vals = line.split(' ')
127                 if len(vals) >= 2:
128                     vm['fail_seq'].append(vals[1])
129         vm['out'] += line + '\n'
130         lines.append(line)
131     vm['pending'] = pending
132     return ready
133
134 def show_progress(scr):
135     global num_servers
136     global vm
137     global dir
138     global timestamp
139     global tests
140     global first_run_failures
141     global total_started, total_passed, total_failed, total_skipped
142
143     total_tests = len(tests)
144     logger.info("Total tests: %d" % total_tests)
145
146     scr.leaveok(1)
147     scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
148     for i in range(0, num_servers):
149         scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
150         scr.addstr(i + 1, 10, "starting VM")
151     scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
152     scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
153     scr.refresh()
154
155     completed_first_pass = False
156     rerun_tests = []
157
158     while True:
159         running = False
160         first_running = False
161         updated = False
162
163         for i in range(0, num_servers):
164             if completed_first_pass:
165                 continue
166             if vm[i]['first_run_done']:
167                 continue
168             if not vm[i]['proc']:
169                 continue
170             if vm[i]['proc'].poll() is not None:
171                 vm[i]['proc'] = None
172                 scr.move(i + 1, 10)
173                 scr.clrtoeol()
174                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
175                 with open(log, 'r') as f:
176                     if "Kernel panic" in f.read():
177                         scr.addstr("kernel panic")
178                         logger.info("VM[%d] kernel panic" % i)
179                     else:
180                         scr.addstr("unexpected exit")
181                         logger.info("VM[%d] unexpected exit" % i)
182                 updated = True
183                 continue
184
185             running = True
186             first_running = True
187             try:
188                 err = vm[i]['proc'].stderr.read()
189                 vm[i]['err'] += err
190                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
191             except:
192                 pass
193
194             if vm_read_stdout(vm[i], i):
195                 scr.move(i + 1, 10)
196                 scr.clrtoeol()
197                 updated = True
198                 if not tests:
199                     vm[i]['first_run_done'] = True
200                     scr.addstr("completed first round")
201                     logger.info("VM[%d] completed first round" % i)
202                     continue
203                 else:
204                     name = tests.pop(0)
205                     vm[i]['proc'].stdin.write(name + '\n')
206                     scr.addstr(name)
207                     logger.debug("VM[%d] start test %s" % (i, name))
208
209         if not first_running and not completed_first_pass:
210             logger.info("First round of testing completed")
211             if tests:
212                 logger.info("Unexpected test cases remaining from first round: " + str(tests))
213                 raise Exception("Unexpected test cases remaining from first round")
214             completed_first_pass = True
215             for name in get_failed(vm):
216                 if rerun_failures:
217                     rerun_tests.append(name)
218                 first_run_failures.append(name)
219
220         for i in range(num_servers):
221             if not completed_first_pass:
222                 continue
223             if not vm[i]['proc']:
224                 continue
225             if vm[i]['proc'].poll() is not None:
226                 vm[i]['proc'] = None
227                 scr.move(i + 1, 10)
228                 scr.clrtoeol()
229                 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
230                 with open(log, 'r') as f:
231                     if "Kernel panic" in f.read():
232                         scr.addstr("kernel panic")
233                         logger.info("VM[%d] kernel panic" % i)
234                     else:
235                         scr.addstr("completed run")
236                         logger.info("VM[%d] completed run" % i)
237                 updated = True
238                 continue
239
240             running = True
241             try:
242                 err = vm[i]['proc'].stderr.read()
243                 vm[i]['err'] += err
244                 logger.debug("VM[%d] stderr.read[%s]" % (i, err))
245             except:
246                 pass
247
248             ready = False
249             if vm[i]['first_run_done']:
250                 vm[i]['first_run_done'] = False
251                 ready = True
252             else:
253                 ready = vm_read_stdout(vm[i], i)
254             if ready:
255                 scr.move(i + 1, 10)
256                 scr.clrtoeol()
257                 updated = True
258                 if not rerun_tests:
259                     vm[i]['proc'].stdin.write('\n')
260                     scr.addstr("shutting down")
261                     logger.info("VM[%d] shutting down" % i)
262                 else:
263                     name = rerun_tests.pop(0)
264                     vm[i]['proc'].stdin.write(name + '\n')
265                     scr.addstr(name + "(*)")
266                     logger.debug("VM[%d] start test %s (*)" % (i, name))
267
268         if not running:
269             break
270
271         if updated:
272             scr.move(num_servers + 1, 10)
273             scr.clrtoeol()
274             scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
275             scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
276             failed = get_failed(vm)
277             if len(failed) > 0:
278                 scr.move(num_servers + 2, 0)
279                 scr.clrtoeol()
280                 scr.addstr("Failed test cases: ")
281                 count = 0
282                 for f in failed:
283                     count += 1
284                     if count > 30:
285                         scr.addstr('...')
286                         scr.clrtoeol()
287                         break
288                     scr.addstr(f)
289                     scr.addstr(' ')
290
291             scr.move(0, 35)
292             scr.clrtoeol()
293             if rerun_tests:
294                 scr.addstr("(RETRY FAILED %d)" % len(rerun_tests))
295             elif rerun_failures:
296                 pass
297             elif first_run_failures:
298                 scr.addstr("(RETRY FAILED)")
299
300             scr.refresh()
301
302         time.sleep(0.25)
303
304     scr.refresh()
305     time.sleep(0.3)
306
307 def main():
308     import argparse
309     import os
310     global num_servers
311     global vm
312     global dir
313     global timestamp
314     global tests
315     global first_run_failures
316     global total_started, total_passed, total_failed, total_skipped
317     global rerun_failures
318
319     total_started = 0
320     total_passed = 0
321     total_failed = 0
322     total_skipped = 0
323
324     debug_level = logging.INFO
325     rerun_failures = True
326     timestamp = int(time.time())
327
328     scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
329
330     p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
331     p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
332                    help="number of VMs to start")
333     p.add_argument('-f', dest='testmodules', metavar='<test module>',
334                    help='execute only tests from these test modules',
335                    type=str, nargs='+')
336     p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
337                    help="don't retry failed tests automatically")
338     p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
339                    help="enable debug logging")
340     p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
341                    help="enable code coverage collection")
342     p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
343                    help="shuffle test cases to randomize order")
344     p.add_argument('--short', dest='short', action='store_const', const=True,
345                    default=False,
346                    help="only run short-duration test cases")
347     p.add_argument('--long', dest='long', action='store_const', const=True,
348                    default=False,
349                    help="include long-duration test cases")
350     p.add_argument('--valgrind', dest='valgrind', action='store_const',
351                    const=True, default=False,
352                    help="run tests under valgrind")
353     p.add_argument('params', nargs='*')
354     args = p.parse_args()
355     num_servers = args.num_servers
356     rerun_failures = not args.no_retry
357     if args.debug:
358         debug_level = logging.DEBUG
359     extra_args = []
360     if args.valgrind:
361         extra_args += [ '--valgrind' ]
362     if args.long:
363         extra_args += [ '--long' ]
364     if args.codecov:
365         print "Code coverage - build separate binaries"
366         logdir = "/tmp/hwsim-test-logs/" + str(timestamp)
367         os.makedirs(logdir)
368         subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
369                                logdir])
370         codecov_args = ['--codecov_dir', logdir]
371         codecov = True
372     else:
373         codecov_args = []
374         codecov = False
375
376     first_run_failures = []
377     if args.params:
378         tests = args.params
379     else:
380         tests = []
381         cmd = [ os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'),
382                 '-L' ]
383         if args.testmodules:
384             cmd += [ "-f" ]
385             cmd += args.testmodules
386         lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
387         for l in lst.stdout.readlines():
388             name = l.split(' ')[0]
389             tests.append(name)
390     if len(tests) == 0:
391         sys.exit("No test cases selected")
392
393     dir = '/tmp/hwsim-test-logs'
394     try:
395         os.mkdir(dir)
396     except:
397         pass
398
399     if args.shuffle:
400         from random import shuffle
401         shuffle(tests)
402     elif num_servers > 2 and len(tests) > 100:
403         # Move test cases with long duration to the beginning as an
404         # optimization to avoid last part of the test execution running a long
405         # duration test case on a single VM while all other VMs have already
406         # completed their work.
407         for l in long_tests:
408             if l in tests:
409                 tests.remove(l)
410                 tests.insert(0, l)
411     if args.short:
412         tests = [t for t in tests if t not in long_tests]
413
414     logger.setLevel(debug_level)
415     log_handler = logging.FileHandler('parallel-vm.log')
416     log_handler.setLevel(debug_level)
417     fmt = "%(asctime)s %(levelname)s %(message)s"
418     log_formatter = logging.Formatter(fmt)
419     log_handler.setFormatter(log_formatter)
420     logger.addHandler(log_handler)
421
422     vm = {}
423     for i in range(0, num_servers):
424         print("\rStarting virtual machine {}/{}".format(i + 1, num_servers)),
425         logger.info("Starting virtual machine {}/{}".format(i + 1, num_servers))
426         cmd = [os.path.join(scriptsdir, 'vm-run.sh'), '--delay', str(i),
427                '--timestamp', str(timestamp),
428                '--ext', 'srv.%d' % (i + 1),
429                '-i'] + codecov_args + extra_args
430         vm[i] = {}
431         vm[i]['first_run_done'] = False
432         vm[i]['proc'] = subprocess.Popen(cmd,
433                                          stdin=subprocess.PIPE,
434                                          stdout=subprocess.PIPE,
435                                          stderr=subprocess.PIPE)
436         vm[i]['out'] = ""
437         vm[i]['pending'] = ""
438         vm[i]['err'] = ""
439         vm[i]['failed'] = []
440         vm[i]['fail_seq'] = []
441         for stream in [ vm[i]['proc'].stdout, vm[i]['proc'].stderr ]:
442             fd = stream.fileno()
443             fl = fcntl.fcntl(fd, fcntl.F_GETFL)
444             fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
445     print
446
447     curses.wrapper(show_progress)
448
449     with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
450         for i in range(0, num_servers):
451             f.write('VM {}\n{}\n{}\n'.format(i, vm[i]['out'], vm[i]['err']))
452
453     failed = get_failed(vm)
454
455     if first_run_failures:
456         print "To re-run same failure sequence(s):"
457         for i in range(0, num_servers):
458             if len(vm[i]['failed']) == 0:
459                 continue
460             print "./parallel-vm.py -1 1",
461             skip = len(vm[i]['fail_seq'])
462             skip -= min(skip, 30)
463             for t in vm[i]['fail_seq']:
464                 if skip > 0:
465                     skip -= 1
466                     continue
467                 print t,
468             print
469         print "Failed test cases:"
470         for f in first_run_failures:
471             print f,
472             logger.info("Failed: " + f)
473         print
474     double_failed = []
475     for name in failed:
476         double_failed.append(name)
477     for test in first_run_failures:
478         double_failed.remove(test)
479     if not rerun_failures:
480         pass
481     elif failed and not double_failed:
482         print "All failed cases passed on retry"
483         logger.info("All failed cases passed on retry")
484     elif double_failed:
485         print "Failed even on retry:"
486         for f in double_failed:
487             print f,
488             logger.info("Failed on retry: " + f)
489         print
490     res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
491                                                     total_passed,
492                                                     total_failed,
493                                                     total_skipped)
494     print(res)
495     logger.info(res)
496     print "Logs: " + dir + '/' + str(timestamp)
497     logger.info("Logs: " + dir + '/' + str(timestamp))
498
499     for i in range(0, num_servers):
500         if len(vm[i]['pending']) > 0:
501             logger.info("Unprocessed stdout from VM[%d]: '%s'" %
502                         (i, vm[i]['pending']))
503         log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
504         with open(log, 'r') as f:
505             if "Kernel panic" in f.read():
506                 print "Kernel panic in " + log
507                 logger.info("Kernel panic in " + log)
508
509     if codecov:
510         print "Code coverage - preparing report"
511         for i in range(num_servers):
512             subprocess.check_call([os.path.join(scriptsdir,
513                                                 'process-codecov.sh'),
514                                    logdir + ".srv.%d" % (i + 1),
515                                    str(i)])
516         subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
517                                logdir])
518         print "file://%s/index.html" % logdir
519         logger.info("Code coverage report: file://%s/index.html" % logdir)
520
521     if double_failed or (failed and not rerun_failures):
522         logger.info("Test run complete - failures found")
523         sys.exit(2)
524     if failed:
525         logger.info("Test run complete - failures found on first run; passed on retry")
526         sys.exit(1)
527     logger.info("Test run complete - no failures")
528     sys.exit(0)
529
530 if __name__ == "__main__":
531     main()