#!/usr/bin/env python # user will need dev (3/15/2018) version of mri_surfcluster and mri_volcluster import os import sys import shlex import argparse from glob import glob from math import log10 from subprocess import call def errExit(message, ret=1): print('error: ' + message) sys.exit(ret) # matlab can't handle .nii.gz - must convert to .nii def convertNifty(src_vol, trg_vol): print('"nii.gz" format is unsupported - converting %s to %s' % (src_vol, trg_vol)) ret = call('mri_convert %s %s' % (src_vol, trg_vol), shell=True) if ret != 0: sys.exit(ret) return trg_vol # if a user does this: --pargs "-flag", then argparse will fail because it interprets "-flag" # as a seperate argument. This can be fixed by using --pargs="-flag", so here we override # the ArgumentParser error function to notify users if this issue occurs class ArgumentParser(argparse.ArgumentParser): def error(self, message): exc = sys.exc_info()[1] if exc and (exc.argument_name == '--pargs'): message = 'additional arguments should be passed to the palm function with this syntax: --pargs="-flag1 -flag2"' self.print_usage(sys.stderr) args = {'prog': self.prog, 'message': message} self.exit(2, ('%(prog)s: error: %(message)s\n') % args) #-------- main ------------------------- version = '$Id'; # ---- parse args ---- description = """\ Prepares and analyzes the output of mri_glmfit for Permutation Analysis of Linear Models (PALM) to correct for multiple comparisons. See https://surfer.nmr.mgh.harvard.edu/fswiki/FsPalm for more info. PALM was written by Dr. Anderson Winkler, see fsl.fmrib.ox.ac.uk/fsl/fslwiki/PALM""" parser = ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter) parser._action_groups.pop() # list required arguments before optional # required args required = parser.add_argument_group('required arguments') required.add_argument('--glmdir', metavar='PATH', type=str, help='the mri_glmfit directory to prepare', required=True) required.add_argument('--cft', metavar='FLOAT', type=str, help='voxel-wise cluster forming threshold (CFT), -log10(p)', required=True) required.add_argument('--cwp', metavar='FLOAT', type=float, help='clusterwise p-value threshold', required=True) # tail - mutually exclusive flags mutually_exclusive = parser.add_argument_group('required flags (use one of the following)') tails = mutually_exclusive.add_mutually_exclusive_group(required=True) tails.add_argument('--onetail', action='store_true', help='perform a one-tailed test') tails.add_argument('--twotail', action='store_true', help='perform a two-tailed test. NOTE: changes CFT') # optional args optional = parser.add_argument_group('optional arguments') optional.add_argument('--name', metavar='DIRNAME', type=str, help='name of palm subdirectory (default="palm")', default='palm') optional.add_argument('--iters', metavar='INT', help='number of iterations', default=10) optional.add_argument('--monly', action='store_true', help='only create matlab file, do not run') optional.add_argument('--pponly', action='store_true', help='Only perform post-processing') optional.add_argument('--octave', action='store_true', help='run with octave, not matlab') optional.add_argument('--centroid', action='store_true', help='add --centroid flag to mri_surfcluster post-processing') optional.add_argument('--2spaces', action='store_true', help='Bonferroni-correct for 2 spaces') optional.add_argument('--3spaces', action='store_true', help='Bonferroni-correct for 3 spaces') optional.add_argument('--pargs', metavar='SUBARGS', type=str, help='supply additional args to be passed to the palm function') # check for no args before parsing if len(sys.argv) == 1: parser.print_help() sys.exit(1) argv0 = sys.argv; args = parser.parse_args() # retrieve any additional palm arguments extra_opts = [] if args.pargs: extra_opts += [ "'%s'" % x for x in shlex.split(args.pargs)] # two-tailed test if args.twotail: extra_opts.append("'-twotail'") # check glmdir glmdir = os.path.abspath(args.glmdir) if not os.path.isdir(glmdir): errExit('"%s" is not a valid directory' % glmdir) # make palm subdir palmdir = os.path.join(glmdir, args.name) print('preparing a palm subdirectory in %s' % palmdir) # os.makedirs(palmdir, exist_ok=True) # python3 if not os.path.exists(palmdir): os.makedirs(palmdir) # settle for python2 for now freesurferhome = os.environ.get('FREESURFER_HOME', '') LFfile = os.path.join(palmdir, 'fspalm.log') LF = open(LFfile,'w'); LF.write("fspalm log file %s\n" % (version)); LF.write("FREESURFER_HOME %s\n" % (freesurferhome)); LF.write("setenv SUBJECTS_DIR %s\n" % (os.environ['SUBJECTS_DIR'])); LF.write("cd %s\n" % os.getcwd()); for arg in argv0: LF.write(arg); LF.write(" "); LF.write("\n\n"); LF.write('palm subdirectory %s' % palmdir) # check whether this is a surface-based analysis surface_analysis = False if os.path.isfile(os.path.join(glmdir, 'surface')): surface_analysis = True print('found %s/surface file - this is a surface-based analysis' % os.path.basename(glmdir)) else: print('this is a volume-based analysis') # ---- glm info ---- glm_log = os.path.join(glmdir, 'mri_glmfit.log') if not os.path.isfile(glm_log): errExit('cannot find mri_glmfit.log in the glm directory') with open(glm_log, 'r') as f: loglines = [line for line in f.readlines() if line.strip()] # extract input filename input_file = None for line in loglines: if line.split()[0] == 'y': input_file = line.split()[1] break if not input_file: errExit("could not extract input filename from mri_glmfit.log") if not os.path.isfile(input_file): errExit('mri_glmfit input file "{0}" is no longer available at this location'.format(input_file)) if input_file.endswith('.nii.gz'): # nii.gz input can't be used - must convert to nii input_file = convertNifty(input_file, os.path.join(palmdir, os.path.basename(input_file).split('.')[0] + '.nii')) print('using the glmfit input file %s' % input_file) # get output file format if input_file.endswith('.nii'): oformat = '.nii' if input_file.endswith('.mgh'): oformat = '.mgz' if input_file.endswith('.mgz'): oformat = '.mgz' # extract hemi (and subject info for surface-based analysis) hemi = None if surface_analysis: # use the glmdir/surface file with open(os.path.join(glmdir, 'surface'), 'r') as f: targ_subject, hemi = f.readline().strip().split() subjects_dir = os.environ.get('SUBJECTS_DIR', '') if not subjects_dir: errExit('you must set the SUBJECTS_DIR envionment variable to do surface-based analysis') # get the surface file path surface_file = os.path.join(subjects_dir, targ_subject, 'surf', hemi + '.white') extra_opts += ["'-s'", "surffile"] # run a couple more sanity checks if not os.path.isdir(os.path.join(subjects_dir, targ_subject)): errExit('subject %s does not exist in current SUBJECTS_DIR (%s)' % (targ_subject, subjects_dir)) if not os.path.isfile(surface_file): errExit('subject file %s does not exist' % surface_file) # see whether there is an average vertex area file area_file = os.path.join(subjects_dir, targ_subject, 'surf', hemi + '.white' + '.avg.area.mgh') if os.path.isfile(area_file): print('using area file %s' % area_file); extra_opts += [" areafile"]; else: area_file = ""; else: # could be a cvs subject, need a way to figure this out # mri_glmfit-sim does not figure it out either, user spec on cmd line vol_targ_subject = 'fsaverage' # ---- design matrix ---- # read the Xg.dat xg_path = os.path.join(glmdir, 'Xg.dat') if not os.path.isfile(xg_path): errExit('cannot find Xg.dat in the glm directory') with open(xg_path, 'r') as f: xg_content = f.readlines() xg_content = [x.strip() for x in xg_content] num_points = len(xg_content) num_waves = len(xg_content[0].split()) # write the palm design.mat f = open(os.path.join(palmdir, 'design.mat'), "w") f.write('/NumWaves {0}\n'.format(num_waves)) f.write('/NumPoints {0}\n'.format(num_points)) f.write('/Matrix\n') for thing in xg_content: if len(thing.split()) != num_waves: errExit('number of waves (cols) in Xg.dat are not consistent') f.write(thing + '\n') f.close() # ---- contrasts ---- # read in the C.dat files contrast_paths = glob('{0}/*/C.dat'.format(glmdir)) if len(contrast_paths) == 0: errExit('cannot find any contrast subdirs (with C.dat) in the glm directory') contrasts = [] contrast_names = [] for constrast_file in contrast_paths: with open(constrast_file) as f: lines = f.read().splitlines() if len(lines) > 1: print('warning: excluding contrast file %s since it has multiple rows' % constrast_file) else: contrasts.append(lines[0]) contrast_names.append(os.path.basename(os.path.dirname(constrast_file))) # write the palm design.con with open(os.path.join(palmdir, 'design.con'), "w") as f: f.write('/NumWaves {0}\n'.format(num_waves)) f.write('/NumPoints {0}\n'.format(len(contrasts))) f.write('/Matrix\n') for contrast in contrasts: if len(contrast.split()) != num_waves: errExit('number of waves (cols) are not consistent') f.write(contrast + '\n') # save a contrast-name lookup file with open(os.path.join(palmdir, 'contrast_names.txt'), "w") as f: for i, con_name in enumerate(contrast_names): f.write('%s %s\n' % (i+1, con_name)) # ---- mask ---- maskfile = '' for fileformat in ['.mgz', '.mgh', '.nii', '.nii.gz']: if os.path.isfile(os.path.join(glmdir, 'mask' + fileformat)): maskfile = os.path.join(glmdir, 'mask' + fileformat) break if not maskfile: errExit('cannot find any mask volume in the glm directory') if maskfile.endswith('.nii.gz'): # nii.gz mask can't be used - must convert to nii maskfile = convertNifty(maskfile, os.path.join(palmdir, 'mask.nii')) if input_file.endswith('.nii') and surface_analysis: print('ERROR: for surface based analysis, input cannot be nii or nii.gz'); sys.exit(1); if maskfile.endswith('.nii') and surface_analysis: print('ERROR: for surface based analysis, mask cannot be nii or nii.gz'); sys.exit(1); # cd to output dir os.chdir(palmdir) # create list of palm output files (should use same file format as inputs) if len(contrast_names) == 1: fwep_file_list = ['fsp_clustere_tstat_fwep' + oformat] if surface_analysis: dpv_file_list = ['fsp_dpv_tstat' + oformat] else: dpv_file_list = ['fsp_vox_tstat' + oformat] else: tmpstr = 'fsp_clustere_tstat_fwep_c%d' + oformat fwep_file_list = [tmpstr % (i+1) for i in range(len(contrast_names))] if surface_analysis: tmpstr = 'fsp_dpv_tstat_c%d' + oformat else: tmpstr = 'fsp_vox_tstat_c%d' + oformat dpv_file_list = [tmpstr % (i+1) for i in range(len(contrast_names))] if args.pponly == False: # to test with post-processing only # delete any previous palm outfiles for fwep_file in fwep_file_list: if os.path.isfile(fwep_file): os.remove(fwep_file) # ---- matlab script ---- # write the run_palm.m script script_path = os.path.join(palmdir, 'run_palm.m') with open(script_path, "w") as f: f.write("hemi = '%s';\n" % hemi) f.write("input = '%s';\n" % input_file) f.write("maskfile = '%s';\n" % maskfile) if surface_analysis: f.write("surffile = '%s';\n" % surface_file) if(len(area_file)>0): f.write("areafile = '%s';\n" % area_file) f.write("\n") # Increase the CFT for two-tailed. PALM generates one-tailed # p-values. Adding 0.301 to the sig threshold effectively halves # the p-value threshold Eg, sig=2 means p=.01, sig=2.301 means # p=.005. AW argues that this is added complication is no really # needed given that the CFT is arbitrary. I decided to do it anyway # because this is what mri_glmfit-sim does. if args.twotail: f.write("%% Changing threshold for two-tailed test\n"); f.write("cft = %s + 0.301;\n" % args.cft) else: f.write("cft = %s;\n" % args.cft) f.write("pthresh = 10^-cft;\n"); f.write("zthresh = fast_p2z(pthresh);\n") f.write("iters = %s;\n" % args.iters) f.write("\n") f.write("zthreshstr = sprintf('%f',zthresh);\n") f.write("itersstr = sprintf('%d',iters);\n") f.write("\n") f.write("palm('-i',input,'-m',maskfile,'-d','design.mat','-t','design.con','-logp',...\n") f.write(" '-n',itersstr,'-C',zthreshstr,'-o','fsp'") f.write("%s);\n" % ''.join(",%s" % x for x in extra_opts)) f.write("return;\n") if args.monly: print("'--monly' requested, so exiting without running...") print("to initiate palm processing, cd into %s and run 'run_palm.m' from matlab" % palmdir) sys.exit(0) # run the run_palm.m script print('palm prep complete - running the generated matlab script...') try: if args.octave: ret = call('octave --no-gui < run_palm.m', shell=True) else: ret = call('matlab -display iconic < run_palm.m', shell=True) except KeyboardInterrupt: errExit('run_palm.m interrupted') # check for successful palm output since we can't get a return code from matlab for fwep_file in fwep_file_list: if not os.path.isfile(fwep_file): errExit('cannot find expected palm output "%s" - check for run_palm.m failures' % fwep_file) # ---- post-processing ---- cwpsig = -log10(args.cwp) if surface_analysis: # ============================================= print('palm processing complete - running the surface cluster analysis...') for i, con_name in enumerate(contrast_names): fwep_file = fwep_file_list[i] # mri_surfcluster command cmd = 'mri_surfcluster --in %s --thmin %f' % (fwep_file, cwpsig) cmd += ' --subject %s --hemi %s' % (targ_subject, hemi) cmd += ' --annot aparc --sum %s.clustertable.summary --no-fixmni --surf white' % con_name cmd += ' --ocn %s.ocn%s --oannot ./%s.ocn.annot --sig2p-max' % (con_name, oformat, con_name) # additional options if args.centroid: cmd += ' --centroid' if vars(args)['2spaces']: cmd += ' --bonferroni-max 2' if vars(args)['3spaces']: cmd += ' --bonferroni-max 3' print(os.getcwd()) print(cmd) LF.write(os.getcwd()); LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_surfcluster failed', ret) # Compute mean of input over clusters cmd = 'mri_segstats --seg %s.ocn%s --exclude 0' % (con_name,oformat) cmd += ' --i %s --avgwf %s.y.ocn.dat' % (input_file, con_name) print(os.getcwd()) print(cmd) LF.write(os.getcwd()); LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_segstats failed', ret) # This is a bit of a hack to get the vertex of the max vox. The mri_surfcluster above # will get the max vox from the cwp, which is the same at each vertex. The dpv is # the uncorrected voxel-wise p-value (which is what is used in mri_glmfit-sim) # I don't think that the order of the clusters is guaranteed to be the same. # Need a better fix for this dpv_file = dpv_file_list[i] cmd = 'mri_surfcluster --in %s --thmin 0.5' % (dpv_file) cmd += ' --subject %s --hemi %s' % (targ_subject, hemi) cmd += ' --annot aparc --sum %s.dpv.clustertable.summary --no-fixmni --surf white' % con_name cmd += ' --mask %s.ocn%s ' % (con_name, oformat) print(os.getcwd()) print(cmd) LF.write(os.getcwd()); LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_surfcluster dpv failed, maybe 0 vertices found', ret) else: # volume analysis =========================================== print('palm processing complete - running the volume cluster analysis...') for i, con_name in enumerate(contrast_names): fwep_file = fwep_file_list[i] # mri_volcluster command cmd = 'mri_volcluster --in %s --thmin %f' % (fwep_file, cwpsig) cmd += ' --seg %s aparc+aseg.mgz' % (vol_targ_subject) cmd += ' --sum %s.clustertable.summary --no-fixmni ' % (con_name) cmd += ' --ocn %s.ocn%s --allowdiag --sig2p-max' % (con_name,oformat) cmd += ' --reg %s/subjects/%s/mri.2mm/reg.2mm.dat' % (freesurferhome, vol_targ_subject) # additional options # no centoid option in volume if vars(args)['2spaces']: cmd += ' --bonferroni-max 2' if vars(args)['3spaces']: cmd += ' --bonferroni-max 3' print(cmd) LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_volcluster failed', ret) ret = call(cmd, shell=True) if ret != 0: errExit('mri_volcluster failed', ret) # Compute mean of input over clusters cmd = 'mri_segstats --seg %s.ocn%s --exclude 0' % (con_name,oformat) cmd += ' --i %s --avgwf %s.y.ocn.dat' % (input_file, con_name) print(cmd) LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_segstats failed', ret) # See hack notes above for dpv dpv_file = dpv_file_list[i] cmd = 'mri_volcluster --in %s --thmin 0.5' % (dpv_file); cmd += ' --seg %s aparc+aseg.mgz' % (vol_targ_subject) cmd += ' --sum %s.dpv.clustertable.summary --no-fixmni ' % (con_name) cmd += ' --mask %s.ocn%s --allowdiag ' % (con_name,oformat) cmd += ' --reg %s/subjects/%s/mri.2mm/reg.2mm.dat' % (freesurferhome, vol_targ_subject) print(cmd) LF.write(cmd); LF.write("\n\n"); ret = call(cmd, shell=True) if ret != 0: errExit('mri_volcluster dpv failed', ret) LF.close(); print('fspalm complete')