/* xfibres_gpu.cu Tim Behrens, Saad Jbabdi, Stam Sotiropoulos, Moises Hernandez - FMRIB Image Analysis Group Copyright (C) 2005 University of Oxford */ /* Part of FSL - FMRIB's Software Library http://www.fmrib.ox.ac.uk/fsl fsl@fmrib.ox.ac.uk Developed at FMRIB (Oxford Centre for Functional Magnetic Resonance Imaging of the Brain), Department of Clinical Neurology, Oxford University, Oxford, UK LICENCE FMRIB Software Library, Release 5.0 (c) 2012, The University of Oxford (the "Software") The Software remains the property of the University of Oxford ("the University"). The Software is distributed "AS IS" under this Licence solely for non-commercial use in the hope that it will be useful, but in order that the University as a charitable foundation protects its assets for the benefit of its educational and research purposes, the University makes clear that no condition is made or to be implied, nor is any warranty given or to be implied, as to the accuracy of the Software, or that it will be suitable for any particular purpose or for use under any specific conditions. Furthermore, the University disclaims all responsibility for the use which is made of the Software. It further disclaims any liability for the outcomes arising from using the Software. The Licensee agrees to indemnify the University and hold the University harmless from and against any and all claims, damages and liabilities asserted by third parties (including claims for negligence) which arise directly or indirectly from the use of the Software or the sale of any products based on the Software. No part of the Software may be reproduced, modified, transmitted or transferred in any form or by any means, electronic or mechanical, without the express permission of the University. The permission of the University is not required if the said reproduction, modification, transmission or transference is done without financial return, the conditions of this Licence are imposed upon the receiver of the product, and all original and amended source code is included in any transmitted product. You may be held legally responsible for any copyright infringement that is caused or encouraged by your failure to abide by these terms and conditions. You are not permitted under this Licence to use this Software commercially. Use for which any financial return is received shall be defined as commercial use, and includes (1) integration of all or part of the source code or the Software into a product for sale or license by or on behalf of Licensee to third parties or (2) use of the Software or any derivative of it for research with the final aim of developing software products for sale or license to a third party or (3) use of the Software or any derivative of it for research with the final aim of developing non-software products for sale or license to a third party, or (4) use of the Software to provide any service to an external organisation for which payment is received. If you are interested in using the Software commercially, please contact Isis Innovation Limited ("Isis"), the technology transfer company of the University, to negotiate a licence. Contact details are: innovation@isis.ox.ac.uk quoting reference DE/9564. */ #include "newmat.h" #include "newimage/newimageall.h" #include "xfibresoptions.h" #include "xfibres_gpu.cuh" #include "diffmodels.cuh" #include "runmcmc.h" #include "samples.h" #include "options.h" #include #include #include #include #include "init_gpu.h" #include using namespace Xfibres; void xfibres_gpu( //INPUT const Matrix datam, const Matrix bvecs, const Matrix bvals, const Matrix gradm, int idpart, int idSubpart, string subjdir) { //write num of part in a string for log file char part_str[8]; char subpart_str[8]; char aux[8]; sprintf(part_str,"%d",idpart); while(strlen(part_str)<4){ strcpy(aux,"0"); strcat(aux,part_str); strcpy(part_str,aux); } sprintf(subpart_str,"%d",idSubpart); while(strlen(subpart_str)<4){ strcpy(aux,"0"); strcat(aux,subpart_str); strcpy(subpart_str,aux); } string gpu_log(subjdir); //logfile gpu_log += ".bedpostX/logs/logs_gpu/part_"; gpu_log += part_str; gpu_log += "-subpart_"; gpu_log += subpart_str; std::ofstream myfile; myfile.open (gpu_log.data(), ios::out | ios::app ); myfile << "----------------------------------------------------- " << "\n"; myfile << "---------------- PART " << idpart << " SUBPART "<< idSubpart << " ------------------- " << "\n"; myfile << "----------------------------------------------------- " << "\n"; myfile.close(); xfibresOptions& opts = xfibresOptions::getInstance(); int nvox = datam.Ncols(); int ndirections = datam.Nrows(); int nfib= opts.nfibres.value(); bool gradnonlin=opts.grad_file.set(); if(nvox>0){ thrust::host_vector datam_host, bvecs_host, bvals_host, params_host; thrust::host_vector alpha_host, beta_host; thrust::host_vector tau_host; vector datam_vec; vector bvecs_vec, bvals_vec; ///// FIT ///// prepare_data_gpu_FIT(datam,bvecs,bvals,gradm,datam_vec, bvecs_vec, bvals_vec, datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host, tau_host); thrust::device_vector datam_gpu=datam_host; thrust::device_vector bvecs_gpu=bvecs_host; thrust::device_vector bvals_gpu=bvals_host; thrust::device_vector params_gpu=params_host; thrust::host_vector vox_repeat; //contains the id's of voxels repeated vox_repeat.resize(nvox); int nrepeat=0; fit(datam_vec,bvecs_vec,bvals_vec,datam_host,bvecs_host,bvals_host,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,gpu_log,params_gpu,vox_repeat,nrepeat); if(opts.rician.value()){ calculate_tau(datam_gpu,params_gpu,bvecs_gpu,bvals_gpu,vox_repeat,nrepeat,ndirections,nfib,opts.modelnum.value(),opts.f0.value(),opts.nonlin.value(),gradnonlin,gpu_log,tau_host); } bvecs_gpu.clear(); //free bvecs_gpu bvecs_gpu.shrink_to_fit(); ////// RUN MCMC ////// thrust::host_vector signals_host,isosignals_host; thrust::host_vector fibres_host; thrust::host_vector multifibres_host; prepare_data_gpu_MCMC(nvox, ndirections, nfib, signals_host, isosignals_host, fibres_host, multifibres_host); thrust::device_vector signals_gpu=signals_host; thrust::device_vector isosignals_gpu=isosignals_host; thrust::device_vector fibres_gpu=fibres_host; thrust::device_vector multifibres_gpu=multifibres_host; thrust::device_vector tau_gpu = tau_host; thrust::device_vector alpha_gpu=alpha_host; thrust::device_vector beta_gpu=beta_host; init_Fibres_Multifibres(datam_gpu, params_gpu, tau_gpu, bvals_gpu, alpha_gpu, beta_gpu, ndirections, gpu_log, fibres_gpu, multifibres_gpu, signals_gpu, isosignals_gpu); srand(opts.seed.value()); //randoms seed runmcmc_burnin(datam_gpu, bvals_gpu, alpha_gpu, beta_gpu, ndirections, rand(), gpu_log, fibres_gpu,multifibres_gpu, signals_gpu, isosignals_gpu); thrust::device_vector rf0_gpu,rtau_gpu,rs0_gpu,rd_gpu,rdstd_gpu,rth_gpu,rph_gpu,rf_gpu; prepare_data_gpu_MCMC_record(nvox,rf0_gpu,rtau_gpu,rs0_gpu,rd_gpu,rdstd_gpu,rth_gpu,rph_gpu,rf_gpu); runmcmc_record(datam_gpu, bvals_gpu, alpha_gpu,beta_gpu, fibres_gpu, multifibres_gpu, signals_gpu, isosignals_gpu, ndirections, rand(), gpu_log, rf0_gpu, rtau_gpu, rs0_gpu, rd_gpu, rdstd_gpu, rth_gpu, rph_gpu, rf_gpu); /////// FINISH ALL VOXELS /////// record_finish_voxels(rf0_gpu,rtau_gpu,rs0_gpu,rd_gpu,rdstd_gpu,rth_gpu,rph_gpu,rf_gpu,nvox,idSubpart); }else{ /////// FINISH EMPTY SLICE /////// Samples samples(nvox,ndirections); samples.save(idSubpart); } } // Correct bvals/bvecs accounting for Gradient Nonlinearities // ColumnVector grad_nonlin has 9 entries, corresponding to the 3 components of each of the x,y and z gradient deviation void correct_bvals_bvecs(const Matrix& bvals,const Matrix& bvecs, const ColumnVector& grad_nonlin, Matrix& bvals_c, Matrix& bvecs_c){ bvals_c=bvals; bvecs_c=bvecs; Matrix L(3,3); //gradient coil tensor float mag; L(1,1)=grad_nonlin(1); L(1,2)=grad_nonlin(4); L(1,3)=grad_nonlin(7); L(2,1)=grad_nonlin(2); L(2,2)=grad_nonlin(5); L(2,3)=grad_nonlin(8); L(3,1)=grad_nonlin(3); L(3,2)=grad_nonlin(6); L(3,3)=grad_nonlin(9); IdentityMatrix Id(3); for (int l=1; l<=bvals.Ncols(); l++){ if (bvals(1,l)>0){ //do not correct b0s bvecs_c.Column(l)=(Id+L)*bvecs.Column(l); mag=sqrt(bvecs_c(1,l)*bvecs_c(1,l)+bvecs_c(2,l)*bvecs_c(2,l)+bvecs_c(3,l)*bvecs_c(3,l)); if (mag!=0) bvecs_c.Column(l)=bvecs_c.Column(l)/mag; bvals_c(1,l)=mag*mag*bvals(1,l);//mag^2 as b propto |G|^2 } } } ////// FIT ////// void fit( //INPUT const vector datam_vec, const vector bvecs_vec, const vector bvals_vec, thrust::host_vector datam_host, thrust::host_vector bvecs_host, thrust::host_vector bvals_host, thrust::device_vector datam_gpu, thrust::device_vector bvecs_gpu, thrust::device_vector bvals_gpu, int ndirections, string output_file, //OUTPUT thrust::device_vector& params_gpu, thrust::host_vector& vox_repeat, //for get residuals with or withot f0 int& nrepeat) { std::ofstream myfile; myfile.open (output_file.data(), ios::out | ios::app ); myfile << "------------------- FIT IN GPU ---------------------- " << "\n"; struct timeval t1,t2; double time; gettimeofday(&t1,NULL); xfibresOptions& opts = xfibresOptions::getInstance(); int nvox = datam_vec.size(); int nfib= opts.nfibres.value(); int nparams_fit = 2+3*opts.nfibres.value(); if(opts.modelnum.value()==2) nparams_fit++; if(opts.f0.value()) nparams_fit++; bool gradnonlin=opts.grad_file.set(); if(opts.modelnum.value()==1){ if(opts.nonlin.value()){ fit_PVM_single(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu); if (opts.f0.value()){ float md,mf,f0; thrust::host_vector params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel vox_repeat[nrepeat]=vox; nrepeat++; } } if(nrepeat>0){ //prepare structures for the voxels that need to be reprocessed vector datam_repeat_vec; vector bvecs_repeat_vec; vector bvals_repeat_vec; thrust::host_vector datam_repeat_host; thrust::host_vector bvecs_repeat_host; thrust::host_vector bvals_repeat_host; thrust::host_vector params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); thrust::device_vector datam_repeat_gpu=datam_repeat_host; thrust::device_vector bvecs_repeat_gpu=bvecs_repeat_host; thrust::device_vector bvals_repeat_gpu=bvals_repeat_host; thrust::device_vector params_repeat_gpu=params_repeat_host; fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin()); //mix all the parameteres: repeated and not repeated mix_params(params_repeat_host,vox_repeat, nrepeat, nvox, params_gpu); } } }else{ fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu); if (opts.f0.value()){ float md,mf,f0; thrust::host_vector params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel vox_repeat[nrepeat]=vox; nrepeat++; } } if(nrepeat>0){ //prepare structures for the voxels that need to be reprocessed vector datam_repeat_vec; vector bvecs_repeat_vec; vector bvals_repeat_vec; thrust::host_vector datam_repeat_host; thrust::host_vector bvecs_repeat_host; thrust::host_vector bvals_repeat_host; thrust::host_vector params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); thrust::device_vector datam_repeat_gpu=datam_repeat_host; thrust::device_vector bvecs_repeat_gpu=bvecs_repeat_host; thrust::device_vector bvals_repeat_gpu=bvals_repeat_host; thrust::device_vector params_repeat_gpu=params_repeat_host; fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin()); //mix all the parameteres: repeated and not repeated mix_params(params_repeat_host ,vox_repeat, nrepeat, nvox, params_gpu); } } } }else{ //model 2 : non-mono-exponential fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu); fit_PVM_multi(datam_gpu,bvecs_gpu,bvals_gpu,nvox,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu); if (opts.f0.value()){ float md,mf,f0; thrust::host_vector params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel vox_repeat[nrepeat]=vox; nrepeat++; } } if(nrepeat>0){ //prepare structures for the voxels that need to be reprocessed vector datam_repeat_vec; vector bvecs_repeat_vec; vector bvals_repeat_vec; thrust::host_vector datam_repeat_host; thrust::host_vector bvecs_repeat_host; thrust::host_vector bvals_repeat_host; thrust::host_vector params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); thrust::device_vector datam_repeat_gpu=datam_repeat_host; thrust::device_vector bvecs_repeat_gpu=bvecs_repeat_host; thrust::device_vector bvals_repeat_gpu=bvals_repeat_host; thrust::device_vector params_repeat_gpu=params_repeat_host; fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); fit_PVM_multi(datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,nrepeat,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin()); //mix all the parameteres: repeated and not repeated mix_params(params_repeat_host ,vox_repeat, nrepeat, nvox, params_gpu); } } } gettimeofday(&t2,NULL); time=timeval_diff(&t2,&t1); myfile << "TIME TOTAL: " << time << " seconds\n"; myfile << "-----------------------------------------------------" << "\n\n" ; myfile.close(); } //prepare the structures for copy all neccesary data to FIT in GPU void prepare_data_gpu_FIT( //INPUT const Matrix datam, const Matrix bvecs, const Matrix bvals, const Matrix gradm, //OUTPUT vector& datam_vec, vector& bvecs_vec, vector& bvals_vec, thrust::host_vector& datam_host, //data prepared for copy to GPU thrust::host_vector& bvecs_host, thrust::host_vector& bvals_host, thrust::host_vector& alpha_host, thrust::host_vector& beta_host, thrust::host_vector& params_host, thrust::host_vector& tau_host) { xfibresOptions& opts = xfibresOptions::getInstance(); int nvox = datam.Ncols(); int ndirections = datam.Nrows(); datam_vec.resize(nvox); datam_host.resize(nvox*ndirections); for(int vox=0;vox datam_host, thrust::host_vector bvecs_host, thrust::host_vector bvals_host, thrust::host_vector vox_repeat, int nrepeat, int ndirections, //OUTPUT vector& datam_repeat_vec, vector& bvecs_repeat_vec, vector& bvals_repeat_vec, thrust::host_vector& datam_repeat_host, //data prepared for copy to GPU thrust::host_vector& bvecs_repeat_host, thrust::host_vector& bvals_repeat_host, thrust::host_vector& params_repeat_host) { xfibresOptions& opts = xfibresOptions::getInstance(); ColumnVector datam(ndirections); Matrix bvecs(3,ndirections); Matrix bvals(1,ndirections); datam_repeat_vec.resize(nrepeat); datam_repeat_host.resize(nrepeat*ndirections); if (opts.grad_file.set()){ bvecs_repeat_vec.resize(nrepeat); bvals_repeat_vec.resize(nrepeat); bvecs_repeat_host.resize(nrepeat*3*ndirections); bvals_repeat_host.resize(nrepeat*ndirections); }else{ bvecs_repeat_vec.resize(1); bvals_repeat_vec.resize(1); bvecs_repeat_host.resize(1*3*ndirections); bvals_repeat_host.resize(1*ndirections); } for(int vox=0;vox params_repeat_host, thrust::host_vector vox_repeat, int nrepeat, int nvox, //INPUT-OUTPUT thrust::device_vector& params_gpu) { xfibresOptions& opts = xfibresOptions::getInstance(); int nfib= opts.nfibres.value(); int nparams = 2+3*opts.nfibres.value(); if(opts.modelnum.value()==2) nparams++; thrust::host_vector params_host; params_host.resize(nvox*(nparams+1)); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox& signals_host, thrust::host_vector& isosignals_host, thrust::host_vector& fibres_host, thrust::host_vector& multifibres_host) { signals_host.resize(nvox*nfib*ndirections); isosignals_host.resize(nvox*ndirections); fibres_host.resize(nvox*nfib); multifibres_host.resize(nvox); } void prepare_data_gpu_MCMC_record( //INPUT int nvox, //OUTPUT thrust::device_vector& rf0_gpu, thrust::device_vector& rtau_gpu, thrust::device_vector& rs0_gpu, thrust::device_vector& rd_gpu, thrust::device_vector& rdstd_gpu, thrust::device_vector& rth_gpu, thrust::device_vector& rph_gpu, thrust::device_vector& rf_gpu) { xfibresOptions& opts = xfibresOptions::getInstance(); int nfib = opts.nfibres.value(); int nsamples = (opts.njumps.value()/opts.sampleevery.value()); if(opts.f0.value()) rf0_gpu.resize(nvox*nsamples); if(opts.rician.value()) rtau_gpu.resize(nvox*nsamples); rs0_gpu.resize(nvox*nsamples); rd_gpu.resize(nvox*nsamples); if(opts.modelnum.value()==2) rdstd_gpu.resize(nvox*nsamples); rth_gpu.resize(nvox*nsamples*nfib); rph_gpu.resize(nvox*nsamples*nfib); rf_gpu.resize(nvox*nsamples*nfib); } void record_finish_voxels( //INPUT thrust::device_vector& rf0_gpu, thrust::device_vector& rtau_gpu, thrust::device_vector& rs0_gpu, thrust::device_vector& rd_gpu, thrust::device_vector& rdstd_gpu, thrust::device_vector& rth_gpu, thrust::device_vector& rph_gpu, thrust::device_vector& rf_gpu, int nvox, int idpart) { xfibresOptions& opts = xfibresOptions::getInstance(); int nfib = opts.nfibres.value(); int nsamples = (opts.njumps.value()/opts.sampleevery.value()); thrust::host_vector rf0_host,rtau_host,rs0_host,rd_host,rdstd_host,rth_host,rph_host,rf_host; rf0_host.resize(nvox*nsamples); rtau_host.resize(nvox*nsamples); rs0_host.resize(nvox*nsamples); rd_host.resize(nvox*nsamples); rdstd_host.resize(nvox*nsamples); rth_host.resize(nvox*nfib*nsamples); rph_host.resize(nvox*nfib*nsamples); rf_host.resize(nvox*nfib*nsamples); if(opts.f0.value()) thrust::copy(rf0_gpu.begin(), rf0_gpu.end(), rf0_host.begin()); if(opts.rician.value()) thrust::copy(rtau_gpu.begin(), rtau_gpu.end(), rtau_host.begin()); thrust::copy(rs0_gpu.begin(), rs0_gpu.end(), rs0_host.begin()); thrust::copy(rd_gpu.begin(), rd_gpu.end(), rd_host.begin()); if(opts.modelnum.value()==2) thrust::copy(rdstd_gpu.begin(), rdstd_gpu.end(), rdstd_host.begin()); thrust::copy(rth_gpu.begin(), rth_gpu.end(), rth_host.begin()); thrust::copy(rph_gpu.begin(), rph_gpu.end(), rph_host.begin()); thrust::copy(rf_gpu.begin(), rf_gpu.end(), rf_host.begin()); Samples samples(nvox,nsamples); float ard,arf0,artau,ardstd,ars0; float *arth = new float[nfib]; float *arph = new float[nfib]; float *arf = new float[nfib]; for(int vox=0;vox