OPALX (Object Oriented Parallel Accelerator Library for Exascal) master (dc2a29eed580)
OPALX
Loading...
Searching...
No Matches
MultiVariateGaussian.cpp
Go to the documentation of this file.
2#include <mpi.h>
3#include <Kokkos_Core.hpp>
4#include <algorithm>
6
14 std::shared_ptr<ParticleContainer_t> pc, std::shared_ptr<FieldContainer_t> fc,
15 Distribution_t* opalDist)
16 : SamplingBase(pc, fc, opalDist) {
17 // Initialize covariance matrix from the distribution.
18 for (unsigned int i = 0; i < 6; i++) {
19 for (unsigned int j = 0; j < 6; j++) {
21 }
22 }
23
28
29 meanR_m = 0.0;
30 meanP_m = 0.0;
32
33 samplerTimer_m = IpplTimings::getTimer("Sampling");
35}
36
38 std::shared_ptr<ParticleContainer_t> pc, const Vector_t<double, 3>& meanR,
39 const Vector_t<double, 3>& meanP, const Vector_t<double, 3>& sigmaR,
40 const Vector_t<double, 3>& sigmaP, const Vector_t<double, 3>& cutoffR,
41 const Vector_t<double, 3>& cutoffP, bool fixMeanR, bool fixMeanP)
42 : SamplingBase(pc) {
43 // Initialize covariance matrix from the distribution.
44 for (unsigned int i = 0; i < 6; i++) {
45 for (unsigned int j = 0; j < 6; j++) {
46 cov_m[i][j] = 0.0;
47 if (i == j && i % 2 == 0) {
48 cov_m[i][j] = sigmaR[i / 2] * sigmaR[i / 2];
49 }
50 if (i == j && i % 2 == 1) {
51 cov_m[i][j] = sigmaP[i / 2] * sigmaP[i / 2];
52 }
53 }
54 }
55 setMeanR(meanR);
56 setMeanP(meanP);
57 setSigmaR(sigmaR);
58 setSigmaP(sigmaP);
59 setCutoffR(cutoffR);
60 setCutoffP(cutoffP);
61 setFixMeanR(fixMeanR);
62 setFixMeanP(fixMeanP);
63
64 samplerTimer_m = IpplTimings::getTimer("Sampling");
66}
67
69 std::shared_ptr<ParticleContainer_t> pc, const Vector_t<double, 3>& meanR,
70 const Vector_t<double, 3>& meanP, const Matrix_t& cov, const Vector_t<double, 3>& cutoffR,
71 const Vector_t<double, 3>& cutoffP, bool fixMeanR, bool fixMeanP)
72 : SamplingBase(pc) {
73 cov_m = cov;
74
75 setMeanR(meanR);
76 setMeanP(meanP);
78 ippl::Vector<double, 3>(
79 Kokkos::sqrt(cov_m[0][0]), Kokkos::sqrt(cov_m[2][2]),
80 Kokkos::sqrt(cov_m[4][4])));
81
83 ippl::Vector<double, 3>(
84 Kokkos::sqrt(cov_m[1][1]), Kokkos::sqrt(cov_m[3][3]),
85 Kokkos::sqrt(cov_m[5][5])));
86 setCutoffR(cutoffR);
87 setCutoffP(cutoffP);
88 setFixMeanR(fixMeanR);
89 setFixMeanP(fixMeanP);
90
91 samplerTimer_m = IpplTimings::getTimer("Sampling");
93}
94
99 extern Inform* gmsg;
100 size_t randInit;
101
102 if (Options::seed == -1) {
103 randInit = 1234567;
104 *gmsg << "* Seed = " << randInit << " on all ranks" << endl;
105 } else {
106 randInit = static_cast<size_t>(Options::seed + 100 * ippl::Comm->rank());
107 }
108
109 GeneratorPool rand_pool64(randInit);
110 randPool_m = rand_pool64;
111 return;
112}
113
118 for (unsigned int i = 0; i < 6; i++) {
119 for (unsigned int j = 0; j < 6; j++) {
120 L_m[i][j] = 0.0;
121 }
122 }
123 double sum = 0.0;
124 for (unsigned int i = 0; i < 6; i++) {
125 for (unsigned int j = 0; j <= i; j++) {
126 sum = 0.0;
127 for (unsigned int k = 0; k < j; k++) {
128 sum += L_m[i][k] * L_m[j][k];
129 }
130 if (j == i) {
131 L_m[j][j] = Kokkos::sqrt(cov_m[j][j] - sum);
132 } else {
133 L_m[i][j] = (cov_m[i][j] - sum) / L_m[j][j];
134 }
135 }
136 }
137}
138
143 rmin_m = -cutoffR_m;
145 pmin_m = -cutoffP_m;
147
148 for (int i = 0; i < 3; i++) {
149 rmin_m(i) *= sigmaR_m(i);
150 rmax_m(i) *= sigmaR_m(i);
151 pmin_m(i) *= sigmaP_m(i);
152 pmax_m(i) *= sigmaP_m(i);
153
154 min_m(i * 2) = rmin_m(i);
155 max_m(i * 2) = rmax_m(i);
156 min_m(i * 2 + 1) = pmin_m(i);
157 max_m(i * 2 + 1) = pmax_m(i);
158 }
159
160 normMin_m = 0.0;
161 normMax_m = 0.0;
162 double sumMin, sumMax;
163 for (int i = 0; i < 6; i++) {
164 sumMin = 0.0;
165 sumMax = 0.0;
166 for (int j = 0; j < i; j++) {
167 sumMin += -L_m[i][j] * normMin_m(j);
168 sumMax += -L_m[i][j] * normMax_m(j);
169 }
170 normMin_m(i) = (min_m(i) - sumMin) / L_m[i][i];
171 normMax_m(i) = (max_m(i) - sumMax) / L_m[i][i];
172 }
173
174 for (int i = 0; i < 3; i++) {
175 normRmin_m(i) = min_m(2 * i) / sigmaR_m(i);
176 normRmax_m(i) = max_m(2 * i) / sigmaR_m(i);
177 normPmin_m(i) = min_m(2 * i + 1) / sigmaP_m(i);
178 normPmax_m(i) = max_m(2 * i + 1) / sigmaP_m(i);
179
180 rmin_m(i) /= sigmaR_m(i);
181 rmax_m(i) /= sigmaR_m(i);
182 pmin_m(i) /= sigmaP_m(i);
183 pmax_m(i) /= sigmaP_m(i);
184 }
185}
186
191 size_t& numberOfParticles, Vector_t<double, 3> /*nr*/) {
192 if (emissionModel_m != "NONE")
193 throw OpalException(
194 "MultiVariateGaussian::generateParticles",
195 "EMISSIONMODEL '" + emissionModel_m
196 + "' is not supported for MULTIVARIATEGAUSS distributions");
197
198 // Only generate during initial sampling (t0 <= 0). For t0 > 0, this
199 // distribution is time-independent and should not contribute here unless
200 // explicitly triggered via emitParticles (which sets hasEmittedOnce_m).
201 if (t0_m > 0.0 && !hasEmittedOnce_m) {
202 return;
203 }
204 IpplTimings::startTimer(samplerTimer_m);
205
206 auto rand_pool64 = randPool_m;
207 // compute L using Cholesky factorization cov=L*LT
209
210 // compute boundaries of normal random numbers
212
213 const double par[6] = {0.0, 1.0, 0.0, 1.0, 0.0, 1.0};
214 using Dist_t = ippl::random::NormalDistribution<double, 3>;
215 using sampling_t = ippl::random::InverseTransformSampling<
216 double, 3, Kokkos::DefaultExecutionSpace, Dist_t>;
217 Dist_t dist(par);
218
219 const int nranks = std::max(1, ippl::Comm->size());
220 // Use computeLocalEmitCount to distribute particles across ranks or uniform fallback.
221 size_t nlocal = pc_m ? computeLocalEmitCount(static_cast<size_t>(numberOfParticles))
222 : static_cast<size_t>(
223 floor(numberOfParticles / nranks)
224 + (ippl::Comm->rank() < static_cast<int>(
225 numberOfParticles % static_cast<size_t>(nranks))
226 ? 1
227 : 0));
228
229 sampling_t sampling(dist, normRmax_m, normRmin_m, normRmax_m, normRmin_m, nlocal);
230
231 const size_t nlocalCurrent = pc_m->getLocalNum();
232 pc_m->createParticles(nlocal);
233
234 view_type RviewFull = pc_m->R.getView();
235 view_type PviewFull = pc_m->P.getView();
236
237 auto Rview = Kokkos::subview(RviewFull, std::make_pair(nlocalCurrent, nlocalCurrent + nlocal));
238 auto Pview = Kokkos::subview(PviewFull, std::make_pair(nlocalCurrent, nlocalCurrent + nlocal));
239
240 sampling.generate(Rview, rand_pool64);
241
242 sampling.updateBounds(normPmax_m, normPmin_m, normPmax_m, normPmin_m);
243 sampling.generate(Pview, rand_pool64);
244
245 Matrix_t L;
246 for (unsigned int i = 0; i < 6; i++) {
247 for (unsigned int j = 0; j < 6; j++) {
248 L[i][j] = L_m[i][j];
249 }
250 }
251
252 // Apply Cholesky transformation
253 Kokkos::parallel_for(
254 nlocal, KOKKOS_LAMBDA(const size_t k) {
255 double vec_old[6], vec[6] = {0.0};
256 for (unsigned i = 0; i < 3; ++i) {
257 vec_old[2 * i] = Rview(k)[i];
258 vec_old[2 * i + 1] = Pview(k)[i];
259 }
260 for (unsigned i = 0; i < 6; ++i) {
261 for (unsigned j = 0; j < i + 1; ++j) {
262 vec[i] += L[i][j] * vec_old[j];
263 }
264 }
265 for (unsigned i = 0; i < 3; ++i) {
266 Rview(k)[i] = vec[2 * i];
267 Pview(k)[i] = vec[2 * i + 1];
268 }
269 });
270
271 Kokkos::fence();
272
273 // zero mean of R
274 double meanR[3], loc_meanR[3];
275
276 if (fixMeanR_m) {
277 for (size_t i = 0; i < 3; i++) {
278 meanR[i] = 0.0;
279 loc_meanR[i] = 0.0;
280 }
281
282 Kokkos::parallel_reduce(
283 "calc moments of particle distr.", nlocal,
284 KOKKOS_LAMBDA(const size_t k, double& cent0, double& cent1, double& cent2) {
285 cent0 += Rview(k)[0];
286 cent1 += Rview(k)[1];
287 cent2 += Rview(k)[2];
288 },
289 Kokkos::Sum<double>(loc_meanR[0]), Kokkos::Sum<double>(loc_meanR[1]),
290 Kokkos::Sum<double>(loc_meanR[2]));
291 Kokkos::fence();
292
293 MPI_Allreduce(loc_meanR, meanR, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
294 ippl::Comm->barrier();
295
296 for (size_t i = 0; i < 3; i++) {
297 meanR[i] = meanR[i] / (1. * numberOfParticles);
298 }
299
300 Kokkos::parallel_for(
301 nlocal, KOKKOS_LAMBDA(const size_t k) {
302 Rview(k)[0] -= meanR[0];
303 Rview(k)[1] -= meanR[1];
304 Rview(k)[2] -= meanR[2];
305 });
306 Kokkos::fence();
307 }
308
309 // zero mean of P
310 double meanP[3], loc_meanP[3];
311 if (fixMeanP_m) {
312 for (size_t i = 0; i < 3; i++) {
313 meanP[i] = 0.0;
314 loc_meanP[i] = 0.0;
315 }
316 Kokkos::parallel_reduce(
317 "calc moments of particle distr.", nlocal,
318 KOKKOS_LAMBDA(const size_t k, double& cent0, double& cent1, double& cent2) {
319 cent0 += Pview(k)[0];
320 cent1 += Pview(k)[1];
321 cent2 += Pview(k)[2];
322 },
323 Kokkos::Sum<double>(loc_meanP[0]), Kokkos::Sum<double>(loc_meanP[1]),
324 Kokkos::Sum<double>(loc_meanP[2]));
325 Kokkos::fence();
326
327 MPI_Allreduce(loc_meanP, meanP, 3, MPI_DOUBLE, MPI_SUM, ippl::Comm->getCommunicator());
328 ippl::Comm->barrier();
329
330 for (size_t i = 0; i < 3; i++) {
331 meanP[i] = meanP[i] / (1. * numberOfParticles);
332 }
333
334 Kokkos::parallel_for(
335 nlocal, KOKKOS_LAMBDA(const size_t k) {
336 Pview(k)[0] -= meanP[0];
337 Pview(k)[1] -= meanP[1];
338 Pview(k)[2] -= meanP[2];
339 });
340 Kokkos::fence();
341 }
342
343 // correct the means of R and P from input
344 for (size_t i = 0; i < 3; i++) {
345 meanR[i] = meanR_m[i];
346 meanP[i] = meanP_m[i];
347 }
348
349 Kokkos::parallel_for(
350 nlocal, KOKKOS_LAMBDA(const size_t k) {
351 for (size_t i = 0; i < 3; i++) {
352 Rview(k)[i] += meanR[i];
353 Pview(k)[i] += meanP[i];
354 }
355 });
356 Kokkos::fence();
357
358 // Apply per-emission-source offsets after all mean-fixing/corrections.
359 // EMISSIONSOURCE offsets are expected to translate the generated bunch
360 // without being affected by the internal "fix mean" logic.
361 const Vector_t<double, 3> R0 = R0_m;
362 const Vector_t<double, 3> P0 = P0_m;
363 Kokkos::parallel_for(
364 nlocal, KOKKOS_LAMBDA(const size_t k) {
365 Rview(k) += R0;
366 Pview(k) += P0;
367 });
368
369 fillPolarization(nlocalCurrent, nlocal);
370
371 pc_m->markMomentsDirty();
372
373 IpplTimings::stopTimer(samplerTimer_m);
374}
375
376void MultiVariateGaussian::emitParticles(double t, double dt) {
377 // One-shot delayed emission for multivariate Gaussian sources.
378 const double tStart = t;
379 const double tEnd = t + dt;
380
381 if (hasEmittedOnce_m) {
382 return;
383 }
384
385 if (t0_m <= 0.0) {
386 return;
387 }
388
389 if (!(tStart <= t0_m && t0_m < tEnd)) {
390 return;
391 }
392
393 if (!opalDist_m) {
394 return;
395 }
396 size_t Ndist = opalDist_m->getNumParticles();
397 if (Ndist == 0) {
398 return;
399 }
400
401 const size_t nlocalBefore = pc_m->getLocalNum();
402
403 hasEmittedOnce_m = true;
404 Vector_t<double, 3> dummyNr(0.0);
405 generateParticles(Ndist, dummyNr);
406
407 const size_t nlocalAfter = pc_m->getLocalNum();
408 const size_t nNew = nlocalAfter - nlocalBefore;
409 if (nNew > 0) {
410 const double fracDt = tEnd - t0_m;
411 auto dtview = pc_m->dt.getView();
412 const size_t offset = nlocalBefore;
413 Kokkos::parallel_for(
414 "MultiVariateGaussian_setDt", nNew,
415 KOKKOS_LAMBDA(const size_t j) { dtview(offset + j) = fracDt; });
416 Kokkos::fence();
417 }
418}
Inform * gmsg
Definition changes.cpp:7
ippl::Vector< T, Dim > Vector_t
typename ippl::detail::ViewType< ippl::Vector< double, Dim >, 1 >::view_type view_type
ippl::Vector< ippl::Vector< double, 6 >, 6 > Matrix_t
typename Kokkos::Random_XorShift64_Pool<> GeneratorPool
ippl::random::NormalDistribution< double, 3 > Dist_t
ippl::Vector< double, 3 > getCutoffP() const
size_t getNumParticles() const
ippl::Vector< double, 3 > getSigmaP() const
ippl::Vector< double, 3 > getSigmaR() const
double getAvrgpz() const
Matrix_t correlationMatrix_m
ippl::Vector< double, 3 > getCutoffR() const
void emitParticles(double t, double dt) override
Time-stepped emission hook for one-shot delayed injection.
Vector_t< double, 6 > normMax_m
Vector_t< double, 3 > sigmaP_m
IpplTimings::TimerRef samplerTimer_m
Timer for performance profiling.
bool fixMeanR_m
Flag to exactly fix the mean R and P of particles after sampling.
Vector_t< double, 3 > meanP_m
void setCutoffR(const Vector_t< double, 3 > &cutoffR)
void setMeanP(const Vector_t< double, 3 > &meanP)
Vector_t< double, 6 > max_m
Vector_t< double, 3 > normPmin_m
void setCutoffP(const Vector_t< double, 3 > &cutoffP)
Vector_t< double, 3 > pmax_m
void ComputeCholeskyFactorization()
Computes the Cholesky factorization of the covariance matrix.
Vector_t< double, 3 > rmin_m
Vector_t< double, 3 > rmax_m
Vector_t< double, 3 > pmin_m
Vector_t< double, 3 > cutoffR_m
Cutoff multipliers for position and momentum distributions.
Vector_t< double, 6 > min_m
Min and Max bounds for all 6 dimensions (R0,P0,R1,P1,R2,P2).
Vector_t< double, 3 > normPmax_m
MultiVariateGaussian(std::shared_ptr< ParticleContainer_t > pc, std::shared_ptr< FieldContainer_t > fc, Distribution_t *opalDist)
Constructor for MultiVariateGaussian.
void setFixMeanR(bool fixMeanR)
void setMeanR(const Vector_t< double, 3 > &meanR)
Vector_t< double, 6 > normMin_m
void initRandomPool()
Initializes the random number generator pool.
Vector_t< double, 3 > normRmax_m
void generateParticles(size_t &numberOfParticles, Vector_t< double, 3 > nr) override
Generates particles based on the defined Gaussian distribution.
Vector_t< double, 3 > meanR_m
Vector_t< double, 3 > sigmaR_m
Standard deviations for position and momentum distributions.
void setSigmaP(const Vector_t< double, 3 > &sigmaP)
GeneratorPool randPool_m
Pool of random number generators for parallel sampling.
void setSigmaR(const Vector_t< double, 3 > &sigmaR)
void ComputeCenteredBounds()
Computes centered bounds for the particle distribution.
Vector_t< double, 3 > cutoffP_m
Vector_t< double, 3 > normRmin_m
void setFixMeanP(bool fixMeanP)
Vector_t< double, 3 > P0_m
Vector_t< double, 3 > R0_m
Emission source offset: position R0, momentum P0, start time t0 (applied in sample step).
bool hasEmittedOnce_m
For one-shot emitters (e.g. Gaussian at delayed t0): guard to avoid double sampling.
size_t computeLocalEmitCount(size_t totalToSample) const
Computes the number of particles this rank should emit so that the global total equals totalToSample ...
std::string emissionModel_m
Distribution_t * opalDist_m
void fillPolarization(size_t startIdx, size_t count)
std::shared_ptr< ParticleContainer_t > pc_m
int seed
The current random seed.
Definition Options.cpp:37