| 1 | /* This file is part of the Linux Trace Toolkit viewer |
| 2 | * Copyright (C) 2009, 2010 Benjamin Poirier <benjamin.poirier@polymtl.ca> |
| 3 | * |
| 4 | * This program is free software: you can redistribute it and/or modify it |
| 5 | * under the terms of the GNU Lesser General Public License as published by |
| 6 | * the Free Software Foundation, either version 2.1 of the License, or (at |
| 7 | * your option) any later version. |
| 8 | * |
| 9 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| 12 | * License for more details. |
| 13 | * |
| 14 | * You should have received a copy of the GNU Lesser General Public License |
| 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | */ |
| 17 | #define _ISOC99_SOURCE |
| 18 | |
| 19 | #ifdef HAVE_CONFIG_H |
| 20 | #include <config.h> |
| 21 | #endif |
| 22 | |
| 23 | #include <math.h> |
| 24 | #include <stdlib.h> |
| 25 | |
| 26 | #include "sync_chain.h" |
| 27 | |
| 28 | #include "factor_reduction_accuracy.h" |
| 29 | |
| 30 | |
| 31 | // Functions common to all reduction modules |
| 32 | static void initReductionAccuracy(SyncState* const syncState); |
| 33 | static void destroyReductionAccuracy(SyncState* const syncState); |
| 34 | |
| 35 | static GArray* finalizeReductionAccuracy(SyncState* const syncState, |
| 36 | AllFactors* allFactors); |
| 37 | static void printReductionStatsAccuracy(SyncState* const syncState); |
| 38 | |
| 39 | // Functions specific to this module |
| 40 | static void floydWarshall(AllFactors* const allFactors, const unsigned int |
| 41 | traceNb, double*** const distances, unsigned int*** const predecessors); |
| 42 | static void getFactors(AllFactors* const allFactors, unsigned int** const |
| 43 | predecessors, unsigned int* const references, const unsigned int traceNum, |
| 44 | Factors* const factors); |
| 45 | |
| 46 | |
| 47 | static ReductionModule reductionModuleAccuracy= { |
| 48 | .name= "accuracy", |
| 49 | .initReduction= &initReductionAccuracy, |
| 50 | .destroyReduction= &destroyReductionAccuracy, |
| 51 | .finalizeReduction= &finalizeReductionAccuracy, |
| 52 | .printReductionStats= &printReductionStatsAccuracy, |
| 53 | .graphFunctions= {}, |
| 54 | }; |
| 55 | |
| 56 | |
| 57 | /* |
| 58 | * Reduction module registering function |
| 59 | */ |
| 60 | void registerReductionAccuracy() |
| 61 | { |
| 62 | g_queue_push_tail(&reductionModules, &reductionModuleAccuracy); |
| 63 | } |
| 64 | |
| 65 | |
| 66 | /* |
| 67 | * Reduction init function |
| 68 | * |
| 69 | * This function is called at the beginning of a synchronization run for a set |
| 70 | * of traces. |
| 71 | * |
| 72 | * Allocate some reduction specific data structures |
| 73 | * |
| 74 | * Args: |
| 75 | * syncState container for synchronization data. |
| 76 | */ |
| 77 | static void initReductionAccuracy(SyncState* const syncState) |
| 78 | { |
| 79 | if (syncState->stats) |
| 80 | { |
| 81 | syncState->reductionData= calloc(1, sizeof(ReductionStatsAccuracy)); |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | |
| 86 | /* |
| 87 | * Reduction destroy function |
| 88 | * |
| 89 | * Free the reduction specific data structures |
| 90 | * |
| 91 | * Args: |
| 92 | * syncState container for synchronization data. |
| 93 | */ |
| 94 | static void destroyReductionAccuracy(SyncState* const syncState) |
| 95 | { |
| 96 | unsigned int i; |
| 97 | |
| 98 | if (syncState->stats) |
| 99 | { |
| 100 | ReductionStatsAccuracy* stats= syncState->reductionData; |
| 101 | |
| 102 | if (stats->predecessors) |
| 103 | { |
| 104 | for (i= 0; i < syncState->traceNb; i++) |
| 105 | { |
| 106 | free(stats->predecessors[i]); |
| 107 | } |
| 108 | free(stats->predecessors); |
| 109 | free(stats->references); |
| 110 | } |
| 111 | free(stats); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | |
| 116 | /* |
| 117 | * Finalize the factor reduction |
| 118 | * |
| 119 | * Calculate a resulting offset and drift for each trace. |
| 120 | * |
| 121 | * Traces are assembled in groups. A group is an "island" of nodes/traces that |
| 122 | * exchanged messages. A reference is determined for each group by using a |
| 123 | * shortest path search based on the accuracy of the approximation. This also |
| 124 | * forms a tree of the best way to relate each node's clock to the reference's |
| 125 | * based on the accuracy. Sometimes it may be necessary or advantageous to |
| 126 | * propagate the factors through intermediary clocks. Resulting factors for |
| 127 | * each trace are determined based on this tree. |
| 128 | * |
| 129 | * This part was not the focus of my research. The algorithm used here is |
| 130 | * inexact in some ways: |
| 131 | * 1) The reference used may not actually be the best one to use. This is |
| 132 | * because the accuracy is not corrected based on the drift during the |
| 133 | * shortest path search. |
| 134 | * 2) The min and max factors are not propagated and are no longer valid. |
| 135 | * 3) Approximations of different types (ACCURATE and APPROXIMATE) are compared |
| 136 | * together. The "accuracy" parameters of these have different meanings and |
| 137 | * are not readily comparable. |
| 138 | * |
| 139 | * Nevertheless, the result is satisfactory. You just can't tell "how much" it |
| 140 | * is. |
| 141 | * |
| 142 | * Two alternative (and subtly different) ways of propagating factors to |
| 143 | * preserve min and max boundaries have been proposed, see: |
| 144 | * [Duda, A., Harrus, G., Haddad, Y., and Bernard, G.: Estimating global time |
| 145 | * in distributed systems, Proc. 7th Int. Conf. on Distributed Computing |
| 146 | * Systems, Berlin, volume 18, 1987] p.304 |
| 147 | * |
| 148 | * [Jezequel, J.M., and Jard, C.: Building a global clock for observing |
| 149 | * computations in distributed memory parallel computers, Concurrency: |
| 150 | * Practice and Experience 8(1), volume 8, John Wiley & Sons, Ltd Chichester, |
| 151 | * 1996, 32] Section 5; which is mostly the same as |
| 152 | * [Jezequel, J.M.: Building a global time on parallel machines, Proceedings |
| 153 | * of the 3rd International Workshop on Distributed Algorithms, LNCS, volume |
| 154 | * 392, 136–147, 1989] Section 5 |
| 155 | * |
| 156 | * Args: |
| 157 | * syncState container for synchronization data. |
| 158 | * allFactors offset and drift between each pair of traces |
| 159 | * |
| 160 | * Returns: |
| 161 | * Factors[traceNb] synchronization factors for each trace |
| 162 | |
| 163 | */ |
| 164 | static GArray* finalizeReductionAccuracy(SyncState* const syncState, |
| 165 | AllFactors* allFactors) |
| 166 | { |
| 167 | GArray* factors; |
| 168 | double** distances; |
| 169 | unsigned int** predecessors; |
| 170 | double* distanceSums; |
| 171 | unsigned int* references; |
| 172 | unsigned int i, j; |
| 173 | |
| 174 | // Solve the all-pairs shortest path problem using the Floyd-Warshall |
| 175 | // algorithm |
| 176 | floydWarshall(allFactors, syncState->traceNb, &distances, &predecessors); |
| 177 | |
| 178 | /* Find the reference for each node |
| 179 | * |
| 180 | * First calculate, for each node, the sum of the distances to each other |
| 181 | * node it can reach. |
| 182 | * |
| 183 | * Then, go through each "island" of traces to find the trace that has the |
| 184 | * lowest distance sum. Assign this trace as the reference to each trace |
| 185 | * of the island. |
| 186 | */ |
| 187 | distanceSums= malloc(syncState->traceNb * sizeof(double)); |
| 188 | for (i= 0; i < syncState->traceNb; i++) |
| 189 | { |
| 190 | distanceSums[i]= 0.; |
| 191 | for (j= 0; j < syncState->traceNb; j++) |
| 192 | { |
| 193 | distanceSums[i]+= distances[i][j]; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | references= malloc(syncState->traceNb * sizeof(unsigned int)); |
| 198 | for (i= 0; i < syncState->traceNb; i++) |
| 199 | { |
| 200 | references[i]= UINT_MAX; |
| 201 | } |
| 202 | for (i= 0; i < syncState->traceNb; i++) |
| 203 | { |
| 204 | if (references[i] == UINT_MAX) |
| 205 | { |
| 206 | unsigned int reference; |
| 207 | double distanceSumMin; |
| 208 | |
| 209 | // A node is its own reference by default |
| 210 | reference= i; |
| 211 | distanceSumMin= INFINITY; |
| 212 | for (j= 0; j < syncState->traceNb; j++) |
| 213 | { |
| 214 | if (distances[i][j] != INFINITY && distanceSums[j] < |
| 215 | distanceSumMin) |
| 216 | { |
| 217 | reference= j; |
| 218 | distanceSumMin= distanceSums[j]; |
| 219 | } |
| 220 | } |
| 221 | for (j= 0; j < syncState->traceNb; j++) |
| 222 | { |
| 223 | if (distances[i][j] != INFINITY) |
| 224 | { |
| 225 | references[j]= reference; |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | for (i= 0; i < syncState->traceNb; i++) |
| 232 | { |
| 233 | free(distances[i]); |
| 234 | } |
| 235 | free(distances); |
| 236 | free(distanceSums); |
| 237 | |
| 238 | /* For each trace, calculate the factors based on their corresponding |
| 239 | * tree. The tree is rooted at the reference and the shortest path to each |
| 240 | * other nodes are the branches. |
| 241 | */ |
| 242 | factors= g_array_sized_new(FALSE, FALSE, sizeof(Factors), |
| 243 | syncState->traceNb); |
| 244 | g_array_set_size(factors, syncState->traceNb); |
| 245 | for (i= 0; i < syncState->traceNb; i++) |
| 246 | { |
| 247 | getFactors(allFactors, predecessors, references, i, &g_array_index(factors, |
| 248 | Factors, i)); |
| 249 | } |
| 250 | |
| 251 | if (syncState->stats) |
| 252 | { |
| 253 | ReductionStatsAccuracy* stats= syncState->reductionData; |
| 254 | |
| 255 | stats->predecessors= predecessors; |
| 256 | stats->references= references; |
| 257 | } |
| 258 | else |
| 259 | { |
| 260 | for (i= 0; i < syncState->traceNb; i++) |
| 261 | { |
| 262 | free(predecessors[i]); |
| 263 | } |
| 264 | free(predecessors); |
| 265 | free(references); |
| 266 | } |
| 267 | |
| 268 | return factors; |
| 269 | } |
| 270 | |
| 271 | |
| 272 | /* |
| 273 | * Print statistics related to reduction. Must be called after |
| 274 | * finalizeReduction. |
| 275 | * |
| 276 | * Args: |
| 277 | * syncState container for synchronization data. |
| 278 | */ |
| 279 | static void printReductionStatsAccuracy(SyncState* const syncState) |
| 280 | { |
| 281 | unsigned int i; |
| 282 | ReductionStatsAccuracy* stats= syncState->reductionData; |
| 283 | |
| 284 | printf("Accuracy-based factor reduction stats:\n"); |
| 285 | for (i= 0; i < syncState->traceNb; i++) |
| 286 | { |
| 287 | unsigned int reference= stats->references[i]; |
| 288 | |
| 289 | if (i == reference) |
| 290 | { |
| 291 | printf("\ttrace %u is a reference\n", i); |
| 292 | } |
| 293 | else |
| 294 | { |
| 295 | printf("\ttrace %u: reference %u, predecessor %u\n", i, |
| 296 | reference, |
| 297 | stats->predecessors[reference][i]); |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | |
| 303 | /* |
| 304 | * Perform an all-source shortest path search using the Floyd-Warshall |
| 305 | * algorithm. |
| 306 | * |
| 307 | * The algorithm is implemented accoding to the description here: |
| 308 | * http://web.mit.edu/urban_or_book/www/book/chapter6/6.2.2.html |
| 309 | * |
| 310 | * Args: |
| 311 | * allFactors: offset and drift between each pair of traces |
| 312 | * traceNb: number of traces |
| 313 | * distances: resulting matrix of the length of the shortest path between |
| 314 | * two nodes. If there is no path between two nodes, the |
| 315 | * length is INFINITY. distances[i][j] is the length of the |
| 316 | * path from i to j. |
| 317 | * predecessors: resulting matrix of each node's predecessor on the shortest |
| 318 | * path between two nodes. predecessors[i][j] is the |
| 319 | * predecessor to j on the path from i to j. |
| 320 | */ |
| 321 | static void floydWarshall(AllFactors* const allFactors, const unsigned int |
| 322 | traceNb, double*** const distances, unsigned int*** const predecessors) |
| 323 | { |
| 324 | unsigned int i, j, k; |
| 325 | PairFactors** const pairFactors= allFactors->pairFactors; |
| 326 | |
| 327 | // Setup initial conditions |
| 328 | *distances= malloc(traceNb * sizeof(double*)); |
| 329 | *predecessors= malloc(traceNb * sizeof(unsigned int*)); |
| 330 | for (i= 0; i < traceNb; i++) |
| 331 | { |
| 332 | (*distances)[i]= malloc(traceNb * sizeof(double)); |
| 333 | for (j= 0; j < traceNb; j++) |
| 334 | { |
| 335 | if (i == j) |
| 336 | { |
| 337 | g_assert(pairFactors[i][j].type == EXACT); |
| 338 | |
| 339 | (*distances)[i][j]= 0.; |
| 340 | } |
| 341 | else |
| 342 | { |
| 343 | if (pairFactors[i][j].type == ACCURATE || |
| 344 | pairFactors[i][j].type == APPROXIMATE) |
| 345 | { |
| 346 | (*distances)[i][j]= pairFactors[i][j].accuracy; |
| 347 | } |
| 348 | else if (pairFactors[j][i].type == ACCURATE || |
| 349 | pairFactors[j][i].type == APPROXIMATE) |
| 350 | { |
| 351 | (*distances)[i][j]= pairFactors[j][i].accuracy; |
| 352 | } |
| 353 | else |
| 354 | { |
| 355 | (*distances)[i][j]= INFINITY; |
| 356 | } |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | (*predecessors)[i]= malloc(traceNb * sizeof(unsigned int)); |
| 361 | for (j= 0; j < traceNb; j++) |
| 362 | { |
| 363 | if (i != j) |
| 364 | { |
| 365 | (*predecessors)[i][j]= i; |
| 366 | } |
| 367 | else |
| 368 | { |
| 369 | (*predecessors)[i][j]= UINT_MAX; |
| 370 | } |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | // Run the iterations |
| 375 | for (k= 0; k < traceNb; k++) |
| 376 | { |
| 377 | for (i= 0; i < traceNb; i++) |
| 378 | { |
| 379 | for (j= 0; j < traceNb; j++) |
| 380 | { |
| 381 | double distanceMin; |
| 382 | |
| 383 | distanceMin= MIN((*distances)[i][j], (*distances)[i][k] + |
| 384 | (*distances)[k][j]); |
| 385 | |
| 386 | if (distanceMin != (*distances)[i][j]) |
| 387 | { |
| 388 | (*predecessors)[i][j]= (*predecessors)[k][j]; |
| 389 | } |
| 390 | |
| 391 | (*distances)[i][j]= distanceMin; |
| 392 | } |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | |
| 398 | /* |
| 399 | * Cummulate the time correction factors to convert a node's time to its |
| 400 | * reference's time. |
| 401 | * This function recursively calls itself until it reaches the reference node. |
| 402 | * |
| 403 | * Args: |
| 404 | * allFactors: offset and drift between each pair of traces |
| 405 | * predecessors: matrix of each node's predecessor on the shortest |
| 406 | * path between two nodes |
| 407 | * references: reference node for each node |
| 408 | * traceNum: node for which to find the factors |
| 409 | * factors: resulting factors |
| 410 | */ |
| 411 | static void getFactors(AllFactors* const allFactors, unsigned int** const |
| 412 | predecessors, unsigned int* const references, const unsigned int traceNum, |
| 413 | Factors* const factors) |
| 414 | { |
| 415 | unsigned int reference; |
| 416 | PairFactors** const pairFactors= allFactors->pairFactors; |
| 417 | |
| 418 | reference= references[traceNum]; |
| 419 | |
| 420 | if (reference == traceNum) |
| 421 | { |
| 422 | factors->offset= 0.; |
| 423 | factors->drift= 1.; |
| 424 | } |
| 425 | else |
| 426 | { |
| 427 | Factors previousVertexFactors; |
| 428 | |
| 429 | getFactors(allFactors, predecessors, references, |
| 430 | predecessors[reference][traceNum], &previousVertexFactors); |
| 431 | |
| 432 | /* Convert the time from traceNum to reference; |
| 433 | * pairFactors[row][col] converts the time from col to row, invert the |
| 434 | * factors as necessary */ |
| 435 | |
| 436 | if (pairFactors[reference][traceNum].approx != NULL) |
| 437 | { |
| 438 | factors->offset= previousVertexFactors.drift * |
| 439 | pairFactors[reference][traceNum].approx->offset + |
| 440 | previousVertexFactors.offset; |
| 441 | factors->drift= previousVertexFactors.drift * |
| 442 | pairFactors[reference][traceNum].approx->drift; |
| 443 | } |
| 444 | else if (pairFactors[traceNum][reference].approx != NULL) |
| 445 | { |
| 446 | factors->offset= previousVertexFactors.drift * (-1. * |
| 447 | pairFactors[traceNum][reference].approx->offset / |
| 448 | pairFactors[traceNum][reference].approx->drift) + |
| 449 | previousVertexFactors.offset; |
| 450 | factors->drift= previousVertexFactors.drift * (1. / |
| 451 | pairFactors[traceNum][reference].approx->drift); |
| 452 | } |
| 453 | else |
| 454 | { |
| 455 | g_assert_not_reached(); |
| 456 | } |
| 457 | } |
| 458 | } |