%PDF-1.4 % 1 0 obj << /S /GoTo /D (chapter*.1) >> endobj 4 0 obj (Abstract) endobj 5 0 obj << /S /GoTo /D (chapter*.3) >> endobj 8 0 obj (List of Tables) endobj 9 0 obj << /S /GoTo /D (chapter*.4) >> endobj 12 0 obj (List of Figures) endobj 13 0 obj << /S /GoTo /D (chapter*.5) >> endobj 16 0 obj (Acknowledgements) endobj 17 0 obj << /S /GoTo /D (chapter.1) >> endobj 20 0 obj (1 Analytical Processing in the Big Data Era) endobj 21 0 obj << /S /GoTo /D (section.1.1) >> endobj 24 0 obj (1.1 MADDER Principles in Big Data Analytics) endobj 25 0 obj << /S /GoTo /D (section.1.2) >> endobj 28 0 obj (1.2 Two Approaches to Big Data Analytics) endobj 29 0 obj << /S /GoTo /D (section.1.3) >> endobj 32 0 obj (1.3 Big Data Analytics Systems are Becoming MADDER) endobj 33 0 obj << /S /GoTo /D (section.1.4) >> endobj 36 0 obj (1.4 Challenges in Tuning MADDER Systems) endobj 37 0 obj << /S /GoTo /D (section.1.5) >> endobj 40 0 obj (1.5 Contributions) endobj 41 0 obj << /S /GoTo /D (chapter.2) >> endobj 44 0 obj (2 A Tuning Approach for MADDER Systems) endobj 45 0 obj << /S /GoTo /D (section.2.1) >> endobj 48 0 obj (2.1 Current Approaches to Optimization and Tuning) endobj 49 0 obj << /S /GoTo /D (subsection.2.1.1) >> endobj 52 0 obj (2.1.1 Self-tuning Database Systems) endobj 53 0 obj << /S /GoTo /D (subsection.2.1.2) >> endobj 56 0 obj (2.1.2 Optimizing Dataflow Systems) endobj 57 0 obj << /S /GoTo /D (section.2.2) >> endobj 60 0 obj (2.2 Overview of a MADDER Tuning Approach) endobj 61 0 obj << /S /GoTo /D (subsection.2.2.1) >> endobj 64 0 obj (2.2.1 Tuning MapReduce Workloads with Starfish) endobj 65 0 obj << /S /GoTo /D (subsection.2.2.2) >> endobj 68 0 obj (2.2.2 Tuning SQL Queries with Xplus) endobj 69 0 obj << /S /GoTo /D (chapter.3) >> endobj 72 0 obj (3 Primer on Tuning MapReduce Workloads) endobj 73 0 obj << /S /GoTo /D (section.3.1) >> endobj 76 0 obj (3.1 MapReduce Job Execution) endobj 77 0 obj << /S /GoTo /D (section.3.2) >> endobj 80 0 obj (3.2 Impact of Configuration Parameter Settings) endobj 81 0 obj << /S /GoTo /D (section.3.3) >> endobj 84 0 obj (3.3 MapReduce on the Cloud) endobj 85 0 obj << /S /GoTo /D (section.3.4) >> endobj 88 0 obj (3.4 Use Cases for Tuning MapReduce Workloads) endobj 89 0 obj << /S /GoTo /D (chapter.4) >> endobj 92 0 obj (4 Dynamic Profiling of MapReduce Workloads) endobj 93 0 obj << /S /GoTo /D (section.4.1) >> endobj 96 0 obj (4.1 Job and Workflow Profiles) endobj 97 0 obj << /S /GoTo /D (section.4.2) >> endobj 100 0 obj (4.2 Using Profiles to Analyze Execution Behavior) endobj 101 0 obj << /S /GoTo /D (section.4.3) >> endobj 104 0 obj (4.3 Generating Profiles via Measurement) endobj 105 0 obj << /S /GoTo /D (section.4.4) >> endobj 108 0 obj (4.4 Task-level Sampling to Generate Approximate Profiles) endobj 109 0 obj << /S /GoTo /D (chapter.5) >> endobj 112 0 obj (5 A Declarative Query Interface to Access Performance Predictors and Optimizers) endobj 113 0 obj << /S /GoTo /D (section.5.1) >> endobj 116 0 obj (5.1 Declarative Interface to Express Workload Tuning Queries) endobj 117 0 obj << /S /GoTo /D (section.5.2) >> endobj 120 0 obj (5.2 Overview of How Starfish Answers a Workload Tuning Query) endobj 121 0 obj << /S /GoTo /D (section.5.3) >> endobj 124 0 obj (5.3 Starfish Visualizer) endobj 125 0 obj << /S /GoTo /D (chapter.6) >> endobj 128 0 obj (6 Predicting MapReduce Workload Performance) endobj 129 0 obj << /S /GoTo /D (section.6.1) >> endobj 132 0 obj (6.1 Overview for Predicting MapReduce Workload Performance) endobj 133 0 obj << /S /GoTo /D (section.6.2) >> endobj 136 0 obj (6.2 Cardinality Models to Estimate Dataflow Statistics Fields) endobj 137 0 obj << /S /GoTo /D (section.6.3) >> endobj 140 0 obj (6.3 Relative Black-box Models to Estimate Cost Statistics Fields) endobj 141 0 obj << /S /GoTo /D (section.6.4) >> endobj 144 0 obj (6.4 Analytical Models to Estimate Dataflow and Cost Fields) endobj 145 0 obj << /S /GoTo /D (subsection.6.4.1) >> endobj 148 0 obj (6.4.1 Modeling the Read and Map Phases in the Map Task) endobj 149 0 obj << /S /GoTo /D (subsection.6.4.2) >> endobj 152 0 obj (6.4.2 Modeling the Collect and Spill Phases in the Map Task) endobj 153 0 obj << /S /GoTo /D (subsection.6.4.3) >> endobj 156 0 obj (6.4.3 Modeling the Merge Phase in the Map Task) endobj 157 0 obj << /S /GoTo /D (subsection.6.4.4) >> endobj 160 0 obj (6.4.4 Modeling the Shuffle Phase in the Reduce Task) endobj 161 0 obj << /S /GoTo /D (subsection.6.4.5) >> endobj 164 0 obj (6.4.5 Modeling the Merge Phase in the Reduce Task) endobj 165 0 obj << /S /GoTo /D (subsection.6.4.6) >> endobj 168 0 obj (6.4.6 Modeling the Reduce and Write Phases in the Reduce Task) endobj 169 0 obj << /S /GoTo /D (section.6.5) >> endobj 172 0 obj (6.5 Simulating the Execution of a MapReduce Workload) endobj 173 0 obj << /S /GoTo /D (section.6.6) >> endobj 176 0 obj (6.6 Estimating Derived Data Properties and Workflow Performance) endobj 177 0 obj << /S /GoTo /D (section.6.7) >> endobj 180 0 obj (6.7 Evaluating the Predictive Power of the What-if Engine) endobj 181 0 obj << /S /GoTo /D (subsection.6.7.1) >> endobj 184 0 obj (6.7.1 Accuracy of What-if Analysis) endobj 185 0 obj << /S /GoTo /D (subsection.6.7.2) >> endobj 188 0 obj (6.7.2 Tuning the Cluster Size) endobj 189 0 obj << /S /GoTo /D (subsection.6.7.3) >> endobj 192 0 obj (6.7.3 Transitioning from Development to Production) endobj 193 0 obj << /S /GoTo /D (subsection.6.7.4) >> endobj 196 0 obj (6.7.4 Evaluating the Training Benchmarks) endobj 197 0 obj << /S /GoTo /D (chapter.7) >> endobj 200 0 obj (7 Cost-based Optimization for MapReduce Workloads) endobj 201 0 obj << /S /GoTo /D (section.7.1) >> endobj 204 0 obj (7.1 Current Approaches to MapReduce Optimization) endobj 205 0 obj << /S /GoTo /D (section.7.2) >> endobj 208 0 obj (7.2 Cost-based Optimization of MapReduce Jobs) endobj 209 0 obj << /S /GoTo /D (subsection.7.2.1) >> endobj 212 0 obj (7.2.1 Subspace Enumeration) endobj 213 0 obj << /S /GoTo /D (subsection.7.2.2) >> endobj 216 0 obj (7.2.2 Search Strategy within a Subspace) endobj 217 0 obj << /S /GoTo /D (subsection.7.2.3) >> endobj 220 0 obj (7.2.3 Evaluating Cost-based Job Optimization) endobj 221 0 obj << /S /GoTo /D (section.7.3) >> endobj 224 0 obj (7.3 Cost-based Optimization of MapReduce Workflows) endobj 225 0 obj << /S /GoTo /D (subsection.7.3.1) >> endobj 228 0 obj (7.3.1 Dataflow and Resource Dependencies in Workflows) endobj 229 0 obj << /S /GoTo /D (subsection.7.3.2) >> endobj 232 0 obj (7.3.2 MapReduce Workflow Optimizers) endobj 233 0 obj << /S /GoTo /D (subsection.7.3.3) >> endobj 236 0 obj (7.3.3 Evaluating Cost-based Workflow Optimization) endobj 237 0 obj << /S /GoTo /D (section.7.4) >> endobj 240 0 obj (7.4 Cost-based Optimization of Cluster Resources) endobj 241 0 obj << /S /GoTo /D (subsection.7.4.1) >> endobj 244 0 obj (7.4.1 Cluster Resource Optimizer) endobj 245 0 obj << /S /GoTo /D (subsection.7.4.2) >> endobj 248 0 obj (7.4.2 Evaluating Cost-based Cluster Provisioning) endobj 249 0 obj << /S /GoTo /D (chapter.8) >> endobj 252 0 obj (8 An Experiment-driven Approach to Tuning Analytical Queries) endobj 253 0 obj << /S /GoTo /D (section.8.1) >> endobj 256 0 obj (8.1 New Representation of the Physical Plan Space) endobj 257 0 obj << /S /GoTo /D (section.8.2) >> endobj 260 0 obj (8.2 New Search Strategy over the Physical Plan Space) endobj 261 0 obj << /S /GoTo /D (subsection.8.2.1) >> endobj 264 0 obj (8.2.1 Enumerating Neighborhoods and Plans) endobj 265 0 obj << /S /GoTo /D (subsection.8.2.2) >> endobj 268 0 obj (8.2.2 Picking the Neighborhoods to Cover) endobj 269 0 obj << /S /GoTo /D (subsection.8.2.3) >> endobj 272 0 obj (8.2.3 Picking the Plan to Run in a Neighborhood) endobj 273 0 obj << /S /GoTo /D (section.8.3) >> endobj 276 0 obj (8.3 Implementation of Xplus) endobj 277 0 obj << /S /GoTo /D (subsection.8.3.1) >> endobj 280 0 obj (8.3.1 Architecture) endobj 281 0 obj << /S /GoTo /D (subsection.8.3.2) >> endobj 284 0 obj (8.3.2 Extensibility Features) endobj 285 0 obj << /S /GoTo /D (subsection.8.3.3) >> endobj 288 0 obj (8.3.3 Efficiency Features) endobj 289 0 obj << /S /GoTo /D (section.8.4) >> endobj 292 0 obj (8.4 Comparing Xplus to Other SQL-tuning Approaches) endobj 293 0 obj << /S /GoTo /D (section.8.5) >> endobj 296 0 obj (8.5 Experimental Evaluation) endobj 297 0 obj << /S /GoTo /D (subsection.8.5.1) >> endobj 300 0 obj (8.5.1 Overall Performance of Xplus) endobj 301 0 obj << /S /GoTo /D (subsection.8.5.2) >> endobj 304 0 obj (8.5.2 Comparison with Other SQL-tuning Approaches) endobj 305 0 obj << /S /GoTo /D (subsection.8.5.3) >> endobj 308 0 obj (8.5.3 Internal Comparisons for Xplus) endobj 309 0 obj << /S /GoTo /D (chapter.9) >> endobj 312 0 obj (9 Increasing Partition-awareness in Cost-based Query Optimization) endobj 313 0 obj << /S /GoTo /D (section.9.1) >> endobj 316 0 obj (9.1 Optimization Opportunities for Partitioned Tables) endobj 317 0 obj << /S /GoTo /D (section.9.2) >> endobj 320 0 obj (9.2 Related Work on Table Partitioning) endobj 321 0 obj << /S /GoTo /D (section.9.3) >> endobj 324 0 obj (9.3 Query Optimization Techniques for Partitioned Tables) endobj 325 0 obj << /S /GoTo /D (subsection.9.3.1) >> endobj 328 0 obj (9.3.1 Matching Phase) endobj 329 0 obj << /S /GoTo /D (subsection.9.3.2) >> endobj 332 0 obj (9.3.2 Clustering Phase) endobj 333 0 obj << /S /GoTo /D (subsection.9.3.3) >> endobj 336 0 obj (9.3.3 Path Creation and Selection) endobj 337 0 obj << /S /GoTo /D (subsection.9.3.4) >> endobj 340 0 obj (9.3.4 Extending our Techniques to Parallel Database Systems) endobj 341 0 obj << /S /GoTo /D (section.9.4) >> endobj 344 0 obj (9.4 Experimental Evaluation) endobj 345 0 obj << /S /GoTo /D (subsection.9.4.1) >> endobj 348 0 obj (9.4.1 Results for Different Partitioning Schemes) endobj 349 0 obj << /S /GoTo /D (subsection.9.4.2) >> endobj 352 0 obj (9.4.2 Studying Optimization Factors on Table Partitioning) endobj 353 0 obj << /S /GoTo /D (subsection.9.4.3) >> endobj 356 0 obj (9.4.3 Impact on Cardinality Estimation) endobj 357 0 obj << /S /GoTo /D (chapter.10) >> endobj 360 0 obj (10 The Future of Big Data Analytics) endobj 361 0 obj << /S /GoTo /D (section.10.1) >> endobj 364 0 obj (10.1 Starfish: Present and Future) endobj 365 0 obj << /S /GoTo /D (section.10.2) >> endobj 368 0 obj (10.2 Xplus: Present and Future) endobj 369 0 obj << /S /GoTo /D (section.10.2) >> endobj 371 0 obj (Bibliography) endobj 372 0 obj << /S /GoTo /D (appendix*.11) >> endobj 375 0 obj (Biography) endobj 376 0 obj << /S /GoTo /D [377 0 R /Fit] >> endobj 379 0 obj << /Length 730 /Filter /FlateDecode >> stream xڥUMs0W3E>{Gԗcw:4bdƞ\`WovmA73rf\1d-2FRp9C%붅Wi,SMiRMf,:ׅ+mdl;{vbMz#*)rnR Jy9 ,CTrq 58,somߥ(; j,T3d_˞Q01= ֶ'>`궻dve'8$s.cGP"1rN~5DKXj;''0\AٷQ 1Ό%Q@ pÖ!`(ҝ /0B
> endobj
380 0 obj <<
/D [377 0 R /XYZ 107 744.91 null]
>> endobj
381 0 obj <<
/D [377 0 R /XYZ 108 719.004 null]
>> endobj
378 0 obj <<
/Font << /F16 382 0 R /F15 383 0 R /F18 384 0 R >>
/ProcSet [ /PDF /Text ]
>> endobj
388 0 obj <<
/Length 799
/Filter /FlateDecode
>>
stream
xڥUn0+xboyI}i(4Y-!J}FK.pvLt3#|/_DTcF@5b)14T,]t]ŌEB
s2X+6`oݢa#kSD)&)LR"k8EX4F~ ;:|3%_٦~dEՏP+M+*P"8fZnaXH