digraph G {
0 [labelType="html" label="<b>Execute InsertIntoHadoopFsRelationCommand</b><br><br>number of written files: 1<br>written output: 28.0 B<br>number of output rows: 1<br>number of dynamic part: 0"];
1 [labelType="html" label="<br><b>Coalesce</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 29 ms";
3 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 64.1 MiB<br>spill size: 0.0 B"];
}
4 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>shuffle write time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 7.0: task 467))<br>records read: 1<br>local bytes read: 60.0 B<br>fetch wait time: 0 ms<br>local blocks read: 1<br>data size total (min, med, max (stageId: taskId))<br>24.0 B (0.0 B, 0.0 B, 24.0 B (stage 7.0: task 467))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>60.0 B (0.0 B, 0.0 B, 60.0 B (stage 7.0: task 467))"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n520 ms (0 ms, 1 ms, 29 ms (stage 5.0: task 267))";
6 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>33 ms (0 ms, 0 ms, 26 ms (stage 5.0: task 267))<br>peak memory total (min, med, max (stageId: taskId))<br>228.0 MiB (256.0 KiB, 256.0 KiB, 64.3 MiB (stage 5.0: task 267))<br>number of output rows: 2<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 5.0: task 267))"];
}
7 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 22<br>shuffle write time total (min, med, max (stageId: taskId))<br>73 ms (2 ms, 3 ms, 4 ms (stage 4.0: task 252))<br>records read: 44<br>local bytes read total (min, med, max (stageId: taskId))<br>1440.0 B (0.0 B, 0.0 B, 720.0 B (stage 5.0: task 267))<br>fetch wait time total (min, med, max (stageId: taskId))<br>15 ms (0 ms, 0 ms, 14 ms (stage 5.0: task 267))<br>remote bytes read total (min, med, max (stageId: taskId))<br>1200.0 B (0.0 B, 0.0 B, 600.0 B (stage 5.0: task 267))<br>local blocks read: 24<br>remote blocks read: 20<br>data size total (min, med, max (stageId: taskId))<br>528.0 B (24.0 B, 24.0 B, 24.0 B (stage 4.0: task 246))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1320.0 B (60.0 B, 60.0 B, 60.0 B (stage 4.0: task 246))"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n37.6 s (520 ms, 1.8 s, 2.2 s (stage 4.0: task 265))";
9 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>37.5 s (516 ms, 1.8 s, 2.2 s (stage 4.0: task 265))<br>peak memory total (min, med, max (stageId: taskId))<br>1413.5 MiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 4.0: task 246))<br>number of output rows: 22<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 4.0: task 246))"];
10 [labelType="html" label="<br><b>Project</b><br><br>"];
}
11 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 2.7 GiB<br>number of output rows: 6,905,288"];
1->0;
3->1;
4->3;
6->4;
7->6;
9->7;
10->9;
11->10;
}
12
Execute InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
Coalesce 1
Sort [Start Hour#62 ASC NULLS FIRST], true, 0
WholeStageCodegen (3)
Exchange rangepartitioning(Start Hour#62 ASC NULLS FIRST, 200), true, [id=#85]
HashAggregate(keys=[Start Hour#62], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(Start Hour#62, 200), true, [id=#81]
HashAggregate(keys=[Start Hour#62], functions=[partial_count(1)])
Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
WholeStageCodegen (1)
FileScan csv [Trip Start Timestamp#18] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>
== Parsed Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [Trip ID#16, Taxi ID#17, Trip Start Timestamp#18, Trip End Timestamp#19, Trip Seconds#20, Trip Miles#21, Pickup Census Tract#22L, Dropoff Census Tract#23L, Pickup Community Area#24, Dropoff Community Area#25, Fare#26, Tips#27, Tolls#28, Extras#29, Trip Total#30, Payment Type#31, Company#32, Pickup Centroid Latitude#33, Pickup Centroid Longitude#34, Pickup Centroid Location#35, Dropoff Centroid Latitude#36, Dropoff Centroid Longitude#37, Dropoff Centroid Location#38, cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Analyzed Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [Trip ID#16, Taxi ID#17, Trip Start Timestamp#18, Trip End Timestamp#19, Trip Seconds#20, Trip Miles#21, Pickup Census Tract#22L, Dropoff Census Tract#23L, Pickup Community Area#24, Dropoff Community Area#25, Fare#26, Tips#27, Tolls#28, Extras#29, Trip Total#30, Payment Type#31, Company#32, Pickup Centroid Latitude#33, Pickup Centroid Longitude#34, Pickup Centroid Location#35, Dropoff Centroid Latitude#36, Dropoff Centroid Longitude#37, Dropoff Centroid Location#38, cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Optimized Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Physical Plan ==
Execute InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Coalesce 1
+- *(3) Sort [Start Hour#62 ASC NULLS FIRST], true, 0
+- Exchange rangepartitioning(Start Hour#62 ASC NULLS FIRST, 200), true, [id=#85]
+- *(2) HashAggregate(keys=[Start Hour#62], functions=[count(1)], output=[Start Hour#62, count#112L])
+- Exchange hashpartitioning(Start Hour#62, 200), true, [id=#81]
+- *(1) HashAggregate(keys=[Start Hour#62], functions=[partial_count(1)], output=[Start Hour#62, count#122L])
+- *(1) Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- FileScan csv [Trip Start Timestamp#18] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>