digraph G {
0 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 10 ms";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 265"];
9 [labelType="html" label="<br><b>Project</b><br><br>"];
10 [labelType="html" label="<br><b>SerializeFromObject</b><br><br>"];
}
4 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1026.1 KiB<br>time to collect: 81 ms<br>time to build: 5 ms<br>time to broadcast: 2 ms"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 32 ms";
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 265"];
}
8 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 10.5 KiB<br>number of output rows: 265"];
11 [labelType="html" label="<b>Scan</b><br><br>number of output rows: 265"];
2->0;
3->2;
4->3;
6->4;
7->6;
8->7;
9->3;
10->9;
11->10;
}
12
TakeOrderedAndProject(limit=6, orderBy=[pagerank#425 DESC NULLS LAST], output=[id#477,pagerank#478])
Project [attr#270.id AS id#421, graphx_attr#388.pagerank AS pagerank#425]
BroadcastHashJoin [new_id#272L], [new_id#383L], Inner, BuildLeft
Project [struct(pagerank, _2#380._1) AS graphx_attr#388, _1#379L AS new_id#383L]
SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#379L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1) AS _2#380]
WholeStageCodegen (2)
BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])), [id=#285]
Project [cast(LocationID#72 as bigint) AS new_id#272L, struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS attr#270]
Filter isnotnull(cast(LocationID#72 as bigint))
WholeStageCodegen (1)
FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [isnotnull(cast(LocationID#72 as bigint))], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>
Scan[obj#378]
== Parsed Logical Plan ==
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(id#421 as string) AS id#477, cast(pagerank#425 as string) AS pagerank#478]
+- Sort [pagerank#425 DESC NULLS LAST], true
+- Project [id#421, pagerank#425]
+- Project [attr#270.id AS id#421, attr#270.Borough AS Borough#422, attr#270.Zone AS Zone#423, attr#270.service_zone AS service_zone#424, graphx_attr#388.pagerank AS pagerank#425]
+- Project [attr#270, graphx_attr#388]
+- Project [new_id#272L, attr#270, graphx_attr#388]
+- Join Inner, (new_id#272L = new_id#383L)
:- Project [new_id#272L, attr#270]
: +- Project [cast(attr#270.id as bigint) AS new_id#272L, attr#270.id AS id#273, attr#270]
: +- Project [struct(id, id#120, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS attr#270]
: +- Project [LocationID#72 AS id#120, Borough#73, Zone#74, service_zone#75]
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(pagerank, graphx_attr#384._1) AS graphx_attr#388, new_id#383L]
+- Project [_1#379L AS new_id#383L, _2#380 AS graphx_attr#384]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#379L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1) AS _2#380]
+- ExternalRDD [obj#378]
== Analyzed Logical Plan ==
id: string, pagerank: string
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(id#421 as string) AS id#477, cast(pagerank#425 as string) AS pagerank#478]
+- Sort [pagerank#425 DESC NULLS LAST], true
+- Project [id#421, pagerank#425]
+- Project [attr#270.id AS id#421, attr#270.Borough AS Borough#422, attr#270.Zone AS Zone#423, attr#270.service_zone AS service_zone#424, graphx_attr#388.pagerank AS pagerank#425]
+- Project [attr#270, graphx_attr#388]
+- Project [new_id#272L, attr#270, graphx_attr#388]
+- Join Inner, (new_id#272L = new_id#383L)
:- Project [new_id#272L, attr#270]
: +- Project [cast(attr#270.id as bigint) AS new_id#272L, attr#270.id AS id#273, attr#270]
: +- Project [struct(id, id#120, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS attr#270]
: +- Project [LocationID#72 AS id#120, Borough#73, Zone#74, service_zone#75]
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(pagerank, graphx_attr#384._1) AS graphx_attr#388, new_id#383L]
+- Project [_1#379L AS new_id#383L, _2#380 AS graphx_attr#384]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#379L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1) AS _2#380]
+- ExternalRDD [obj#378]
== Optimized Logical Plan ==
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(id#421 as string) AS id#477, cast(pagerank#425 as string) AS pagerank#478]
+- Sort [pagerank#425 DESC NULLS LAST], true
+- Project [attr#270.id AS id#421, graphx_attr#388.pagerank AS pagerank#425]
+- Join Inner, (new_id#272L = new_id#383L)
:- Project [cast(LocationID#72 as bigint) AS new_id#272L, struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS attr#270]
: +- Filter isnotnull(cast(LocationID#72 as bigint))
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(pagerank, _2#380._1) AS graphx_attr#388, _1#379L AS new_id#383L]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#379L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1) AS _2#380]
+- ExternalRDD [obj#378]
== Physical Plan ==
TakeOrderedAndProject(limit=6, orderBy=[pagerank#425 DESC NULLS LAST], output=[id#477,pagerank#478])
+- *(2) Project [attr#270.id AS id#421, graphx_attr#388.pagerank AS pagerank#425]
+- *(2) BroadcastHashJoin [new_id#272L], [new_id#383L], Inner, BuildLeft
:- BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, true])), [id=#285]
: +- *(1) Project [cast(LocationID#72 as bigint) AS new_id#272L, struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS attr#270]
: +- *(1) Filter isnotnull(cast(LocationID#72 as bigint))
: +- FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [isnotnull(cast(LocationID#72 as bigint))], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>
+- *(2) Project [struct(pagerank, _2#380._1) AS graphx_attr#388, _1#379L AS new_id#383L]
+- *(2) SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#379L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1) AS _2#380]
+- Scan[obj#378]