digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: total (min, med, max (stageId: taskId))\n47 ms (0 ms, 0 ms, 47 ms (stage 85.0: task 865))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 12"];
}
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n138 ms (1 ms, 1 ms, 85 ms (stage 85.0: task 865))";
5 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 85.0: task 865))<br>peak memory total (min, med, max (stageId: taskId))<br>384.5 MiB (64.0 KiB, 64.1 MiB, 64.1 MiB (stage 85.0: task 865))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 85.0: task 865))"];
}
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 265<br>shuffle write time: 10 ms<br>records read: 12<br>local bytes read total (min, med, max (stageId: taskId))<br>1150.0 B (0.0 B, 173.0 B, 326.0 B (stage 113.0: task 870))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 85.0: task 865))<br>local blocks read: 6<br>data size: 28.4 KiB<br>shuffle bytes written: 27.3 KiB"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 49 ms";
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 265"];
10 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 265"];
}
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: total (min, med, max (stageId: taskId))\n71 ms (0 ms, 0 ms, 71 ms (stage 85.0: task 865))";
12 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 85.0: task 865))<br>peak memory total (min, med, max (stageId: taskId))<br>384.4 MiB (0.0 B, 64.1 MiB, 64.1 MiB (stage 85.0: task 865))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 85.0: task 865))"];
}
13 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 265<br>shuffle write time: 8 ms<br>records read: 12<br>local bytes read total (min, med, max (stageId: taskId))<br>704.0 B (0.0 B, 117.0 B, 145.0 B (stage 113.0: task 870))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 85.0: task 865))<br>local blocks read: 6<br>data size: 22.1 KiB<br>shuffle bytes written: 16.2 KiB"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 25 ms";
15 [labelType="html" label="<br><b>Project</b><br><br>"];
16 [labelType="html" label="<br><b>SerializeFromObject</b><br><br>"];
}
17 [labelType="html" label="<b>Scan</b><br><br>number of output rows: 265"];
2->0;
3->2;
5->3;
6->5;
8->6;
9->8;
10->9;
12->3;
13->12;
15->13;
16->15;
17->16;
}
18
CollectLimit 11
Project [concat_ws(->, cast(attr#306.id as string), 1) AS id_to_1#443, cast(UDF(graphx_attr#342.distances)[1] as string) AS shortest_distance#444]
SortMergeJoin [new_id#308L], [new_id#337L], Inner
WholeStageCodegen (5)
Sort [new_id#308L ASC NULLS FIRST], false, 0
WholeStageCodegen (2)
Exchange hashpartitioning(new_id#308L, 200), true, [id=#467]
Project [cast(id#105 as bigint) AS new_id#308L, struct(id, id#105, Borough, Borough#106, Zone, Zone#107, service_zone, service_zone#108) AS attr#306]
Filter isnotnull(cast(id#105 as bigint))
Scan ExistingRDD[id#105,Borough#106,Zone#107,service_zone#108]
WholeStageCodegen (1)
Sort [new_id#337L ASC NULLS FIRST], false, 0
WholeStageCodegen (4)
Exchange hashpartitioning(new_id#337L, 200), true, [id=#476]
Project [struct(distances, _2#334._1) AS graphx_attr#342, _1#333L AS new_id#337L]
SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#333L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, mapobjects(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1), if (isnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))) null else named_struct(_1, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._1, _2, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._2), knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1, None)) AS _2#334]
WholeStageCodegen (3)
Scan[obj#332]
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(id_to_1#431 as string) AS id_to_1#443, cast(shortest_distance#424 as string) AS shortest_distance#444]
+- Project [id_to_1#431, shortest_distance#424]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, distances#418, shortest_distance#424, concat_ws(->, cast(id#364 as string), 1) AS id_to_1#431]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, distances#418, distances#418[cast(1 as bigint)] AS shortest_distance#424]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, UDF(distances#368) AS distances#418]
+- Project [attr#306.id AS id#364, attr#306.Borough AS Borough#365, attr#306.Zone AS Zone#366, attr#306.service_zone AS service_zone#367, graphx_attr#342.distances AS distances#368]
+- Project [attr#306, graphx_attr#342]
+- Project [new_id#308L, attr#306, graphx_attr#342]
+- Join Inner, (new_id#308L = new_id#337L)
:- Project [new_id#308L, attr#306]
: +- Project [cast(attr#306.id as bigint) AS new_id#308L, attr#306.id AS id#309, attr#306]
: +- Project [struct(id, id#105, Borough, Borough#106, Zone, Zone#107, service_zone, service_zone#108) AS attr#306]
: +- LogicalRDD [id#105, Borough#106, Zone#107, service_zone#108], false
+- Project [struct(distances, graphx_attr#338._1) AS graphx_attr#342, new_id#337L]
+- Project [_1#333L AS new_id#337L, _2#334 AS graphx_attr#338]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#333L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, mapobjects(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1), if (isnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))) null else named_struct(_1, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))._1, _2, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))._2), knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1, None)) AS _2#334]
+- ExternalRDD [obj#332]
== Analyzed Logical Plan ==
id_to_1: string, shortest_distance: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(id_to_1#431 as string) AS id_to_1#443, cast(shortest_distance#424 as string) AS shortest_distance#444]
+- Project [id_to_1#431, shortest_distance#424]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, distances#418, shortest_distance#424, concat_ws(->, cast(id#364 as string), 1) AS id_to_1#431]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, distances#418, distances#418[cast(1 as bigint)] AS shortest_distance#424]
+- Project [id#364, Borough#365, Zone#366, service_zone#367, UDF(distances#368) AS distances#418]
+- Project [attr#306.id AS id#364, attr#306.Borough AS Borough#365, attr#306.Zone AS Zone#366, attr#306.service_zone AS service_zone#367, graphx_attr#342.distances AS distances#368]
+- Project [attr#306, graphx_attr#342]
+- Project [new_id#308L, attr#306, graphx_attr#342]
+- Join Inner, (new_id#308L = new_id#337L)
:- Project [new_id#308L, attr#306]
: +- Project [cast(attr#306.id as bigint) AS new_id#308L, attr#306.id AS id#309, attr#306]
: +- Project [struct(id, id#105, Borough, Borough#106, Zone, Zone#107, service_zone, service_zone#108) AS attr#306]
: +- LogicalRDD [id#105, Borough#106, Zone#107, service_zone#108], false
+- Project [struct(distances, graphx_attr#338._1) AS graphx_attr#342, new_id#337L]
+- Project [_1#333L AS new_id#337L, _2#334 AS graphx_attr#338]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#333L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, mapobjects(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1), if (isnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))) null else named_struct(_1, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))._1, _2, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, 1))._2), knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1, None)) AS _2#334]
+- ExternalRDD [obj#332]
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [concat_ws(->, cast(attr#306.id as string), 1) AS id_to_1#443, cast(UDF(graphx_attr#342.distances)[1] as string) AS shortest_distance#444]
+- Join Inner, (new_id#308L = new_id#337L)
:- Project [cast(id#105 as bigint) AS new_id#308L, struct(id, id#105, Borough, Borough#106, Zone, Zone#107, service_zone, service_zone#108) AS attr#306]
: +- Filter isnotnull(cast(id#105 as bigint))
: +- LogicalRDD [id#105, Borough#106, Zone#107, service_zone#108], false
+- Project [struct(distances, _2#334._1) AS graphx_attr#342, _1#333L AS new_id#337L]
+- SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#333L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, mapobjects(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1), if (isnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))) null else named_struct(_1, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._1, _2, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._2), knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1, None)) AS _2#334]
+- ExternalRDD [obj#332]
== Physical Plan ==
CollectLimit 11
+- *(5) Project [concat_ws(->, cast(attr#306.id as string), 1) AS id_to_1#443, cast(UDF(graphx_attr#342.distances)[1] as string) AS shortest_distance#444]
+- *(5) SortMergeJoin [new_id#308L], [new_id#337L], Inner
:- *(2) Sort [new_id#308L ASC NULLS FIRST], false, 0
: +- Exchange hashpartitioning(new_id#308L, 200), true, [id=#467]
: +- *(1) Project [cast(id#105 as bigint) AS new_id#308L, struct(id, id#105, Borough, Borough#106, Zone, Zone#107, service_zone, service_zone#108) AS attr#306]
: +- *(1) Filter isnotnull(cast(id#105 as bigint))
: +- *(1) Scan ExistingRDD[id#105,Borough#106,Zone#107,service_zone#108]
+- *(4) Sort [new_id#337L ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(new_id#337L, 200), true, [id=#476]
+- *(3) Project [struct(distances, _2#334._1) AS graphx_attr#342, _1#333L AS new_id#337L]
+- *(3) SerializeFromObject [knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._1 AS _1#333L, if (isnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)) null else named_struct(_1, mapobjects(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1), if (isnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))) null else named_struct(_1, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._1, _2, knownnotnull(lambdavariable(MapObject, ObjectType(class scala.Tuple2), true, -1))._2), knownnotnull(knownnotnull(assertnotnull(input[0, scala.Tuple2, true]))._2)._1, None)) AS _2#334]
+- Scan[obj#332]