digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (15)\n \nduration: total (min, med, max (stageId: taskId))\n4 ms (1 ms, 1 ms, 2 ms (stage 21.0: task 220))";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 25.0: task 221))<br>peak memory total (min, med, max (stageId: taskId))<br>192.8 MiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 25.0: task 221))<br>number of output rows: 15<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 25.0: task 221))"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1,285<br>shuffle write time total (min, med, max (stageId: taskId))<br>581 ms (2 ms, 2 ms, 8 ms (stage 20.0: task 147))<br>records read: 15<br>local bytes read total (min, med, max (stageId: taskId))<br>1883.0 B (264.0 B, 628.0 B, 991.0 B (stage 25.0: task 222))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 25.0: task 221))<br>local blocks read: 15<br>data size total (min, med, max (stageId: taskId))<br>251.0 KiB (200.0 B, 1200.0 B, 2.9 KiB (stage 20.0: task 164))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>155.8 KiB (120.0 B, 769.0 B, 1837.0 B (stage 20.0: task 164))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (14)\n \nduration: total (min, med, max (stageId: taskId))\n1.5 s (4 ms, 5 ms, 139 ms (stage 20.0: task 20))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>474 ms (0 ms, 1 ms, 97 ms (stage 20.0: task 20))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 20.0: task 20))<br>number of output rows: 1,285<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 20.0: task 20))"];
}
6 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 1,285"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n4.8 s (14 ms, 16 ms, 442 ms (stage 20.0: task 20))";
8 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.1 MiB, 64.1 MiB, 64.1 MiB (stage 20.0: task 20))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 20.0: task 20))"];
}
9 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2,760<br>shuffle write time: 23 ms<br>records read: 2,760<br>local bytes read total (min, med, max (stageId: taskId))<br>96.2 KiB (260.0 B, 497.0 B, 737.0 B (stage 20.0: task 201))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20))<br>local blocks read: 200<br>data size: 539.1 KiB<br>shuffle bytes written: 96.2 KiB"];
subgraph cluster10 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: 653 ms";
11 [labelType="html" label="<br><b>Project</b><br><br>"];
12 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 2,760"];
13 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 2,690,019"];
14 [labelType="html" label="<br><b>Project</b><br><br>"];
15 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 88,234"];
16 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 88,234"];
17 [labelType="html" label="<br><b>Project</b><br><br>"];
}
18 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 88,234"];
19 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 255 ms<br>time to build: 13 ms<br>time to broadcast: 4 ms"];
subgraph cluster20 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 124 ms";
21 [labelType="html" label="<br><b>Project</b><br><br>"];
}
22 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
23 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 243 ms<br>time to build: 15 ms<br>time to broadcast: 4 ms"];
subgraph cluster24 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 108 ms";
25 [labelType="html" label="<br><b>Project</b><br><br>"];
}
26 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
27 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 8.0 MiB<br>time to collect: 599 ms<br>time to build: 65 ms<br>time to broadcast: 9 ms"];
subgraph cluster28 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 174 ms";
29 [labelType="html" label="<br><b>Project</b><br><br>"];
}
30 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 88,234"];
31 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 2.1 MiB<br>time to collect: 385 ms<br>time to build: 35 ms<br>time to broadcast: 6 ms"];
subgraph cluster32 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 108 ms";
33 [labelType="html" label="<br><b>Project</b><br><br>"];
}
34 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
subgraph cluster35 {
isCluster="true";
label="WholeStageCodegen (13)";
36 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>389 ms (1 ms, 1 ms, 100 ms (stage 20.0: task 88))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 20.0: task 20))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 20.0: task 20))"];
}
37 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1,612,010<br>shuffle write time: 65 ms<br>records read: 1,612,010<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20))<br>remote bytes read total (min, med, max (stageId: taskId))<br>26.8 MiB (133.3 KiB, 137.4 KiB, 141.0 KiB (stage 20.0: task 54))<br>remote blocks read: 200<br>data size: 307.5 MiB<br>shuffle bytes written: 26.8 MiB"];
subgraph cluster38 {
isCluster="true";
label="WholeStageCodegen (12)\n \nduration: 1.7 s";
39 [labelType="html" label="<br><b>Project</b><br><br>"];
40 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 1,612,010"];
41 [labelType="html" label="<br><b>Project</b><br><br>"];
42 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 2,690,019"];
43 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 2,690,019"];
44 [labelType="html" label="<br><b>Project</b><br><br>"];
45 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 88,234"];
46 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 88,234"];
47 [labelType="html" label="<br><b>Project</b><br><br>"];
}
48 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 88,234"];
49 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 607 ms<br>time to build: 18 ms<br>time to broadcast: 3 ms"];
subgraph cluster50 {
isCluster="true";
label="WholeStageCodegen (10)\n \nduration: 119 ms";
51 [labelType="html" label="<br><b>Project</b><br><br>"];
}
52 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
53 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 12.0 MiB<br>time to collect: 450 ms<br>time to build: 75 ms<br>time to broadcast: 26 ms"];
subgraph cluster54 {
isCluster="true";
label="WholeStageCodegen (11)\n \nduration: 162 ms";
55 [labelType="html" label="<br><b>Project</b><br><br>"];
}
56 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 88,234"];
2->0;
3->2;
5->3;
6->5;
8->6;
9->8;
11->9;
12->11;
13->12;
14->13;
15->14;
16->15;
17->16;
18->17;
19->16;
21->19;
22->21;
23->15;
25->23;
26->25;
27->13;
29->27;
30->29;
31->12;
33->31;
34->33;
36->6;
37->36;
39->37;
40->39;
41->40;
42->41;
43->42;
44->43;
45->44;
46->45;
47->46;
48->47;
19->46;
23->45;
27->43;
49->42;
51->49;
52->51;
53->40;
55->53;
56->55;
}
57
CollectLimit 11
HashAggregate(keys=[a#200, b#202, c#227], functions=[])
WholeStageCodegen (15)
Exchange hashpartitioning(a#200, b#202, c#227, 200), true, [id=#590]
HashAggregate(keys=[a#200, b#202, c#227], functions=[])
WholeStageCodegen (14)
SortMergeJoin [coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227)], [coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280)], LeftAnti
Sort [coalesce(a#200, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#200) ASC NULLS FIRST, coalesce(b#202, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#202) ASC NULLS FIRST, coalesce(c#227, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#227) ASC NULLS FIRST], false, 0
WholeStageCodegen (6)
Exchange hashpartitioning(coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227), 200), true, [id=#409]
Project [a#200, b#202, c#227]
BroadcastHashJoin [a#200.education_year_id, a#200.education_school_id, __tmp-1043886091038848698#225.dst], [c#227.education_year_id, c#227.education_school_id, c#227.id], Inner, BuildRight, NOT (a#200.id = c#227.id)
BroadcastHashJoin [b#202.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
Project [a#200, b#202]
BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#202.id], Inner, BuildRight
BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#200.id], Inner, BuildRight
Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
WholeStageCodegen (5)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
WholeStageCodegen (1)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#391]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
WholeStageCodegen (2)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]
Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
WholeStageCodegen (3)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_year_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_school_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id)), [id=#404]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
WholeStageCodegen (4)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
Sort [coalesce(a#278, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#278) ASC NULLS FIRST, coalesce(b#279, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#279) ASC NULLS FIRST, coalesce(c#280, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#280) ASC NULLS FIRST], false, 0
WholeStageCodegen (13)
Exchange hashpartitioning(coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280), 200), true, [id=#525]
Project [a#278, b#279, c#280]
BroadcastHashJoin [a#278.id, c#280.id], [__tmp-430217833014886237#257.src, __tmp-430217833014886237#257.dst], Inner, BuildRight
Project [a#278, b#279, c#280]
BroadcastHashJoin [__tmp-1043886091038848698#225.dst], [c#280.id], Inner, BuildRight
BroadcastHashJoin [b#279.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
Project [a#278, b#279]
BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#279.id], Inner, BuildRight
BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#278.id], Inner, BuildRight
Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
WholeStageCodegen (12)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#437]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
WholeStageCodegen (10)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<src:int,dst:int>, false].src as bigint), 32) | (cast(input[0, struct<src:int,dst:int>, false].dst as bigint) & 4294967295)))), [id=#444]
Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
WholeStageCodegen (11)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(source person#293 as string) AS source person#311, cast(intermediate friend#294 as string) AS intermediate friend#312, cast(target person#295 as string) AS target person#313, cast(education_school_id#299 as string) AS education_school_id#314, cast(education_year_id#300 as string) AS education_year_id#315]
+- Project [a#200.id AS source person#293, b#202.id AS intermediate friend#294, c#227.id AS target person#295, a#200.education_school_id AS education_school_id#299, a#200.education_year_id AS education_year_id#300]
+- Filter NOT (a#200.id = c#227.id)
+- Filter (a#200.education_year_id = c#227.education_year_id)
+- Filter (a#200.education_school_id = c#227.education_school_id)
+- Project [a#200, b#202, c#227]
+- Except false
:- Project [a#200, b#202, c#227]
: +- Join Inner, (__tmp-1043886091038848698#225.dst = c#227.id)
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
: : :- Project [a#200, b#202]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [a#278, b#279, c#280]
+- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
:- Project [a#278, b#279, c#280]
: +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
: : :- Project [a#278, b#279]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
+- Relation[src#0,dst#1] csv
== Analyzed Logical Plan ==
source person: string, intermediate friend: string, target person: string, education_school_id: string, education_year_id: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(source person#293 as string) AS source person#311, cast(intermediate friend#294 as string) AS intermediate friend#312, cast(target person#295 as string) AS target person#313, cast(education_school_id#299 as string) AS education_school_id#314, cast(education_year_id#300 as string) AS education_year_id#315]
+- Project [a#200.id AS source person#293, b#202.id AS intermediate friend#294, c#227.id AS target person#295, a#200.education_school_id AS education_school_id#299, a#200.education_year_id AS education_year_id#300]
+- Filter NOT (a#200.id = c#227.id)
+- Filter (a#200.education_year_id = c#227.education_year_id)
+- Filter (a#200.education_school_id = c#227.education_school_id)
+- Project [a#200, b#202, c#227]
+- Except false
:- Project [a#200, b#202, c#227]
: +- Join Inner, (__tmp-1043886091038848698#225.dst = c#227.id)
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
: : :- Project [a#200, b#202]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [a#278, b#279, c#280]
+- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
:- Project [a#278, b#279, c#280]
: +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
: : :- Project [a#278, b#279]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
+- Relation[src#0,dst#1] csv
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Aggregate [a#200, b#202, c#227], [cast(a#200.id as string) AS source person#311, cast(b#202.id as string) AS intermediate friend#312, cast(c#227.id as string) AS target person#313, cast(a#200.education_school_id as string) AS education_school_id#314, cast(a#200.education_year_id as string) AS education_year_id#315]
+- Join LeftAnti, (((a#200 <=> a#278) AND (b#202 <=> b#279)) AND (c#227 <=> c#280))
:- Project [a#200, b#202, c#227]
: +- Join Inner, ((((a#200.education_year_id = c#227.education_year_id) AND NOT (a#200.id = c#227.id)) AND (a#200.education_school_id = c#227.education_school_id)) AND (__tmp-1043886091038848698#225.dst = c#227.id))
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
: : :- Project [a#200, b#202]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [a#278, b#279, c#280]
+- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
:- Project [a#278, b#279, c#280]
: +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
: :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
: : :- Project [a#278, b#279]
: : : +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
: : : :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
: : : : :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- Relation[src#0,dst#1] csv
: : : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
: : : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : : +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
: : : +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
: : +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
+- Relation[src#0,dst#1] csv
== Physical Plan ==
CollectLimit 11
+- *(15) HashAggregate(keys=[a#200, b#202, c#227], functions=[], output=[source person#311, intermediate friend#312, target person#313, education_school_id#314, education_year_id#315])
+- Exchange hashpartitioning(a#200, b#202, c#227, 200), true, [id=#590]
+- *(14) HashAggregate(keys=[a#200, b#202, c#227], functions=[], output=[a#200, b#202, c#227])
+- SortMergeJoin [coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227)], [coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280)], LeftAnti
:- *(6) Sort [coalesce(a#200, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#200) ASC NULLS FIRST, coalesce(b#202, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#202) ASC NULLS FIRST, coalesce(c#227, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#227) ASC NULLS FIRST], false, 0
: +- Exchange hashpartitioning(coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227), 200), true, [id=#409]
: +- *(5) Project [a#200, b#202, c#227]
: +- *(5) BroadcastHashJoin [a#200.education_year_id, a#200.education_school_id, __tmp-1043886091038848698#225.dst], [c#227.education_year_id, c#227.education_school_id, c#227.id], Inner, BuildRight, NOT (a#200.id = c#227.id)
: :- *(5) BroadcastHashJoin [b#202.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
: : :- *(5) Project [a#200, b#202]
: : : +- *(5) BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#202.id], Inner, BuildRight
: : : :- *(5) BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#200.id], Inner, BuildRight
: : : : :- *(5) Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: : : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]
: : : : +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
: : : : +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
: : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#391]
: : : +- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
: : : +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
: : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]
: : +- *(3) Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
: : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_year_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_school_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id)), [id=#404]
: +- *(4) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
: +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
+- *(13) Sort [coalesce(a#278, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#278) ASC NULLS FIRST, coalesce(b#279, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#279) ASC NULLS FIRST, coalesce(c#280, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#280) ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280), 200), true, [id=#525]
+- *(12) Project [a#278, b#279, c#280]
+- *(12) BroadcastHashJoin [a#278.id, c#280.id], [__tmp-430217833014886237#257.src, __tmp-430217833014886237#257.dst], Inner, BuildRight
:- *(12) Project [a#278, b#279, c#280]
: +- *(12) BroadcastHashJoin [__tmp-1043886091038848698#225.dst], [c#280.id], Inner, BuildRight
: :- *(12) BroadcastHashJoin [b#279.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
: : :- *(12) Project [a#278, b#279]
: : : +- *(12) BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#279.id], Inner, BuildRight
: : : :- *(12) BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#278.id], Inner, BuildRight
: : : : :- *(12) Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
: : : : : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: : : : +- ReusedExchange [a#278], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]
: : : +- ReusedExchange [b#279], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#391]
: : +- ReusedExchange [__tmp-1043886091038848698#225], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]
: +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#437]
: +- *(10) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
: +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
+- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<src:int,dst:int>, false].src as bigint), 32) | (cast(input[0, struct<src:int,dst:int>, false].dst as bigint) & 4294967295)))), [id=#444]
+- *(11) Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
+- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>