digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (3)";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 11"];
4 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 522"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
}
6 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 522"];
7 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 236 ms<br>time to build: 16 ms<br>time to broadcast: 5 ms"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 111 ms";
9 [labelType="html" label="<br><b>Project</b><br><br>"];
}
10 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
11 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1088.0 KiB<br>time to collect: 231 ms<br>time to build: 18 ms<br>time to broadcast: 4 ms"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 121 ms";
13 [labelType="html" label="<br><b>Project</b><br><br>"];
}
14 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
2->0;
3->2;
4->3;
5->4;
6->5;
7->4;
9->7;
10->9;
11->3;
13->11;
14->13;
}
15
CollectLimit 11
Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]
BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight
BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight
Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
WholeStageCodegen (3)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#191]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
WholeStageCodegen (1)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#197]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
WholeStageCodegen (2)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(id#165 as string) AS id#174, cast(id#166 as string) AS id#175, cast(birthday#167 as string) AS birthday#176]
+- Project [a#115.id AS id#165, b#117.id AS id#166, b#117.birthday AS birthday#167]
+- Filter (a#115.birthday = b#117.birthday)
+- Project [a#115, b#117]
+- Project [a#115, b#117]
+- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Analyzed Logical Plan ==
id: string, id: string, birthday: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(id#165 as string) AS id#174, cast(id#166 as string) AS id#175, cast(birthday#167 as string) AS birthday#176]
+- Project [a#115.id AS id#165, b#117.id AS id#166, b#117.birthday AS birthday#167]
+- Filter (a#115.birthday = b#117.birthday)
+- Project [a#115, b#117]
+- Project [a#115, b#117]
+- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]
+- Join Inner, ((a#115.birthday = b#117.birthday) AND (__tmp-4363599943432734077#113.dst = b#117.id))
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Physical Plan ==
CollectLimit 11
+- *(3) Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]
+- *(3) BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight
:- *(3) BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight
: :- *(3) Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#191]
: +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
+- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#197]
+- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...