digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (3)";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 11"];
4 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 11"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
}
6 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 11"];
7 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 332 ms<br>time to build: 50 ms<br>time to broadcast: 7 ms"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 163 ms";
9 [labelType="html" label="<br><b>Project</b><br><br>"];
}
10 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
11 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 2.6 s<br>time to build: 19 ms<br>time to broadcast: 12 ms"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 1.5 s";
13 [labelType="html" label="<br><b>Project</b><br><br>"];
}
14 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 1 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
2->0;
3->2;
4->3;
5->4;
6->5;
7->4;
9->7;
10->9;
11->3;
13->11;
14->13;
}
15
CollectLimit 11
Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
BroadcastHashJoin [edge#58.dst], [dst#62.id], Inner, BuildRight
BroadcastHashJoin [edge#58.src], [src#60.id], Inner, BuildRight
Project [struct(src, src#0, dst, dst#1) AS edge#58]
WholeStageCodegen (3)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#68]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
WholeStageCodegen (1)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#74]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
WholeStageCodegen (2)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
+- Project [src#60, edge#58, dst#62]
+- Join Inner, (edge#58.dst = dst#62.id)
:- Join Inner, (edge#58.src = src#60.id)
: :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Analyzed Logical Plan ==
src: string, edge: string, dst: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
+- Project [src#60, edge#58, dst#62]
+- Join Inner, (edge#58.dst = dst#62.id)
:- Join Inner, (edge#58.src = src#60.id)
: :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
+- Join Inner, (edge#58.dst = dst#62.id)
:- Join Inner, (edge#58.src = src#60.id)
: :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Physical Plan ==
CollectLimit 11
+- *(3) Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
+- *(3) BroadcastHashJoin [edge#58.dst], [dst#62.id], Inner, BuildRight
:- *(3) BroadcastHashJoin [edge#58.src], [src#60.id], Inner, BuildRight
: :- *(3) Project [struct(src, src#0, dst, dst#1) AS edge#58]
: : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#68]
: +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
: +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
+- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#74]
+- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
+- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...