diff --git a/Cargo.toml b/Cargo.toml index 7a350716..bef9ffa2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ rocksdb = "0.12.2" lru = "0.1.15" scoped_threadpool = "0.1.9" walkdir = "2.0.0" +time = "0.1" [dependencies.hdfs] git="https://github.com/UNSW-database/hdfs-rs.git" diff --git a/src/generic/graph.rs b/src/generic/graph.rs index 9ed35be2..03ee0252 100644 --- a/src/generic/graph.rs +++ b/src/generic/graph.rs @@ -240,6 +240,14 @@ pub trait GraphLabelTrait: .filter_map(|(s, d)| self.get_edge_label(s, d)) .collect() } + + /// Trait for labelled graphs. + fn neighbors_of_node_iter(&self, id: Id, label: Option) -> Iter; + fn neighbors_of_edge_iter(&self, id: Id, label: Option) -> Iter; + fn neighbors_of_node(&self, id: Id, label: Option) -> Cow<[Id]>; + fn neighbors_of_edge(&self, id: Id, label: Option) -> Cow<[Id]>; + fn nodes_with_label(&self, label: Option) -> Iter; + fn edges_with_label(&self, label: Option) -> Iter<(Id, Id)>; } pub trait MutGraphLabelTrait: diff --git a/src/generic/node.rs b/src/generic/node.rs index 460c4936..da45cf21 100644 --- a/src/generic/node.rs +++ b/src/generic/node.rs @@ -20,6 +20,7 @@ */ use generic::IdType; pub use graph_impl::graph_map::NodeMap; +use graph_impl::multi_graph::node::MultiNode; pub use graph_impl::static_graph::StaticNode; pub trait NodeTrait { @@ -55,6 +56,7 @@ pub enum OwnedNodeType { pub enum NodeType<'a, Id: 'a + IdType, L: 'a + IdType = Id> { NodeMap(&'a NodeMap), StaticNode(StaticNode), + MultiNode(MultiNode), None, } @@ -133,6 +135,7 @@ impl<'a, Id: IdType, L: IdType> NodeType<'a, Id, L> { match self { NodeType::NodeMap(node) => node, NodeType::StaticNode(_) => panic!("`unwrap_nodemap()` on `StaticNode`"), + NodeType::MultiNode(_) => panic!("`unwrap_nodemap()` on `MultiNode`"), NodeType::None => panic!("`unwrap_nodemap()` on `None`"), } } @@ -142,6 +145,7 @@ impl<'a, Id: IdType, L: IdType> NodeType<'a, Id, L> { match self { NodeType::NodeMap(_) => panic!("`unwrap_staticnode()` on `NodeMap`"), NodeType::StaticNode(node) => node, + NodeType::MultiNode(node) => panic!("`unwrap_staticnode()` on `MultiNode`"), NodeType::None => panic!("`unwrap_staticnode()` on `None`"), } } @@ -161,6 +165,7 @@ impl<'a, Id: IdType, L: IdType> NodeTrait for NodeType<'a, Id, L> { match self { NodeType::NodeMap(node) => node.get_id(), NodeType::StaticNode(ref node) => node.get_id(), + NodeType::MultiNode(ref node) => node.get_id(), NodeType::None => panic!("`get_id()` on `None`"), } } @@ -170,6 +175,7 @@ impl<'a, Id: IdType, L: IdType> NodeTrait for NodeType<'a, Id, L> { match self { NodeType::NodeMap(node) => node.get_label_id(), NodeType::StaticNode(ref node) => node.get_label_id(), + NodeType::MultiNode(ref node) => node.get_label_id(), NodeType::None => None, } } diff --git a/src/graph_impl/graph_map/graph.rs b/src/graph_impl/graph_map/graph.rs index 3b714eef..baae10f2 100644 --- a/src/graph_impl/graph_map/graph.rs +++ b/src/graph_impl/graph_map/graph.rs @@ -563,6 +563,30 @@ impl fn get_edge_label_map(&self) -> &SetMap { &self.edge_label_map } + + fn neighbors_of_node_iter(&self, id: Id, label: Option) -> Iter { + unimplemented!() + } + + fn neighbors_of_edge_iter(&self, id: Id, label: Option) -> Iter { + unimplemented!() + } + + fn neighbors_of_node(&self, id: Id, label: Option) -> Cow<[Id]> { + unimplemented!() + } + + fn neighbors_of_edge(&self, id: Id, label: Option) -> Cow<[Id]> { + unimplemented!() + } + + fn nodes_with_label(&self, label: Option) -> Iter { + unimplemented!() + } + + fn edges_with_label(&self, label: Option) -> Iter<(Id, Id)> { + unimplemented!() + } } impl diff --git a/src/graph_impl/graph_vec/mod.rs b/src/graph_impl/graph_vec/mod.rs index 397a42b9..413f3d8d 100644 --- a/src/graph_impl/graph_vec/mod.rs +++ b/src/graph_impl/graph_vec/mod.rs @@ -157,7 +157,6 @@ impl TypedGraphVec::new(); g.add_node(0, Some("node0")); g.add_node(2, Some("node2")); - g.add_node(2, Some("node2")); g.add_edge(0, 1, Some("(0,1)")); g.add_edge(1, 0, Some("(0,1)")); g.add_edge(0, 3, Some("(0,3)")); + g.add_edge(3, 0, Some("(0,3)")); let un_graph = g.clone().into_static::(); let un_graph_true = UnStaticGraph::<&str, &str, u16>::from_raw( 4, - 1, - EdgeVec::with_labels(vec![0, 2, 3, 3, 3], vec![1, 3, 0], vec![0, 1, 0]), + 2, + EdgeVec::with_labels(vec![0, 2, 3, 3, 4], vec![1, 3, 0, 0], vec![0, 1, 0, 1]), None, Some(vec![0, u16::max_value(), 1, u16::max_value()]), vec!["node0", "node2"].into(), @@ -366,17 +365,19 @@ mod tests { g.add_edge(0, 1, Some("(0,1)")); g.add_in_edge(1, 0); g.add_edge(0, 3, Some("(0,3)")); + g.add_in_edge(3, 0); assert_eq!(g.node_count(), 2); assert_eq!(g.edge_count(), 2); let di_graph = g.clone().into_static::(); + println!("My turn..."); let di_graph_true = DiStaticGraph::<&str>::from_raw( 4, 2, EdgeVec::with_labels(vec![0, 2, 2, 2, 2], vec![1, 3], vec![0, 1]), - Some(EdgeVec::new(vec![0, 0, 1, 1, 1], vec![0])), + Some(EdgeVec::new(vec![0, 0, 1, 1, 2], vec![0, 0])), Some(vec![0, u32::max_value(), 1, u32::max_value()]), vec!["node0", "node2"].into(), vec!["(0,1)", "(0,3)"].into(), diff --git a/src/graph_impl/mod.rs b/src/graph_impl/mod.rs index 653feb10..de0661bb 100644 --- a/src/graph_impl/mod.rs +++ b/src/graph_impl/mod.rs @@ -20,6 +20,7 @@ */ pub mod graph_map; pub mod graph_vec; +pub mod multi_graph; pub mod static_graph; pub use graph_impl::graph_map::{ @@ -36,6 +37,7 @@ pub use graph_impl::static_graph::{ pub enum GraphImpl { GraphMap, StaticGraph, + MultiGraph, } impl ::std::str::FromStr for GraphImpl { @@ -45,6 +47,7 @@ impl ::std::str::FromStr for GraphImpl { match s.as_ref() { "graphmap" => Ok(GraphImpl::GraphMap), "staticgraph" => Ok(GraphImpl::StaticGraph), + "multigraph" => Ok(GraphImpl::MultiGraph), _other => Err(format!("Unsupported implementation {:?}", _other)), } } diff --git a/src/graph_impl/multi_graph/mod.rs b/src/graph_impl/multi_graph/mod.rs new file mode 100644 index 00000000..5fbc9599 --- /dev/null +++ b/src/graph_impl/multi_graph/mod.rs @@ -0,0 +1,7 @@ +#[macro_use] +pub mod plan; +pub mod node; +pub mod planner; +pub mod query; +pub mod runner; +pub mod utils; diff --git a/src/graph_impl/multi_graph/node.rs b/src/graph_impl/multi_graph/node.rs new file mode 100644 index 00000000..710e3c8d --- /dev/null +++ b/src/graph_impl/multi_graph/node.rs @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 UNSW Sydney, Data and Knowledge Group. + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +use generic::{IdType, NodeTrait}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MultiNode { + id: Id, + label: Option, +} + +impl MultiNode { + #[inline(always)] + pub fn new(id: Id, label: Option) -> Self { + MultiNode { id, label } + } + + #[inline(always)] + pub fn new_static(id: Id, label: L) -> Self { + MultiNode { + id, + label: if label == L::max_value() { + None + } else { + Some(label) + }, + } + } +} + +impl NodeTrait for MultiNode { + #[inline(always)] + fn get_id(&self) -> Id { + self.id + } + + #[inline(always)] + fn get_label_id(&self) -> Option { + self.label + } +} diff --git a/src/graph_impl/multi_graph/plan/mod.rs b/src/graph_impl/multi_graph/plan/mod.rs new file mode 100644 index 00000000..49620f2c --- /dev/null +++ b/src/graph_impl/multi_graph/plan/mod.rs @@ -0,0 +1,4 @@ +#[macro_use] +pub mod operator; +pub mod query_plan; +pub mod query_plan_worker; diff --git a/src/graph_impl/multi_graph/plan/operator/extend/EI.rs b/src/graph_impl/multi_graph/plan/operator/extend/EI.rs new file mode 100644 index 00000000..5f4b8d31 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/EI.rs @@ -0,0 +1,483 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::extend::Extend; +use graph_impl::multi_graph::plan::operator::extend::intersect::{ + BaseIntersect, Intersect, IntersectType, +}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::sorted_adj_vec::SortedAdjVec; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +pub static DIFFERENTIATE_FWD_BWD_SINGLE_ALD: bool = false; + +#[derive(Clone)] +pub enum CachingType { + None, + FullCaching, + PartialCaching, +} + +impl PartialEq for CachingType { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (CachingType::None, CachingType::None) + | (CachingType::FullCaching, CachingType::FullCaching) + | (CachingType::PartialCaching, CachingType::PartialCaching) => true, + _ => false, + } + } +} + +#[derive(Clone)] +pub enum EI { + Base(BaseEI), + Extend(Extend), + Intersect(Intersect), +} + +#[derive(Clone)] +pub struct BaseEI { + pub base_op: BaseOperator, + pub vertex_types: Vec, + pub to_type: i32, + pub to_query_vertex: String, + pub alds: Vec, + pub out_idx: usize, + pub vertex_idx: Vec, + pub vertex_idx_to_cache: Vec, + pub labels_or_to_types: Vec, + pub labels_or_to_types_to_cache: Vec, + pub adj_lists: Vec>>>, + pub adj_lists_to_cache: Vec>>>, + pub caching_type: CachingType, + is_intersection_cached: bool, + last_vertex_ids_intersected: Option>, + pub out_neighbours: Neighbours, + pub init_neighbours: Neighbours, + pub temp_neighbours: Neighbours, + pub cached_neighbours: Neighbours, +} + +impl BaseEI { + pub fn new( + to_query_vertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: Option, + ) -> BaseEI { + let mut ei = BaseEI { + base_op: BaseOperator::new(out_subgraph, in_subgraph), + vertex_types: vec![], + to_type, + to_query_vertex, + alds, + out_idx: 0, + vertex_idx: vec![], + vertex_idx_to_cache: vec![], + labels_or_to_types: vec![], + labels_or_to_types_to_cache: vec![], + adj_lists: vec![], + adj_lists_to_cache: vec![], + caching_type: CachingType::None, + is_intersection_cached: false, + last_vertex_ids_intersected: None, + out_neighbours: Neighbours::new(), + init_neighbours: Neighbours::new(), + temp_neighbours: Neighbours::new(), + cached_neighbours: Neighbours::new(), + }; + ei.set_operator_name(); + ei + } + + fn set_operator_name(&mut self) { + let variables = self + .alds + .iter() + .map(|ald| { + ald.from_query_vertex.clone() + + "[" + + if let Direction::Fwd = ald.direction { + "Fwd" + } else { + "Bwd" + } + + "]" + }) + .sorted() + .join("-"); + if 1 == self.alds.len() { + self.base_op.name = "Single-Edge-extend".to_string(); + } else { + self.base_op.name = "Multi-Edge-extend".to_string(); + } + self.base_op.name += + &(" TO (".to_owned() + &self.to_query_vertex + ") From (" + &variables + ")"); + } + + pub fn is_intersection_cached(&mut self) -> bool { + self.is_intersection_cached = true; + for i in 0..self.last_vertex_ids_intersected.as_ref().unwrap().len() { + if self.last_vertex_ids_intersected.as_ref().unwrap()[i] + != self.base_op.probe_tuple.borrow()[self.vertex_idx_to_cache[i] as usize] + { + self.is_intersection_cached = false; + self.last_vertex_ids_intersected.as_mut().unwrap()[i] = + self.base_op.probe_tuple.borrow()[self.vertex_idx_to_cache[i] as usize]; + } + } + self.is_intersection_cached + } + pub fn init_caching(&mut self, last_repeated_vertex_idx: usize) { + if self.alds.len() == 1 { + return; + } + let num_cached_alds = self + .alds + .iter() + .filter(|&ald| ald.vertex_idx <= last_repeated_vertex_idx) + .count(); + if num_cached_alds <= 1 { + return; + } + if num_cached_alds == self.alds.len() { + self.caching_type = CachingType::FullCaching; + } else { + self.caching_type = CachingType::PartialCaching; + } + self.last_vertex_ids_intersected = Some(vec![Id::new(0); num_cached_alds]); + } + + fn init_extensions( + &mut self, + graph: &TypedStaticGraph, + ) { + match self.caching_type { + CachingType::None | CachingType::FullCaching => { + *&mut self.out_neighbours = Neighbours::new(); + if 1 == self.alds.len() { + return; + } + } + _ => {} + } + let mut largest_adj_list_size = 0; + for ald in &self.alds { + let label = if graph.is_sorted_by_node() { + self.to_type + } else { + ald.label + }; + let adj_list_size = graph.get_largest_adj_list_size(label, ald.direction.clone()); + if adj_list_size > largest_adj_list_size { + largest_adj_list_size = adj_list_size; + } + } + if let CachingType::PartialCaching = self.caching_type { + self.out_neighbours = Neighbours::with_capacity(largest_adj_list_size); + } + self.init_neighbours = Neighbours::new(); + self.cached_neighbours = Neighbours::with_capacity(largest_adj_list_size); + if self.alds.len() > 2 { + self.temp_neighbours = Neighbours::with_capacity(largest_adj_list_size); + } + } + + fn set_alds_and_adj_lists( + &mut self, + graph: &TypedStaticGraph, + last_repeated_vertex_idx: usize, + ) { + for ald in &self.alds { + if let CachingType::PartialCaching = self.caching_type { + if ald.vertex_idx > last_repeated_vertex_idx { + self.vertex_idx.push(ald.vertex_idx); + self.labels_or_to_types.push(if graph.is_sorted_by_node() { + self.to_type + } else { + ald.label + }); + self.adj_lists.push(if let Direction::Fwd = ald.direction { + graph.get_fwd_adj_list().clone() + } else { + graph.get_bwd_adj_list().clone() + }); + continue; + } + } + self.vertex_idx_to_cache.push(ald.vertex_idx); + self.labels_or_to_types_to_cache + .push(if graph.is_sorted_by_node() { + self.to_type + } else { + ald.label + }); + self.adj_lists_to_cache + .push(if let Direction::Fwd = ald.direction { + graph.get_fwd_adj_list().clone() + } else { + graph.get_bwd_adj_list().clone() + }); + } + } + + pub fn execute_intersect(&mut self, idx: usize, intersect_type: IntersectType) -> usize { + let (adj_vec, label_or_type) = match intersect_type { + IntersectType::CachedOut | IntersectType::TempOut => ( + self.adj_lists_to_cache[idx] + [self.base_op.probe_tuple.borrow()[self.vertex_idx[idx]].id()] + .as_ref(), + self.labels_or_to_types[idx], + ), + _ => ( + self.adj_lists_to_cache[idx] + [self.base_op.probe_tuple.borrow()[self.vertex_idx_to_cache[idx]].id()] + .as_ref(), + self.labels_or_to_types_to_cache[idx], + ), + }; + let init = &mut self.init_neighbours; + let cached = &mut self.cached_neighbours; + let temp = &mut self.temp_neighbours; + let out = &mut self.out_neighbours; + adj_vec.map_or(0, |adj| match intersect_type { + IntersectType::InitCached => adj.intersect(label_or_type, init, cached), + IntersectType::TempCached => adj.intersect(label_or_type, temp, cached), + IntersectType::CachedOut => adj.intersect(label_or_type, cached, out), + IntersectType::TempOut => adj.intersect(label_or_type, temp, out), + }) + } +} + +impl CommonOperatorTrait for BaseEI { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_op.probe_tuple = probe_tuple.clone(); + self.caching_type = CachingType::None; + self.vertex_types = graph.get_node_types().clone(); + let last_repeated_vertex_idx = unsafe { + let prev = self.base_op.prev.as_ref().unwrap().as_ptr(); + get_op_attr!(&*prev, last_repeated_vertex_idx) + }; + self.init_caching(last_repeated_vertex_idx); + self.init_extensions(graph); + self.set_alds_and_adj_lists(graph, last_repeated_vertex_idx); + self.base_op.next.iter_mut().for_each(|next_op| { + next_op.borrow_mut().init(probe_tuple.clone(), graph); + }); + } + + fn process_new_tuple(&mut self) { + panic!("unsupported operation exception") + } + + fn execute(&mut self) { + self.base_op.execute() + } + + fn get_alds_as_string(&self) -> String { + if !DIFFERENTIATE_FWD_BWD_SINGLE_ALD && 1 == self.alds.len() { + return "E".to_owned() + &self.alds[0].label.to_string(); + } + let mut directions = vec!["".to_owned(); self.alds.len()]; + for ald in &self.alds { + let dir = if let Direction::Fwd = ald.direction { + "F".to_owned() + } else { + "B".to_owned() + }; + directions.push(dir + &ald.label.to_string()); + } + directions.sort(); + directions.join("-") + } + + fn update_operator_name(&mut self, mut query_vertex_to_index_map: HashMap) { + let mut prev_to_query_vertices = vec!["".to_owned(); query_vertex_to_index_map.len()]; + for (query_vertex, &index) in &query_vertex_to_index_map { + prev_to_query_vertices[index] = query_vertex.clone(); + } + self.base_op.name = + serde_json::to_string(&prev_to_query_vertices).unwrap() + " - " + &self.base_op.name; + query_vertex_to_index_map.insert( + self.to_query_vertex.clone(), + query_vertex_to_index_map.len(), + ); + self.base_op.next.iter_mut().foreach(|op| { + op.borrow_mut() + .update_operator_name(query_vertex_to_index_map.clone()) + }) + } + + fn copy(&self, _is_thread_safe: bool) -> Operator { + panic!("unsupported operation exception") + } + + fn is_same_as(&mut self, _op: &mut Rc>>) -> bool { + panic!("unsupported operation exception") + } + + fn get_num_out_tuples(&self) -> usize { + self.base_op.num_out_tuples + } +} + +impl EI { + pub fn has_multi_edge_extends(&self) -> bool { + let base_ei = get_ei_as_ref!(self); + if base_ei.alds.len() > 1 { + return true; + } + if let Some(prev) = &base_ei.base_op.prev { + return prev.borrow().has_multi_edge_extends(); + } + false + } + + pub fn make( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + out_qvertex_to_idx_map: HashMap, + ) -> EI { + if 1 == alds.len() { + return EI::Extend(Extend::new( + to_qvertex, + to_type, + alds, + out_subgraph, + Some(in_subgraph), + out_qvertex_to_idx_map, + )); + } + EI::Intersect(Intersect::BaseIntersect(BaseIntersect::new( + to_qvertex, + to_type, + alds, + out_subgraph, + Some(in_subgraph), + out_qvertex_to_idx_map, + ))) + } +} + +impl CommonOperatorTrait for EI { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + EI::Base(base) => base.init(probe_tuple, graph), + EI::Intersect(intersect) => intersect.init(probe_tuple, graph), + EI::Extend(extend) => extend.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + EI::Base(base) => base.process_new_tuple(), + EI::Intersect(intersect) => intersect.process_new_tuple(), + EI::Extend(extend) => extend.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + EI::Base(base) => base.execute(), + EI::Intersect(intersect) => intersect.execute(), + EI::Extend(extend) => extend.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + EI::Base(base) => base.get_alds_as_string(), + EI::Intersect(intersect) => intersect.get_alds_as_string(), + EI::Extend(extend) => extend.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + EI::Base(base) => base.update_operator_name(query_vertex_to_index_map), + EI::Intersect(intersect) => intersect.update_operator_name(query_vertex_to_index_map), + EI::Extend(extend) => extend.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + EI::Base(base) => base.copy(is_thread_safe), + EI::Intersect(intersect) => intersect.copy(is_thread_safe), + EI::Extend(extend) => extend.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + EI::Base(base) => base.is_same_as(op), + EI::Intersect(intersect) => intersect.is_same_as(op), + EI::Extend(extend) => extend.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + EI::Base(base) => base.get_num_out_tuples(), + EI::Intersect(intersect) => intersect.get_num_out_tuples(), + EI::Extend(extend) => extend.get_num_out_tuples(), + } + } +} + +#[derive(Clone)] +pub struct Neighbours { + pub ids: Vec, + pub start_idx: usize, + pub end_idx: usize, +} + +impl Neighbours { + pub fn new() -> Neighbours { + Neighbours { + ids: vec![], + start_idx: 0, + end_idx: 0, + } + } + + pub fn with_capacity(capacity: usize) -> Neighbours { + Neighbours { + ids: vec![Id::new(0); capacity], + start_idx: 0, + end_idx: 0, + } + } + + pub fn reset(&mut self) { + self.start_idx = 0; + self.end_idx = 0; + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/extend/Extend.rs b/src/graph_impl/multi_graph/plan/operator/extend/Extend.rs new file mode 100644 index 00000000..1e9dd88e --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/Extend.rs @@ -0,0 +1,168 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::{ + BaseEI, Neighbours, DIFFERENTIATE_FWD_BWD_SINGLE_ALD, EI, +}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::static_graph::sorted_adj_vec::SortedAdjVec; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +#[derive(Clone)] +pub struct Extend { + pub base_ei: BaseEI, + vertex_index: usize, + label_or_to_type: i32, + pub dir: Direction, + adj_list: Vec>>, +} + +impl Extend { + pub fn new( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: Option, + out_qvertex_to_idx_map: HashMap, + ) -> Extend { + let ald = alds[0].clone(); + let mut extend = Extend { + base_ei: BaseEI::new(to_qvertex.clone(), to_type, alds, out_subgraph, in_subgraph), + vertex_index: ald.vertex_idx, + label_or_to_type: ald.label, + dir: ald.direction.clone(), + adj_list: vec![], + }; + extend.base_ei.base_op.last_repeated_vertex_idx = extend.base_ei.base_op.out_tuple_len - 2; + extend.base_ei.out_idx = out_qvertex_to_idx_map[&to_qvertex].clone(); + extend.base_ei.base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map; + extend + } +} + +impl CommonOperatorTrait for Extend { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_ei.out_neighbours = Neighbours::new(); + self.base_ei.base_op.probe_tuple = probe_tuple.clone(); + self.base_ei.vertex_types = graph.get_node_types().clone(); + self.adj_list = if let Direction::Fwd = self.dir { + graph.get_fwd_adj_list() + } else { + graph.get_bwd_adj_list() + } + .clone(); + if graph.is_sorted_by_node() { + self.label_or_to_type = self.base_ei.to_type; + self.base_ei.to_type = KEY_ANY; + } + for next_operator in &mut self.base_ei.base_op.next { + next_operator.borrow_mut().init(probe_tuple.clone(), graph); + } + } + + fn process_new_tuple(&mut self) { + let adj_vec = self.adj_list[self.base_ei.base_op.probe_tuple.borrow()[self.vertex_index].id()] + .as_mut() + .unwrap(); + let out_neighbour = &mut self.base_ei.out_neighbours; + adj_vec.set_neighbor_ids(self.label_or_to_type, out_neighbour); + self.base_ei.base_op.icost += out_neighbour.end_idx - out_neighbour.start_idx; + for idx in out_neighbour.start_idx..out_neighbour.end_idx { + if self.base_ei.to_type == KEY_ANY + || self.base_ei.to_type == self.base_ei.vertex_types[out_neighbour.ids[idx].id()] + { + self.base_ei.base_op.num_out_tuples += 1; + self.base_ei.base_op.probe_tuple.borrow_mut()[self.base_ei.out_idx] = out_neighbour.ids[idx]; + self.base_ei.base_op.next[0] + .borrow_mut() + .process_new_tuple(); + } + } + } + + fn execute(&mut self) { + self.base_ei.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_ei.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_ei.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_ei = &self.base_ei; + let base_op = &base_ei.base_op; + let mut extend = Extend::new( + base_ei.to_query_vertex.clone(), + base_ei.to_type, + base_ei.alds.clone(), + base_op.out_subgraph.clone(), + base_op.in_subgraph.clone(), + base_op.out_qvertex_to_idx_map.clone(), + ); + let extend_copy = extend.clone(); + extend.base_ei.base_op.prev = Some(Rc::new(RefCell::new( + base_op.prev.as_ref().unwrap().borrow().copy(is_thread_safe), + ))); + + let last_repeated_vertex_idx = { + let mut prev = extend.base_ei.base_op.prev.as_mut().unwrap().borrow_mut(); + *get_op_attr_as_mut!(prev.deref_mut(), next) = + vec![Rc::new(RefCell::new(Operator::EI(EI::Extend(extend_copy))))]; + get_op_attr!(prev.deref_mut(), last_repeated_vertex_idx) + }; + extend.base_ei.init_caching(last_repeated_vertex_idx); + Operator::EI(EI::Extend(extend)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::EI(EI::Extend(extend)) = op.borrow_mut().deref_mut() { + let base_self = &mut self.base_ei.base_op; + let other_op = &mut extend.base_ei.base_op; + return (!DIFFERENTIATE_FWD_BWD_SINGLE_ALD || self.dir == extend.dir) + && self.label_or_to_type == extend.label_or_to_type + && self.base_ei.to_type == extend.base_ei.to_type + && base_self + .in_subgraph + .as_mut() + .unwrap() + .is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + && base_self + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph) + && base_self + .prev + .as_mut() + .unwrap() + .borrow_mut() + .is_same_as(other_op.prev.as_mut().unwrap()); + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_ei.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/extend/Intersect.rs b/src/graph_impl/multi_graph/plan/operator/extend/Intersect.rs new file mode 100644 index 00000000..03cc6160 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/Intersect.rs @@ -0,0 +1,267 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::EI::EI::Base; +use graph_impl::multi_graph::plan::operator::extend::EI::{BaseEI, CachingType, Neighbours, EI}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::AdjListDescriptor; +use graph_impl::multi_graph::planner::catalog::operator::intersect_catalog::IntersectCatalog; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +pub enum IntersectType { + InitCached, + TempCached, + CachedOut, + TempOut, +} + +#[derive(Clone)] +pub enum Intersect { + BaseIntersect(BaseIntersect), + IntersectCatalog(IntersectCatalog), +} + +#[derive(Clone)] +pub struct BaseIntersect { + pub base_ei: BaseEI, +} + +impl BaseIntersect { + pub fn new( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: Option, + out_qvertex_to_idx_map: HashMap, + ) -> BaseIntersect { + let mut intersect = BaseIntersect { + base_ei: BaseEI::new(to_qvertex.clone(), to_type, alds, out_subgraph, in_subgraph), + }; + let base_op = &mut intersect.base_ei.base_op; + base_op.last_repeated_vertex_idx = base_op.out_tuple_len - 2; + base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map; + intersect.base_ei.out_idx = base_op.out_qvertex_to_idx_map[&to_qvertex].clone(); + intersect + } +} + +impl CommonOperatorTrait for BaseIntersect { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_ei.init(probe_tuple, graph) + } + + fn process_new_tuple(&mut self) { + let mut temp = Neighbours::new(); + if CachingType::None == self.base_ei.caching_type || !self.base_ei.is_intersection_cached() + { + let base_ei = &mut self.base_ei; + let cache_id = base_ei.vertex_idx_to_cache[0]; + let to_id = base_ei.base_op.probe_tuple.borrow()[cache_id].id(); + let adj_vec = base_ei.adj_lists_to_cache[0][to_id].as_ref(); + let cache_id = base_ei.labels_or_to_types_to_cache[0]; + let neighbours = &mut base_ei.init_neighbours; + adj_vec.map(|adj| adj.set_neighbor_ids(cache_id, neighbours)); + base_ei.base_op.icost += + base_ei.init_neighbours.end_idx - base_ei.init_neighbours.start_idx; + base_ei.base_op.icost += base_ei.execute_intersect(1, IntersectType::InitCached); + + if base_ei.to_type != KEY_ANY { + let mut curr_end_idx = 0; + let cached_neighbours = &mut base_ei.cached_neighbours; + for i in cached_neighbours.start_idx..cached_neighbours.end_idx { + if base_ei.vertex_types[cached_neighbours.ids[i].id()] == base_ei.to_type { + cached_neighbours.ids[curr_end_idx] = cached_neighbours.ids[i]; + curr_end_idx += 1; + } + } + cached_neighbours.end_idx = curr_end_idx; + } + for i in 2..base_ei.adj_lists_to_cache.len() { + temp = base_ei.cached_neighbours.clone(); + base_ei.cached_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp.clone(); + base_ei.base_op.icost += base_ei.execute_intersect(i, IntersectType::TempCached); + } + } + + let base_ei = &mut self.base_ei; + match base_ei.caching_type { + CachingType::None | CachingType::FullCaching => { + base_ei.out_neighbours = base_ei.cached_neighbours.clone() + } + CachingType::PartialCaching => { + let cost = base_ei.execute_intersect(0, IntersectType::CachedOut); + base_ei.base_op.icost += cost; + for i in 1..base_ei.adj_lists.len() { + temp = base_ei.out_neighbours.clone(); + base_ei.out_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp.clone(); + base_ei.base_op.icost += base_ei.execute_intersect(i, IntersectType::TempOut); + } + } + } + + let base_op = &mut base_ei.base_op; + // setAdjListSortOrder the initNeighbours ids in the output tuple. + let out_neighbours = &mut base_ei.out_neighbours; + base_op.num_out_tuples += out_neighbours.end_idx - out_neighbours.start_idx; + for idx in out_neighbours.start_idx..out_neighbours.end_idx { + base_op.probe_tuple.borrow_mut()[base_ei.out_idx] = out_neighbours.ids[idx]; + base_op.next[0].borrow_mut().process_new_tuple(); + } + } + + fn execute(&mut self) { + self.base_ei.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_ei.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_ei.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_ei = &self.base_ei; + let base_op = &base_ei.base_op; + let mut intersect = BaseIntersect::new( + base_ei.to_query_vertex.clone(), + base_ei.to_type, + base_ei.alds.clone(), + base_op.out_subgraph.clone(), + base_op.in_subgraph.clone(), + base_op.out_qvertex_to_idx_map.clone(), + ); + let intersect_copy = intersect.clone(); + intersect.base_ei.base_op.prev = Some(Rc::new(RefCell::new( + base_op.prev.as_ref().unwrap().borrow().copy(is_thread_safe), + ))); + let last_repeated_vertex_idx = { + let mut prev = intersect + .base_ei + .base_op + .prev + .as_mut() + .unwrap() + .borrow_mut(); + *get_op_attr_as_mut!(prev.deref_mut(), next) = vec![Rc::new(RefCell::new( + Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect_copy))), + ))]; + get_op_attr!(prev.deref_mut(), last_repeated_vertex_idx) + }; + intersect.base_ei.init_caching(last_repeated_vertex_idx); + Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect))) = + op.borrow_mut().deref_mut() + { + return self.base_ei.caching_type == intersect.base_ei.caching_type + && self.get_alds_as_string() == intersect.base_ei.base_op.get_alds_as_string() + && self + .base_ei + .base_op + .in_subgraph + .as_mut() + .unwrap() + .is_isomorphic_to(intersect.base_ei.base_op.in_subgraph.as_mut().unwrap()) + && self + .base_ei + .base_op + .out_subgraph + .is_isomorphic_to(&mut intersect.base_ei.base_op.out_subgraph) + && self + .base_ei + .base_op + .prev + .as_mut() + .unwrap() + .borrow_mut() + .is_same_as(intersect.base_ei.base_op.prev.as_mut().unwrap()); + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_ei.get_num_out_tuples() + } +} + +impl CommonOperatorTrait for Intersect { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Intersect::BaseIntersect(base) => base.init(probe_tuple, graph), + Intersect::IntersectCatalog(ic) => ic.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Intersect::BaseIntersect(base) => base.process_new_tuple(), + Intersect::IntersectCatalog(ic) => ic.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Intersect::BaseIntersect(base) => base.execute(), + Intersect::IntersectCatalog(ic) => ic.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Intersect::BaseIntersect(base) => base.get_alds_as_string(), + Intersect::IntersectCatalog(ic) => ic.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Intersect::BaseIntersect(base) => base.update_operator_name(query_vertex_to_index_map), + Intersect::IntersectCatalog(ic) => ic.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Intersect::BaseIntersect(base) => base.copy(is_thread_safe), + Intersect::IntersectCatalog(ic) => ic.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Intersect::BaseIntersect(base) => base.is_same_as(op), + Intersect::IntersectCatalog(ic) => ic.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Intersect::BaseIntersect(base) => base.get_num_out_tuples(), + Intersect::IntersectCatalog(ic) => ic.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/extend/extend.rs b/src/graph_impl/multi_graph/plan/operator/extend/extend.rs new file mode 100644 index 00000000..fb53f71b --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/extend.rs @@ -0,0 +1,170 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::{ + BaseEI, Neighbours, DIFFERENTIATE_FWD_BWD_SINGLE_ALD, EI, +}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::static_graph::sorted_adj_vec::SortedAdjVec; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +#[derive(Clone)] +pub struct Extend { + pub base_ei: BaseEI, + vertex_index: usize, + label_or_to_type: i32, + pub dir: Direction, + adj_list: Vec>>, +} + +impl Extend { + pub fn new( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: Option, + out_qvertex_to_idx_map: HashMap, + ) -> Extend { + let ald = alds[0].clone(); + let mut extend = Extend { + base_ei: BaseEI::new(to_qvertex.clone(), to_type, alds, out_subgraph, in_subgraph), + vertex_index: ald.vertex_idx, + label_or_to_type: ald.label, + dir: ald.direction.clone(), + adj_list: vec![], + }; + extend.base_ei.base_op.last_repeated_vertex_idx = extend.base_ei.base_op.out_tuple_len - 2; + extend.base_ei.out_idx = out_qvertex_to_idx_map[&to_qvertex].clone(); + extend.base_ei.base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map; + extend + } +} + +impl CommonOperatorTrait for Extend { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_ei.out_neighbours = Neighbours::new(); + self.base_ei.base_op.probe_tuple = probe_tuple.clone(); + self.base_ei.vertex_types = graph.get_node_types().clone(); + self.adj_list = if let Direction::Fwd = self.dir { + graph.get_fwd_adj_list() + } else { + graph.get_bwd_adj_list() + } + .clone(); + if graph.is_sorted_by_node() { + self.label_or_to_type = self.base_ei.to_type; + self.base_ei.to_type = KEY_ANY; + } + for next_operator in &mut self.base_ei.base_op.next { + next_operator.borrow_mut().init(probe_tuple.clone(), graph); + } + } + + fn process_new_tuple(&mut self) { + let adj_vec = self.adj_list + [self.base_ei.base_op.probe_tuple.borrow()[self.vertex_index].id()] + .as_mut() + .unwrap(); + let out_neighbour = &mut self.base_ei.out_neighbours; + adj_vec.set_neighbor_ids(self.label_or_to_type, out_neighbour); + self.base_ei.base_op.icost += out_neighbour.end_idx - out_neighbour.start_idx; + for idx in out_neighbour.start_idx..out_neighbour.end_idx { + if self.base_ei.to_type == KEY_ANY + || self.base_ei.to_type == self.base_ei.vertex_types[out_neighbour.ids[idx].id()] + { + self.base_ei.base_op.num_out_tuples += 1; + self.base_ei.base_op.probe_tuple.borrow_mut()[self.base_ei.out_idx] = + out_neighbour.ids[idx]; + self.base_ei.base_op.next[0] + .borrow_mut() + .process_new_tuple(); + } + } + } + + fn execute(&mut self) { + self.base_ei.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_ei.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_ei.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_ei = &self.base_ei; + let base_op = &base_ei.base_op; + let mut extend = Extend::new( + base_ei.to_query_vertex.clone(), + base_ei.to_type, + base_ei.alds.clone(), + base_op.out_subgraph.clone(), + base_op.in_subgraph.clone(), + base_op.out_qvertex_to_idx_map.clone(), + ); + let extend_copy = extend.clone(); + extend.base_ei.base_op.prev = Some(Rc::new(RefCell::new( + base_op.prev.as_ref().unwrap().borrow().copy(is_thread_safe), + ))); + + let last_repeated_vertex_idx = { + let mut prev = extend.base_ei.base_op.prev.as_mut().unwrap().borrow_mut(); + *get_op_attr_as_mut!(prev.deref_mut(), next) = + vec![Rc::new(RefCell::new(Operator::EI(EI::Extend(extend_copy))))]; + get_op_attr!(prev.deref_mut(), last_repeated_vertex_idx) + }; + extend.base_ei.init_caching(last_repeated_vertex_idx); + Operator::EI(EI::Extend(extend)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::EI(EI::Extend(extend)) = op.borrow_mut().deref_mut() { + let base_self = &mut self.base_ei.base_op; + let other_op = &mut extend.base_ei.base_op; + return (!DIFFERENTIATE_FWD_BWD_SINGLE_ALD || self.dir == extend.dir) + && self.label_or_to_type == extend.label_or_to_type + && self.base_ei.to_type == extend.base_ei.to_type + && base_self + .in_subgraph + .as_mut() + .unwrap() + .is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + && base_self + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph) + && base_self + .prev + .as_mut() + .unwrap() + .borrow_mut() + .is_same_as(other_op.prev.as_mut().unwrap()); + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_ei.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/extend/intersect.rs b/src/graph_impl/multi_graph/plan/operator/extend/intersect.rs new file mode 100644 index 00000000..03cc6160 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/intersect.rs @@ -0,0 +1,267 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::EI::EI::Base; +use graph_impl::multi_graph::plan::operator::extend::EI::{BaseEI, CachingType, Neighbours, EI}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::AdjListDescriptor; +use graph_impl::multi_graph::planner::catalog::operator::intersect_catalog::IntersectCatalog; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +pub enum IntersectType { + InitCached, + TempCached, + CachedOut, + TempOut, +} + +#[derive(Clone)] +pub enum Intersect { + BaseIntersect(BaseIntersect), + IntersectCatalog(IntersectCatalog), +} + +#[derive(Clone)] +pub struct BaseIntersect { + pub base_ei: BaseEI, +} + +impl BaseIntersect { + pub fn new( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: Option, + out_qvertex_to_idx_map: HashMap, + ) -> BaseIntersect { + let mut intersect = BaseIntersect { + base_ei: BaseEI::new(to_qvertex.clone(), to_type, alds, out_subgraph, in_subgraph), + }; + let base_op = &mut intersect.base_ei.base_op; + base_op.last_repeated_vertex_idx = base_op.out_tuple_len - 2; + base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map; + intersect.base_ei.out_idx = base_op.out_qvertex_to_idx_map[&to_qvertex].clone(); + intersect + } +} + +impl CommonOperatorTrait for BaseIntersect { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_ei.init(probe_tuple, graph) + } + + fn process_new_tuple(&mut self) { + let mut temp = Neighbours::new(); + if CachingType::None == self.base_ei.caching_type || !self.base_ei.is_intersection_cached() + { + let base_ei = &mut self.base_ei; + let cache_id = base_ei.vertex_idx_to_cache[0]; + let to_id = base_ei.base_op.probe_tuple.borrow()[cache_id].id(); + let adj_vec = base_ei.adj_lists_to_cache[0][to_id].as_ref(); + let cache_id = base_ei.labels_or_to_types_to_cache[0]; + let neighbours = &mut base_ei.init_neighbours; + adj_vec.map(|adj| adj.set_neighbor_ids(cache_id, neighbours)); + base_ei.base_op.icost += + base_ei.init_neighbours.end_idx - base_ei.init_neighbours.start_idx; + base_ei.base_op.icost += base_ei.execute_intersect(1, IntersectType::InitCached); + + if base_ei.to_type != KEY_ANY { + let mut curr_end_idx = 0; + let cached_neighbours = &mut base_ei.cached_neighbours; + for i in cached_neighbours.start_idx..cached_neighbours.end_idx { + if base_ei.vertex_types[cached_neighbours.ids[i].id()] == base_ei.to_type { + cached_neighbours.ids[curr_end_idx] = cached_neighbours.ids[i]; + curr_end_idx += 1; + } + } + cached_neighbours.end_idx = curr_end_idx; + } + for i in 2..base_ei.adj_lists_to_cache.len() { + temp = base_ei.cached_neighbours.clone(); + base_ei.cached_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp.clone(); + base_ei.base_op.icost += base_ei.execute_intersect(i, IntersectType::TempCached); + } + } + + let base_ei = &mut self.base_ei; + match base_ei.caching_type { + CachingType::None | CachingType::FullCaching => { + base_ei.out_neighbours = base_ei.cached_neighbours.clone() + } + CachingType::PartialCaching => { + let cost = base_ei.execute_intersect(0, IntersectType::CachedOut); + base_ei.base_op.icost += cost; + for i in 1..base_ei.adj_lists.len() { + temp = base_ei.out_neighbours.clone(); + base_ei.out_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp.clone(); + base_ei.base_op.icost += base_ei.execute_intersect(i, IntersectType::TempOut); + } + } + } + + let base_op = &mut base_ei.base_op; + // setAdjListSortOrder the initNeighbours ids in the output tuple. + let out_neighbours = &mut base_ei.out_neighbours; + base_op.num_out_tuples += out_neighbours.end_idx - out_neighbours.start_idx; + for idx in out_neighbours.start_idx..out_neighbours.end_idx { + base_op.probe_tuple.borrow_mut()[base_ei.out_idx] = out_neighbours.ids[idx]; + base_op.next[0].borrow_mut().process_new_tuple(); + } + } + + fn execute(&mut self) { + self.base_ei.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_ei.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_ei.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_ei = &self.base_ei; + let base_op = &base_ei.base_op; + let mut intersect = BaseIntersect::new( + base_ei.to_query_vertex.clone(), + base_ei.to_type, + base_ei.alds.clone(), + base_op.out_subgraph.clone(), + base_op.in_subgraph.clone(), + base_op.out_qvertex_to_idx_map.clone(), + ); + let intersect_copy = intersect.clone(); + intersect.base_ei.base_op.prev = Some(Rc::new(RefCell::new( + base_op.prev.as_ref().unwrap().borrow().copy(is_thread_safe), + ))); + let last_repeated_vertex_idx = { + let mut prev = intersect + .base_ei + .base_op + .prev + .as_mut() + .unwrap() + .borrow_mut(); + *get_op_attr_as_mut!(prev.deref_mut(), next) = vec![Rc::new(RefCell::new( + Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect_copy))), + ))]; + get_op_attr!(prev.deref_mut(), last_repeated_vertex_idx) + }; + intersect.base_ei.init_caching(last_repeated_vertex_idx); + Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::EI(EI::Intersect(Intersect::BaseIntersect(intersect))) = + op.borrow_mut().deref_mut() + { + return self.base_ei.caching_type == intersect.base_ei.caching_type + && self.get_alds_as_string() == intersect.base_ei.base_op.get_alds_as_string() + && self + .base_ei + .base_op + .in_subgraph + .as_mut() + .unwrap() + .is_isomorphic_to(intersect.base_ei.base_op.in_subgraph.as_mut().unwrap()) + && self + .base_ei + .base_op + .out_subgraph + .is_isomorphic_to(&mut intersect.base_ei.base_op.out_subgraph) + && self + .base_ei + .base_op + .prev + .as_mut() + .unwrap() + .borrow_mut() + .is_same_as(intersect.base_ei.base_op.prev.as_mut().unwrap()); + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_ei.get_num_out_tuples() + } +} + +impl CommonOperatorTrait for Intersect { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Intersect::BaseIntersect(base) => base.init(probe_tuple, graph), + Intersect::IntersectCatalog(ic) => ic.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Intersect::BaseIntersect(base) => base.process_new_tuple(), + Intersect::IntersectCatalog(ic) => ic.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Intersect::BaseIntersect(base) => base.execute(), + Intersect::IntersectCatalog(ic) => ic.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Intersect::BaseIntersect(base) => base.get_alds_as_string(), + Intersect::IntersectCatalog(ic) => ic.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Intersect::BaseIntersect(base) => base.update_operator_name(query_vertex_to_index_map), + Intersect::IntersectCatalog(ic) => ic.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Intersect::BaseIntersect(base) => base.copy(is_thread_safe), + Intersect::IntersectCatalog(ic) => ic.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Intersect::BaseIntersect(base) => base.is_same_as(op), + Intersect::IntersectCatalog(ic) => ic.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Intersect::BaseIntersect(base) => base.get_num_out_tuples(), + Intersect::IntersectCatalog(ic) => ic.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/extend/mod.rs b/src/graph_impl/multi_graph/plan/operator/extend/mod.rs new file mode 100644 index 00000000..f1c11a29 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/extend/mod.rs @@ -0,0 +1,3 @@ +pub mod EI; +pub mod extend; +pub mod intersect; diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/build.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/build.rs new file mode 100644 index 00000000..60ffb028 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/build.rs @@ -0,0 +1,111 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_table::HashTable; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{DerefMut, Deref}; +use std::rc::Rc; + +#[derive(Clone)] +pub struct Build { + pub base_op: BaseOperator, + pub hash_table: Option>>>, + pub probing_subgraph: Option, + query_vertex_to_hash: String, + pub build_hash_idx: usize, + pub hashed_tuple_len: usize, +} + +impl Build { + pub fn new( + in_subgraph: QueryGraph, + query_vertex_to_hash: String, + build_hash_idx: usize, + ) -> Build { + let mut build = Build { + base_op: BaseOperator::empty(), + hash_table: None, + probing_subgraph: None, + query_vertex_to_hash: query_vertex_to_hash.clone(), + build_hash_idx, + hashed_tuple_len: 0, + }; + build.hashed_tuple_len = in_subgraph.get_num_qvertices() - 1; + build.base_op.out_tuple_len = in_subgraph.get_num_qvertices(); + build.base_op.in_subgraph = Some(in_subgraph); + build.base_op.name = "HASH ON (".to_owned() + &query_vertex_to_hash + ")"; + build + } +} + +impl CommonOperatorTrait for Build { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + if self.base_op.probe_tuple.borrow().len() == 0 { + self.base_op.probe_tuple = probe_tuple; + self.hash_table.as_mut().map(|table| { + table + .borrow_mut() + .allocate_initial_memory(graph.node_count() + 1); + }); + } + } + + fn process_new_tuple(&mut self) { + let probe_tuple = self.base_op.probe_tuple.clone(); + self.hash_table.as_mut().map(|table| { + table.borrow_mut().insert_tuple(probe_tuple); + }); + } + + fn execute(&mut self) { + self.base_op.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_op.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_op.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let mut build = Build::new( + self.base_op.in_subgraph.as_ref().unwrap().clone(), + self.query_vertex_to_hash.clone(), + self.build_hash_idx, + ); + build.base_op.prev = self.base_op.prev.as_ref().map(|prev| prev.clone()); + build.base_op.next = vec![Rc::new(RefCell::new(Operator::Build(build.clone())))]; + build.probing_subgraph = self.probing_subgraph.clone(); + Operator::Build(build) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Build(build) = op.borrow_mut().deref_mut() { + let base_self = &mut self.base_op; + let base_op = &mut build.base_op; + let in_subgraph = base_self.in_subgraph.as_mut().map_or(false, |in_subgraph| { + in_subgraph.is_isomorphic_to(base_op.in_subgraph.as_mut().unwrap()) + }); + let prev = base_self.prev.as_ref().map_or(false, |prev| { + prev.borrow_mut().is_same_as(base_op.prev.as_mut().unwrap()) + }); + return in_subgraph && prev; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_op.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_join.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_join.rs new file mode 100644 index 00000000..c32de07e --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_join.rs @@ -0,0 +1,202 @@ +use generic::IdType; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::build::Build; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::{BaseProbe, Probe}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_cartesian::ProbeCartesian; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::{ + ProbeMultiVertices, PMV, +}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices_cartesian::ProbeMultiVerticesCartesian; +use graph_impl::multi_graph::plan::operator::operator::Operator; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +pub struct HashJoin {} + +impl HashJoin { + pub fn make( + out_subgraph: QueryGraph, + build_plan: QueryPlan, + probe_plan: QueryPlan, + ) -> QueryPlan { + QueryPlan::new_from_subplans(HashJoin::inner_make( + out_subgraph, + build_plan.shallow_copy().subplans, + probe_plan.shallow_copy().subplans, + )) + } + + fn inner_make( + out_subgraph: QueryGraph, + mut build_subplans: Vec>>>, + mut probe_subplans: Vec>>>, + ) -> Vec>>> { + let mut pre_build = build_subplans.pop().unwrap(); + let mut pre_probe = probe_subplans.pop().unwrap(); + let join_qvertices: Vec = pre_build + .borrow() + .get_out_query_vertices() + .intersection(&pre_probe.borrow().get_out_query_vertices()) + .map(|x| x.clone()) + .collect(); + + let build_qvertex_to_idx_map = + get_op_attr_as_ref!(pre_build.borrow().deref(), out_qvertex_to_idx_map).clone(); + let query_vertex_to_hash = &join_qvertices[0]; + let build_hash_idx = build_qvertex_to_idx_map[query_vertex_to_hash].clone(); + let mut build = Build::new( + get_op_attr_as_ref!(pre_build.borrow().deref(), out_subgraph).clone(), + query_vertex_to_hash.clone(), + build_hash_idx.clone(), + ); + build.base_op.prev.replace(pre_build.clone()); + let build = Rc::new(RefCell::new(Operator::Build(build))); + *get_op_attr_as_mut!(pre_build.borrow_mut().deref_mut(), next) = vec![build.clone()]; + build_subplans.push(build.clone()); + + let mapping = { + let mut pre_probe = pre_probe.borrow().clone(); + let mut pre_build = pre_build.borrow().clone(); + let out_subgraph_probe = get_op_attr_as_mut!(&mut pre_probe, out_subgraph); + let out_subgraph_build = get_op_attr_as_mut!(&mut pre_build, out_subgraph); + out_subgraph_build.get_isomorphic_mapping_if_any(out_subgraph_probe) + }; + let mut probe_qvertex_to_idx_map; + if let Some(map) = &mapping { + probe_qvertex_to_idx_map = HashMap::new(); + build_qvertex_to_idx_map + .iter() + .for_each(|(query_vertex, &idx)| { + probe_qvertex_to_idx_map.insert( + map[query_vertex].clone(), + if idx < build_hash_idx { idx } else { idx - 1 }, + ); + }); + probe_qvertex_to_idx_map.insert( + map[&join_qvertices[0]].clone(), + build_qvertex_to_idx_map.len() - 1, + ); + } else { + probe_qvertex_to_idx_map = + get_op_attr_as_ref!(pre_probe.borrow().deref(), out_qvertex_to_idx_map).clone(); + } + let probe_hash_idx = probe_qvertex_to_idx_map[query_vertex_to_hash]; + let out_qvertex_to_idx_map = HashJoin::compute_out_qvertex_to_idx_map( + &join_qvertices, + &build_qvertex_to_idx_map, + &probe_qvertex_to_idx_map, + ); + let hashed_tuple_len = build_qvertex_to_idx_map.len() - 1; + let mut probe_indices = vec![0; join_qvertices.len() - 1]; + let mut build_indices = vec![0; join_qvertices.len() - 1]; + for (i, join_qvertex) in join_qvertices.iter().enumerate() { + if i < 1 { + continue; + } + probe_indices[i - 1] = probe_qvertex_to_idx_map[join_qvertex].clone(); + let mut other_build_idx = build_qvertex_to_idx_map[join_qvertex]; + if build_hash_idx < other_build_idx { + other_build_idx -= 1; + } + build_indices[i - 1] = other_build_idx; + } + + let in_subgraph = get_op_attr_as_ref!(pre_probe.borrow().deref(), out_subgraph).clone(); + let out_tuple_len = get_op_attr!(pre_probe.borrow().deref(), out_tuple_len); + let mut probe = if let Some(map) = &mapping { + let probe = if probe_indices.len() == 0 { + Probe::PC(ProbeCartesian::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + hashed_tuple_len, + out_tuple_len, + out_qvertex_to_idx_map, + )) + } else { + Probe::PMV(PMV::PMVC(ProbeMultiVerticesCartesian::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + probe_indices, + build_indices, + hashed_tuple_len, + out_tuple_len, + out_qvertex_to_idx_map, + ))) + }; + Rc::new(RefCell::new(Operator::Probe(probe))) + } else { + let probe = if probe_indices.len() == 0 { + Probe::BaseProbe(BaseProbe::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + hashed_tuple_len, + out_tuple_len, + out_qvertex_to_idx_map, + )) + } else { + Probe::PMV(PMV::BasePMV(ProbeMultiVertices::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + probe_indices, + build_indices, + hashed_tuple_len, + out_tuple_len, + out_qvertex_to_idx_map, + ))) + }; + let probe = Rc::new(RefCell::new(Operator::Probe(probe))); + get_op_attr_as_mut!(probe.borrow_mut().deref_mut(), prev).replace(pre_probe.clone()); + *get_op_attr_as_mut!(pre_probe.borrow_mut().deref_mut(), next) = vec![probe.clone()]; + let last_index = probe_subplans.len() - 1; + probe_subplans[last_index] = probe.clone(); + probe + }; + if let Operator::Build(build) = build.borrow_mut().deref_mut() { + build.probing_subgraph = + get_op_attr_as_ref!(probe.borrow().deref(), in_subgraph).clone(); + } + + let mut subplans = build_subplans.clone(); + if let None = &mapping { + subplans.append(&mut probe_subplans); + } else { + subplans.push(probe); + } + subplans + } + + pub fn compute_out_qvertex_to_idx_map( + join_qvertices: &Vec, + build_qvertex_to_idx_map: &HashMap, + probe_qvertex_to_idx_map: &HashMap, + ) -> HashMap { + let mut out_qvertices_to_idx_map = probe_qvertex_to_idx_map.clone(); + let mut build_qvertices = vec![String::from(""); build_qvertex_to_idx_map.len()]; + for (build_qvertex, idx) in build_qvertex_to_idx_map { + build_qvertices[idx.clone()] = build_qvertex.clone(); + } + for build_qvertex in build_qvertices { + if join_qvertices.contains(&build_qvertex) { + continue; + } + out_qvertices_to_idx_map.insert(build_qvertex, out_qvertices_to_idx_map.len()); + } + out_qvertices_to_idx_map + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_table.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_table.rs new file mode 100644 index 00000000..51c3f9f1 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/hash_table.rs @@ -0,0 +1,148 @@ +use generic::IdType; +use itertools::Itertools; +use std::cell::RefCell; +use std::rc::Rc; + +#[derive(Clone)] +pub struct BlockInfo { + pub block: Vec, + pub start_offset: usize, + pub end_offset: usize, +} + +impl BlockInfo { + pub fn empty() -> BlockInfo { + BlockInfo { + block: vec![], + start_offset: 0, + end_offset: 0, + } + } +} + +#[derive(Clone)] +pub struct HashTable { + blocks: Vec>, + extra_blocks: Vec>, + block_ids_and_chunk_offsets: Vec>, + pub num_chunks: Vec, + initial_num_blocks: usize, + num_tuples_per_chunk: usize, + num_chunks_per_block: usize, + initial_num_chunks_per_vertex: usize, + block_sz: usize, + chunk_sz: usize, + next_global_block_id: usize, + next_global_chunk_offset: usize, + build_hash_idx: usize, + build_tuple_len: usize, + hashed_tuple_len: usize, +} + +impl HashTable { + pub fn new(build_hash_idx: usize, hashed_tuple_len: usize) -> HashTable { + let mut hash_table = HashTable { + blocks: vec![], + extra_blocks: vec![], + block_ids_and_chunk_offsets: vec![], + num_chunks: vec![], + initial_num_blocks: 1000, + num_tuples_per_chunk: 64, + num_chunks_per_block: 8000, + initial_num_chunks_per_vertex: 6, + block_sz: 0, + chunk_sz: 0, + next_global_block_id: 0, + next_global_chunk_offset: 0, + build_hash_idx, + build_tuple_len: hashed_tuple_len + 1, + hashed_tuple_len, + }; + hash_table.chunk_sz = hash_table.num_tuples_per_chunk * hash_table.hashed_tuple_len; + hash_table.block_sz = hash_table.chunk_sz * hash_table.num_chunks_per_block; + hash_table + } + + pub fn allocate_initial_memory(&mut self, highest_vertex_id: usize) { + self.blocks = vec![vec![Id::new(0); self.block_sz]; self.initial_num_blocks]; + self.extra_blocks = vec![vec![]; self.initial_num_blocks]; + self.block_ids_and_chunk_offsets = + vec![vec![0; self.initial_num_chunks_per_vertex * 3]; highest_vertex_id + 1]; + self.num_chunks = vec![0; highest_vertex_id + 1]; + } + pub fn insert_tuple(&mut self, build_tuple: Rc>>) { + let hash_vertex = build_tuple.borrow()[self.build_hash_idx].id(); + let mut last_chunk_idx = self.num_chunks[hash_vertex]; + if 0 == last_chunk_idx { + self.num_chunks[hash_vertex] += 1; + self.update_block_ids_and_global_and_chunk_offset(hash_vertex); + } + last_chunk_idx = 3 * (self.num_chunks[hash_vertex] - 1); + let block_id = self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx]; + let start_offset = self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx + 1]; + let mut end_offset = self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx + 2]; + let block = if block_id < self.initial_num_blocks { + &mut self.blocks[block_id] + } else { + &mut self.extra_blocks[block_id - self.initial_num_blocks] + }; + for i in 0..self.build_tuple_len { + if i != self.build_hash_idx { + block[end_offset] = build_tuple.borrow()[i]; + end_offset += 1; + } + } + self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx + 2] = end_offset; + if self.chunk_sz <= end_offset - start_offset + self.hashed_tuple_len { + self.num_chunks[hash_vertex] += 1; + self.resize_block_ids_and_global_and_chunk_offset(hash_vertex); + self.update_block_ids_and_global_and_chunk_offset(hash_vertex); + } + } + + pub fn get_block_and_offsets( + &self, + hash_vertex: usize, + chunk_idx: usize, + block_info: &mut BlockInfo, + ) { + let block_id = self.block_ids_and_chunk_offsets[hash_vertex][chunk_idx * 3]; + block_info.start_offset = self.block_ids_and_chunk_offsets[hash_vertex][chunk_idx * 3 + 1]; + block_info.end_offset = self.block_ids_and_chunk_offsets[hash_vertex][chunk_idx * 3 + 2]; + block_info.block = if block_id < self.initial_num_blocks { + self.blocks[block_id].clone() + } else { + self.extra_blocks[block_id - self.initial_num_blocks].clone() + }; + } + + pub fn resize_block_ids_and_global_and_chunk_offset(&mut self, hash_vertex: usize) { + if self.num_chunks[hash_vertex] + 1 + > (self.block_ids_and_chunk_offsets[hash_vertex].len() / 3) + { + let mut new_chunk_block_id_offset_array = + vec![0; (self.num_chunks[hash_vertex] + 2) * 3]; + self.block_ids_and_chunk_offsets[hash_vertex] + .iter() + .enumerate() + .foreach(|(i, x)| new_chunk_block_id_offset_array[i] = x.clone()); + self.block_ids_and_chunk_offsets[hash_vertex] = new_chunk_block_id_offset_array; + } + } + pub fn update_block_ids_and_global_and_chunk_offset(&mut self, hash_vertex: usize) { + let last_chunk_idx = (self.num_chunks[hash_vertex] - 1) * 3; + self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx] = self.next_global_block_id; + self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx + 1] = + self.next_global_chunk_offset; + self.block_ids_and_chunk_offsets[hash_vertex][last_chunk_idx + 2] = + self.next_global_chunk_offset; + self.next_global_chunk_offset += self.chunk_sz; + if self.next_global_chunk_offset == self.block_sz { + self.next_global_block_id += 1; + if self.next_global_block_id >= self.initial_num_blocks { + self.extra_blocks.push(vec![Id::new(0); self.block_sz]); + } + self.next_global_chunk_offset = 0; + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/mod.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/mod.rs new file mode 100644 index 00000000..20754c7a --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/mod.rs @@ -0,0 +1,7 @@ +pub mod build; +pub mod hash_join; +pub mod hash_table; +pub mod probe; +pub mod probe_cartesian; +pub mod probe_multi_vertices; +pub mod probe_multi_vertices_cartesian; diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/probe.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe.rs new file mode 100644 index 00000000..3347dda0 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe.rs @@ -0,0 +1,245 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_table::{BlockInfo, HashTable}; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; + +use graph_impl::multi_graph::plan::operator::hashjoin::probe_cartesian::ProbeCartesian; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; +use itertools::Itertools; + +#[derive(Clone)] +pub enum Probe { + BaseProbe(BaseProbe), + PC(ProbeCartesian), + PMV(PMV), +} + +#[derive(Clone)] +pub struct BaseProbe { + pub base_op: BaseOperator, + pub hash_tables: Vec>>>, + pub join_qvertices: Vec, + pub probe_hash_idx: usize, + pub hashed_tuple_len: usize, + pub probe_tuple_len: usize, + pub block_info: BlockInfo, +} + +impl BaseProbe { + pub fn new( + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + join_qvertices: Vec, + probe_hash_idx: usize, + hashed_tuple_len: usize, + probe_tuple_len: usize, + out_qvertex_to_idx_map: HashMap, + ) -> BaseProbe { + let mut probe = BaseProbe { + base_op: BaseOperator::new(out_subgraph, Some(in_subgraph)), + hash_tables: vec![], + join_qvertices, + probe_hash_idx, + hashed_tuple_len, + probe_tuple_len, + block_info: BlockInfo::empty(), + }; + probe.base_op.out_tuple_len = out_qvertex_to_idx_map.len(); + probe.base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map; + probe.base_op.name = "PROBE ON (".to_owned() + &probe.join_qvertices[0] + ")"; + probe + } +} + +impl CommonOperatorTrait for BaseProbe { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + if self.base_op.probe_tuple.borrow().len() == 0 { + self.base_op.probe_tuple = probe_tuple.clone(); + self.block_info = BlockInfo::empty(); + self.base_op + .next + .iter_mut() + .map(|next_op| next_op.borrow_mut()) + .for_each(|mut next_op| next_op.deref_mut().init(probe_tuple.clone(), graph)); + } + } + + fn process_new_tuple(&mut self) { + let hash_vertex = self.base_op.probe_tuple.borrow()[self.probe_hash_idx].id(); + for hash_table in &mut self.hash_tables { + let last_chunk_idx = hash_table.borrow().num_chunks[hash_vertex]; + let mut prev_first_item = -1i32; + for chunk_idx in 0..last_chunk_idx { + hash_table.borrow().get_block_and_offsets( + hash_vertex, + chunk_idx, + &mut self.block_info, + ); + let mut offset = self.block_info.start_offset; + while offset < self.block_info.end_offset { + self.base_op.num_out_tuples += 1; + if self.hashed_tuple_len == 2 { + let first_item = self.block_info.block[offset]; + offset += 1; + if prev_first_item != first_item.id() as i32 { + self.base_op.probe_tuple.borrow_mut()[self.probe_tuple_len] = + first_item; + prev_first_item = first_item.id() as i32; + } + self.base_op.probe_tuple.borrow_mut()[self.probe_tuple_len + 1] = + self.block_info.block[offset]; + offset += 1; + } else { + for k in 0..self.hashed_tuple_len { + self.base_op.probe_tuple.borrow_mut()[self.probe_tuple_len + k] = + self.block_info.block[offset]; + offset += 1; + } + } + self.base_op.next[0] + .borrow_mut() + .deref_mut() + .process_new_tuple(); + } + } + } + } + + fn execute(&mut self) { + self.base_op.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_op.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_op.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let op = &self.base_op; + let mut probe = BaseProbe::new( + op.out_subgraph.clone(), + op.in_subgraph.as_ref().unwrap().clone(), + self.join_qvertices.clone(), + self.probe_hash_idx, + self.hashed_tuple_len, + self.probe_tuple_len, + op.out_qvertex_to_idx_map.clone(), + ); + probe.base_op.prev = op + .prev + .as_ref() + .map(|prev| Rc::new(RefCell::new(prev.borrow().deref().copy(is_thread_safe)))); + probe.base_op.next = vec![Rc::new(RefCell::new(Operator::Probe(Probe::BaseProbe( + probe.clone(), + ))))]; + Operator::Probe(Probe::BaseProbe(probe)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Probe(Probe::BaseProbe(probe)) = op.borrow_mut().deref_mut() { + let self_op = &mut self.base_op; + let other_op = &mut probe.base_op; + let in_subgraph = self_op.in_subgraph.as_mut().map_or(false, |in_subgraph| { + in_subgraph.is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + }); + let out_subgraph = self_op + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph); + let prev = self_op.prev.as_mut().map_or(false, |prev| { + prev.borrow_mut() + .is_same_as(other_op.prev.as_mut().unwrap()) + }); + return in_subgraph && out_subgraph && prev; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_op.get_num_out_tuples() + } +} + +impl CommonOperatorTrait for Probe { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Probe::BaseProbe(base) => base.init(probe_tuple, graph), + Probe::PC(pc) => pc.init(probe_tuple, graph), + Probe::PMV(pmv) => pmv.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Probe::BaseProbe(base) => base.process_new_tuple(), + Probe::PC(pc) => pc.process_new_tuple(), + Probe::PMV(pmv) => pmv.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Probe::BaseProbe(base) => base.execute(), + Probe::PC(pc) => pc.execute(), + Probe::PMV(pmv) => pmv.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Probe::BaseProbe(base) => base.get_alds_as_string(), + Probe::PC(pc) => pc.get_alds_as_string(), + Probe::PMV(pmv) => pmv.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Probe::BaseProbe(base) => base.update_operator_name(query_vertex_to_index_map), + Probe::PC(pc) => pc.update_operator_name(query_vertex_to_index_map), + Probe::PMV(pmv) => pmv.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Probe::BaseProbe(base) => base.copy(is_thread_safe), + Probe::PC(pc) => pc.copy(is_thread_safe), + Probe::PMV(pmv) => pmv.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Probe::BaseProbe(base) => base.is_same_as(op), + Probe::PC(pc) => pc.is_same_as(op), + Probe::PMV(pmv) => pmv.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Probe::BaseProbe(base) => base.get_num_out_tuples(), + Probe::PC(pc) => pc.get_num_out_tuples(), + Probe::PMV(pmv) => pmv.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_cartesian.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_cartesian.rs new file mode 100644 index 00000000..9913f0ee --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_cartesian.rs @@ -0,0 +1,145 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_table::BlockInfo; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::{BaseProbe, Probe}; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +#[derive(Clone)] +pub struct ProbeCartesian { + pub base_probe: BaseProbe, + other_block_info: BlockInfo, + highest_vertex_id: usize, +} + +impl ProbeCartesian { + pub fn new( + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + join_qvertices: Vec, + probe_hash_idx: usize, + hashed_tuple_len: usize, + probe_tuple_len: usize, + out_qvertex_to_idx_map: HashMap, + ) -> ProbeCartesian { + let mut pc = ProbeCartesian { + base_probe: BaseProbe::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + hashed_tuple_len, + probe_tuple_len, + out_qvertex_to_idx_map, + ), + other_block_info: BlockInfo::empty(), + highest_vertex_id: 0, + }; + pc.base_probe.base_op.name = "CARTESIAN ".to_owned() + &pc.base_probe.base_op.name; + pc + } +} + +impl CommonOperatorTrait for ProbeCartesian { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + if self.base_probe.base_op.probe_tuple.borrow().len() == 0 { + self.highest_vertex_id = graph.node_count(); + self.other_block_info = BlockInfo::empty(); + } + self.base_probe.init(probe_tuple, graph); + } + + fn process_new_tuple(&mut self) { + self.base_probe.process_new_tuple(); + } + + fn execute(&mut self) { + for a_hash_vertex in 0..self.highest_vertex_id { + let base_probe = &mut self.base_probe; + base_probe.base_op.probe_tuple.borrow_mut()[base_probe.hashed_tuple_len] = + Id::new(a_hash_vertex); + for hash_table in base_probe.hash_tables.clone() { + let a_last_chunk_idx = hash_table.borrow().num_chunks[a_hash_vertex]; + let mut a_prev_first_vertex = -1i32; + for a_chunk_idx in 0..a_last_chunk_idx { + hash_table.borrow().get_block_and_offsets( + a_hash_vertex, + a_chunk_idx, + &mut self.other_block_info, + ); + let mut an_offset = self.other_block_info.start_offset; + while an_offset < self.other_block_info.end_offset { + if base_probe.hashed_tuple_len == 2 { + let first_vertex = self.other_block_info.block[an_offset]; + an_offset += 1; + if a_prev_first_vertex != first_vertex.id() as i32 { + base_probe.base_op.probe_tuple.borrow_mut()[0] = first_vertex; + a_prev_first_vertex = first_vertex.id() as i32; + } + base_probe.base_op.probe_tuple.borrow_mut()[1] = + self.other_block_info.block[an_offset]; + an_offset += 1; + } else { + for k in 0..base_probe.hashed_tuple_len { + base_probe.base_op.probe_tuple.borrow_mut()[k] = + self.other_block_info.block[an_offset]; + an_offset += 1; + } + } + base_probe.process_new_tuple(); + } + } + } + } + } + + fn get_alds_as_string(&self) -> String { + self.base_probe.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_probe + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let op = &self.base_probe.base_op; + Operator::Probe(Probe::PC(ProbeCartesian::new( + op.out_subgraph.clone(), + op.in_subgraph.as_ref().unwrap().clone(), + self.base_probe.join_qvertices.clone(), + self.base_probe.probe_hash_idx, + self.base_probe.hashed_tuple_len, + self.base_probe.probe_tuple_len, + op.out_qvertex_to_idx_map.clone(), + ))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Probe(Probe::PC(pc)) = op.borrow_mut().deref_mut() { + let self_op = &mut self.base_probe.base_op; + let other_op = &mut pc.base_probe.base_op; + let in_subgraph = self_op.in_subgraph.as_mut().map_or(false, |in_subgraph| { + in_subgraph.is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + }); + let out_subgraph = self_op + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph); + return in_subgraph && out_subgraph; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_probe.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices.rs new file mode 100644 index 00000000..cdfabf8e --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices.rs @@ -0,0 +1,257 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::{BaseProbe, Probe}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices_cartesian::ProbeMultiVerticesCartesian; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +#[derive(Clone)] +pub enum PMV { + BasePMV(ProbeMultiVertices), + PMVC(ProbeMultiVerticesCartesian), +} + +#[derive(Clone)] +pub struct ProbeMultiVertices { + pub base_probe: BaseProbe, + pub probe_indices: Vec, + pub build_indices: Vec, +} + +impl ProbeMultiVertices { + pub fn new( + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + join_qvertices: Vec, + probe_hash_idx: usize, + probe_indices: Vec, + build_indices: Vec, + hashed_tuple_len: usize, + probe_tuple_len: usize, + out_qvertex_to_idx_map: HashMap, + ) -> ProbeMultiVertices { + let mut name = "PROBE ON ".to_owned(); + if 1 == join_qvertices.len() { + name = name + "(" + &join_qvertices[0] + ")"; + } else { + for i in 0..join_qvertices.len() { + if i > 0 && i < join_qvertices.len() - 1 { + name += ", "; + } + if i == join_qvertices.len() - 1 { + name += " & " + } + name = name + "(" + &join_qvertices[i] + ")"; + } + } + let mut pmv = ProbeMultiVertices { + base_probe: BaseProbe::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + hashed_tuple_len, + probe_tuple_len, + out_qvertex_to_idx_map, + ), + probe_indices, + build_indices, + }; + pmv.base_probe.base_op.name = name; + pmv + } +} + +impl CommonOperatorTrait for ProbeMultiVertices { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_probe.init(probe_tuple, graph); + } + + fn process_new_tuple(&mut self) { + let hash_vertex = + self.base_probe.base_op.probe_tuple.borrow()[self.base_probe.probe_hash_idx].id(); + for hash_table in self.base_probe.hash_tables.clone() { + let last_chunk_idx = hash_table.borrow().num_chunks[hash_vertex]; + for chunk_idx in 0..last_chunk_idx { + hash_table.borrow().get_block_and_offsets( + hash_vertex, + chunk_idx, + &mut self.base_probe.block_info, + ); + let offset = self.base_probe.block_info.start_offset; + (self.base_probe.block_info.start_offset..self.base_probe.block_info.end_offset) + .step(self.base_probe.hashed_tuple_len) + .for_each(|offset| { + let mut flag = true; + for i in 0..self.probe_indices.len() { + if self.base_probe.base_op.probe_tuple.borrow()[self.probe_indices[i]] + != self.base_probe.block_info.block[offset + self.build_indices[i]] + { + flag = false; + break; + } + } + if flag { + self.base_probe.base_op.num_out_tuples += 1; + let mut out = 0; + for k in 0..self.base_probe.hashed_tuple_len { + let mut copy = true; + for build_idx in &self.build_indices { + if k == build_idx.clone() { + copy = false; + break; + } + } + if copy { + self.base_probe.base_op.probe_tuple.borrow_mut() + [self.base_probe.probe_tuple_len + out] = + self.base_probe.block_info.block[offset + k]; + out += 1; + } + } + self.base_probe.base_op.next[0] + .borrow_mut() + .process_new_tuple(); + } + }); + } + } + } + + fn execute(&mut self) { + self.base_probe.execute(); + } + + fn get_alds_as_string(&self) -> String { + self.base_probe.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_probe + .update_operator_name(query_vertex_to_index_map); + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let mut probe = ProbeMultiVertices::new( + self.base_probe.base_op.out_subgraph.clone(), + self.base_probe + .base_op + .in_subgraph + .as_ref() + .unwrap() + .clone(), + self.base_probe.join_qvertices.clone(), + self.base_probe.probe_hash_idx, + self.probe_indices.clone(), + self.build_indices.clone(), + self.base_probe.hashed_tuple_len, + self.base_probe.probe_tuple_len, + self.base_probe.base_op.out_qvertex_to_idx_map.clone(), + ); + probe.base_probe.base_op.prev = self + .base_probe + .base_op + .prev + .as_ref() + .map(|prev| Rc::new(RefCell::new(prev.borrow().deref().copy(is_thread_safe)))); + probe.base_probe.base_op.next = vec![Rc::new(RefCell::new(Operator::Probe(Probe::PMV( + PMV::BasePMV(probe.clone()), + ))))]; + Operator::Probe(Probe::PMV(PMV::BasePMV(probe))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Probe(Probe::PMV(PMV::BasePMV(probe))) = op.borrow_mut().deref_mut() { + let self_op = &mut self.base_probe.base_op; + let other_op = &mut probe.base_probe.base_op; + let in_subgraph = self_op.in_subgraph.as_mut().map_or(false, |in_subgraph| { + in_subgraph.is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + }); + let out_subgraph = self_op + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph); + let prev = self_op.prev.as_mut().map_or(false, |prev| { + prev.borrow_mut() + .is_same_as(other_op.prev.as_mut().unwrap()) + }); + return in_subgraph && out_subgraph && prev; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_probe.get_num_out_tuples() + } +} + +impl CommonOperatorTrait for PMV { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + PMV::BasePMV(base) => base.init(probe_tuple, graph), + PMV::PMVC(pmvc) => pmvc.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + PMV::BasePMV(base) => base.process_new_tuple(), + PMV::PMVC(pmvc) => pmvc.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + PMV::BasePMV(base) => base.execute(), + PMV::PMVC(pmvc) => pmvc.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + PMV::BasePMV(base) => base.get_alds_as_string(), + PMV::PMVC(pmvc) => pmvc.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + PMV::BasePMV(base) => base.update_operator_name(query_vertex_to_index_map), + PMV::PMVC(pmvc) => pmvc.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + PMV::BasePMV(base) => base.copy(is_thread_safe), + PMV::PMVC(pmvc) => pmvc.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + PMV::BasePMV(base) => base.is_same_as(op), + PMV::PMVC(pmvc) => pmvc.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + PMV::BasePMV(base) => base.get_num_out_tuples(), + PMV::PMVC(pmvc) => pmvc.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices_cartesian.rs b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices_cartesian.rs new file mode 100644 index 00000000..52b66b8a --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/hashjoin/probe_multi_vertices_cartesian.rs @@ -0,0 +1,155 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_table::BlockInfo; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::{ + ProbeMultiVertices, PMV, +}; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::DerefMut; +use std::rc::Rc; + +#[derive(Clone)] +pub struct ProbeMultiVerticesCartesian { + pub base_pmv: ProbeMultiVertices, + other_block_info: BlockInfo, + highest_vertex_id: usize, +} + +impl ProbeMultiVerticesCartesian { + pub fn new( + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + join_qvertices: Vec, + probe_hash_idx: usize, + probe_indices: Vec, + build_indices: Vec, + hashed_tuple_len: usize, + probe_tuple_len: usize, + out_qvertex_to_idx_map: HashMap, + ) -> ProbeMultiVerticesCartesian { + let mut pmvc = ProbeMultiVerticesCartesian { + base_pmv: ProbeMultiVertices::new( + out_subgraph, + in_subgraph, + join_qvertices, + probe_hash_idx, + probe_indices, + build_indices, + hashed_tuple_len, + probe_tuple_len, + out_qvertex_to_idx_map, + ), + other_block_info: BlockInfo::empty(), + highest_vertex_id: 0, + }; + pmvc.base_pmv.base_probe.base_op.name = + "CARTESIAN ".to_owned() + &pmvc.base_pmv.base_probe.base_op.name; + pmvc + } +} + +impl CommonOperatorTrait for ProbeMultiVerticesCartesian { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + if self.base_pmv.base_probe.base_op.probe_tuple.borrow().len() == 0 { + self.highest_vertex_id = graph.node_count(); + self.other_block_info = BlockInfo::empty(); + } + self.base_pmv.init(probe_tuple, graph); + } + + fn process_new_tuple(&mut self) { + for a_hash_vertex in 0..self.highest_vertex_id { + self.base_pmv.base_probe.base_op.probe_tuple.borrow_mut() + [self.base_pmv.base_probe.hashed_tuple_len] = Id::new(a_hash_vertex); + for hash_table in self.base_pmv.base_probe.hash_tables.clone() { + let a_last_chunk_idx = hash_table.borrow().num_chunks[a_hash_vertex]; + let mut a_prev_first_vertex = -1; + for a_chunk_idx in 0..a_last_chunk_idx { + hash_table.borrow().get_block_and_offsets( + a_hash_vertex, + a_chunk_idx, + &mut self.other_block_info, + ); + let mut an_offset = self.other_block_info.start_offset; + while an_offset < self.other_block_info.end_offset { + if self.base_pmv.base_probe.hashed_tuple_len == 2 { + let first_vertex = self.other_block_info.block[an_offset]; + an_offset += 1; + if a_prev_first_vertex != first_vertex.id() as i32 { + self.base_pmv.base_probe.base_op.probe_tuple.borrow_mut()[0] = + first_vertex; + a_prev_first_vertex = first_vertex.id() as i32; + } + self.base_pmv.base_probe.base_op.probe_tuple.borrow_mut()[1] = + self.other_block_info.block[an_offset]; + an_offset += 1; + } else { + for k in 0..self.base_pmv.base_probe.hashed_tuple_len { + self.base_pmv.base_probe.base_op.probe_tuple.borrow_mut()[k] = + self.other_block_info.block[an_offset]; + an_offset += 1; + } + } + self.base_pmv.process_new_tuple(); + } + } + } + } + } + + fn execute(&mut self) { + self.base_pmv.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_pmv.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_pmv + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let op = &self.base_pmv.base_probe.base_op; + Operator::Probe(Probe::PMV(PMV::PMVC(ProbeMultiVerticesCartesian::new( + op.out_subgraph.clone(), + op.in_subgraph.as_ref().unwrap().clone(), + self.base_pmv.base_probe.join_qvertices.clone(), + self.base_pmv.base_probe.probe_hash_idx, + self.base_pmv.probe_indices.clone(), + self.base_pmv.build_indices.clone(), + self.base_pmv.base_probe.hashed_tuple_len, + self.base_pmv.base_probe.probe_tuple_len, + op.out_qvertex_to_idx_map.clone(), + )))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Probe(Probe::PMV(PMV::PMVC(pc))) = op.borrow_mut().deref_mut() { + let self_op = &mut self.base_pmv.base_probe.base_op; + let other_op = &mut pc.base_pmv.base_probe.base_op; + let in_subgraph = self_op.in_subgraph.as_mut().map_or(false, |in_subgraph| { + in_subgraph.is_isomorphic_to(other_op.in_subgraph.as_mut().unwrap()) + }); + let out_subgraph = self_op + .out_subgraph + .is_isomorphic_to(&mut other_op.out_subgraph); + return in_subgraph && out_subgraph; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_pmv.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/mod.rs b/src/graph_impl/multi_graph/plan/operator/mod.rs new file mode 100644 index 00000000..0ffde2a7 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/mod.rs @@ -0,0 +1,273 @@ +#[macro_export] +macro_rules! get_sink_as_mut { + ($item:expr) => { + match $item { + Sink::BaseSink(base) => base, + Sink::SinkCopy(base) => &mut base.base_sink, + Sink::SinkPrint(base) => &mut base.base_sink, + Sink::SinkLimit(base) => &mut base.base_sink, + } + }; +} + +#[macro_export] +macro_rules! get_sink_as_ref { + ($item:expr) => { + match $item { + Sink::BaseSink(base) => base, + Sink::SinkCopy(base) => &base.base_sink, + Sink::SinkPrint(base) => &base.base_sink, + Sink::SinkLimit(base) => &base.base_sink, + } + }; +} + +#[macro_export] +macro_rules! get_ei_as_mut { + ($item:expr) => { + match $item { + EI::Base(base) => base, + EI::Extend(base) => &mut base.base_ei, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &mut intersect.base_ei, + Intersect::IntersectCatalog(ic) => &mut ic.base_intersect.base_ei, + }, + } + }; +} + +#[macro_export] +macro_rules! get_ei_as_ref { + ($item:expr) => { + match $item { + EI::Base(base) => base, + EI::Extend(base) => &base.base_ei, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &intersect.base_ei, + Intersect::IntersectCatalog(ic) => &ic.base_intersect.base_ei, + }, + } + }; +} + +#[macro_export] +macro_rules! get_scan_as_mut { + ($item:expr) => { + match $item { + Scan::Base(base) => base, + Scan::ScanSampling(base) => &mut base.base_scan, + Scan::ScanBlocking(base) => &mut base.base_scan, + } + }; +} + +#[macro_export] +macro_rules! get_probe_as_mut { + ($item:expr) => { + match $item { + Probe::BaseProbe(base) => base, + Probe::PC(base) => &mut base.base_probe, + Probe::PMV(PMV::BasePMV(base)) => &mut base.base_probe, + Probe::PMV(PMV::PMVC(pmvc)) => &mut pmvc.base_pmv.base_probe, + } + }; +} + +#[macro_export] +macro_rules! get_scan_as_ref { + ($item:expr) => { + match $item { + Scan::Base(base) => base, + Scan::ScanSampling(base) => &base.base_scan, + Scan::ScanBlocking(base) => &base.base_scan, + } + }; +} + +#[macro_export] +macro_rules! get_base_op_as_mut { + ($item:expr) => { + match $item { + Operator::Base(base) => base, + Operator::Sink(sink) => match sink { + Sink::BaseSink(base) => &mut base.base_op, + Sink::SinkCopy(sc) => &mut sc.base_sink.base_op, + Sink::SinkPrint(sp) => &mut sp.base_sink.base_op, + Sink::SinkLimit(sl) => &mut sl.base_sink.base_op, + }, + Operator::Scan(scan) => match scan { + Scan::Base(base) => &mut base.base_op, + Scan::ScanSampling(ss) => &mut ss.base_scan.base_op, + Scan::ScanBlocking(sb) => &mut sb.base_scan.base_op, + }, + Operator::EI(ei) => match ei { + EI::Base(base) => &mut base.base_op, + EI::Extend(base) => &mut base.base_ei.base_op, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &mut intersect.base_ei.base_op, + Intersect::IntersectCatalog(ic) => &mut ic.base_intersect.base_ei.base_op, + }, + }, + Operator::Build(build) => &mut build.base_op, + Operator::Probe(probe) => match probe { + Probe::BaseProbe(base) => &mut base.base_op, + Probe::PC(pc) => &mut pc.base_probe.base_op, + Probe::PMV(PMV::BasePMV(base)) => &mut base.base_probe.base_op, + Probe::PMV(PMV::PMVC(pmvc)) => &mut pmvc.base_pmv.base_probe.base_op, + }, + Operator::Noop(noop) => &mut noop.base_op, + } + }; +} + +#[macro_export] +macro_rules! get_base_op_as_ref { + ($item:expr) => { + match $item { + Operator::Base(base) => &base, + Operator::Sink(sink) => match sink { + Sink::BaseSink(base) => &base.base_op, + Sink::SinkCopy(sc) => &sc.base_sink.base_op, + Sink::SinkPrint(sp) => &sp.base_sink.base_op, + Sink::SinkLimit(sl) => &sl.base_sink.base_op, + }, + Operator::Scan(scan) => match scan { + Scan::Base(base) => &base.base_op, + Scan::ScanSampling(ss) => &ss.base_scan.base_op, + Scan::ScanBlocking(sb) => &sb.base_scan.base_op, + }, + Operator::EI(ei) => match ei { + EI::Base(base) => &base.base_op, + EI::Extend(base) => &base.base_ei.base_op, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &intersect.base_ei.base_op, + Intersect::IntersectCatalog(ic) => &ic.base_intersect.base_ei.base_op, + }, + }, + Operator::Build(build) => &build.base_op, + Operator::Probe(probe) => match probe { + Probe::BaseProbe(base) => &base.base_op, + Probe::PC(pc) => &pc.base_probe.base_op, + Probe::PMV(PMV::BasePMV(base)) => &base.base_probe.base_op, + Probe::PMV(PMV::PMVC(pmvc)) => &pmvc.base_pmv.base_probe.base_op, + }, + Operator::Noop(noop) => &noop.base_op, + } + }; +} + +/// Get common attributes(Origin) from Operator +#[macro_export] +macro_rules! get_op_attr { + ($item:expr,$name:ident) => { + match $item { + Operator::Base(base) => base.$name, + Operator::Sink(sink) => match sink { + Sink::BaseSink(base) => base.base_op.$name, + Sink::SinkCopy(sc) => sc.base_sink.base_op.$name, + Sink::SinkPrint(sp) => sp.base_sink.base_op.$name, + Sink::SinkLimit(sl) => sl.base_sink.base_op.$name, + }, + Operator::Scan(scan) => match scan { + Scan::Base(base) => base.base_op.$name, + Scan::ScanSampling(base) => base.base_scan.base_op.$name, + Scan::ScanBlocking(base) => base.base_scan.base_op.$name, + }, + Operator::EI(ei) => match ei { + EI::Base(base) => base.base_op.$name, + EI::Extend(base) => base.base_ei.base_op.$name, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => intersect.base_ei.base_op.$name, + Intersect::IntersectCatalog(ic) => ic.base_intersect.base_ei.base_op.$name, + }, + }, + Operator::Build(build) => build.base_op.$name, + Operator::Probe(probe) => match probe { + Probe::BaseProbe(base) => base.base_op.$name, + Probe::PC(pc) => pc.base_probe.base_op.$name, + Probe::PMV(PMV::BasePMV(base)) => base.base_probe.base_op.$name, + Probe::PMV(PMV::PMVC(pmvc)) => pmvc.base_pmv.base_probe.base_op.$name, + }, + Operator::Noop(noop) => noop.base_op.$name, + } + }; +} + +/// Get common attributes(reference) from Operator +#[macro_export] +macro_rules! get_op_attr_as_ref { + ($item:expr,$name:ident) => { + match $item { + Operator::Base(base) => &base.$name, + Operator::Sink(sink) => match sink { + Sink::BaseSink(base) => &base.base_op.$name, + Sink::SinkCopy(sc) => &sc.base_sink.base_op.$name, + Sink::SinkPrint(sp) => &sp.base_sink.base_op.$name, + Sink::SinkLimit(sl) => &sl.base_sink.base_op.$name, + }, + Operator::Scan(scan) => match scan { + Scan::Base(base) => &base.base_op.$name, + Scan::ScanSampling(base) => &base.base_scan.base_op.$name, + Scan::ScanBlocking(base) => &base.base_scan.base_op.$name, + }, + Operator::EI(ei) => match ei { + EI::Base(base) => &base.base_op.$name, + EI::Extend(base) => &base.base_ei.base_op.$name, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &intersect.base_ei.base_op.$name, + Intersect::IntersectCatalog(ic) => &ic.base_intersect.base_ei.base_op.$name, + }, + }, + Operator::Build(build) => &build.base_op.$name, + Operator::Probe(probe) => match probe { + Probe::BaseProbe(base) => &base.base_op.$name, + Probe::PC(pc) => &pc.base_probe.base_op.$name, + Probe::PMV(PMV::BasePMV(base)) => &base.base_probe.base_op.$name, + Probe::PMV(PMV::PMVC(pmvc)) => &pmvc.base_pmv.base_probe.base_op.$name, + }, + Operator::Noop(noop) => &noop.base_op.$name, + } + }; +} + +/// Get common attributes(mutable) from Operator +#[macro_export] +macro_rules! get_op_attr_as_mut { + ($item:expr,$name:ident) => { + match $item { + Operator::Base(base) => &mut base.$name, + Operator::Sink(sink) => match sink { + Sink::BaseSink(base) => &mut base.base_op.$name, + Sink::SinkCopy(sc) => &mut sc.base_sink.base_op.$name, + Sink::SinkPrint(sp) => &mut sp.base_sink.base_op.$name, + Sink::SinkLimit(sl) => &mut sl.base_sink.base_op.$name, + }, + Operator::Scan(scan) => match scan { + Scan::Base(base) => &mut base.base_op.$name, + Scan::ScanSampling(ss) => &mut ss.base_scan.base_op.$name, + Scan::ScanBlocking(sb) => &mut sb.base_scan.base_op.$name, + }, + Operator::EI(ei) => match ei { + EI::Base(base) => &mut base.base_op.$name, + EI::Extend(base) => &mut base.base_ei.base_op.$name, + EI::Intersect(base) => match base { + Intersect::BaseIntersect(intersect) => &mut intersect.base_ei.base_op.$name, + Intersect::IntersectCatalog(ic) => &mut ic.base_intersect.base_ei.base_op.$name, + }, + }, + Operator::Build(build) => &mut build.base_op.$name, + Operator::Probe(probe) => match probe { + Probe::BaseProbe(base) => &mut base.base_op.$name, + Probe::PC(pc) => &mut pc.base_probe.base_op.$name, + Probe::PMV(PMV::BasePMV(base)) => &mut base.base_probe.base_op.$name, + Probe::PMV(PMV::PMVC(pmvc)) => &mut pmvc.base_pmv.base_probe.base_op.$name, + }, + Operator::Noop(noop) => &mut noop.base_op.$name, + } + }; +} +pub mod extend; +pub mod hashjoin; +pub mod operator; +pub mod scan; +pub mod sink; diff --git a/src/graph_impl/multi_graph/plan/operator/operator.rs b/src/graph_impl/multi_graph/plan/operator/operator.rs new file mode 100644 index 00000000..2839b982 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/operator.rs @@ -0,0 +1,297 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::build::Build; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::plan::operator::scan::scan_sampling::ScanSampling; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::operator::noop::Noop; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::{HashMap, HashSet}; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::Deref; +use std::rc::Rc; + +/// Operator types +#[derive(Clone)] +pub enum Operator { + Base(BaseOperator), + Sink(Sink), + Scan(Scan), + EI(EI), + Build(Build), + Probe(Probe), + Noop(Noop), +} + +/// Basic operator +#[derive(Clone)] +pub struct BaseOperator { + pub name: String, + pub next: Vec>>>, + pub prev: Option>>>, + pub probe_tuple: Rc>>, + pub out_tuple_len: usize, + pub in_subgraph: Option, + pub out_subgraph: QueryGraph, + pub out_qvertex_to_idx_map: HashMap, + pub last_repeated_vertex_idx: usize, + pub num_out_tuples: usize, + pub icost: usize, +} + +impl BaseOperator { + pub fn new(out_subgraph: QueryGraph, in_subgraph: Option) -> BaseOperator { + BaseOperator { + name: "".to_string(), + next: vec![], + prev: None, + probe_tuple: Rc::new(RefCell::new(vec![])), + out_tuple_len: out_subgraph.get_num_qvertices(), + in_subgraph, + out_subgraph, + out_qvertex_to_idx_map: HashMap::new(), + last_repeated_vertex_idx: 0, + num_out_tuples: 0, + icost: 0, + } + } + + pub fn empty() -> BaseOperator { + BaseOperator { + name: "".to_string(), + next: vec![], + prev: None, + probe_tuple: Rc::new(RefCell::new(vec![])), + out_tuple_len: 0, + in_subgraph: None, + out_subgraph: QueryGraph::empty(), + out_qvertex_to_idx_map: HashMap::new(), + last_repeated_vertex_idx: 0, + num_out_tuples: 0, + icost: 0, + } + } +} + +/// Common operations for every kind of operator +pub trait CommonOperatorTrait { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ); + fn process_new_tuple(&mut self); + fn execute(&mut self); + fn get_alds_as_string(&self) -> String; + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap); + fn copy(&self, is_thread_safe: bool) -> Operator; + fn is_same_as(&mut self, op: &mut Rc>>) -> bool; + fn get_num_out_tuples(&self) -> usize; +} + +impl Operator { + pub fn get_last_operators(&self) -> Vec>>> { + let next = get_op_attr_as_ref!(self, next); + if next.is_empty() { + return vec![]; + } + let mut last_operators = vec![]; + for op in next { + let next = op.borrow(); + if get_op_attr_as_ref!(next.deref(), next).is_empty() { + last_operators.push(op.clone()); + continue; + } + op.borrow() + .get_last_operators() + .into_iter() + .for_each(|op| last_operators.push(op)); + } + last_operators + } + + pub fn get_operator_metrics_next_operators( + &self, + operator_metrics: &mut Vec<(String, usize, usize)>, + ) { + let name: &String = get_op_attr_as_ref!(self, name); + let icost = get_op_attr!(self, icost); + let num_out_tuples = get_op_attr!(self, num_out_tuples); + operator_metrics.push((name.clone(), icost, num_out_tuples)); + get_op_attr_as_ref!(self, next) + .iter() + .map(|op| op.borrow()) + .for_each(|op| match op.deref() { + Operator::Sink(_) => {} + _ => op.get_operator_metrics_next_operators(operator_metrics), + }); + } + + pub fn has_multi_edge_extends(&self) -> bool { + match self { + Operator::EI(ei) => ei.has_multi_edge_extends(), + _ => { + if let Some(prev) = get_op_attr_as_ref!(self, prev) { + return prev.borrow().deref().has_multi_edge_extends(); + } + false + } + } + } + + pub fn get_out_query_vertices(&self) -> HashSet { + let idx_map = get_op_attr_as_ref!(self, out_qvertex_to_idx_map); + idx_map.iter().map(|(key, _val)| key.clone()).collect() + } +} + +impl CommonOperatorTrait for BaseOperator { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + panic!("unsupported operation exception") + } + + fn process_new_tuple(&mut self) { + panic!("unsupported operation exception") + } + + fn execute(&mut self) { + if let Some(prev) = self.prev.as_mut() { + let mut op = prev.as_ptr(); + unsafe { + (&mut *op).execute(); + } + } + } + + fn get_alds_as_string(&self) -> String { + String::from("") + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + panic!("`update_operator_name()` on neither `EI` or `Scan`") + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + panic!("unsupported operation exception") + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + panic!("unsupported operation exception") + } + + fn get_num_out_tuples(&self) -> usize { + self.num_out_tuples + } +} + +/// Abstract methods +impl CommonOperatorTrait for Operator { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Operator::Base(base) => base.init(probe_tuple, graph), + Operator::Sink(sink) => sink.init(probe_tuple, graph), + Operator::Scan(scan) => scan.init(probe_tuple, graph), + Operator::EI(ei) => ei.init(probe_tuple, graph), + Operator::Build(build) => build.init(probe_tuple, graph), + Operator::Probe(probe) => probe.init(probe_tuple, graph), + Operator::Noop(noop) => noop.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Operator::Base(base) => base.process_new_tuple(), + Operator::Sink(sink) => sink.process_new_tuple(), + Operator::Scan(scan) => scan.process_new_tuple(), + Operator::EI(ei) => ei.process_new_tuple(), + Operator::Build(build) => build.process_new_tuple(), + Operator::Probe(probe) => probe.process_new_tuple(), + Operator::Noop(noop) => noop.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Operator::Base(base) => base.execute(), + Operator::Sink(sink) => sink.execute(), + Operator::Scan(scan) => scan.execute(), + Operator::EI(ei) => ei.execute(), + Operator::Build(build) => build.execute(), + Operator::Probe(probe) => probe.execute(), + Operator::Noop(noop) => noop.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Operator::Base(base) => base.get_alds_as_string(), + Operator::Sink(sink) => sink.get_alds_as_string(), + Operator::Scan(scan) => scan.get_alds_as_string(), + Operator::EI(ei) => ei.get_alds_as_string(), + Operator::Build(build) => build.get_alds_as_string(), + Operator::Probe(probe) => probe.get_alds_as_string(), + Operator::Noop(noop) => noop.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Operator::Base(base) => base.update_operator_name(query_vertex_to_index_map), + Operator::Sink(sink) => sink.update_operator_name(query_vertex_to_index_map), + Operator::Scan(scan) => scan.update_operator_name(query_vertex_to_index_map), + Operator::EI(ei) => ei.update_operator_name(query_vertex_to_index_map), + Operator::Build(build) => build.update_operator_name(query_vertex_to_index_map), + Operator::Probe(probe) => probe.update_operator_name(query_vertex_to_index_map), + Operator::Noop(noop) => noop.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Operator::Base(base) => base.copy(is_thread_safe), + Operator::Sink(sink) => sink.copy(is_thread_safe), + Operator::Scan(scan) => scan.copy(is_thread_safe), + Operator::EI(ei) => ei.copy(is_thread_safe), + Operator::Build(build) => build.copy(is_thread_safe), + Operator::Probe(probe) => probe.copy(is_thread_safe), + Operator::Noop(noop) => noop.copy(is_thread_safe), + } + } + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Operator::Base(base) => base.is_same_as(op), + Operator::Sink(sink) => sink.is_same_as(op), + Operator::Scan(scan) => scan.is_same_as(op), + Operator::EI(ei) => ei.is_same_as(op), + Operator::Build(build) => build.is_same_as(op), + Operator::Probe(probe) => probe.is_same_as(op), + Operator::Noop(noop) => noop.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Operator::Base(base) => base.get_num_out_tuples(), + Operator::Sink(sink) => sink.get_num_out_tuples(), + Operator::Scan(scan) => scan.get_num_out_tuples(), + Operator::EI(ei) => ei.get_num_out_tuples(), + Operator::Build(build) => build.get_num_out_tuples(), + Operator::Probe(probe) => probe.get_num_out_tuples(), + Operator::Noop(noop) => noop.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/scan/mod.rs b/src/graph_impl/multi_graph/plan/operator/scan/mod.rs new file mode 100644 index 00000000..6cc8b375 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/scan/mod.rs @@ -0,0 +1,3 @@ +pub mod scan; +pub mod scan_blocking; +pub mod scan_sampling; diff --git a/src/graph_impl/multi_graph/plan/operator/scan/scan.rs b/src/graph_impl/multi_graph/plan/operator/scan/scan.rs new file mode 100644 index 00000000..840ecca2 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/scan/scan.rs @@ -0,0 +1,236 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan_blocking::ScanBlocking; +use graph_impl::multi_graph::plan::operator::scan::scan_sampling::ScanSampling; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::static_graph::sorted_adj_vec::SortedAdjVec; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{DerefMut, Deref}; +use std::rc::Rc; + +#[derive(Clone)] +pub enum Scan { + Base(BaseScan), + ScanSampling(ScanSampling), + ScanBlocking(ScanBlocking), +} + +#[derive(Clone)] +pub struct BaseScan { + pub base_op: BaseOperator, + pub from_query_vertex: String, + pub to_query_vertex: String, + pub from_type: i32, + pub to_type: i32, + pub label_or_to_type: i32, + pub fwd_adj_list: Vec>>, + pub vertex_ids: Vec, + pub vertex_types: Vec, + from_vertex_start_idx: usize, + from_vertex_end_idx: usize, +} + +impl BaseScan { + pub fn new(out_subgraph: QueryGraph) -> BaseScan { + let mut scan = BaseScan { + base_op: BaseOperator::new(out_subgraph, None), + from_query_vertex: "".to_string(), + to_query_vertex: "".to_string(), + from_type: 0, + to_type: 0, + label_or_to_type: 0, + fwd_adj_list: vec![], + vertex_ids: vec![], + vertex_types: vec![], + from_vertex_start_idx: 0, + from_vertex_end_idx: 0, + }; + let out_subgraph = &scan.base_op.out_subgraph; + if out_subgraph.q_edges.len() > 1 { + panic!("IllegalArgumentException"); + } + let query_edge = &out_subgraph.q_edges[0]; + scan.from_type = query_edge.from_type; + scan.to_type = query_edge.to_type; + scan.label_or_to_type = query_edge.label; + scan.base_op.last_repeated_vertex_idx = 0; + scan.from_query_vertex = query_edge.from_query_vertex.clone(); + scan.to_query_vertex = query_edge.to_query_vertex.clone(); + scan.base_op + .out_qvertex_to_idx_map + .insert(scan.from_query_vertex.clone(), 0); + scan.base_op + .out_qvertex_to_idx_map + .insert(scan.to_query_vertex.clone(), 1); + scan.base_op.name = + "SCAN (".to_owned() + &scan.from_query_vertex + ")->(" + &scan.to_query_vertex + ")"; + scan + } +} + +impl CommonOperatorTrait for BaseScan { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_op.probe_tuple = probe_tuple.clone(); + self.vertex_ids = graph.get_node_ids().clone(); + self.vertex_types = graph.get_node_types().clone(); + if KEY_ANY != self.from_type { + self.from_vertex_start_idx = graph.get_node_type_offsets()[self.from_type as usize]; + self.from_vertex_end_idx = graph.get_node_type_offsets()[(self.from_type + 1) as usize]; + } else { + self.from_vertex_start_idx = 0; + self.from_vertex_end_idx = graph.node_count(); + } + self.fwd_adj_list = graph.get_fwd_adj_list().clone(); + if graph.is_sorted_by_node() { + self.label_or_to_type = self.to_type; + self.to_type = KEY_ANY; + } + for next_op in &self.base_op.next { + next_op.borrow_mut().init(probe_tuple.clone(), graph); + } + } + + fn process_new_tuple(&mut self) { + panic!("Operator `scan` does not support execute().") + } + + fn execute(&mut self) { + for from_idx in self.from_vertex_start_idx..self.from_vertex_end_idx { + let from_vertex = self.vertex_ids[from_idx]; + self.base_op.probe_tuple.borrow_mut()[0] = from_vertex.clone(); + let to_vertex_start_idx = self.fwd_adj_list[from_vertex.id()].as_ref().unwrap().get_offsets() + [self.label_or_to_type as usize]; + let to_vertex_end_idx = self.fwd_adj_list[from_vertex.id()].as_ref().unwrap().get_offsets() + [(self.label_or_to_type + 1) as usize]; + for to_idx in to_vertex_start_idx..to_vertex_end_idx { + self.base_op.probe_tuple.borrow_mut()[1] = self.fwd_adj_list[from_vertex.id()] + .as_ref() + .unwrap() + .get_neighbor_id(Id::new(to_idx)); + if self.to_type == KEY_ANY + || self.vertex_types[self.base_op.probe_tuple.borrow()[1].id()] == self.to_type + { + self.base_op.num_out_tuples += 1; + self.base_op.next[0].borrow_mut().process_new_tuple(); + } + } + } + } + + fn get_alds_as_string(&self) -> String { + self.base_op.get_alds_as_string() + } + + fn update_operator_name(&mut self, mut query_vertex_to_index_map: HashMap) { + query_vertex_to_index_map = HashMap::new(); + query_vertex_to_index_map.insert(self.from_query_vertex.clone(), 0); + query_vertex_to_index_map.insert(self.to_query_vertex.clone(), 1); + self.base_op.next.iter_mut().for_each(|op| { + op.borrow_mut() + .update_operator_name(query_vertex_to_index_map.clone()) + }); + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + if is_thread_safe { + return Operator::Scan(Scan::ScanBlocking(ScanBlocking::new( + self.base_op.out_subgraph.clone(), + ))); + } + Operator::Scan(Scan::Base(BaseScan::new(self.base_op.out_subgraph.clone()))) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Scan(scan) = op.borrow_mut().deref_mut() { + return self.from_type == get_scan_as_ref!(scan).from_type + && self.to_type == get_scan_as_ref!(scan).to_type + && self.label_or_to_type == get_scan_as_ref!(scan).label_or_to_type; + } + false + } + + fn get_num_out_tuples(&self) -> usize { + self.base_op.get_num_out_tuples() + } +} + +impl CommonOperatorTrait for Scan { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Scan::Base(base) => base.init(probe_tuple, graph), + Scan::ScanSampling(ss) => ss.init(probe_tuple, graph), + Scan::ScanBlocking(sb) => sb.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Scan::Base(base) => base.process_new_tuple(), + Scan::ScanSampling(ss) => ss.process_new_tuple(), + Scan::ScanBlocking(sb) => sb.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Scan::Base(base) => base.execute(), + Scan::ScanSampling(ss) => ss.execute(), + Scan::ScanBlocking(sb) => sb.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Scan::Base(base) => base.get_alds_as_string(), + Scan::ScanSampling(ss) => ss.get_alds_as_string(), + Scan::ScanBlocking(sb) => sb.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Scan::Base(base) => base.update_operator_name(query_vertex_to_index_map), + Scan::ScanSampling(ss) => ss.update_operator_name(query_vertex_to_index_map), + Scan::ScanBlocking(sb) => sb.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Scan::Base(base) => base.copy(is_thread_safe), + Scan::ScanSampling(ss) => ss.copy(is_thread_safe), + Scan::ScanBlocking(sb) => sb.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Scan::Base(base) => base.is_same_as(op), + Scan::ScanSampling(ss) => ss.is_same_as(op), + Scan::ScanBlocking(sb) => sb.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Scan::Base(base) => base.get_num_out_tuples(), + Scan::ScanSampling(ss) => ss.get_num_out_tuples(), + Scan::ScanBlocking(sb) => sb.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/scan/scan_blocking.rs b/src/graph_impl/multi_graph/plan/operator/scan/scan_blocking.rs new file mode 100644 index 00000000..a05b0e08 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/scan/scan_blocking.rs @@ -0,0 +1,216 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::BaseScan; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::rc::Rc; + +static PARTITION_SIZE: usize = 100; + +#[derive(Clone)] +pub struct VertexIdxLimits { + pub from_variable_index_limit: usize, + pub to_variable_index_limit: usize, +} + +///TODO:ReentrantLock +#[derive(Clone)] +pub struct ScanBlocking { + pub base_scan: BaseScan, + curr_from_idx: usize, + curr_to_idx: usize, + from_idx_limit: usize, + to_idx_limit: usize, + highest_from_idx: usize, + highest_to_idx: usize, + pub global_vertices_idx_limits: VertexIdxLimits, +} + +impl ScanBlocking { + pub fn new(out_subgraph: QueryGraph) -> ScanBlocking { + ScanBlocking { + base_scan: BaseScan::new(out_subgraph), + curr_from_idx: 0, + curr_to_idx: 0, + from_idx_limit: 0, + to_idx_limit: 0, + highest_from_idx: 0, + highest_to_idx: 0, + global_vertices_idx_limits: VertexIdxLimits { + from_variable_index_limit: 0, + to_variable_index_limit: 0, + }, + } + } + + fn update_indices_limits(&mut self) { + //ReentrantLock lock here. + self.curr_from_idx = self.global_vertices_idx_limits.from_variable_index_limit; + self.curr_to_idx = self.global_vertices_idx_limits.to_variable_index_limit; + self.from_idx_limit = self.curr_from_idx; + self.to_idx_limit = self.curr_to_idx; + let mut num_edges_left = PARTITION_SIZE; + while num_edges_left > 0 { + let flag = self.from_idx_limit == self.highest_from_idx - 1 + && self.to_idx_limit < self.highest_to_idx - 1 + || self.from_idx_limit < self.highest_from_idx - 1; + if !flag { + break; + } + let mut label = self.base_scan.label_or_to_type; + let to_limit = self.base_scan.fwd_adj_list[self.from_idx_limit] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[(label + 1) as usize]); + if self.to_idx_limit + num_edges_left <= to_limit - 1 { + self.to_idx_limit += num_edges_left - 1; + num_edges_left = 0; + } else { + num_edges_left -= to_limit - 1 - self.to_idx_limit; + self.to_idx_limit = to_limit; + if self.from_idx_limit == self.highest_from_idx - 1 { + break; + } + self.from_idx_limit += 1; + label = self.base_scan.label_or_to_type; + self.to_idx_limit = self.base_scan.fwd_adj_list[self.from_idx_limit] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[label as usize]); + } + } + self.global_vertices_idx_limits.from_variable_index_limit = self.from_idx_limit; + self.global_vertices_idx_limits.to_variable_index_limit = self.to_idx_limit; + } + + fn produce_new_edges(&mut self, from_idx: usize, start_to_idx: usize, end_to_idx: usize) { + let base_op = &mut self.base_scan.base_op; + for to_idx in start_to_idx..end_to_idx { + base_op.probe_tuple.borrow_mut()[0] = self.base_scan.vertex_ids[from_idx]; + base_op.probe_tuple.borrow_mut()[1] = self.base_scan.fwd_adj_list[from_idx] + .as_mut() + .unwrap() + .get_neighbor_id(Id::new(to_idx)); + base_op.num_out_tuples += 1; + base_op.next[0].borrow_mut().process_new_tuple(); + } + } + + fn produce_new_edges_default(&mut self) { + for from_idx in self.curr_from_idx + 1..self.from_idx_limit { + let label = self.base_scan.label_or_to_type; + self.base_scan.base_op.probe_tuple.borrow_mut()[0] = + self.base_scan.vertex_ids[from_idx]; + let to_vertex_idx_start = self.base_scan.fwd_adj_list[from_idx] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[label as usize]); + let to_vertex_idx_limit = self.base_scan.fwd_adj_list[from_idx] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[(label + 1) as usize]); + for to_idx in to_vertex_idx_start..to_vertex_idx_limit { + self.base_scan.base_op.probe_tuple.borrow_mut()[1] = self.base_scan.fwd_adj_list + [from_idx] + .as_mut() + .unwrap() + .get_neighbor_id(Id::new(to_idx)); + if self.base_scan.to_type == KEY_ANY + || self.base_scan.vertex_types + [self.base_scan.base_op.probe_tuple.borrow()[1].id()] + == self.base_scan.to_type + { + self.base_scan.base_op.num_out_tuples += 1; + self.base_scan.base_op.next[0] + .borrow_mut() + .process_new_tuple(); + } + } + } + } +} + +impl CommonOperatorTrait for ScanBlocking { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_scan.init(probe_tuple.clone(), graph); + if self.base_scan.from_type != KEY_ANY { + self.curr_from_idx = graph.get_node_type_offsets()[self.base_scan.from_type as usize]; + self.highest_from_idx = + graph.get_node_type_offsets()[(self.base_scan.from_type + 1) as usize]; + } else { + self.curr_from_idx = 0; + self.highest_from_idx = graph.node_count() + 1; + } + let label = self.base_scan.label_or_to_type; + self.curr_to_idx = self.base_scan.fwd_adj_list + [self.base_scan.vertex_ids[self.curr_from_idx].id()] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[label as usize]); + self.highest_to_idx = self.base_scan.fwd_adj_list + [self.base_scan.vertex_ids[self.highest_from_idx - 1].id()] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[(label + 1) as usize]); + self.from_idx_limit = self.curr_from_idx; + self.to_idx_limit = self.curr_to_idx; + self.base_scan + .base_op + .next + .iter() + .for_each(|next_op| next_op.borrow_mut().init(probe_tuple.clone(), graph)); + } + + fn process_new_tuple(&mut self) { + self.base_scan.process_new_tuple() + } + + fn execute(&mut self) { + self.update_indices_limits(); + while self.curr_from_idx == self.highest_from_idx - 1 + && self.curr_to_idx < self.highest_to_idx - 1 + || self.curr_from_idx < self.highest_from_idx - 1 + { + if self.curr_from_idx == self.from_idx_limit { + self.produce_new_edges(self.curr_from_idx, self.curr_to_idx, self.to_idx_limit); + } else if self.curr_from_idx < self.from_idx_limit { + let label = self.base_scan.label_or_to_type; + let to_vertex_idx_limit = self.base_scan.fwd_adj_list + [self.base_scan.vertex_ids[self.curr_from_idx].id()] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[(label + 1) as usize]); + self.produce_new_edges(self.curr_from_idx, self.curr_to_idx, to_vertex_idx_limit); + self.produce_new_edges_default(/* startFromIdx: currFromIdx + 1, endFromIdx: fromIdxLimit */); + let start_idx = self.base_scan.fwd_adj_list + [self.base_scan.vertex_ids[self.from_idx_limit].id()] + .as_mut() + .map_or(0, |adj| adj.get_offsets()[label as usize]); + self.produce_new_edges(self.from_idx_limit, start_idx, self.to_idx_limit); + } + self.update_indices_limits(); + } + } + + fn get_alds_as_string(&self) -> String { + self.base_scan.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_scan + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + self.base_scan.copy(is_thread_safe) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_scan.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_scan.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/scan/scan_sampling.rs b/src/graph_impl/multi_graph/plan/operator/scan/scan_sampling.rs new file mode 100644 index 00000000..bdd47810 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/scan/scan_sampling.rs @@ -0,0 +1,124 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use itertools::Itertools; +use rand::{thread_rng, Rng}; +use std::cell::RefCell; +use std::hash::Hash; +use std::rc::Rc; + +#[derive(Clone)] +pub struct ScanSampling { + pub base_scan: BaseScan, + pub edges_queue: Vec>, +} + +static mut FLAG: bool = false; + +impl ScanSampling { + pub fn new(out_subgraph: QueryGraph) -> ScanSampling { + Self { + base_scan: BaseScan::new(out_subgraph), + edges_queue: vec![], + } + } + pub fn set_edge_indices_to_sample(&mut self, edges: Vec, num_edges_to_sample: usize) { + let mut rng = thread_rng(); + let num_edges = edges.len() / 2; + while self.edges_queue.len() < num_edges_to_sample { + let edge_idx = rng.gen_range(0, num_edges); + self.edges_queue + .push(vec![edges[edge_idx], edges[edge_idx + 1]]); + } + } + + pub fn set_edge_indices_to_sample_list( + &mut self, + edges: Vec>, + num_edges_to_sample: usize, + ) { + let mut rng = thread_rng(); + while self.edges_queue.len() < num_edges_to_sample { + let edge_idx = rng.gen_range(0, edges.len()); + self.edges_queue.push(edges[edge_idx].clone()); + } + } + + pub fn set_edge_indices_to_sample_by_edges( + &mut self, + edges: Vec>, + num_edges_to_sample: usize, + ) { + let mut rng = thread_rng(); + self.edges_queue = vec![vec![]; num_edges_to_sample]; + while self.edges_queue.len() < num_edges_to_sample { + let edge_idx = rng.gen_range(0, edges.len()); + self.edges_queue.push(edges[edge_idx].clone()); + } + } + + pub fn copy_default(&self) -> Operator { + let mut scan_sampling = ScanSampling::new(self.base_scan.base_op.out_subgraph.clone()); + scan_sampling.edges_queue = self.edges_queue.clone(); + Operator::Scan(Scan::ScanSampling(scan_sampling)) + } +} + +impl CommonOperatorTrait for ScanSampling { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + if self.base_scan.base_op.probe_tuple.borrow().is_empty() { + self.base_scan.base_op.probe_tuple = probe_tuple.clone(); + self.base_scan.base_op.next.iter().for_each(|next_op| { + next_op.borrow_mut().init(probe_tuple.clone(), graph); + }); + } + } + + fn process_new_tuple(&mut self) { + self.base_scan.process_new_tuple() + } + + fn execute(&mut self) { + while !self.edges_queue.is_empty() { + let edge = self.edges_queue.pop().unwrap(); + self.base_scan.base_op.probe_tuple.borrow_mut()[0] = edge[0]; + self.base_scan.base_op.probe_tuple.borrow_mut()[1] = edge[1]; + self.base_scan.base_op.num_out_tuples += 1; + for next_op in &mut self.base_scan.base_op.next { + next_op.borrow_mut().process_new_tuple(); + } + } + } + + fn get_alds_as_string(&self) -> String { + self.base_scan.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_scan + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let mut scan_sampling = ScanSampling::new(self.base_scan.base_op.out_subgraph.clone()); + scan_sampling.edges_queue = self.edges_queue.clone(); + Operator::Scan(Scan::ScanSampling(scan_sampling)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_scan.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_scan.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/sink/mod.rs b/src/graph_impl/multi_graph/plan/operator/sink/mod.rs new file mode 100644 index 00000000..a0c364bf --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/sink/mod.rs @@ -0,0 +1,4 @@ +pub mod sink; +pub mod sink_copy; +pub mod sink_limit; +pub mod sink_print; diff --git a/src/graph_impl/multi_graph/plan/operator/sink/sink.rs b/src/graph_impl/multi_graph/plan/operator/sink/sink.rs new file mode 100644 index 00000000..ae5736af --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/sink/sink.rs @@ -0,0 +1,199 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink_copy::SinkCopy; +use graph_impl::multi_graph::plan::operator::sink::sink_limit::SinkLimit; +use graph_impl::multi_graph::plan::operator::sink::sink_print::SinkPrint; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +#[derive(Clone)] +pub enum SinkType { + Copy, + Print, + Limit, + Counter, +} + +#[derive(Clone)] +pub enum Sink { + BaseSink(BaseSink), + SinkCopy(SinkCopy), + SinkPrint(SinkPrint), + SinkLimit(SinkLimit), +} + +#[derive(Clone)] +pub struct BaseSink { + pub base_op: BaseOperator, + pub previous: Vec>>>, +} + +impl BaseSink { + pub fn new(query_graph: QueryGraph) -> Self { + Self { + base_op: BaseOperator::new(query_graph.clone(), Some(query_graph)), + previous: vec![], + } + } +} + +impl CommonOperatorTrait for BaseSink { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_op.probe_tuple = probe_tuple; + } + + fn process_new_tuple(&mut self) {} + + fn execute(&mut self) { + if !self.previous.is_empty() { + let mut prev = self.previous[0].as_ptr(); + unsafe { + (&mut *prev).execute(); + } + } else { + self.base_op.prev.as_mut().unwrap().borrow_mut().execute(); + } + } + + fn get_alds_as_string(&self) -> String { + self.base_op.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_op.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let mut sink = BaseSink::new(self.base_op.out_subgraph.clone()); + if let Some(prev) = &self.base_op.prev { + sink.base_op.prev = Some(Rc::new(RefCell::new( + prev.borrow().deref().copy(is_thread_safe), + ))); + } else { + sink.base_op.prev = None; + } + Operator::Sink(Sink::BaseSink(sink)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + if let Operator::Sink(sink) = op.borrow().deref() { + if let Some(prev) = &mut self.base_op.prev { + let mut op = op.borrow_mut(); + let op_prev = get_op_attr_as_mut!(op.deref_mut(), prev).as_mut().unwrap(); + return prev.borrow_mut().is_same_as(op_prev); + } + } + false + } + + fn get_num_out_tuples(&self) -> usize { + if !self.previous.is_empty() { + self.previous + .iter() + .map(|op| op.borrow()) + .map(|op| get_op_attr!(op.deref(), num_out_tuples)) + .sum() + } else { + let op = self.base_op.prev.as_ref().unwrap().borrow(); + get_op_attr!( + self.base_op.prev.as_ref().unwrap().borrow().deref(), + num_out_tuples + ) + } + } +} + +impl CommonOperatorTrait for Sink { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + match self { + Sink::BaseSink(base) => base.init(probe_tuple, graph), + Sink::SinkCopy(sc) => sc.init(probe_tuple, graph), + Sink::SinkPrint(sp) => sp.init(probe_tuple, graph), + Sink::SinkLimit(sl) => sl.init(probe_tuple, graph), + } + } + + fn process_new_tuple(&mut self) { + match self { + Sink::BaseSink(base) => base.process_new_tuple(), + Sink::SinkCopy(sc) => sc.process_new_tuple(), + Sink::SinkPrint(sp) => sp.process_new_tuple(), + Sink::SinkLimit(sl) => sl.process_new_tuple(), + } + } + + fn execute(&mut self) { + match self { + Sink::BaseSink(base) => base.execute(), + Sink::SinkCopy(sc) => sc.execute(), + Sink::SinkPrint(sp) => sp.execute(), + Sink::SinkLimit(sl) => sl.execute(), + } + } + + fn get_alds_as_string(&self) -> String { + match self { + Sink::BaseSink(base) => base.get_alds_as_string(), + Sink::SinkCopy(sc) => sc.get_alds_as_string(), + Sink::SinkPrint(sp) => sp.get_alds_as_string(), + Sink::SinkLimit(sl) => sl.get_alds_as_string(), + } + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + match self { + Sink::BaseSink(base) => base.update_operator_name(query_vertex_to_index_map), + Sink::SinkCopy(sc) => sc.update_operator_name(query_vertex_to_index_map), + Sink::SinkPrint(sp) => sp.update_operator_name(query_vertex_to_index_map), + Sink::SinkLimit(sl) => sl.update_operator_name(query_vertex_to_index_map), + } + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + match self { + Sink::BaseSink(base) => base.copy(is_thread_safe), + Sink::SinkCopy(sc) => sc.copy(is_thread_safe), + Sink::SinkPrint(sp) => sp.copy(is_thread_safe), + Sink::SinkLimit(sl) => sl.copy(is_thread_safe), + } + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + match self { + Sink::BaseSink(base) => base.is_same_as(op), + Sink::SinkCopy(sc) => sc.is_same_as(op), + Sink::SinkPrint(sp) => sp.is_same_as(op), + Sink::SinkLimit(sl) => sl.is_same_as(op), + } + } + + fn get_num_out_tuples(&self) -> usize { + match self { + Sink::BaseSink(base) => base.get_num_out_tuples(), + Sink::SinkCopy(sc) => sc.get_num_out_tuples(), + Sink::SinkPrint(sp) => sp.get_num_out_tuples(), + Sink::SinkLimit(sl) => sl.get_num_out_tuples(), + } + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/sink/sink_copy.rs b/src/graph_impl/multi_graph/plan/operator/sink/sink_copy.rs new file mode 100644 index 00000000..9dd63534 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/sink/sink_copy.rs @@ -0,0 +1,75 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::sink::sink::{BaseSink, Sink}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::Deref; +use std::rc::Rc; + +#[derive(Clone)] +pub struct SinkCopy { + pub base_sink: BaseSink, + output_tuple: Vec, +} + +impl SinkCopy { + pub fn new(query_graph: QueryGraph, out_tuple_length: usize) -> SinkCopy { + let mut sink_copy = SinkCopy { + base_sink: BaseSink::new(query_graph), + output_tuple: vec![], + }; + sink_copy.base_sink.base_op.out_tuple_len = out_tuple_length; + sink_copy.output_tuple = vec![Id::new(0); out_tuple_length]; + sink_copy + } +} + +impl CommonOperatorTrait for SinkCopy { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_sink.init(probe_tuple, graph); + } + + fn process_new_tuple(&mut self) { + let len = self.output_tuple.len(); + self.output_tuple + .clone_from_slice(&self.base_sink.base_op.probe_tuple.borrow()[0..len]); + } + + fn execute(&mut self) { + self.base_sink.execute(); + } + + fn get_alds_as_string(&self) -> String { + self.base_sink.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_sink + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_op = &self.base_sink.base_op; + let mut sink = SinkCopy::new(base_op.out_subgraph.clone(), base_op.out_tuple_len); + let origin_prev = base_op.prev.as_ref().unwrap(); + sink.base_sink.base_op.prev = Some(Rc::new(RefCell::new( + origin_prev.borrow().deref().copy(is_thread_safe), + ))); + Operator::Sink(Sink::SinkCopy(sink)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_sink.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_sink.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/sink/sink_limit.rs b/src/graph_impl/multi_graph/plan/operator/sink/sink_limit.rs new file mode 100644 index 00000000..d650b95e --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/sink/sink_limit.rs @@ -0,0 +1,81 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::{BaseSink, Sink}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::Deref; +use std::rc::Rc; +use std::time::SystemTime; + +#[derive(Clone)] +pub struct SinkLimit { + pub base_sink: BaseSink, + pub start_time: SystemTime, + pub elapsed_time: u128, + out_tuples_limit: usize, +} + +impl SinkLimit { + pub fn new(query_graph: QueryGraph, out_tuple_limit: usize) -> SinkLimit { + SinkLimit { + base_sink: BaseSink::new(query_graph), + start_time: SystemTime::now(), + elapsed_time: 0, + out_tuples_limit: out_tuple_limit, + } + } +} + +impl CommonOperatorTrait for SinkLimit { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_sink.init(probe_tuple, graph) + } + + fn process_new_tuple(&mut self) { + let prev = self.base_sink.base_op.prev.as_ref().unwrap().borrow(); + if get_op_attr!(prev.deref(), num_out_tuples) >= self.out_tuples_limit { + self.elapsed_time = SystemTime::now() + .duration_since(self.start_time.clone()) + .unwrap() + .as_millis(); + } + } + + fn execute(&mut self) { + self.base_sink.execute(); + } + + fn get_alds_as_string(&self) -> String { + self.base_sink.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_sink + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + self.base_sink.copy(is_thread_safe) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_sink.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_sink.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/operator/sink/sink_print.rs b/src/graph_impl/multi_graph/plan/operator/sink/sink_print.rs new file mode 100644 index 00000000..e6bdbd66 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/operator/sink/sink_print.rs @@ -0,0 +1,68 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::plan::operator::sink::sink::{BaseSink, Sink}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::Deref; +use std::rc::Rc; + +#[derive(Clone)] +pub struct SinkPrint { + pub base_sink: BaseSink, +} + +impl SinkPrint { + pub fn new(query_graph: QueryGraph) -> SinkPrint { + SinkPrint { + base_sink: BaseSink::new(query_graph), + } + } +} + +impl CommonOperatorTrait for SinkPrint { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_sink.init(probe_tuple, graph) + } + + fn process_new_tuple(&mut self) { + println!("{:?}", self.base_sink.base_op.probe_tuple); + } + + fn execute(&mut self) { + self.base_sink.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_sink.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_sink + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + let base_op = &self.base_sink.base_op; + let mut sink = SinkPrint::new(base_op.out_subgraph.clone()); + let origin_prev = base_op.prev.as_ref().unwrap(); + sink.base_sink.base_op.prev = Some(Rc::new(RefCell::new( + origin_prev.borrow().deref().copy(is_thread_safe), + ))); + Operator::Sink(Sink::SinkPrint(sink)) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_sink.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_sink.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/plan/query_plan.rs b/src/graph_impl/multi_graph/plan/query_plan.rs new file mode 100644 index 00000000..9092f992 --- /dev/null +++ b/src/graph_impl/multi_graph/plan/query_plan.rs @@ -0,0 +1,398 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_table::HashTable; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::plan::operator::scan::scan_sampling::ScanSampling; +use graph_impl::multi_graph::plan::operator::sink::sink::{BaseSink, Sink, SinkType}; +use graph_impl::multi_graph::plan::operator::sink::sink_copy::SinkCopy; +use graph_impl::multi_graph::plan::operator::sink::sink_limit::SinkLimit; +use graph_impl::multi_graph::plan::operator::sink::sink_print::SinkPrint; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; +use std::time::SystemTime; + +#[derive(Clone)] +pub struct QueryPlan { + pub sink: Option>>>, + pub sink_type: SinkType, + pub scan_sampling: Option>>>, + pub last_operator: Option>>>, + pub out_tuples_limit: usize, + pub elapsed_time: u128, + pub icost: usize, + pub num_intermediate_tuples: usize, + pub num_out_tuples: usize, + pub operator_metrics: Vec<(String, usize, usize)>, + executed: bool, + adaptive_enabled: bool, + pub subplans: Vec>>>, + pub estimated_icost: f64, + pub estimated_num_out_tuples: f64, + pub q_vertex_to_num_out_tuples: HashMap, +} + +impl QueryPlan { + pub fn new(scan_sampling: Rc>>) -> Self { + let mut last_operators = { + let op_ref = scan_sampling.borrow(); + op_ref.deref().get_last_operators() + }; + + let out_subgraph = { + let op = last_operators[0].borrow(); + get_op_attr_as_ref!(op.deref(), out_subgraph).clone() + }; + let mut sink = Rc::new(RefCell::new(Operator::Sink(Sink::BaseSink(BaseSink::new( + out_subgraph, + ))))); + for op in last_operators.iter_mut() { + *get_op_attr_as_mut!(op.borrow_mut().deref_mut(), next) = vec![sink.clone()]; + } + if let Operator::Sink(Sink::BaseSink(sink)) = sink.borrow_mut().deref_mut() { + sink.previous = last_operators.clone(); + } + Self { + sink: Some(sink), + sink_type: SinkType::Counter, + scan_sampling: Some(scan_sampling), + last_operator: None, + out_tuples_limit: 0, + elapsed_time: 0, + icost: 0, + num_intermediate_tuples: 0, + num_out_tuples: 0, + operator_metrics: vec![], + executed: false, + adaptive_enabled: false, + subplans: vec![], + estimated_icost: 0.0, + estimated_num_out_tuples: 0.0, + q_vertex_to_num_out_tuples: HashMap::new(), + } + } + pub fn new_from_operator(last_operator: Rc>>) -> Self { + Self { + sink: None, + sink_type: SinkType::Counter, + scan_sampling: None, + last_operator: Some(last_operator.clone()), + out_tuples_limit: 0, + elapsed_time: 0, + icost: 0, + num_intermediate_tuples: 0, + num_out_tuples: 0, + operator_metrics: vec![], + executed: false, + adaptive_enabled: false, + subplans: vec![last_operator], + estimated_icost: 0.0, + estimated_num_out_tuples: 0.0, + q_vertex_to_num_out_tuples: HashMap::new(), + } + } + pub fn new_from_subplans(subplans: Vec>>>) -> Self { + Self { + sink: None, + sink_type: SinkType::Counter, + scan_sampling: None, + last_operator: subplans.get(subplans.len() - 1).map(|x| x.clone()), + out_tuples_limit: 0, + elapsed_time: 0, + icost: 0, + num_intermediate_tuples: 0, + num_out_tuples: 0, + operator_metrics: vec![], + executed: false, + adaptive_enabled: false, + subplans, + estimated_icost: 0.0, + estimated_num_out_tuples: 0.0, + q_vertex_to_num_out_tuples: HashMap::new(), + } + } + pub fn new_from_last_op(last_operator: Scan, estimated_num_out_tuples: f64) -> Self { + let mut map = HashMap::new(); + let op = get_scan_as_ref!(&last_operator); + map.insert(op.from_query_vertex.clone(), estimated_num_out_tuples); + map.insert(op.to_query_vertex.clone(), estimated_num_out_tuples); + let mut plan = + QueryPlan::new_from_operator(Rc::new(RefCell::new(Operator::Scan(last_operator)))); + plan.estimated_num_out_tuples = estimated_num_out_tuples; + map.into_iter().for_each(|(k, v)| { + plan.q_vertex_to_num_out_tuples.insert(k, v); + }); + plan + } + + pub fn shallow_copy(&self) -> QueryPlan { + QueryPlan::new_from_subplans(self.subplans.clone()) + } + + pub fn append(&mut self, mut new_operator: Rc>>) { + { + let mut last_operator = self.last_operator.as_mut().unwrap().borrow_mut(); + *get_op_attr_as_mut!(last_operator.deref_mut(), next) = vec![new_operator.clone()]; + } + *get_op_attr_as_mut!(new_operator.borrow_mut().deref_mut(), prev) = + self.last_operator.as_ref().map(|op| op.clone()); + self.subplans.pop(); + self.subplans.push(new_operator.clone()); + self.last_operator = Some(new_operator); + } + + pub fn get_output_log(&mut self) -> String { + self.set_stats(); + let mut str_joiner = vec![]; + if self.executed { + str_joiner.push(format!("{}", self.elapsed_time)); + str_joiner.push(format!("{}", self.num_out_tuples)); + str_joiner.push(format!("{}", self.num_intermediate_tuples)); + str_joiner.push(format!("{}", self.icost)); + } + for operator_metric in &self.operator_metrics { + str_joiner.push(format!("{}", operator_metric.0)); /* operator name */ + if self.executed { + if !operator_metric.0.contains("PROBE") + && !operator_metric.0.contains("HASH") + && !operator_metric.0.contains("SCAN") + { + str_joiner.push(format!("{}", operator_metric.1)); /* i-cost */ + } + if !operator_metric.0.contains("HASH") { + str_joiner.push(format!("{}", operator_metric.2)); /* num out tuples */ + } + } + } + str_joiner.join(",") + } + + pub fn set_stats(&mut self) { + for subplan in &self.subplans { + let mut first_op = subplan.clone(); + loop { + let prev = { + let first_op_ref = first_op.borrow(); + get_op_attr_as_ref!(first_op_ref.deref(), prev) + .as_ref() + .map(|op| op.clone()) + }; + if prev.is_none() { + break; + } + first_op = prev.as_ref().unwrap().clone(); + } + first_op + .borrow() + .get_operator_metrics_next_operators(&mut self.operator_metrics); + } + for i in 0..self.operator_metrics.len() - 1 { + self.icost += self.operator_metrics[i].1; + self.num_intermediate_tuples += self.operator_metrics[i].2; + } + self.icost += self.operator_metrics[self.operator_metrics.len() - 1].1; + } + + pub fn copy(&self, is_thread_safe: bool) -> QueryPlan { + let mut subplans = vec![]; + for subplan in &self.subplans { + subplans.push(Rc::new(RefCell::new( + subplan.borrow().deref().copy(is_thread_safe), + ))); + } + QueryPlan::new_from_subplans(subplans) + } + + pub fn init( + &mut self, + graph: &TypedStaticGraph, + ) { + let plan_size = self.subplans.len(); + let last_operator = self.subplans[plan_size - 1].clone(); + let query_graph = { + let last_operator = last_operator.borrow(); + get_op_attr_as_ref!(last_operator.deref(), out_subgraph).clone() + }; + let mut sink = Rc::new(RefCell::new(Operator::Sink(match self.sink_type { + SinkType::Copy => Sink::SinkCopy(SinkCopy::new( + query_graph.clone(), + get_op_attr!(last_operator.borrow().deref(), out_tuple_len), + )), + SinkType::Print => Sink::SinkPrint(SinkPrint::new(query_graph.clone())), + SinkType::Limit => { + Sink::SinkLimit(SinkLimit::new(query_graph.clone(), self.out_tuples_limit)) + } + SinkType::Counter => Sink::BaseSink(BaseSink::new(query_graph.clone())), + }))); + get_op_attr_as_mut!(sink.borrow_mut().deref_mut(), prev).replace(last_operator.clone()); + *get_op_attr_as_mut!(last_operator.borrow_mut().deref_mut(), next) = vec![sink.clone()]; + self.sink = Some(sink); + let mut probes = vec![]; + for i in 1..self.subplans.len() { + let mut operator = self.subplans[i].clone(); + loop { + { + let mut op_ref = operator.borrow(); + if let Operator::Probe(pb) = op_ref.deref() { + probes.push(operator.clone()); + } + } + let prev = { + let mut op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), prev) + .as_ref() + .map(|op| op.clone()) + }; + if prev.is_none() { + break; + } + operator = prev.unwrap(); + } + } + for i in 0..self.subplans.len() - 1 { + let subplan = self.subplans.get_mut(i).unwrap(); + if let Operator::Build(build) = subplan.borrow_mut().deref_mut() { + let hash_table = Rc::new(RefCell::new(HashTable::new( + build.build_hash_idx, + build.hashed_tuple_len, + ))); + build.hash_table = Some(hash_table.clone()); + for probe in &probes { + let mut probe_mut = probe.borrow_mut(); + if get_op_attr_as_ref!(probe_mut.deref(), in_subgraph) + .as_ref() + .unwrap() + == build.probing_subgraph.as_ref().unwrap() + { + if let Operator::Probe(pb) = probe_mut.deref_mut() { + let mut base_probe = get_probe_as_mut!(pb); + base_probe.hash_tables = vec![hash_table.clone()]; + break; + } + } + } + } + } + + for subplan in &mut self.subplans { + let probe_tuple = Rc::new(RefCell::new(vec![ + Id::new(0); + get_op_attr!( + subplan.borrow().deref(), + out_tuple_len + ) + ])); + let mut first_op = subplan.clone(); + loop { + let prev = get_op_attr_as_ref!(first_op.borrow().deref(), prev) + .as_ref() + .map(|op| op.clone()); + if prev.is_none() { + break; + } + first_op = prev.as_ref().unwrap().clone(); + } + first_op.borrow_mut().init(probe_tuple, graph); + } + } + + fn init_hashtable( + &mut self, + build_insubgrpah: &QueryGraph, + hash_table: Rc>>, + ) { + for operator in &mut self.subplans { + let mut op_mut = operator.borrow_mut(); + if let Operator::Probe(_p) = op_mut.deref_mut() { + if Self::check_and_init(build_insubgrpah, op_mut.deref_mut(), hash_table.clone()) { + break; + } + } + let mut op = operator.clone(); + loop { + let prev = { + let op_ref = op.borrow(); + get_op_attr_as_ref!(op_ref.deref(), prev) + .as_ref() + .map(|op| op.clone()) + }; + if prev.is_none() { + break; + } + op = prev.as_ref().unwrap().clone(); + let mut op_mut = op.borrow_mut(); + if let Operator::Probe(p) = op_mut.deref_mut() { + if Self::check_and_init( + build_insubgrpah, + op_mut.deref_mut(), + hash_table.clone(), + ) { + return; + } + } + } + } + } + + fn check_and_init( + build_insubgrpah: &QueryGraph, + probe: &mut Operator, + hash_table: Rc>>, + ) -> bool { + let prob_insubgraph = get_op_attr_as_ref!(probe, in_subgraph).as_ref().unwrap(); + if prob_insubgraph == build_insubgrpah { + if let Operator::Probe(probe_op) = probe { + let mut base_probe = get_probe_as_mut!(probe_op); + base_probe.hash_tables = vec![hash_table.clone()]; + return true; + } + } + false + } + + pub fn execute(&mut self) { + if let SinkType::Limit = self.sink_type { + if let Operator::Sink(Sink::SinkLimit(sink)) = + self.sink.as_mut().unwrap().borrow_mut().deref_mut() + { + sink.start_time = SystemTime::now(); + self.subplans + .iter_mut() + .map(|plan| plan.borrow_mut()) + .for_each(|mut plan| plan.execute()); + self.elapsed_time = sink.elapsed_time; + } + } else { + let start_time = SystemTime::now(); + self.subplans.iter_mut().for_each(|mut plan| { + let mut op = plan.as_ptr(); + unsafe { + (&mut *op).execute(); + } + }); + self.elapsed_time = SystemTime::now() + .duration_since(start_time) + .unwrap() + .as_millis(); + } + self.executed = true; + self.num_out_tuples = self + .sink + .as_ref() + .unwrap() + .borrow() + .deref() + .get_num_out_tuples(); + } +} diff --git a/src/graph_impl/multi_graph/plan/query_plan_worker.rs b/src/graph_impl/multi_graph/plan/query_plan_worker.rs new file mode 100644 index 00000000..c5d130cc --- /dev/null +++ b/src/graph_impl/multi_graph/plan/query_plan_worker.rs @@ -0,0 +1,155 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::Operator; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::scan::scan_blocking::VertexIdxLimits; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::TypedStaticGraph; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::time::SystemTime; + +pub struct QPWorkers { + query_plans: Vec>, + elapsed_time: u128, + intersection_cost: usize, + num_intermediate_tuples: usize, + num_out_tuples: usize, + operator_metrics: Vec<(String, usize, usize)>, +} + +impl QPWorkers { + pub fn new(query_plan: QueryPlan, num_threads: usize) -> QPWorkers { + let mut worker = QPWorkers { + query_plans: vec![], + elapsed_time: 0, + intersection_cost: 0, + num_intermediate_tuples: 0, + num_out_tuples: 0, + operator_metrics: vec![], + }; + if num_threads == 1 { + worker.query_plans.push(query_plan); + } else { + // num_threads > 1 + for i in 0..num_threads { + worker.query_plans.push(query_plan.copy(true)); + } + let global_vertex_idx_limits = VertexIdxLimits { + from_variable_index_limit: 0, + to_variable_index_limit: 0, + }; + for query_plan in &mut worker.query_plans { + for last_op in &mut query_plan.subplans { + let mut op = last_op.clone(); + loop { + let prev = { + let op_ref = op.borrow(); + get_op_attr_as_ref!(op_ref.deref(), prev) + .as_ref() + .map(|op| op.clone()) + }; + if prev.is_none() { + break; + } + op = prev.as_ref().unwrap().clone(); + } + let mut op_mut = op.borrow_mut(); + if let Operator::Scan(Scan::ScanBlocking(sb)) = op_mut.deref_mut() { + //TODO:Lock need to be fixed + sb.global_vertices_idx_limits = global_vertex_idx_limits.clone(); + } + } + } + } + worker + } + + pub fn init( + &mut self, + graph: &TypedStaticGraph, + ) { + self.query_plans + .iter_mut() + .for_each(|plan| plan.init(graph)); + } + + pub fn execute(&mut self) { + if self.query_plans.len() == 1 { + self.query_plans[0].execute(); + self.elapsed_time = self.query_plans[0].elapsed_time; + } else { + let begin_time = SystemTime::now(); + // let mut workers = vec![]; + for plan in &self.query_plans { + let mut plan = plan.clone(); + // workers.push(thread::spawn(move || { + // plan.execute(); + // })); + plan.execute(); + } + // for worker in workers { + // worker.join(); + // } + self.elapsed_time = SystemTime::now() + .duration_since(begin_time) + .unwrap() + .as_millis(); + } + } + + pub fn get_output_log(&mut self) -> String { + if self.query_plans.len() == 1 { + return self.query_plans[0].get_output_log(); + } + if self.operator_metrics.is_empty() { + self.query_plans + .iter_mut() + .for_each(|plan| plan.set_stats()); + self.aggregate_output(); + } + let mut str_joiner = vec![]; + str_joiner.push(format!("{}", self.elapsed_time)); + str_joiner.push(format!("{}", self.num_out_tuples)); + str_joiner.push(format!("{}", self.num_intermediate_tuples)); + str_joiner.push(format!("{}", self.intersection_cost)); + for operator_metric in &self.operator_metrics { + str_joiner.push(format!("{}", operator_metric.0)); /* operator name */ + if !operator_metric.0.contains("PROBE") + && !operator_metric.0.contains("HASH") + && !operator_metric.0.contains("SCAN") + { + str_joiner.push(format!("{}", operator_metric.1)); /* i-cost */ + } + if !operator_metric.0.contains("HASH") { + str_joiner.push(format!("{}", operator_metric.2)); /* output tuples size */ + } + } + str_joiner.join(",") + } + + fn aggregate_output(&mut self) { + self.operator_metrics = vec![]; + for plan in &mut self.query_plans { + self.intersection_cost += plan.icost; + self.num_intermediate_tuples += plan.num_intermediate_tuples; + self.num_out_tuples += plan.num_out_tuples; + } + let query_plan = &mut self.query_plans[0].operator_metrics; + for metric in &mut self.query_plans[0].operator_metrics { + self.operator_metrics + .push((metric.0.clone(), metric.1, metric.2)); + } + for i in 0..self.query_plans.len() { + for j in 0..self.operator_metrics.len() { + self.operator_metrics[j].1 += self.query_plans[i].operator_metrics[j].1; + self.operator_metrics[j].2 += self.query_plans[i].operator_metrics[j].2; + } + } + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/adj_list_descriptor.rs b/src/graph_impl/multi_graph/planner/catalog/adj_list_descriptor.rs new file mode 100644 index 00000000..c2acd482 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/adj_list_descriptor.rs @@ -0,0 +1,51 @@ +use serde::export::fmt::Error; +use serde::export::Formatter; +use std::fmt::Display; + +#[derive(Clone)] +pub enum Direction { + Fwd, + Bwd, +} + +#[derive(Clone)] +pub struct AdjListDescriptor { + pub from_query_vertex: String, + pub vertex_idx: usize, + pub direction: Direction, + pub label: i32, +} + +impl AdjListDescriptor { + pub fn new( + from_query_vertex: String, + vertex_idx: usize, + direction: Direction, + label: i32, + ) -> Self { + Self { + from_query_vertex, + vertex_idx, + direction, + label, + } + } +} + +impl Display for Direction { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + Direction::Fwd => write!(f, "Fwd"), + Direction::Bwd => write!(f, "Bwd"), + } + } +} + +impl PartialEq for Direction { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Direction::Fwd, Direction::Fwd) | (Direction::Bwd, Direction::Bwd) => true, + _ => false, + } + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/catalog.rs b/src/graph_impl/multi_graph/planner/catalog/catalog.rs new file mode 100644 index 00000000..5d42d0b1 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/catalog.rs @@ -0,0 +1,701 @@ +use generic::{GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::plan::operator::scan::scan::Scan; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::catalog_plans::{ + CatalogPlans, DEF_MAX_INPUT_NUM_VERTICES, DEF_NUM_EDGES_TO_SAMPLE, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::{HashMap, HashSet}; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; +use std::time::SystemTime; + +pub static SINGLE_VERTEX_WEIGHT_PROBE_COEF: f64 = 3.0; +pub static SINGLE_VERTEX_WEIGHT_BUILD_COEF: f64 = 12.0; +pub static MULTI_VERTEX_WEIGHT_PROBE_COEF: f64 = 12.0; +pub static MULTI_VERTEX_WEIGHT_BUILD_COEF: f64 = 720.0; + +pub static mut LOGGER_FLAG: bool = false; + +pub struct Catalog { + pub in_subgraphs: Vec, + pub sampled_icost: HashMap>, + pub sampled_selectivity: HashMap>, + pub is_sorted_by_node: bool, + pub num_sampled_edge: usize, + pub max_input_num_vertices: usize, + pub elapsed_time: u128, +} + +impl Catalog { + pub fn new(num_sampled_edge: usize, max_input_num_vertices: usize) -> Self { + Self { + in_subgraphs: vec![], + sampled_icost: HashMap::new(), + sampled_selectivity: HashMap::new(), + is_sorted_by_node: false, + num_sampled_edge, + max_input_num_vertices, + elapsed_time: 0, + } + } + + // Icost is the intersection costs sampled. + // Cardinality is the sampledSelectivity sampled. + // InSubgraphs are the set of input subgraphs sampled. + pub fn with_cost( + i_cost: HashMap>, + cardinality: HashMap>, + in_subgraphs: Vec, + ) -> Self { + Self { + in_subgraphs, + sampled_icost: i_cost, + sampled_selectivity: cardinality, + is_sorted_by_node: false, + num_sampled_edge: 0, + max_input_num_vertices: 0, + elapsed_time: 0, + } + } + + /// Returns the i-cost of a particular extension from an input. + pub fn get_icost( + &self, + query_graph: &mut QueryGraph, + alds: Vec<&AdjListDescriptor>, + to_type: i32, + ) -> f64 { + let mut approx_icost = 0.0; + let mut min_icost = std::f64::MAX; + alds.iter().for_each(|ald| { + for num_vertices in (2..=(DEF_NUM_EDGES_TO_SAMPLE - 1)).rev() { + min_icost = std::f64::MAX; + let mut num_edges_matched = 0; + for (i, sub_graph) in self.in_subgraphs.iter().enumerate() { + if sub_graph.get_num_qvertices() != num_vertices { + continue; + } + let new_num_edges_matched = query_graph.q_edges.len(); + let it = query_graph.get_subgraph_mapping_iterator(&self.in_subgraphs[i]); + if new_num_edges_matched < num_edges_matched { + continue; + } + while it.has_next() { + let new_vertex_mapping = it.next().unwrap(); + if new_vertex_mapping.get(&ald.from_query_vertex).is_none() { + continue; + } + let sampled_icost; + let aldas_str = "(".to_string() + + &new_vertex_mapping[&ald.from_query_vertex] + + ") " + + &ald.direction.to_string() + + "[" + + &ald.label.to_string() + + "]"; + if self.is_sorted_by_node { + sampled_icost = self.sampled_selectivity[&i] + [&(aldas_str + "~" + &to_type.to_string())] + .clone(); + } else { + sampled_icost = self.sampled_icost[&i][&aldas_str].clone(); + } + if new_num_edges_matched > num_edges_matched || min_icost > sampled_icost { + min_icost = sampled_icost; + num_edges_matched = new_num_edges_matched; + } + } + } + if min_icost < std::f64::MAX { + break; + } + } + approx_icost += min_icost; + }); + return approx_icost; + } + + /// Returns the sampledSelectivity of a particular extension from an input. + pub fn get_selectivity( + &self, + in_subgraph: &mut QueryGraph, + alds: &Vec, + to_type: i32, + ) -> f64 { + let mut approx_selectivity = std::f64::MAX; + let mut num_vertices = DEF_MAX_INPUT_NUM_VERTICES - 1; + while num_vertices >= 2 { + let mut num_alds_matched = 0; + for (i, sub_graph) in self.in_subgraphs.iter().enumerate() { + if sub_graph.get_num_qvertices() != num_vertices { + continue; + } + let it = in_subgraph.get_subgraph_mapping_iterator(sub_graph); + while it.has_next() { + let vertex_mapping = it.next().unwrap(); + let new_num_alds_matched = self.get_num_alds_matched(&alds, &vertex_mapping); + if new_num_alds_matched == 0 || new_num_alds_matched < num_alds_matched { + continue; + } + let sampled_selectivity = self.sampled_selectivity[&i] + [&self.get_alds_as_str(&alds, Some(&vertex_mapping), Some(to_type))] + .clone(); + if new_num_alds_matched > num_alds_matched + || sampled_selectivity < approx_selectivity + { + num_alds_matched = new_num_alds_matched; + approx_selectivity = sampled_selectivity; + } + } + } + num_vertices -= 1; + } + approx_selectivity + } + + fn get_alds_as_str( + &self, + alds: &Vec, + vertex_mapping: Option<&HashMap>, + to_type: Option, + ) -> String { + let mut from_qvertices_and_dirs = alds + .iter() + .filter(|ald| { + vertex_mapping.is_none() + || vertex_mapping + .unwrap() + .get(&ald.from_query_vertex) + .is_some() + }) + .map(|ald| { + "(".to_owned() + + if vertex_mapping.is_none() { + &ald.from_query_vertex + } else { + let vertex_mapping = vertex_mapping.unwrap(); + vertex_mapping.get(&ald.from_query_vertex).unwrap() + } + + ") " + + &ald.direction.to_string() + + "[" + + &ald.label.to_string() + + "]" + }) + .sorted() + .join(", "); + if to_type.is_some() { + from_qvertices_and_dirs += &("~".to_owned() + &to_type.unwrap().to_string()); + } + from_qvertices_and_dirs + } + + fn get_num_alds_matched( + &self, + alds: &Vec, + vertex_mapping: &HashMap, + ) -> usize { + let mut from_vertices_in_alds = HashSet::new(); + for ald in alds { + from_vertices_in_alds.insert(ald.from_query_vertex.clone()); + } + let num_alds_matched = 0; + vertex_mapping + .keys() + .filter(|&vertex| { + from_vertices_in_alds.contains(vertex) && vertex_mapping[vertex] != "" + }) + .count() + } + + ///TODO: Multi thread catalog building + pub fn populate( + &mut self, + graph: &TypedStaticGraph, + num_threads: usize, + ) { + let start_time = SystemTime::now(); + self.is_sorted_by_node = graph.is_sorted_by_node(); + self.sampled_icost = HashMap::new(); + self.sampled_selectivity = HashMap::new(); + let mut plans = CatalogPlans::new( + &graph, + num_threads, + self.num_sampled_edge, + self.max_input_num_vertices, + ); + self.set_input_subgraphs(plans.query_graphs_to_extend.get_query_graph_set()); + self.add_zero_selectivities(&graph, &mut plans); + + for query_plan_arr in &mut plans.query_plans_arrs { + self.init(&graph, query_plan_arr); + self.execute(query_plan_arr); + self.log_output(&graph, query_plan_arr); + query_plan_arr.clear(); + } + self.elapsed_time = SystemTime::now() + .duration_since(start_time) + .unwrap() + .as_millis(); + } + + fn init( + &self, + graph: &TypedStaticGraph, + query_plan_arr: &mut Vec>, + ) { + for query_plan in query_plan_arr { + let probe_tuple = Rc::new(RefCell::new(vec![ + Id::new(0); + self.max_input_num_vertices + 1 + ])); + if let Some(scan) = &mut query_plan.scan_sampling { + scan.borrow_mut().init(probe_tuple, graph); + } + } + } + + fn execute(&self, query_plan_arr: &mut Vec>) { + if query_plan_arr.len() > 1 { + // let mut handlers = vec![]; + for i in 0..query_plan_arr.len() { + let mut sink = query_plan_arr[i].sink.as_mut().unwrap().borrow_mut(); + sink.execute(); + // handlers.push(thread::spawn(move || { + // sink.execute(); + // })); + } + // for handler in handlers { + // handler.join(); + // } + } else { + let mut sink = query_plan_arr[0].sink.as_mut().unwrap().as_ptr(); + unsafe { + (&mut *sink).execute(); + } + } + } + + fn retrieve_op(op: &Rc>>) { + unsafe { + print!("{:?}->", op.as_ptr()); + } + let op_ref = op.borrow(); + let base = get_base_op_as_ref!(op_ref.deref()); + if let Some(op) = &base.prev { + Self::retrieve_op(op); + } + } + + fn log_output( + &mut self, + graph: &TypedStaticGraph, + query_plan_arr: &mut Vec>, + ) { + let mut other: Vec>>> = query_plan_arr + .iter_mut() + .map(|plan| plan.sink.as_ref().unwrap().borrow()) + .map(|query_plan| { + if let Operator::Sink(sink) = query_plan.deref() { + let base_sink = get_sink_as_ref!(sink); + let mut op = base_sink.previous[0].clone(); + loop { + if let Operator::Scan(Scan::ScanSampling(sp)) = op.borrow().deref() { + break; + } + op = { + let op_ref = op.borrow(); + get_op_attr_as_ref!(op_ref.deref(), prev) + .as_ref() + .unwrap() + .clone() + }; + } + let op_ref = op.borrow(); + get_op_attr_as_ref!(op_ref.deref(), next)[0].clone() + } else { + panic!("Sink has not been set.") + } + }) + .collect(); + + let op = other.remove(0); + if self.is_sorted_by_node { + self.add_icost_and_selectivity_sorted_by_node(op, other, !graph.is_directed()); + } else { + self.add_icost_and_selectivity(op, other, !graph.is_directed()); + } + } + + fn add_icost_and_selectivity_sorted_by_node( + &mut self, + operator: Rc>>, + other: Vec>>>, + is_undirected: bool, + ) { + if let Operator::Sink(sink) = get_op_attr_as_ref!(operator.borrow().deref(), next)[0] + .borrow() + .deref() + { + return; + } + let mut num_input_tuples = get_op_attr!(operator.borrow().deref(), num_out_tuples); + + for other_op in &other { + num_input_tuples += get_op_attr!(other_op.borrow().deref(), num_out_tuples); + } + let mut in_subgraph = { + let op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), out_subgraph).clone() + }; + let subgraph_idx = self.get_subgraph_idx(&mut in_subgraph); + let next = { + let op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), next).clone() + }; + + for i in 0..next.len() { + let next_i = next[i].borrow(); + if let Operator::EI(EI::Intersect(Intersect::IntersectCatalog(intersect))) = + next_i.deref() + { + let to_type = intersect.base_intersect.base_ei.to_type; + let mut alds_as_str_list = vec![]; + let alds_str = self.get_alds_as_str( + &intersect.base_intersect.base_ei.alds, + None, + Some(to_type), + ); + if is_undirected { + let splits: Vec<&str> = alds_str.split(", ").collect(); + let direction_patterns = CatalogPlans::::generate_direction_patterns( + splits.len(), + is_undirected, + ); + for pattern in direction_patterns { + let mut alds_str_with_pattern = "".to_owned(); + for j in 0..pattern.len() { + let ok: Vec<&str> = splits[j].split("Bwd").collect(); + alds_str_with_pattern = + alds_str_with_pattern + ok[0] + &pattern[j].to_string() + ok[1]; + if j != pattern.len() - 1 { + alds_str_with_pattern += ", "; + } + } + alds_as_str_list.push(alds_str_with_pattern); + } + } else { + alds_as_str_list.push(alds_str); + } + let mut selectivity = intersect.base_intersect.base_ei.base_op.num_out_tuples; + for other_op in &other { + let next = { + let other_op_ref = other_op.borrow(); + get_op_attr_as_ref!(other_op_ref.deref(), next)[i].clone() + }; + let next_ref = next.borrow(); + selectivity += get_op_attr!(next_ref.deref(), num_out_tuples); + } + self.sampled_selectivity + .entry(subgraph_idx) + .or_insert(HashMap::new()); + for alds_as_str in alds_as_str_list { + self.sampled_selectivity + .get_mut(&subgraph_idx) + .unwrap() + .insert( + alds_as_str, + if num_input_tuples > 0 { + (selectivity as f64) / (num_input_tuples as f64) + } else { + 0.0 + }, + ); + } + let noop = { + let next_ref = next[i].borrow(); + get_op_attr_as_ref!(next_ref.deref(), next)[0].clone() + }; + let mut other_noops = vec![]; + for (j, other) in other.iter().enumerate() { + other_noops.push({ + let other_ref = other.borrow(); + let next_i = get_op_attr_as_ref!(other_ref.deref(), next)[i].clone(); + let next_ref = next_i.borrow(); + get_op_attr_as_ref!(next_ref.deref(), next)[j].clone() + }); + } + self.add_icost_and_selectivity(noop, other_noops, is_undirected); + } + } + } + + fn add_icost_and_selectivity( + &mut self, + operator: Rc>>, + other: Vec>>>, + is_undirected: bool, + ) { + if let Operator::Sink(sink) = get_op_attr_as_ref!(operator.borrow().deref(), next)[0] + .borrow() + .deref() + { + return; + } + let mut num_input_tuples = get_op_attr!(operator.borrow().deref(), num_out_tuples); + for other_op in &other { + num_input_tuples += get_op_attr!(other_op.borrow().deref(), num_out_tuples); + } + let mut in_subgraph = { + let op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), out_subgraph).clone() + }; + let subgraph_idx = self.get_subgraph_idx(&mut in_subgraph); + let next_vec = { + let op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), next).clone() + }; + for (i, next) in next_vec.iter().enumerate() { + let next_ref = next.borrow(); + if let Operator::EI(EI::Intersect(Intersect::IntersectCatalog(intersect))) = + next_ref.deref() + { + let alds = &intersect.base_intersect.base_ei.alds; + let mut alds_as_str_list = vec![]; + let alds_str = + self.get_alds_as_str(&intersect.base_intersect.base_ei.alds, None, None); + if is_undirected { + let splits: Vec<&str> = alds_str.split(", ").collect(); + let direction_patterns = CatalogPlans::::generate_direction_patterns( + splits.len(), + is_undirected, + ); + for pattern in direction_patterns { + let mut alds_str_with_pattern = "".to_owned(); + for j in 0..pattern.len() { + let ok: Vec<&str> = splits[j].split("Bwd").collect(); + alds_str_with_pattern = + alds_str_with_pattern + ok[0] + &pattern[j].to_string() + ok[1]; + if j != pattern.len() - 1 { + alds_str_with_pattern += ", "; + } + } + alds_as_str_list.push(alds_str_with_pattern); + } + } else { + alds_as_str_list.push(alds_str); + } + if 1 == alds.len() { + let mut icost = get_op_attr!(next.borrow().deref(), icost); + for other_op in &other { + let next = { + let other_ref = other_op.borrow(); + get_op_attr_as_ref!(other_ref.deref(), next)[i].clone() + }; + icost += get_op_attr!(next.borrow().deref(), icost); + } + self.sampled_icost + .entry(subgraph_idx) + .or_insert(HashMap::new()); + for alds_as_str in &alds_as_str_list { + if num_input_tuples > 0 { + self.sampled_icost + .get_mut(&subgraph_idx) + .unwrap() + .entry(alds_as_str.clone()) + .or_insert((icost as f64) / (num_input_tuples as f64)); + } else { + self.sampled_icost + .get_mut(&subgraph_idx) + .unwrap() + .entry(alds_as_str.clone()) + .or_insert(0.0); + } + } + } + let noops = { + let next_ref = next.borrow(); + get_op_attr_as_ref!(next_ref.deref(), next).clone() + }; + for to_type in 0..noops.len() { + let noop = noops[to_type].clone(); + let mut selectivity = { + let noop_ref = noop.borrow(); + get_op_attr!(noop_ref.deref(), num_out_tuples) + }; + for other_op in &other { + let next = { + let other_op_ref = other_op.borrow(); + get_op_attr_as_ref!(other_op_ref.deref(), next)[i].clone() + }; + let o_next = { + let next_ref = next.borrow(); + get_op_attr_as_ref!(next_ref.deref(), next)[to_type].clone() + }; + selectivity += { + let o_next_ref = next.borrow(); + get_op_attr!(o_next_ref.deref(), num_out_tuples) + }; + } + self.sampled_selectivity + .entry(subgraph_idx) + .or_insert(HashMap::new()); + for alds_as_str in &alds_as_str_list { + self.sampled_selectivity + .get_mut(&subgraph_idx) + .unwrap() + .insert( + alds_as_str.to_owned() + "~" + &to_type.to_string(), + if num_input_tuples > 0 { + (selectivity as f64) / (num_input_tuples as f64) + } else { + 0.0 + }, + ); + } + let mut other_noops = vec![]; + for other_op in &other { + let next = { + let other_op_ref = other_op.borrow(); + get_op_attr_as_ref!(other_op_ref.deref(), next)[i].clone() + }; + let next_op = { + let next_ref = next.borrow(); + get_op_attr_as_ref!(next_ref.deref(), next)[to_type].clone() + }; + other_noops.push(next_op); + } + self.add_icost_and_selectivity(noop, other_noops, is_undirected); + } + } + } + } + + fn set_input_subgraphs(&mut self, in_subgraphs: Vec) { + self.in_subgraphs = vec![]; + for mut in_subgraph in in_subgraphs { + let mut is_unique = true; + for subgraph in self.in_subgraphs.iter_mut() { + if subgraph.is_isomorphic_to(&mut in_subgraph) { + is_unique = false; + break; + } + } + if is_unique { + self.in_subgraphs.push(in_subgraph); + } + } + } + + fn get_subgraph_idx(&mut self, in_subgraph: &mut QueryGraph) -> usize { + for (idx, sub_graph) in self.in_subgraphs.iter_mut().enumerate() { + if in_subgraph.is_isomorphic_to(sub_graph) { + return idx; + } + } + panic!("Illegal argument exception.") + } + + fn generate_direction_patterns(&self, size: usize, is_directed: bool) -> Vec> { + let mut direction_patterns = vec![]; + let mut directions = vec![Direction::Bwd; size]; + self.sub_generate_direction_patterns( + &mut directions, + size, + &mut direction_patterns, + is_directed, + ); + direction_patterns + } + + fn sub_generate_direction_patterns( + &self, + directions: &mut Vec, + size: usize, + direction_pattern: &mut Vec>, + is_directed: bool, + ) { + if size <= 0 { + direction_pattern.push(directions.to_vec()); + } else { + directions[size - 1] = Direction::Bwd; + self.sub_generate_direction_patterns( + directions, + size - 1, + direction_pattern, + is_directed, + ); + if is_directed { + directions[size - 1] = Direction::Fwd; + self.sub_generate_direction_patterns( + directions, + size - 1, + direction_pattern, + is_directed, + ); + } + } + } + + fn add_zero_selectivities< + Id: IdType, + NL: Hash + Eq, + EL: Hash + Eq, + Ty: GraphType, + L: IdType, + >( + &mut self, + graph: &TypedStaticGraph, + plans: &mut CatalogPlans, + ) { + let selectivity_zero = &mut plans.selectivity_zero; + for (q_graph, alds, to_type) in selectivity_zero { + let subgraph_idx = self.get_subgraph_idx(q_graph); + if self.sampled_selectivity.get(&subgraph_idx).is_none() { + self.sampled_selectivity + .insert(subgraph_idx, HashMap::new()); + } + let mut alds_as_str_list = vec![]; + let alds_str = self.get_alds_as_str(alds, None, None); + if !graph.is_directed() { + let splits: Vec<&str> = alds_str.split(", ").collect(); + let direction_patterns = + self.generate_direction_patterns(splits.len(), !graph.is_directed()); + for pattern in direction_patterns { + let mut alds_str_with_pattern = "".to_string(); + for i in 0..pattern.len() { + let ok: Vec<&str> = splits[i].split("Bwd").collect(); + alds_str_with_pattern = + alds_str_with_pattern + ok[0] + &pattern[i].to_string() + &ok[1]; + if i != pattern.len() - 1 { + alds_str_with_pattern.push_str(", "); + } + } + alds_as_str_list.push(alds_str_with_pattern); + } + } else { + alds_as_str_list.push(alds_str); + } + for alds_as_str in alds_as_str_list { + let selectivity = self.sampled_selectivity.get_mut(&subgraph_idx).unwrap(); + selectivity.insert(alds_as_str + "~" + &to_type.to_string(), 0.00); + } + } + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/catalog_plans.rs b/src/graph_impl/multi_graph/planner/catalog/catalog_plans.rs new file mode 100644 index 00000000..860ca15c --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/catalog_plans.rs @@ -0,0 +1,521 @@ +use generic::{GraphLabelTrait, GraphTrait, GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::EI; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::Probe; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::PMV; +use graph_impl::multi_graph::plan::operator::operator::Operator; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::plan::operator::scan::scan_sampling::ScanSampling; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::operator::intersect_catalog::IntersectCatalog; +use graph_impl::multi_graph::planner::catalog::operator::noop::Noop; +use graph_impl::multi_graph::planner::catalog::query_edge::QueryEdge; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::multi_graph::query::query_graph_set::QueryGraphSet; +use graph_impl::multi_graph::utils::set_utils; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +pub static DEF_NUM_EDGES_TO_SAMPLE: usize = 1000; +pub static DEF_MAX_INPUT_NUM_VERTICES: usize = 3; +static QUERY_VERTICES: [&str; 7] = ["a", "b", "c", "d", "e", "f", "g"]; + +pub struct CatalogPlans { + num_sampled_edges: usize, + max_input_num_vertices: usize, + num_node_labels: usize, + num_edge_labels: usize, + sorted_by_node: bool, + pub query_graphs_to_extend: QueryGraphSet, + pub query_plans_arrs: Vec>>, + is_directed: bool, + pub selectivity_zero: Vec<(QueryGraph, Vec, i32)>, + query_vertex_to_idx_map: HashMap, +} + +impl CatalogPlans { + pub fn new( + graph: &TypedStaticGraph, + num_thread: usize, + num_sampled_edges: usize, + max_input_num_vertices: usize, + ) -> Self { + let mut plans = CatalogPlans { + num_sampled_edges, + max_input_num_vertices, + num_node_labels: graph.num_of_node_labels(), + num_edge_labels: graph.num_of_edge_labels(), + sorted_by_node: graph.is_sorted_by_node(), + query_graphs_to_extend: QueryGraphSet::new(), + query_plans_arrs: vec![], + is_directed: graph.is_directed(), + selectivity_zero: vec![], + query_vertex_to_idx_map: HashMap::new(), + }; + for i in 0..QUERY_VERTICES.len() { + plans + .query_vertex_to_idx_map + .insert(QUERY_VERTICES[i].to_owned(), i); + } + let scans = if graph.edge_count() > 1073741823 { + plans.generate_all_scans_for_large_graph(graph) + } else { + plans.generate_all_scans(graph) + }; + for scan in scans { + let mut noop = Noop::new(scan.base_scan.base_op.out_subgraph.clone()); + let scan_pointer = Rc::new(RefCell::new(Operator::Scan(Scan::ScanSampling(scan)))); + noop.base_op.prev = Some(scan_pointer.clone()); + noop.base_op.out_qvertex_to_idx_map = + get_op_attr_as_ref!(scan_pointer.borrow().deref(), out_qvertex_to_idx_map).clone(); + let mut noop_pointer = Rc::new(RefCell::new(Operator::Noop(noop))); + *get_op_attr_as_mut!(scan_pointer.borrow_mut().deref_mut(), next) = + vec![noop_pointer.clone()]; + plans.set_next_operators(graph, noop_pointer, false); + let mut query_plans_arr = vec![QueryPlan::new(scan_pointer.clone())]; + for i in 1..num_thread { + let mut scan_ref = scan_pointer.borrow(); + let scan_copy = if let Operator::Scan(Scan::ScanSampling(scan)) = scan_ref.deref() { + scan.copy_default() + } else { + panic!("Scan initial failed!"); + }; + let mut another_noop = + Noop::new(get_op_attr_as_ref!(&scan_copy, out_subgraph).clone()); + another_noop.base_op.out_qvertex_to_idx_map = + get_op_attr_as_ref!(&scan_copy, out_qvertex_to_idx_map).clone(); + let mut scan_copy = Rc::new(RefCell::new(scan_copy)); + another_noop.base_op.prev = Some(scan_copy.clone()); + let mut another_noop_op = Rc::new(RefCell::new(Operator::Noop(another_noop))); + *get_op_attr_as_mut!(scan_copy.borrow_mut().deref_mut(), next) = + vec![another_noop_op.clone()]; + plans.set_next_operators(graph, another_noop_op, true); + let scan_copy_ref = scan_copy.borrow(); + if let Operator::Scan(Scan::ScanSampling(sc)) = scan_copy_ref.deref() { + query_plans_arr.push(QueryPlan::new(scan_copy.clone())); + } + } + plans.query_plans_arrs.push(query_plans_arr); + } + plans + } + + pub fn set_next_operators( + &mut self, + graph: &TypedStaticGraph, + operator: Rc>>, + is_none: bool, + ) { + let mut in_subgraph = { + let op_ref = operator.borrow(); + get_op_attr_as_ref!(op_ref.deref(), out_subgraph).clone() + }; + if !is_none && !self.query_graphs_to_extend.contains(&mut in_subgraph) { + self.query_graphs_to_extend.add(in_subgraph.clone()); + } else if !is_none { + return; + } + + let query_vertices = in_subgraph.get_query_vertices().clone(); + let mut descriptors = vec![]; + for query_vertex_to_extend in set_utils::get_power_set_excluding_empty_set(query_vertices) { + for alds in self.generate_alds(&query_vertex_to_extend, self.is_directed) { + descriptors.push(Descriptor { + out_subgraph: self.get_out_subgraph(in_subgraph.copy(), alds.clone()), + alds, + }); + } + } + let to_qvertex = QUERY_VERTICES[in_subgraph.get_num_qvertices()]; + let mut next = vec![]; + let last_repeated_vertex_idx = + get_op_attr_as_ref!(operator.borrow().deref(), last_repeated_vertex_idx).clone(); + + if self.sorted_by_node { + for mut descriptor in descriptors { + let mut types = vec![]; + let node_label_cnt = std::cmp::max(self.num_node_labels, 1); + for to_type in 0..node_label_cnt { + let mut produces_output = true; + for ald in &descriptor.alds { + let from_type = in_subgraph.get_query_vertex_type(&ald.from_query_vertex); + if (ald.direction == Direction::Fwd + && 0 == graph.get_num_edges(from_type, to_type as i32, ald.label)) + || (ald.direction == Direction::Bwd + && 0 == graph.get_num_edges(to_type as i32, from_type, ald.label)) + { + produces_output = false; + break; + } + } + if produces_output { + types.push(to_type as i32); + } else { + self.selectivity_zero.push(( + in_subgraph.clone(), + descriptor.alds.clone(), + to_type as i32, + )); + } + } + let mut out_qvertex_to_idx_map = + get_op_attr_as_ref!(operator.borrow().deref(), out_qvertex_to_idx_map).clone(); + out_qvertex_to_idx_map.insert(to_qvertex.to_owned(), out_qvertex_to_idx_map.len()); + for to_type in types { + descriptor + .out_subgraph + .set_query_vertex_type(to_qvertex.to_owned(), to_type); + let mut intersect = IntersectCatalog::new( + to_qvertex.to_owned(), + to_type, + descriptor.alds.clone(), + descriptor.out_subgraph.clone(), + in_subgraph.clone(), + out_qvertex_to_idx_map.clone(), + self.sorted_by_node, + ); + intersect + .base_intersect + .base_ei + .init_caching(last_repeated_vertex_idx); + next.push(Rc::new(RefCell::new(Operator::EI(EI::Intersect( + Intersect::IntersectCatalog(intersect), + ))))); + } + } + } else { + for i in 0..descriptors.len() { + let descriptor = &descriptors[i]; + let mut out_qvertex_to_idx_map = { + let op_ref = operator.borrow(); + let prev = get_op_attr_as_ref!(op_ref.deref(), prev) + .as_ref() + .unwrap() + .clone(); + let prev_ref = prev.borrow(); + get_op_attr_as_ref!(prev_ref.deref(), out_qvertex_to_idx_map).clone() + }; + + out_qvertex_to_idx_map.insert(to_qvertex.to_owned(), out_qvertex_to_idx_map.len()); + let mut ic = IntersectCatalog::new( + to_qvertex.to_owned(), + KEY_ANY, + descriptor.alds.clone(), + descriptor.out_subgraph.clone(), + in_subgraph.clone(), + out_qvertex_to_idx_map, + self.sorted_by_node, + ); + ic.base_intersect + .base_ei + .init_caching(last_repeated_vertex_idx); + next.push(Rc::new(RefCell::new(Operator::EI(EI::Intersect( + Intersect::IntersectCatalog(ic), + ))))); + } + } + Self::set_next_pointer(operator.clone(), next.clone()); + for next_op in next { + let mut next_noops = if self.sorted_by_node { + vec![Noop::new(QueryGraph::empty()); 1] + } else { + vec![Noop::new(QueryGraph::empty()); self.num_node_labels + 1] + }; + self.set_noops( + get_op_attr_as_ref!(next_op.borrow().deref(), out_subgraph), + to_qvertex.to_owned(), + &mut next_noops, + get_op_attr_as_ref!(next_op.borrow().deref(), out_qvertex_to_idx_map), + ); + let next_noops: Vec>>> = next_noops + .into_iter() + .map(|noop| Rc::new(RefCell::new(Operator::Noop(noop)))) + .collect(); + Self::set_next_pointer(next_op.clone(), next_noops.clone()); + if get_op_attr_as_ref!(next_op.borrow().deref(), out_subgraph).get_num_qvertices() + <= self.max_input_num_vertices + { + println!( + "next_noops_cnt={},sort_by_node={},max_input_num_vertices={}", + next_noops.len(), + self.sorted_by_node, + self.max_input_num_vertices + ); + for next_noop in next_noops { + *get_op_attr_as_mut!( + next_noop.borrow_mut().deref_mut(), + last_repeated_vertex_idx + ) = last_repeated_vertex_idx; + self.set_next_operators(graph, next_noop, is_none) + } + } + } + } + + fn set_next_pointer(operator: Rc>>, next: Vec>>>) { + *get_op_attr_as_mut!(operator.borrow_mut().deref_mut(), next) = next.clone(); + for next_op in next { + *get_op_attr_as_mut!(next_op.borrow_mut().deref_mut(), prev) = Some(operator.clone()); + } + } + + fn set_noops( + &self, + query_graph: &QueryGraph, + to_qvertex: String, + noops: &mut Vec>, + out_qvertex_to_idx_map: &HashMap, + ) { + if self.sorted_by_node { + noops[0] = Noop::new(query_graph.clone()); + noops[0].base_op.out_qvertex_to_idx_map = out_qvertex_to_idx_map.clone(); + } else { + let node_label_cnt = std::cmp::max(self.num_node_labels, 1); + for to_type in 0..node_label_cnt { + let mut query_graph_copy = query_graph.copy(); + query_graph_copy.set_query_vertex_type(to_qvertex.clone(), to_type as i32); + noops[to_type] = Noop::new(query_graph_copy); + } + } + } + + fn generate_alds( + &self, + qvertices: &Vec, + is_direccted: bool, + ) -> Vec> { + let direction_patterns = Self::generate_direction_patterns(qvertices.len(), is_direccted); + let label_patterns = self.generate_labels_patterns(qvertices.len()); + let mut alds_list = vec![]; + for directions in direction_patterns { + for labels in &label_patterns { + let mut alds = vec![]; + for i in 0..directions.len() { + let vertex_idx = self.query_vertex_to_idx_map[&qvertices[i]]; + let to_qvertex = QUERY_VERTICES[vertex_idx]; + alds.push(AdjListDescriptor::new( + to_qvertex.to_owned(), + vertex_idx, + directions[i].clone(), + labels[i].clone(), + )); + } + alds_list.push(alds); + } + } + alds_list + } + + fn generate_labels_patterns(&self, size: usize) -> Vec> { + let mut labels = vec![]; + let edge_label_cnt = std::cmp::max(self.num_edge_labels, 1); + for label in 0..edge_label_cnt { + labels.push(label as i32); + } + set_utils::generate_permutations(labels, size) + } + + pub fn generate_direction_patterns(size: usize, is_directed: bool) -> Vec> { + let mut direction_patterns = vec![]; + Self::generate_direction_patterns_inner( + &mut vec![Direction::Fwd; size], + size, + &mut direction_patterns, + is_directed, + ); + direction_patterns + } + + fn generate_direction_patterns_inner( + direction_arr: &mut Vec, + size: usize, + direction_patterns: &mut Vec>, + is_directed: bool, + ) { + if size <= 0 { + direction_patterns.push(direction_arr.clone()); + } else { + direction_arr[size - 1] = Direction::Bwd; + Self::generate_direction_patterns_inner( + direction_arr, + size - 1, + direction_patterns, + is_directed, + ); + if is_directed { + direction_arr[size - 1] = Direction::Fwd; + Self::generate_direction_patterns_inner( + direction_arr, + size - 1, + direction_patterns, + is_directed, + ); + } + } + } + + fn get_out_subgraph( + &self, + mut query_graph: QueryGraph, + alds: Vec, + ) -> QueryGraph { + let num_qvertices = query_graph.get_num_qvertices(); + for ald in alds { + let mut query_edge = if let Direction::Fwd = ald.direction { + let mut query_edge = QueryEdge::default( + ald.from_query_vertex.clone(), + QUERY_VERTICES[num_qvertices].to_owned(), + ); + query_edge.from_type = query_graph.get_query_vertex_type(&ald.from_query_vertex); + query_edge + } else { + let mut query_edge = QueryEdge::default( + QUERY_VERTICES[num_qvertices].to_owned(), + ald.from_query_vertex.clone(), + ); + query_edge.to_type = query_graph.get_query_vertex_type(&ald.from_query_vertex); + query_edge + }; + query_edge.label = ald.label; + query_graph.add_qedge(query_edge); + } + query_graph + } + + pub fn generate_all_scans_for_large_graph< + NL: Hash + Eq, + EL: Hash + Eq, + Ty: GraphType, + L: IdType, + >( + &mut self, + graph: &TypedStaticGraph, + ) -> Vec> { + let fwd_adj_lists = graph.get_fwd_adj_list(); + let num_vertices = graph.node_count(); + let mut edges = vec![]; + for from_vertex in 0..num_vertices { + for to_vertex in fwd_adj_lists[from_vertex] + .as_ref() + .unwrap() + .get_neighbor_ids() + { + edges.push(vec![Id::new(from_vertex), to_vertex.clone()]); + } + } + let mut out_subgraph = QueryGraph::empty(); + out_subgraph.add_qedge(QueryEdge::new("a".to_owned(), "b".to_owned(), 0, 0, 0)); + let mut scan = ScanSampling::new(out_subgraph); + scan.set_edge_indices_to_sample_list(edges, self.num_sampled_edges); + vec![scan] + } + + pub fn generate_all_scans( + &mut self, + graph: &TypedStaticGraph, + ) -> Vec> { + let fwd_adj_lists = graph.get_fwd_adj_list(); + let vertex_types = graph.get_node_types(); + let num_vertices = graph.node_count(); + let mut key_to_edges_map = HashMap::new(); + let mut key_to_curr_idx = HashMap::new(); + let node_label_cnt = std::cmp::max(self.num_node_labels, 1); + let edge_label_cnt = std::cmp::max(self.num_edge_labels, 1); + for from_type in 0..node_label_cnt { + for label in 0..edge_label_cnt { + for to_type in 0..node_label_cnt { + let edge_key = TypedStaticGraph::::get_edge_key( + from_type, to_type, label, + ); + let num_edges = + graph.get_num_edges(from_type as i32, to_type as i32, label as i32); + key_to_edges_map.insert(edge_key, vec![0; num_edges * 2]); + key_to_curr_idx.insert(edge_key, 0); + } + } + } + for from_vertex in 0..num_vertices { + let from_type = vertex_types[from_vertex]; + let offsets = fwd_adj_lists[from_vertex].as_ref().unwrap().get_offsets(); + let neighbours = fwd_adj_lists[from_vertex] + .as_ref() + .unwrap() + .get_neighbor_ids(); + for label_type in 0..offsets.len() - 1 { + for to_idx in offsets[label_type]..offsets[label_type + 1] { + let (to_type, label) = if self.sorted_by_node { + (label_type, 0) + } else { + (vertex_types[neighbours[to_idx].id()] as usize, label_type) + }; + let edge_key = TypedStaticGraph::::get_edge_key( + from_type as usize, + to_type, + label, + ); + let curr_idx = key_to_curr_idx[&edge_key]; + key_to_edges_map.get_mut(&edge_key).unwrap()[curr_idx] = from_vertex; + key_to_edges_map.get_mut(&edge_key).unwrap()[curr_idx + 1] = + neighbours[to_idx].id(); + key_to_curr_idx.insert(edge_key, curr_idx + 2); + } + } + } + let mut scans = vec![]; + for from_type in 0..node_label_cnt { + for edge_label in 0..edge_label_cnt { + for to_type in 0..node_label_cnt { + let mut out_subgraph = QueryGraph::empty(); + out_subgraph.add_qedge(QueryEdge::new( + "a".to_owned(), + "b".to_owned(), + from_type as i32, + to_type as i32, + edge_label as i32, + )); + let edge_key = TypedStaticGraph::::get_edge_key( + from_type, to_type, edge_label, + ); + let actual_num_edges = + graph.get_num_edges(from_type as i32, to_type as i32, edge_label as i32); + if actual_num_edges <= 0 { + continue; + } + let mut num_edges_to_sample = (self.num_sampled_edges as f64 + * (graph.get_num_edges(from_type as i32, to_type as i32, edge_label as i32) + as f64 + / graph.edge_count() as f64)) + as usize; + let mut scan = ScanSampling::new(out_subgraph); + if self.sorted_by_node && num_edges_to_sample < 1000 { + num_edges_to_sample = actual_num_edges; + } + scan.set_edge_indices_to_sample( + key_to_edges_map[&edge_key] + .iter() + .map(|edge| Id::new(edge.clone())) + .collect(), + num_edges_to_sample, + ); + scans.push(scan); + } + } + } + scans + } +} + +pub struct Descriptor { + out_subgraph: QueryGraph, + alds: Vec, +} diff --git a/src/graph_impl/multi_graph/planner/catalog/mod.rs b/src/graph_impl/multi_graph/planner/catalog/mod.rs new file mode 100644 index 00000000..6b989ce7 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/mod.rs @@ -0,0 +1,8 @@ +// Code in this module have not been finished. +pub mod adj_list_descriptor; +pub mod catalog; +pub mod catalog_plans; +pub mod operator; +pub mod query_edge; +pub mod query_graph; +pub mod subgraph_mapping_iterator; diff --git a/src/graph_impl/multi_graph/planner/catalog/operator/intersect_catalog.rs b/src/graph_impl/multi_graph/planner/catalog/operator/intersect_catalog.rs new file mode 100644 index 00000000..64abd453 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/operator/intersect_catalog.rs @@ -0,0 +1,203 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::EI::CachingType; +use graph_impl::multi_graph::plan::operator::operator::{CommonOperatorTrait, Operator}; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::AdjListDescriptor; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::static_graph::graph::KEY_ANY; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::rc::Rc; + +#[derive(Clone)] +pub struct IntersectCatalog { + pub base_intersect: BaseIntersect, + is_adj_list_sorted_by_node: bool, + last_icost: usize, + caching_enable: bool, +} + +impl IntersectCatalog { + pub fn new( + to_qvertex: String, + to_type: i32, + alds: Vec, + out_subgraph: QueryGraph, + in_subgraph: QueryGraph, + out_qvertex_to_idx_map: HashMap, + is_adj_list_sorted_by_node: bool, + ) -> IntersectCatalog { + IntersectCatalog { + base_intersect: BaseIntersect::new( + to_qvertex, + to_type, + alds, + out_subgraph, + Some(in_subgraph), + out_qvertex_to_idx_map, + ), + is_adj_list_sorted_by_node, + last_icost: 0, + caching_enable: true, + } + } +} + +impl CommonOperatorTrait for IntersectCatalog { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_intersect.init(probe_tuple, graph) + } + + fn process_new_tuple(&mut self) { + let base_ei = &mut self.base_intersect.base_ei; + if 1 == base_ei.alds.len() { + // intersect the adjacency lists and setAdjListSortOrder the output vertex values. + let adj = base_ei.adj_lists_to_cache[0] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx_to_cache[0]].id()] + .as_ref() + .unwrap(); + adj.set_neighbor_ids( + base_ei.labels_or_to_types_to_cache[0], + &mut base_ei.out_neighbours, + ); + base_ei.base_op.icost += + base_ei.out_neighbours.end_idx - base_ei.out_neighbours.start_idx; + } else { + // intersect the adjacency lists and setAdjListSortOrder the output vertex values. + let mut temp; + if base_ei.caching_type == CachingType::None || !base_ei.is_intersection_cached() { + let adj = base_ei.adj_lists_to_cache[0] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx_to_cache[0]].id()] + .as_ref() + .unwrap(); + adj.set_neighbor_ids( + base_ei.labels_or_to_types_to_cache[0], + &mut base_ei.init_neighbours, + ); + self.last_icost = + base_ei.init_neighbours.end_idx - base_ei.init_neighbours.start_idx; + let adj = base_ei.adj_lists_to_cache[1] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx_to_cache[1]].id()] + .as_ref() + .unwrap(); + self.last_icost += adj.intersect( + base_ei.labels_or_to_types_to_cache[1], + &mut base_ei.init_neighbours, + &mut base_ei.cached_neighbours, + ); + + if base_ei.to_type != KEY_ANY { + let mut curr_end_idx = 0; + for i in base_ei.cached_neighbours.start_idx..base_ei.cached_neighbours.end_idx + { + if base_ei.vertex_types[base_ei.cached_neighbours.ids[i].id()] + == base_ei.to_type + { + base_ei.cached_neighbours.ids[curr_end_idx] = + base_ei.cached_neighbours.ids[i]; + curr_end_idx += 1; + } + } + base_ei.cached_neighbours.end_idx = curr_end_idx; + } + for i in 2..base_ei.adj_lists_to_cache.len() { + temp = base_ei.cached_neighbours.clone(); + base_ei.cached_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp; + let adj = base_ei.adj_lists_to_cache[i] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx_to_cache[i]].id()] + .as_ref() + .unwrap(); + self.last_icost += adj.intersect( + base_ei.labels_or_to_types_to_cache[i], + &mut base_ei.temp_neighbours, + &mut base_ei.cached_neighbours, + ); + } + } + + match base_ei.caching_type { + CachingType::None | CachingType::FullCaching => { + base_ei.base_op.icost += self.last_icost; + base_ei.out_neighbours = base_ei.cached_neighbours.clone(); + } + CachingType::PartialCaching => { + let adj = base_ei.adj_lists[0] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx[0]].id()] + .as_ref() + .unwrap(); + base_ei.base_op.icost += adj.intersect( + base_ei.labels_or_to_types[0], + &mut base_ei.cached_neighbours, + &mut base_ei.out_neighbours, + ); + for i in 1..base_ei.adj_lists.len() { + temp = base_ei.out_neighbours.clone(); + base_ei.out_neighbours = base_ei.temp_neighbours.clone(); + base_ei.temp_neighbours = temp; + let adj = base_ei.adj_lists[i] + [base_ei.base_op.probe_tuple.borrow()[base_ei.vertex_idx[i]].id()] + .as_ref() + .unwrap(); + base_ei.base_op.icost += adj.intersect( + base_ei.labels_or_to_types[i], + &mut base_ei.temp_neighbours, + &mut base_ei.out_neighbours, + ); + } + } + } + } + + for idx in base_ei.out_neighbours.start_idx..base_ei.out_neighbours.end_idx { + base_ei.base_op.probe_tuple.borrow_mut()[base_ei.out_idx] = + base_ei.out_neighbours.ids[idx]; + base_ei.base_op.num_out_tuples += 1; + if self.is_adj_list_sorted_by_node { + base_ei.base_op.next[0].borrow_mut().process_new_tuple(); + } else { + base_ei + .base_op + .next + .get( + base_ei.vertex_types + [base_ei.base_op.probe_tuple.borrow()[base_ei.out_idx].id()] + as usize, + ) + .map(|next_op| next_op.borrow_mut().process_new_tuple()); + } + } + } + + fn execute(&mut self) { + self.base_intersect.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_intersect.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_intersect + .update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + self.base_intersect.copy(is_thread_safe) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_intersect.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_intersect.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/operator/mod.rs b/src/graph_impl/multi_graph/planner/catalog/operator/mod.rs new file mode 100644 index 00000000..f11094bf --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/operator/mod.rs @@ -0,0 +1,2 @@ +pub mod intersect_catalog; +pub mod noop; diff --git a/src/graph_impl/multi_graph/planner/catalog/operator/noop.rs b/src/graph_impl/multi_graph/planner/catalog/operator/noop.rs new file mode 100644 index 00000000..692da1d1 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/operator/noop.rs @@ -0,0 +1,67 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::operator::{ + BaseOperator, CommonOperatorTrait, Operator, +}; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::HashMap; +use std::cell::RefCell; +use std::hash::Hash; +use std::rc::Rc; + +#[derive(Clone)] +pub struct Noop { + pub base_op: BaseOperator, +} + +impl Noop { + pub fn new(query_graph: QueryGraph) -> Noop { + Noop { + base_op: BaseOperator::new(query_graph.clone(), Some(query_graph)), + } + } +} + +impl CommonOperatorTrait for Noop { + fn init( + &mut self, + probe_tuple: Rc>>, + graph: &TypedStaticGraph, + ) { + self.base_op.probe_tuple = probe_tuple.clone(); + for next_op in &self.base_op.next { + next_op.borrow_mut().init(probe_tuple.clone(), graph); + } + } + + fn process_new_tuple(&mut self) { + self.base_op.num_out_tuples += 1; + self.base_op.next.iter().for_each(|next_op| { + next_op.borrow_mut().process_new_tuple(); + }); + } + + fn execute(&mut self) { + self.base_op.execute() + } + + fn get_alds_as_string(&self) -> String { + self.base_op.get_alds_as_string() + } + + fn update_operator_name(&mut self, query_vertex_to_index_map: HashMap) { + self.base_op.update_operator_name(query_vertex_to_index_map) + } + + fn copy(&self, is_thread_safe: bool) -> Operator { + self.base_op.copy(is_thread_safe) + } + + fn is_same_as(&mut self, op: &mut Rc>>) -> bool { + self.base_op.is_same_as(op) + } + + fn get_num_out_tuples(&self) -> usize { + self.base_op.get_num_out_tuples() + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/query_edge.rs b/src/graph_impl/multi_graph/planner/catalog/query_edge.rs new file mode 100644 index 00000000..04b6f0e6 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/query_edge.rs @@ -0,0 +1,35 @@ +#[derive(Clone, Debug, PartialEq)] +pub struct QueryEdge { + pub from_query_vertex: String, + pub to_query_vertex: String, + pub from_type: i32, + pub to_type: i32, + pub label: i32, +} + +impl QueryEdge { + pub fn new( + from_qvertex: String, + to_qvertex: String, + from_type: i32, + to_type: i32, + label: i32, + ) -> Self { + Self { + from_query_vertex: from_qvertex, + to_query_vertex: to_qvertex, + from_type, + to_type, + label, + } + } + pub fn default(from_qvertex: String, to_qvertex: String) -> Self { + Self { + from_query_vertex: from_qvertex, + to_query_vertex: to_qvertex, + from_type: 0, + to_type: 0, + label: 0, + } + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/query_graph.rs b/src/graph_impl/multi_graph/planner/catalog/query_graph.rs new file mode 100644 index 00000000..6bf001f3 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/query_graph.rs @@ -0,0 +1,278 @@ +use graph_impl::multi_graph::planner::catalog::catalog::LOGGER_FLAG; +use graph_impl::multi_graph::planner::catalog::query_edge::QueryEdge; +use graph_impl::multi_graph::planner::catalog::subgraph_mapping_iterator::SubgraphMappingIterator; +use graph_impl::static_graph::graph::KEY_ANY; +use hashbrown::{HashMap, HashSet}; +use itertools::Itertools; +use std::iter::FromIterator; + +#[derive(Clone)] +pub struct QueryGraph { + pub qvertex_to_qedges_map: HashMap>>, + pub qvertex_to_type_map: HashMap, + pub qvertex_to_deg_map: HashMap>, + pub q_edges: Vec, + pub it: Option>, + pub encoding: Option, + pub limit: usize, +} + +impl QueryGraph { + pub fn empty() -> QueryGraph { + QueryGraph { + qvertex_to_qedges_map: HashMap::new(), + qvertex_to_type_map: HashMap::new(), + qvertex_to_deg_map: HashMap::new(), + q_edges: vec![], + it: None, + encoding: None, + limit: 0, + } + } + + pub fn get_num_qvertices(&self) -> usize { + self.qvertex_to_qedges_map.len() + } + + pub fn get_subgraph_mapping_iterator( + &mut self, + query_graph: &QueryGraph, + ) -> &mut Box { + let mut it = self + .it + .take() + .unwrap_or(Box::new(SubgraphMappingIterator::new( + self.qvertex_to_qedges_map + .keys() + .map(|x| x.clone()) + .sorted(), + ))); + it.init(&self, query_graph); + self.it.replace(it); + self.it.as_mut().unwrap() + } + + pub fn get_query_vertices(&self) -> Vec { + self.qvertex_to_qedges_map + .keys() + .map(|x| x.clone()) + .sorted() + } + + pub fn get_query_vertices_as_set(&self) -> HashSet { + let mut set = HashSet::new(); + self.qvertex_to_qedges_map.keys().for_each(|key| { + set.insert(key.clone()); + }); + set + } + + pub fn get_query_vertex_type(&self, query_vertex: &str) -> i32 { + if let Some(vertex_type) = self.qvertex_to_type_map.get(query_vertex) { + return vertex_type.clone(); + } + 0 + } + + pub fn set_query_vertex_type(&mut self, query_vertex: String, to_type: i32) { + self.qvertex_to_type_map + .insert(query_vertex.clone(), to_type); + for edge in self.q_edges.iter_mut() { + if edge.from_query_vertex == query_vertex { + edge.from_type = to_type; + } else if edge.to_query_vertex == query_vertex { + edge.to_type = to_type; + } + } + } + + pub fn contains_query_edge(&self, v1: &String, v2: &String) -> bool { + if let Some(map) = self.qvertex_to_qedges_map.get(v1) { + return map.contains_key(v2); + } + false + } + + pub fn get_qedges(&self, variable: &String, neighbor_variable: &String) -> Vec { + if !self.qvertex_to_qedges_map.contains_key(variable) { + panic!("The variable '{}' is not present.", variable); + } + let contains_in_qedges = self + .qvertex_to_qedges_map + .get(variable) + .map_or(false, |map| map.contains_key(neighbor_variable)); + if !contains_in_qedges { + return vec![]; + } + self.qvertex_to_qedges_map[variable][neighbor_variable].clone() + } + + pub fn get_encoding(&mut self) -> String { + if self.encoding.is_some() { + return self.encoding.as_ref().unwrap().clone(); + } + let mut query_vertices_encoded = vec![String::from(""); self.qvertex_to_qedges_map.len()]; + let mut vertex_idx = 0; + for from_vertex in self.qvertex_to_qedges_map.keys() { + let from_vertex = from_vertex.clone(); + let mut encoding_str = "".to_string(); + if let Some(edge_map) = self.qvertex_to_qedges_map.get(&from_vertex) { + edge_map.keys().for_each(|to_vertex| { + if let Some(query_edges) = edge_map.get(to_vertex) { + for query_edge in query_edges { + if from_vertex == query_edge.from_query_vertex { + encoding_str += "F"; + } else { + encoding_str += "B"; + } + } + } + }); + } + let encoding_to_sort = String::from_iter(encoding_str.chars().into_iter().sorted()); + query_vertices_encoded[vertex_idx] = encoding_to_sort; + vertex_idx += 1; + } + query_vertices_encoded.sort(); + self.encoding = Some(query_vertices_encoded.join(".")); + self.encoding.as_ref().unwrap().clone() + } + + fn get_subgraph_mapping_if_any( + &mut self, + other_query_graph: &QueryGraph, + ) -> Option> { + let it = self.get_subgraph_mapping_iterator(other_query_graph); + if !it.has_next() { + return None; + } + it.next() + } + + pub fn is_isomorphic_to(&mut self, other_query_graph: &mut QueryGraph) -> bool { + other_query_graph.get_encoding() == self.get_encoding() + && ((self.q_edges.len() == 0 && other_query_graph.q_edges.len() == 0) + || self + .get_subgraph_mapping_if_any(other_query_graph) + .is_some()) + } + + pub fn get_isomorphic_mapping_if_any( + &mut self, + other_query_graph: &mut QueryGraph, + ) -> Option> { + if self.is_isomorphic_to(other_query_graph) { + return self.get_subgraph_mapping_if_any(other_query_graph); + } + None + } + + pub fn add_qedges(&mut self, query_edges: &Vec) { + query_edges + .iter() + .for_each(|edge| self.add_qedge(edge.clone())); + } + + pub fn add_qedge(&mut self, query_edge: QueryEdge) { + // Get the vertex IDs. + let from_qvertex = query_edge.from_query_vertex.clone(); + let to_qvertex = query_edge.to_query_vertex.clone(); + let from_type = query_edge.from_type; + let to_type = query_edge.to_type; + self.qvertex_to_type_map + .entry(from_qvertex.clone()) + .or_insert(KEY_ANY); + self.qvertex_to_type_map + .entry(to_qvertex.clone()) + .or_insert(KEY_ANY); + if KEY_ANY != from_type { + self.qvertex_to_type_map + .insert(from_qvertex.clone(), from_type); + } + if KEY_ANY != to_type { + self.qvertex_to_type_map.insert(to_qvertex.clone(), to_type); + } + // Set the in and out degrees for each variable. + if !self.qvertex_to_deg_map.contains_key(&from_qvertex) { + self.qvertex_to_deg_map + .insert(from_qvertex.clone(), vec![0; 2]); + } + self.qvertex_to_deg_map.get_mut(&from_qvertex).unwrap()[0] += 1; + if !self.qvertex_to_deg_map.contains_key(&to_qvertex) { + self.qvertex_to_deg_map + .insert(to_qvertex.clone(), vec![0; 2]); + } + self.qvertex_to_deg_map.get_mut(&to_qvertex).unwrap()[1] += 1; + // Add fwd edge from_qvertex -> to_qvertex to the qVertexToQEdgesMap. + self.add_qedge_to_qgraph(&from_qvertex, &to_qvertex, &query_edge); + // Add bwd edge to_qvertex <- from_qvertex to the qVertexToQEdgesMap. + self.add_qedge_to_qgraph(&to_qvertex, &from_qvertex, &query_edge); + + self.q_edges.push(query_edge); + } + + fn add_qedge_to_qgraph( + &mut self, + from_qvertex: &String, + to_qvertex: &String, + q_edge: &QueryEdge, + ) { + self.qvertex_to_qedges_map + .entry(from_qvertex.clone()) + .or_insert(HashMap::new()); + self.qvertex_to_qedges_map + .get_mut(from_qvertex) + .unwrap() + .entry(to_qvertex.clone()) + .or_insert(vec![]); + self.qvertex_to_qedges_map + .get_mut(from_qvertex) + .unwrap() + .get_mut(to_qvertex) + .unwrap() + .push(q_edge.clone()); + } + + pub fn get_neighbors(&self, from_var: Vec) -> HashSet { + let mut to_variables = HashSet::new(); + from_var.iter().for_each(|from| { + if !self.qvertex_to_qedges_map.contains_key(from) { + panic!("The variable '{}' is not present.", from); + } + self.get_neighbours_of_node(from).into_iter().for_each(|n| { + to_variables.insert(n); + }); + }); + from_var.iter().for_each(|from| { + to_variables.remove(from); + }); + to_variables + } + + pub fn get_neighbours_of_node(&self, from: &String) -> Vec { + if !self.qvertex_to_qedges_map.contains_key(from) { + panic!("The variable '{}' is not present.", from); + } + self.qvertex_to_qedges_map[from] + .keys() + .map(|key| key.clone()) + .collect() + } + + pub fn copy(&self) -> QueryGraph { + let mut q = QueryGraph::empty(); + q.add_qedges(&self.q_edges); + q + } +} + +impl PartialEq for QueryGraph { + fn eq(&self, other: &Self) -> bool { + self.qvertex_to_qedges_map == other.qvertex_to_qedges_map + && self.qvertex_to_type_map == other.qvertex_to_type_map + && self.qvertex_to_deg_map == other.qvertex_to_deg_map + && self.q_edges.eq(&other.q_edges) + && self.encoding.as_ref().unwrap() == other.encoding.as_ref().unwrap() + && self.limit == other.limit + } +} diff --git a/src/graph_impl/multi_graph/planner/catalog/subgraph_mapping_iterator.rs b/src/graph_impl/multi_graph/planner/catalog/subgraph_mapping_iterator.rs new file mode 100644 index 00000000..25e2acea --- /dev/null +++ b/src/graph_impl/multi_graph/planner/catalog/subgraph_mapping_iterator.rs @@ -0,0 +1,197 @@ +use graph_impl::multi_graph::planner::catalog::catalog::LOGGER_FLAG; +use graph_impl::multi_graph::planner::catalog::query_edge::QueryEdge; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use hashbrown::HashMap; + +// An iterator over a set of possible mappings between two query graphs. +#[derive(Clone)] +pub struct SubgraphMappingIterator { + pub query_vertices: Vec, + pub o_qvertices: Vec, + pub o_qgraph: QueryGraph, + pub next: HashMap, + pub is_next_computed: bool, + pub curr_mapping: Vec, + pub current_idx: usize, + pub vertex_indices: Vec, + pub vertices_for_idx: Vec>, + + pub qvertex_to_qedges_map: HashMap>>, + pub qvertex_to_type_map: HashMap, + pub qvertex_to_deg_map: HashMap>, +} + +impl SubgraphMappingIterator { + pub fn new(query_vertices: Vec) -> Self { + let mut next = HashMap::new(); + query_vertices.iter().for_each(|v| { + next.insert(v.clone(), String::from("")); + }); + SubgraphMappingIterator { + query_vertices, + o_qvertices: vec![], + o_qgraph: QueryGraph::empty(), + next, + is_next_computed: false, + curr_mapping: vec![], + current_idx: 0, + vertex_indices: vec![], + vertices_for_idx: vec![], + qvertex_to_qedges_map: HashMap::new(), + qvertex_to_type_map: HashMap::new(), + qvertex_to_deg_map: HashMap::new(), + } + } + + pub fn init(&mut self, query_graph: &QueryGraph, o_query_graph: &QueryGraph) { + self.o_qvertices = o_query_graph.get_query_vertices(); + self.o_qgraph = o_query_graph.clone(); + self.current_idx = 0; + self.vertex_indices = vec![0; self.o_qvertices.len()]; + self.curr_mapping.clear(); + self.qvertex_to_qedges_map = query_graph.qvertex_to_qedges_map.clone(); + self.qvertex_to_deg_map = query_graph.qvertex_to_deg_map.clone(); + self.qvertex_to_type_map = query_graph.qvertex_to_type_map.clone(); + for i in 0..self.o_qvertices.len() { + if self.vertices_for_idx.len() <= i { + self.vertices_for_idx.push(vec![]); + } else { + self.vertices_for_idx[i].clear(); + } + let o_qvertex = &self.o_qvertices[i]; + let o_qvertex_deg = &o_query_graph.qvertex_to_deg_map[o_qvertex]; + let o_qvertex_type = o_query_graph.qvertex_to_type_map[o_qvertex]; + + for j in 0..self.query_vertices.len() { + let q_vertex = &self.query_vertices[j]; + let vertex_type = self.qvertex_to_type_map[q_vertex]; + let q_vertex_deg = &self.qvertex_to_deg_map[q_vertex]; + if o_qvertex_type == vertex_type + && (o_qvertex_deg.eq(q_vertex_deg) + || (self.o_qvertices.len() < self.query_vertices.len() + && q_vertex_deg[0] >= o_qvertex_deg[0] + && q_vertex_deg[1] >= o_qvertex_deg[1])) + { + self.vertices_for_idx[i].push(q_vertex.clone()); + } + } + if 0 == self.vertices_for_idx[i].len() { + self.is_next_computed = true; + return; + } + } + self.is_next_computed = false; + self.has_next(); + } + + pub fn has_next(&mut self) -> bool { + if !self.is_next_computed { + if self.curr_mapping.len() == self.o_qvertices.len() { + self.curr_mapping.pop(); + } + loop { + let next_idx = self.curr_mapping.len(); + if next_idx == 0 && self.vertex_indices[0] < self.vertices_for_idx[0].len() { + self.curr_mapping + .push(self.vertices_for_idx[0][self.vertex_indices[0]].clone()); + self.vertex_indices[0] += 1; + } else if self.vertex_indices[next_idx] < self.vertices_for_idx[next_idx].len() { + let new_var = &self.vertices_for_idx[next_idx][self.vertex_indices[next_idx]]; + self.vertex_indices[next_idx] += 1; + let other_for_new = &self.o_qvertices[next_idx]; + let mut outer_flag = false; + for i in 0..self.curr_mapping.len() { + let prev_var = &self.curr_mapping[i]; + if prev_var == new_var { + outer_flag = true; + break; + } + let other_for_prev = &self.o_qvertices[i]; + let q_edges = self.qvertex_to_qedges_map[new_var].get(prev_var); + let o_qedges = + self.o_qgraph.qvertex_to_qedges_map[other_for_new].get(other_for_prev); + if q_edges.is_none() && o_qedges.is_none() { + continue; + } + if q_edges.is_none() + || o_qedges.is_none() + || q_edges.unwrap().len() != o_qedges.unwrap().len() + { + outer_flag = true; + break; + } + if q_edges.unwrap().len() == 0 { + continue; + } + let q_edge = &q_edges.unwrap()[0]; + let o_qedge = &o_qedges.unwrap()[0]; + if q_edge.label != o_qedge.label { + continue; + } + if !((&q_edge.from_query_vertex == prev_var + && &o_qedge.from_query_vertex == other_for_prev) + || (&q_edge.from_query_vertex == new_var + && &o_qedge.from_query_vertex == other_for_new)) + { + outer_flag = true; + break; + } + } + if outer_flag { + continue; + } + self.curr_mapping.push(new_var.clone()); + } else if self.vertex_indices[next_idx] >= self.vertices_for_idx[next_idx].len() { + self.curr_mapping.pop(); + self.vertex_indices[next_idx] = 0; + } + if self.curr_mapping.len() == self.o_qvertices.len() + || (self.vertex_indices[0] >= self.vertices_for_idx[0].len() + && self.curr_mapping.is_empty()) + { + break; + } + } + self.is_next_computed = true; + } + if !self.curr_mapping.is_empty() { + for i in 0..self.curr_mapping.len() { + for j in (i + 1)..self.curr_mapping.len() { + let q_vertex = &self.curr_mapping[i]; + let o_qvertex = &self.curr_mapping[j]; + if !self.contains_query_edge(q_vertex, o_qvertex) { + continue; + } + let q_edge = &self.qvertex_to_qedges_map[q_vertex][o_qvertex][0]; + let o_qedge = &self.o_qgraph.qvertex_to_qedges_map[&self.o_qvertices[i]] + [&self.o_qvertices[j]][0]; + if q_edge.label == o_qedge.label { + continue; + } + self.is_next_computed = false; + return self.has_next(); + } + } + } + + !self.curr_mapping.is_empty() + } + + pub fn next(&mut self) -> Option> { + if !self.has_next() { + return None; + } + self.is_next_computed = false; + self.next.clear(); + for i in 0..self.o_qvertices.len() { + self.next + .insert(self.curr_mapping[i].clone(), self.o_qvertices[i].clone()); + } + return Some(self.next.clone()); + } + + pub fn contains_query_edge(&self, v1: &String, v2: &String) -> bool { + self.qvertex_to_qedges_map.contains_key(v1) + && self.qvertex_to_qedges_map[v1].contains_key(v2) + } +} diff --git a/src/graph_impl/multi_graph/planner/mod.rs b/src/graph_impl/multi_graph/planner/mod.rs new file mode 100644 index 00000000..ede2a1d7 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/mod.rs @@ -0,0 +1,3 @@ +pub mod catalog; +pub mod query_planner; +pub mod query_planner_big; diff --git a/src/graph_impl/multi_graph/planner/query_planner.rs b/src/graph_impl/multi_graph/planner/query_planner.rs new file mode 100644 index 00000000..9092c4d1 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/query_planner.rs @@ -0,0 +1,555 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::{BaseEI, CachingType, EI}; +use graph_impl::multi_graph::plan::operator::hashjoin::hash_join::HashJoin; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::{BaseProbe, Probe}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::{ + ProbeMultiVertices, PMV, +}; +use graph_impl::multi_graph::plan::operator::operator::Operator; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::operator::sink::sink::SinkType; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::{ + AdjListDescriptor, Direction, +}; +use graph_impl::multi_graph::planner::catalog::catalog::{ + Catalog, LOGGER_FLAG, MULTI_VERTEX_WEIGHT_BUILD_COEF, MULTI_VERTEX_WEIGHT_PROBE_COEF, + SINGLE_VERTEX_WEIGHT_BUILD_COEF, SINGLE_VERTEX_WEIGHT_PROBE_COEF, +}; +use graph_impl::multi_graph::planner::catalog::query_edge::QueryEdge; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::TypedStaticGraph; +use hashbrown::{HashMap, HashSet}; +use itertools::Itertools; +use std::cell::RefCell; +use std::hash::Hash; +use std::ops::{Deref, DerefMut}; +use std::rc::Rc; + +pub struct QueryPlanner { + subgraph_plans: HashMap>>>, + pub query_graph: QueryGraph, + pub num_qvertices: usize, + pub next_num_qvertices: usize, + graph: TypedStaticGraph, + catalog: Catalog, + has_limit: bool, + computed_selectivities: HashMap>, +} + +impl + QueryPlanner +{ + pub fn new( + query_graph: QueryGraph, + catalog: Catalog, + graph: TypedStaticGraph, + ) -> Self { + QueryPlanner { + subgraph_plans: HashMap::new(), + has_limit: query_graph.limit > 0, + num_qvertices: query_graph.get_num_qvertices(), + query_graph, + next_num_qvertices: 0, + graph, + catalog, + computed_selectivities: HashMap::new(), + } + } + + pub fn plan(&mut self) -> QueryPlan { + if self.num_qvertices == 2 { + return QueryPlan::new_from_operator(Rc::new(RefCell::new(Operator::Scan( + Scan::Base(BaseScan::new(self.query_graph.clone())), + )))); + } + self.consider_all_scan_operators(); + while self.next_num_qvertices <= self.num_qvertices { + self.consider_all_next_query_extensions(); + self.next_num_qvertices += 1; + } + let key = self.subgraph_plans[&self.num_qvertices] + .keys() + .next() + .unwrap(); + let mut best_plan = self.get_best_plan(self.num_qvertices, key); + // each operator added only sets its prev pointer (to reuse operator objects). + // the picked plan needs to set the next pointer for each operator in the linear subplans. + self.set_next_pointers(&mut best_plan); + if self.has_limit { + best_plan.sink_type = SinkType::Limit; + best_plan.out_tuples_limit = self.query_graph.limit; + } + best_plan + } + + pub fn set_next_pointers(&self, best_plan: &mut QueryPlan) { + best_plan.subplans.iter_mut().for_each(|last_op| { + let mut cur_op = last_op.clone(); + loop { + let prev_op = { + let cur_op_ref = cur_op.borrow(); + get_op_attr_as_ref!(cur_op_ref.deref(), prev) + .as_ref() + .map(|op| op.clone()) + }; + if prev_op.is_none() { + break; + } + let mut prev = prev_op.unwrap(); + *get_op_attr_as_mut!(prev.borrow_mut().deref_mut(), next) = vec![cur_op.clone()]; + cur_op = prev.clone() + } + }); + } + + fn consider_all_scan_operators(&mut self) { + self.next_num_qvertices = 2; + self.subgraph_plans + .entry(self.next_num_qvertices) + .or_insert(HashMap::new()); + for query_edge in &self.query_graph.q_edges { + let mut out_subgraph = QueryGraph::empty(); + out_subgraph.add_qedge(query_edge.clone()); + let scan = Scan::Base(BaseScan::new(out_subgraph)); + let num_edges = self.get_num_edges(&query_edge); + let query_plan = QueryPlan::new_from_last_op(scan, num_edges as f64); + let mut query_plans = vec![]; + query_plans.push(query_plan); + let key = QueryPlanner::::get_key(vec![ + query_edge.from_query_vertex.clone(), + query_edge.to_query_vertex.clone(), + ]); + let plan = self + .subgraph_plans + .get_mut(&self.next_num_qvertices) + .unwrap(); + plan.insert(key, query_plans); + } + self.next_num_qvertices = 3; + } + + fn consider_all_next_query_extensions(&mut self) { + self.subgraph_plans + .entry(self.next_num_qvertices) + .or_insert(HashMap::new()); + let keys: Vec = (&self.subgraph_plans[&(self.next_num_qvertices - 1)]) + .keys() + .map(|v| v.clone()) + .collect(); + for key in keys { + self.consider_all_next_extend_operators(&key); + } + if !self.has_limit && self.next_num_qvertices >= 4 { + let plan_map_keys: Vec = self.subgraph_plans[&self.next_num_qvertices] + .keys() + .map(|v| v.clone()) + .collect(); + for plan_map_key in plan_map_keys { + self.consider_all_next_hash_join_operators(&plan_map_key); + } + } + } + + fn consider_all_next_extend_operators(&mut self, key: &String) { + let prev_query_plans = &self.subgraph_plans[&(self.next_num_qvertices - 1)][key]; + let prev_qvertices = { + let op = prev_query_plans[0].last_operator.as_ref().unwrap(); + get_op_attr_as_ref!(op.borrow().deref(), out_subgraph).get_query_vertices() + }; + let to_qvertices = self.query_graph.get_neighbors(prev_qvertices); + let prev_query_plans_len = prev_query_plans.len(); + for to_qvertex in to_qvertices { + for i in 0..prev_query_plans_len { + let prev_query_plan = + self.subgraph_plans[&(self.next_num_qvertices - 1)][key][i].clone(); + let (key, plan) = self.get_plan_with_next_extend(prev_query_plan, &to_qvertex); + let plan_map = self + .subgraph_plans + .get_mut(&self.next_num_qvertices) + .unwrap(); + plan_map.entry(key.clone()).or_insert(vec![]); + plan_map.get_mut(&key).unwrap().push(plan.clone()); + } + } + } + + pub fn get_plan_with_next_extend( + &mut self, + prev_query_plan: QueryPlan, + to_qvertex: &String, + ) -> (String, QueryPlan) { + let last_operator = prev_query_plan.last_operator.as_ref().unwrap(); + let (mut in_subgraph, last_previous_repeated_index, last_prev) = { + let last_op_ref = last_operator.borrow(); + let base_op = get_base_op_as_ref!(last_op_ref.deref()); + ( + base_op.out_subgraph.clone(), + base_op.last_repeated_vertex_idx, + base_op.prev.as_ref().map(|op| op.clone()), + ) + }; + let mut alds = vec![]; + let mut next_extend = self.get_next_ei(&in_subgraph, to_qvertex, &mut alds, last_operator); + let base_next_extend = get_ei_as_mut!(&mut next_extend); + base_next_extend.init_caching(last_previous_repeated_index); + let prev_estimated_num_out_tuples = prev_query_plan.estimated_num_out_tuples; + let to_type = base_next_extend + .base_op + .out_subgraph + .get_query_vertex_type(to_qvertex); + let estimated_selectivity = self.get_selectivity( + &mut in_subgraph, + &mut base_next_extend.base_op.out_subgraph, + &alds, + to_type, + ); + let icost; + if let CachingType::None = base_next_extend.caching_type { + icost = prev_estimated_num_out_tuples + * self.catalog.get_icost( + &mut in_subgraph, + alds.iter().collect(), + base_next_extend.to_type, + ); + } else { + let mut out_tuples_to_process = prev_estimated_num_out_tuples; + if last_prev.is_some() { + let index = 0; + let mut last_estimated_num_out_tuples_for_extension_qvertex = -1.0; + for ald in alds.iter().filter(|ald| ald.vertex_idx > index) { + last_estimated_num_out_tuples_for_extension_qvertex = + prev_query_plan.q_vertex_to_num_out_tuples[&ald.from_query_vertex].clone(); + } + out_tuples_to_process /= last_estimated_num_out_tuples_for_extension_qvertex; + } + if let CachingType::FullCaching = base_next_extend.caching_type { + icost = out_tuples_to_process * self.catalog.get_icost(&mut in_subgraph, alds.iter().collect(), to_type) + + // added to make caching effect on cost more robust. + (prev_estimated_num_out_tuples - out_tuples_to_process) * estimated_selectivity; + } else { + // cachingType == CachingType.PARTIAL_CACHING + let alds_to_cache = alds + .iter() + .filter(|ald| ald.vertex_idx <= last_previous_repeated_index) + .collect(); + let alds_to_always_intersect = alds + .iter() + .filter(|ald| ald.vertex_idx > last_previous_repeated_index) + .collect(); + let always_intersect_icost = prev_estimated_num_out_tuples + * self + .catalog + .get_icost(&mut in_subgraph, alds_to_always_intersect, to_type); + let cached_intersect_icost = out_tuples_to_process + * self + .catalog + .get_icost(&mut in_subgraph, alds_to_cache, to_type); + icost = prev_estimated_num_out_tuples * always_intersect_icost + + out_tuples_to_process * cached_intersect_icost + + // added to make caching effect on cost more robust. + (prev_estimated_num_out_tuples - out_tuples_to_process) * estimated_selectivity; + } + } + + let estimated_icost = prev_query_plan.estimated_icost + icost; + let estimated_num_out_tuples = prev_estimated_num_out_tuples * estimated_selectivity; + + let mut q_vertex_to_num_out_tuples = HashMap::new(); + prev_query_plan + .q_vertex_to_num_out_tuples + .iter() + .for_each(|(k, v)| { + q_vertex_to_num_out_tuples.insert(k.clone(), v.clone()); + }); + q_vertex_to_num_out_tuples.insert( + base_next_extend.to_query_vertex.clone(), + estimated_num_out_tuples, + ); + + let mut new_query_plan = prev_query_plan.shallow_copy(); + new_query_plan.estimated_icost = estimated_icost; + new_query_plan.estimated_num_out_tuples = estimated_num_out_tuples; + let query_vertices = base_next_extend + .base_op + .out_qvertex_to_idx_map + .keys() + .map(|k| k.clone()) + .collect(); + new_query_plan.append(Rc::new(RefCell::new(Operator::EI(next_extend)))); + new_query_plan.q_vertex_to_num_out_tuples = q_vertex_to_num_out_tuples; + ( + QueryPlanner::::get_key(query_vertices), + new_query_plan, + ) + } + + fn get_next_ei( + &self, + in_subgraph: &QueryGraph, + to_qvertex: &String, + alds: &mut Vec, + last_operator: &Rc>>, + ) -> EI { + let mut out_subgraph = in_subgraph.copy(); + in_subgraph + .get_query_vertices() + .iter() + .for_each(|from_qvertex| { + if self + .query_graph + .contains_query_edge(from_qvertex, to_qvertex) + { + // simple query graph so there is only 1 query_edge, so get query_edge at index '0'. + let query_edge = + self.query_graph.get_qedges(from_qvertex, to_qvertex)[0].clone(); + let index = + get_op_attr_as_ref!(last_operator.borrow().deref(), out_qvertex_to_idx_map) + [from_qvertex] + .clone(); + let direction = if from_qvertex == &query_edge.from_query_vertex { + Direction::Fwd + } else { + Direction::Bwd + }; + let label = query_edge.label; + alds.push(AdjListDescriptor::new( + from_qvertex.clone(), + index, + direction, + label, + )); + out_subgraph.add_qedge(query_edge); + } + }); + let mut output_variable_idx_map = HashMap::new(); + get_op_attr_as_ref!(last_operator.borrow().deref(), out_qvertex_to_idx_map) + .iter() + .for_each(|(k, v)| { + output_variable_idx_map.insert(k.clone(), v.clone()); + }); + output_variable_idx_map.insert(to_qvertex.clone(), output_variable_idx_map.len()); + EI::make( + to_qvertex.clone(), + self.query_graph.get_query_vertex_type(to_qvertex), + alds.clone(), + out_subgraph, + in_subgraph.clone(), + output_variable_idx_map, + ) + } + + fn get_selectivity( + &mut self, + in_subgraph: &mut QueryGraph, + out_subgraph: &mut QueryGraph, + alds: &Vec, + to_type: i32, + ) -> f64 { + let selectivity; + let computed_selectivity_op = self + .computed_selectivities + .get_mut(&out_subgraph.get_encoding()); + if computed_selectivity_op.is_some() { + for (graph, selectivity) in computed_selectivity_op.unwrap() { + if graph.is_isomorphic_to(out_subgraph) { + return selectivity.clone(); + } + } + } else { + self.computed_selectivities + .insert(out_subgraph.get_encoding(), vec![]); + } + selectivity = self.catalog.get_selectivity(in_subgraph, alds, to_type); + self.computed_selectivities + .get_mut(&out_subgraph.get_encoding()) + .unwrap() + .push((out_subgraph.clone(), selectivity)); + selectivity + } + + fn consider_all_next_hash_join_operators(&mut self, map_key: &String) { + let plan_map = &self.subgraph_plans[&self.next_num_qvertices]; + let plans = &plan_map[map_key]; + let op = plans[0].last_operator.as_ref().unwrap(); + let out_subgraph = get_op_attr_as_ref!(op.borrow().deref(), out_subgraph).clone(); + + let query_vertices = out_subgraph.get_query_vertices(); + let min_size = 3; + let mut max_size = out_subgraph.get_query_vertices().len() - min_size; + if max_size < min_size { + max_size = min_size; + } + for set_size in min_size..=max_size { + let plans = self.subgraph_plans[&set_size].clone(); + for key in plans.keys() { + let prev_query_plan = self.get_best_plan(set_size, key); + let last_op = prev_query_plan.last_operator.as_ref().unwrap(); + let last_op_ref = last_op.borrow(); + let base_last_op = get_base_op_as_ref!(last_op_ref.deref()); + let prev_qvertices = base_last_op.out_subgraph.get_query_vertices_as_set(); + let is_subset = prev_qvertices + .iter() + .map(|v| query_vertices.contains(v)) + .filter(|&x| !x) + .count() + == 0; + if !is_subset { + continue; + } + let mut other_set: Vec = query_vertices + .iter() + .filter(|&x| !prev_qvertices.contains(x)) + .map(|x| x.clone()) + .collect(); + if other_set.len() == 1 { + continue; + } + let join_qvertices = + Self::get_join_qvertices(&out_subgraph, &prev_qvertices, &other_set); + if join_qvertices.len() < 1 + || join_qvertices.len() > 2 + || other_set.len() + join_qvertices.len() > self.next_num_qvertices - 1 + { + continue; + } + join_qvertices.iter().for_each(|v| { + other_set.push(v.clone()); + }); + + let rest_size = other_set.len(); + let rest_key = QueryPlanner::::get_key(other_set); + if !self.subgraph_plans[&rest_size].contains_key(&rest_key) { + continue; + } + let other_prev_operator = self.get_best_plan(rest_size, &rest_key); + self.consider_hash_join_operator( + &out_subgraph, + query_vertices.clone(), + &prev_query_plan, + &other_prev_operator, + join_qvertices.len(), + ); + } + } + } + + fn get_join_qvertices( + query_graph: &QueryGraph, + vertices: &HashSet, + other_vertices: &Vec, + ) -> Vec { + let mut join_qvertices = HashSet::new(); + vertices.iter().for_each(|cur| { + other_vertices + .iter() + .filter(|&other| query_graph.contains_query_edge(cur, other)) + .for_each(|_other| { + join_qvertices.insert(cur.clone()); + }) + }); + join_qvertices.into_iter().collect() + } + + fn consider_hash_join_operator( + &mut self, + out_subgraph: &QueryGraph, + query_vertices: Vec, + subplan: &QueryPlan, + other_subplan: &QueryPlan, + num_join_qvertices: usize, + ) { + let is_plan_build_subplan = + subplan.estimated_num_out_tuples < other_subplan.estimated_num_out_tuples; + let (build_subplan, probe_subplan) = if is_plan_build_subplan { + (subplan, other_subplan) + } else { + (other_subplan, subplan) + }; + let (build_coef, probe_coef) = if num_join_qvertices == 1 { + ( + SINGLE_VERTEX_WEIGHT_BUILD_COEF, + SINGLE_VERTEX_WEIGHT_PROBE_COEF, + ) + } else { + ( + MULTI_VERTEX_WEIGHT_BUILD_COEF, + MULTI_VERTEX_WEIGHT_PROBE_COEF, + ) + }; + let icost = build_subplan.estimated_icost + + probe_subplan.estimated_icost + + build_coef * build_subplan.estimated_num_out_tuples + + probe_coef * probe_subplan.estimated_num_out_tuples; + + let key = QueryPlanner::::get_key(query_vertices.clone()); + let curr_best_query_plan = self.get_best_plan(query_vertices.len(), &key); + if curr_best_query_plan.estimated_icost > icost { + let mut query_plan = HashJoin::make( + out_subgraph.clone(), + build_subplan.clone(), + probe_subplan.clone(), + ); + query_plan.estimated_icost = icost; + query_plan.estimated_num_out_tuples = curr_best_query_plan.estimated_num_out_tuples; + + let mut q_vertex_to_num_out_tuples = HashMap::new(); + probe_subplan + .q_vertex_to_num_out_tuples + .iter() + .for_each(|(k, v)| { + q_vertex_to_num_out_tuples.insert(k.clone(), v.clone()); + }); + + let last_op = build_subplan.last_operator.as_ref().unwrap(); + let last_op_ref = last_op.borrow(); + let out_subgraph = get_op_attr_as_ref!(last_op_ref.deref(), out_subgraph); + out_subgraph.get_query_vertices().iter().for_each(|v| { + q_vertex_to_num_out_tuples + .entry(v.clone()) + .or_insert(curr_best_query_plan.estimated_num_out_tuples); + }); + + query_plan.q_vertex_to_num_out_tuples = q_vertex_to_num_out_tuples; + + let query_plans = self + .subgraph_plans + .get_mut(&query_vertices.len()) + .unwrap() + .get_mut(&key) + .unwrap(); + query_plans.clear(); + query_plans.push(query_plan); + } + } + + fn get_best_plan(&self, num_qvertices: usize, key: &String) -> QueryPlan { + let possible_query_plans = &self.subgraph_plans[&num_qvertices][key]; + let mut best_plan = &possible_query_plans[0]; + possible_query_plans.iter().for_each(|possible_query_plan| { + if possible_query_plan.estimated_icost < best_plan.estimated_icost { + best_plan = possible_query_plan; + } + }); + best_plan.clone() + } + + pub fn get_num_edges(&self, query_edge: &QueryEdge) -> usize { + let from_type = self + .query_graph + .get_query_vertex_type(&query_edge.from_query_vertex); + let to_type = self + .query_graph + .get_query_vertex_type(&query_edge.to_query_vertex); + let label = query_edge.label; + self.graph.get_num_edges(from_type, to_type, label) + } + + fn get_key(mut query_vertices: Vec) -> String { + query_vertices.sort(); + serde_json::to_string(&query_vertices).unwrap() + } +} diff --git a/src/graph_impl/multi_graph/planner/query_planner_big.rs b/src/graph_impl/multi_graph/planner/query_planner_big.rs new file mode 100644 index 00000000..61307467 --- /dev/null +++ b/src/graph_impl/multi_graph/planner/query_planner_big.rs @@ -0,0 +1,180 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::operator::extend::intersect::BaseIntersect; +use graph_impl::multi_graph::plan::operator::extend::intersect::Intersect; +use graph_impl::multi_graph::plan::operator::extend::EI::{BaseEI, CachingType, EI}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe::{BaseProbe, Probe}; +use graph_impl::multi_graph::plan::operator::hashjoin::probe_multi_vertices::{ + ProbeMultiVertices, PMV, +}; +use graph_impl::multi_graph::plan::operator::operator::Operator; +use graph_impl::multi_graph::plan::operator::scan::scan::{BaseScan, Scan}; +use graph_impl::multi_graph::plan::operator::sink::sink::Sink; +use graph_impl::multi_graph::plan::operator::sink::sink::SinkType; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::catalog::Catalog; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::multi_graph::planner::query_planner::QueryPlanner; +use graph_impl::TypedStaticGraph; +use hashbrown::{HashMap, HashSet}; +use std::cmp::max; +use std::hash::Hash; +use std::ops::Deref; + +pub struct QueryPlannerBig { + base_planner: QueryPlanner, + subgraph_plans: HashMap>>, + num_top_plans_kept: usize, +} + +impl + QueryPlannerBig +{ + pub fn new( + query_graph: QueryGraph, + catalog: Catalog, + graph: TypedStaticGraph, + ) -> Self { + let mut planner = QueryPlannerBig { + base_planner: QueryPlanner::new(query_graph, catalog, graph), + subgraph_plans: HashMap::new(), + num_top_plans_kept: 5, + }; + let num_vertices = planner.base_planner.num_qvertices; + if num_vertices >= 15 { + planner.num_top_plans_kept = 3; + } else if num_vertices >= 20 && num_vertices <= 25 { + planner.num_top_plans_kept = 5; + } else if num_vertices > 25 { + planner.num_top_plans_kept = 1; + } + planner + } + + pub fn plan(&mut self) -> QueryPlan { + self.consider_least_selective_scans(); + while self.base_planner.next_num_qvertices <= self.base_planner.num_qvertices { + self.consider_next_query_extensions(); + self.base_planner.next_num_qvertices += 1; + } + let plans = &self.subgraph_plans[&self.base_planner.num_qvertices]; + let mut best_plan = &plans[0]; + for plan in plans { + if best_plan.estimated_icost > plan.estimated_icost { + best_plan = plan; + } + } + let mut best_plan = best_plan.clone(); + // each operator added only sets its prev pointer (to reuse operator objects). + // the picked plan needs to set the next pointer for each operator in the linear subplans. + self.base_planner.set_next_pointers(&mut best_plan); + if self.base_planner.query_graph.limit > 0 { + best_plan.sink_type = SinkType::Limit; + best_plan.out_tuples_limit = self.base_planner.query_graph.limit; + } + best_plan + } + + fn consider_least_selective_scans(&mut self) { + self.base_planner.next_num_qvertices = 2; /* level = 2 for edge scan */ + self.subgraph_plans.entry(2).or_insert(vec![]); + let mut edges_to_scan = vec![]; + let mut num_edges_to_scan = vec![]; + let q_edges = &self.base_planner.query_graph.q_edges; + for i in 0..self.num_top_plans_kept { + let edge = q_edges[i].clone(); + num_edges_to_scan.push(self.base_planner.get_num_edges(&edge)); + edges_to_scan.push(edge); + } + + for i in self.num_top_plans_kept..q_edges.len() { + let num_edges = self.base_planner.get_num_edges(&q_edges[i]); + for j in 0..self.num_top_plans_kept { + if num_edges < num_edges_to_scan[j] { + edges_to_scan[j] = q_edges[i].clone(); + num_edges_to_scan[j] = num_edges; + break; + } + } + } + for i in 0..self.num_top_plans_kept { + let mut output_subgraph = QueryGraph::empty(); + output_subgraph.add_qedge(edges_to_scan[i].clone()); + let scan = Scan::Base(BaseScan::new(output_subgraph)); + let query_plan = QueryPlan::new_from_last_op(scan, num_edges_to_scan[i] as f64); + self.subgraph_plans + .get_mut(&self.base_planner.next_num_qvertices) + .unwrap() + .push(query_plan); + } + self.base_planner.next_num_qvertices = 3; + } + + fn consider_next_query_extensions(&mut self) { + let mut new_query_plans = vec![]; + let plans = self + .subgraph_plans + .get_mut(&(self.base_planner.next_num_qvertices - 1)) + .unwrap(); + for prev_query_plan in plans { + let (prev_qvertices, in_subgraph) = { + let last_base_op = prev_query_plan.last_operator.as_ref().unwrap(); + let last_base_op_ref = last_base_op.borrow(); + let last_op = get_base_op_as_ref!(last_base_op_ref.deref()); + ( + last_op.out_subgraph.get_query_vertices(), + last_op.out_subgraph.clone(), + ) + }; + let to_qvertices = self.base_planner.query_graph.get_neighbors(prev_qvertices); + let next_to_qvertices = Self::filter_to_qvertices_by_max_num_alds( + &self.base_planner.query_graph, + to_qvertices, + &in_subgraph, + ); + for to_qvertex in next_to_qvertices { + let (_key, plan) = self + .base_planner + .get_plan_with_next_extend(prev_query_plan.clone(), &to_qvertex); + let icost = plan.estimated_icost; + if new_query_plans.len() < self.num_top_plans_kept { + new_query_plans.push(plan); + } else { + for i in 0..self.num_top_plans_kept { + if new_query_plans[i].estimated_icost > icost { + new_query_plans.insert(i, plan.clone()); + } + } + } + } + } + self.subgraph_plans + .insert(self.base_planner.next_num_qvertices, new_query_plans); + } + fn filter_to_qvertices_by_max_num_alds( + query_graph: &QueryGraph, + to_qvertices: HashSet, + in_subgraph: &QueryGraph, + ) -> Vec { + let mut max_num_alds = 0; + let mut to_qvertex_to_num_alds_map = HashMap::new(); + to_qvertices.iter().for_each(|to_qvertex| { + let num_alds = in_subgraph + .get_query_vertices() + .iter() + .filter(|&from_qvertex| query_graph.contains_query_edge(from_qvertex, to_qvertex)) + .count(); + max_num_alds = max(max_num_alds, num_alds); + to_qvertex_to_num_alds_map.insert(to_qvertex, num_alds); + }); + let final_max_num_alds = max_num_alds; + to_qvertices + .iter() + .filter(|&to_qvertex| { + to_qvertex_to_num_alds_map + .get(to_qvertex) + .map_or(false, |&to| to == final_max_num_alds) + }) + .map(|x| x.clone()) + .collect() + } +} diff --git a/src/graph_impl/multi_graph/query/mod.rs b/src/graph_impl/multi_graph/query/mod.rs new file mode 100644 index 00000000..28b54a80 --- /dev/null +++ b/src/graph_impl/multi_graph/query/mod.rs @@ -0,0 +1 @@ +pub mod query_graph_set; diff --git a/src/graph_impl/multi_graph/query/query_graph_set.rs b/src/graph_impl/multi_graph/query/query_graph_set.rs new file mode 100644 index 00000000..6ac6769a --- /dev/null +++ b/src/graph_impl/multi_graph/query/query_graph_set.rs @@ -0,0 +1,42 @@ +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use hashbrown::HashMap; + +pub struct QueryGraphSet { + query_graphs: HashMap>, +} + +impl QueryGraphSet { + pub fn new() -> Self { + Self { + query_graphs: HashMap::new(), + } + } + + pub fn add(&mut self, mut query_graph: QueryGraph) { + let encoding = query_graph.get_encoding(); + self.query_graphs.entry(encoding.clone()).or_insert(vec![]); + self.query_graphs + .get_mut(&encoding) + .unwrap() + .push(query_graph); + } + + pub fn contains(&mut self, query_graph: &mut QueryGraph) -> bool { + if let Some(query_graphs) = self.query_graphs.get_mut(&query_graph.get_encoding()) { + for other_query_graph in query_graphs { + if query_graph.is_isomorphic_to(other_query_graph) { + return true; + } + } + } + false + } + + pub fn get_query_graph_set(&self) -> Vec { + self.query_graphs + .values() + .map(|g| g.clone()) + .flatten() + .collect() + } +} diff --git a/src/graph_impl/multi_graph/runner/catalog_generator.rs b/src/graph_impl/multi_graph/runner/catalog_generator.rs new file mode 100644 index 00000000..4e99ed29 --- /dev/null +++ b/src/graph_impl/multi_graph/runner/catalog_generator.rs @@ -0,0 +1,26 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::planner::catalog::catalog::Catalog; +use graph_impl::multi_graph::planner::catalog::catalog_plans::{ + DEF_MAX_INPUT_NUM_VERTICES, DEF_NUM_EDGES_TO_SAMPLE, +}; +use graph_impl::TypedStaticGraph; +use std::hash::Hash; + +pub fn default( + graph: &TypedStaticGraph, +) -> Catalog { + let mut max_input_num_vertex = DEF_MAX_INPUT_NUM_VERTICES; + if graph.is_sorted_by_node() { + max_input_num_vertex = 2; + } + let mut catalog = Catalog::new(DEF_NUM_EDGES_TO_SAMPLE, max_input_num_vertex); + catalog.populate(graph, 1); + catalog.in_subgraphs.iter_mut().for_each(|graph| { + graph.it = None; + }); + println!( + "Catalog generation finished in {} (ms)", + catalog.elapsed_time + ); + catalog +} diff --git a/src/graph_impl/multi_graph/runner/mod.rs b/src/graph_impl/multi_graph/runner/mod.rs new file mode 100644 index 00000000..bee911f0 --- /dev/null +++ b/src/graph_impl/multi_graph/runner/mod.rs @@ -0,0 +1,2 @@ +pub mod catalog_generator; +pub mod optimizer_executor; diff --git a/src/graph_impl/multi_graph/runner/optimizer_executor.rs b/src/graph_impl/multi_graph/runner/optimizer_executor.rs new file mode 100644 index 00000000..1ddaa27f --- /dev/null +++ b/src/graph_impl/multi_graph/runner/optimizer_executor.rs @@ -0,0 +1,37 @@ +use generic::{GraphType, IdType}; +use graph_impl::multi_graph::plan::query_plan::QueryPlan; +use graph_impl::multi_graph::planner::catalog::catalog::Catalog; +use graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use graph_impl::multi_graph::planner::query_planner::QueryPlanner; +use graph_impl::multi_graph::planner::query_planner_big::QueryPlannerBig; +use graph_impl::TypedStaticGraph; +use std::hash::Hash; +use std::time::SystemTime; + +pub fn generate_plan( + query_graph: QueryGraph, + catalog: Catalog, + g: TypedStaticGraph, +) -> QueryPlan { + let start_time = SystemTime::now(); + let elapsed_time; + let query_plan = if query_graph.get_num_qvertices() <= 8 { + let mut planner = QueryPlanner::new(query_graph, catalog, g); + let query_plan = planner.plan(); + elapsed_time = SystemTime::now() + .duration_since(start_time) + .unwrap() + .as_millis(); + query_plan + } else { + let mut planner = QueryPlannerBig::new(query_graph, catalog, g); + let query_plan = planner.plan(); + elapsed_time = SystemTime::now() + .duration_since(start_time) + .unwrap() + .as_millis(); + query_plan + }; + println!("Optimizer runtime: {} (ms)", elapsed_time); + query_plan +} diff --git a/src/graph_impl/multi_graph/utils/mod.rs b/src/graph_impl/multi_graph/utils/mod.rs new file mode 100644 index 00000000..1ec8e1c3 --- /dev/null +++ b/src/graph_impl/multi_graph/utils/mod.rs @@ -0,0 +1 @@ +pub mod set_utils; diff --git a/src/graph_impl/multi_graph/utils/set_utils.rs b/src/graph_impl/multi_graph/utils/set_utils.rs new file mode 100644 index 00000000..1c33a810 --- /dev/null +++ b/src/graph_impl/multi_graph/utils/set_utils.rs @@ -0,0 +1,56 @@ +pub fn get_power_set_excluding_empty_set(set: Vec) -> Vec> { + let mut res = vec![]; + let len = set.len(); + for sub in generate_power_set(set) { + if sub.len() >= 1 && sub.len() <= len { + res.push(sub); + } + } + res +} + +pub fn generate_permutations(mut set: Vec, len: usize) -> Vec> { + let mut permutations = vec![]; + get_permutations_given_len(&mut set, len, 0, &mut vec![], &mut permutations); + permutations +} + +fn get_permutations_given_len( + set: &mut Vec, + len: usize, + pos: usize, + temp: &mut Vec, + permutation: &mut Vec>, +) { + if len == 0 { + permutation.push(temp.clone()); + return; + } + for i in 0..set.len() { + if temp.len() < pos + 1 { + temp.push(set[i].clone()); + } else { + temp.insert(pos, set[i].clone()); + } + get_permutations_given_len(set, len - 1, pos + 1, temp, permutation); + } +} + +fn generate_power_set(original_set: Vec) -> Vec> { + let mut sets = vec![]; + if original_set.is_empty() { + sets.push(vec![]); + return sets; + } + let list = original_set.clone(); + let head = list.get(0).unwrap(); + let rest = list[1..list.len()].iter().map(|x| x.clone()).collect(); + for set in generate_power_set(rest) { + let mut new_set = vec![]; + new_set.push(head.clone()); + set.iter().for_each(|it| new_set.push(it.clone())); + sets.push(new_set); + sets.push(set); + } + sets +} diff --git a/src/graph_impl/static_graph/edge_vec.rs b/src/graph_impl/static_graph/edge_vec.rs index b635fd15..42d7ff0d 100644 --- a/src/graph_impl/static_graph/edge_vec.rs +++ b/src/graph_impl/static_graph/edge_vec.rs @@ -238,6 +238,7 @@ pub trait EdgeVecTrait { } impl EdgeVec { + // no label pub fn new(offsets: Vec, edges: Vec) -> Self { EdgeVec { offsets: offsets.into(), @@ -246,6 +247,7 @@ impl EdgeVec { } } + // with label pub fn with_labels(offsets: Vec, edges: Vec, labels: Vec) -> Self { if edges.len() != labels.len() { panic!( diff --git a/src/graph_impl/static_graph/graph.rs b/src/graph_impl/static_graph/graph.rs index 394a369e..bcd3d370 100644 --- a/src/graph_impl/static_graph/graph.rs +++ b/src/graph_impl/static_graph/graph.rs @@ -22,20 +22,24 @@ use std::borrow::Cow; use std::hash::{Hash, Hasher}; use std::marker::PhantomData; -use itertools::Itertools; -use serde; - use generic::{ - DefaultId, DefaultTy, DiGraphTrait, Directed, EdgeType, GeneralGraph, GraphLabelTrait, - GraphTrait, GraphType, IdType, Iter, MapTrait, MutMapTrait, NodeType, UnGraphTrait, Undirected, + DefaultId, DefaultTy, DiGraphTrait, Directed, EdgeTrait, EdgeType, GeneralGraph, + GraphLabelTrait, GraphTrait, GraphType, IdType, Iter, MapTrait, MutMapTrait, NodeTrait, + NodeType, UnGraphTrait, Undirected, }; +use graph_impl::multi_graph::planner::catalog::adj_list_descriptor::Direction; use graph_impl::static_graph::node::StaticNode; +use graph_impl::static_graph::sorted_adj_vec::SortedAdjVec; use graph_impl::static_graph::static_edge_iter::StaticEdgeIndexIter; use graph_impl::static_graph::{EdgeVec, EdgeVecTrait}; use graph_impl::{Edge, GraphImpl}; +use hashbrown::HashMap; use io::serde::{Deserialize, Serialize}; +use itertools::Itertools; use map::SetMap; +use serde; use std::ops::Add; +use std::{cmp, iter}; pub type TypedUnStaticGraph = TypedStaticGraph; pub type TypedDiStaticGraph = TypedStaticGraph; @@ -46,6 +50,8 @@ pub type DiStaticGraph = StaticGraph { @@ -61,6 +67,22 @@ pub struct TypedStaticGraph, // A map of edge labels. edge_label_map: SetMap, + + sort_by_node: bool, + // node Ids indexed by type and random access to node types. + node_ids: Vec, + // node_types[node_id] = node_label_id + // `node_label_id` has been shifted right and id 0 is prepared for no label item. + node_types: Vec, + node_type_offsets: Vec, + pub fwd_adj_lists: Vec>>, + bwd_adj_lists: Vec>>, + label_to_num_edges: Vec, + label_to_largest_fwd_adj_list_size: Vec, + label_to_largest_bwd_adj_list_size: Vec, + edge_key_to_num_edges_map: HashMap, + to_label_to_percentage_map: HashMap, + from_label_to_percentage_map: HashMap, } impl PartialEq @@ -145,6 +167,7 @@ impl Self::new(EdgeVec::default(), None, None, None) } + //without node label and edge label pub fn new( edges: EdgeVec, in_edges: Option>, @@ -187,16 +210,30 @@ impl edges.num_edges() >> 1 }; - TypedStaticGraph { + let mut g = TypedStaticGraph { num_nodes, num_edges, + sort_by_node: false, + node_ids: vec![], + node_types: vec![], + node_type_offsets: vec![], + fwd_adj_lists: vec![], + bwd_adj_lists: vec![], + label_to_num_edges: vec![], + label_to_largest_fwd_adj_list_size: vec![], + label_to_largest_bwd_adj_list_size: vec![], + edge_key_to_num_edges_map: HashMap::new(), + to_label_to_percentage_map: HashMap::new(), edge_vec: edges, in_edge_vec: in_edges, labels: None, node_label_map: SetMap::::new(), edge_label_map: SetMap::::new(), graph_type: PhantomData, - } + from_label_to_percentage_map: HashMap::new(), + }; + g.init_graphflow(); + g } pub fn with_labels( @@ -248,16 +285,32 @@ impl debug!("{} nodes, but {} labels", num_nodes, labels.len()); } - TypedStaticGraph { + if edge_label_map.len() != 0 {} + + let mut g = TypedStaticGraph { num_nodes, num_edges, + sort_by_node: false, + node_ids: vec![], + node_types: vec![], + node_type_offsets: vec![], + fwd_adj_lists: vec![], + bwd_adj_lists: vec![], + label_to_num_edges: vec![], + label_to_largest_fwd_adj_list_size: vec![], + label_to_largest_bwd_adj_list_size: vec![], + edge_key_to_num_edges_map: HashMap::new(), + to_label_to_percentage_map: HashMap::new(), edge_vec: edges, in_edge_vec: in_edges, labels: Some(labels), node_label_map, edge_label_map, graph_type: PhantomData, - } + from_label_to_percentage_map: HashMap::new(), + }; + g.init_graphflow(); + g } pub fn from_raw( @@ -307,7 +360,6 @@ impl edge_vec.num_edges() ); } - if labels.is_some() { let num_of_labels = labels.as_ref().unwrap().len(); if num_nodes != num_of_labels { @@ -318,16 +370,130 @@ impl } } - TypedStaticGraph { + let mut g = TypedStaticGraph { num_nodes, num_edges, + sort_by_node: false, + node_ids: vec![], + node_types: vec![], + node_type_offsets: vec![], + fwd_adj_lists: vec![], + bwd_adj_lists: vec![], + label_to_num_edges: vec![], + label_to_largest_fwd_adj_list_size: vec![], + label_to_largest_bwd_adj_list_size: vec![], + edge_key_to_num_edges_map: HashMap::new(), + to_label_to_percentage_map: HashMap::new(), edge_vec, in_edge_vec, labels, node_label_map, edge_label_map, graph_type: PhantomData, + from_label_to_percentage_map: HashMap::new(), + }; + g.init_graphflow(); + g + } + + pub fn init_graphflow(&mut self) { + self.partition_nodes(); + self.partition_edges(); + let mut label_cnt = if self.sort_by_node { + self.num_of_node_labels() + } else { + self.num_of_edge_labels() + }; + if label_cnt == 0 { + label_cnt = 1; + } + self.label_to_num_edges = vec![0; label_cnt]; + self.label_to_largest_fwd_adj_list_size = vec![0; label_cnt]; + self.label_to_largest_bwd_adj_list_size = vec![0; label_cnt]; + for vertex_id in 0..self.num_nodes { + for label in 0..label_cnt { + let fwd_adj_size = self.fwd_adj_lists[vertex_id] + .as_ref() + .unwrap() + .sub_len(label); + let bwd_adj_size = self.bwd_adj_lists[vertex_id] + .as_ref() + .unwrap() + .sub_len(label); + self.label_to_num_edges[label] += fwd_adj_size; + if fwd_adj_size > self.label_to_largest_fwd_adj_list_size[label] { + self.label_to_largest_fwd_adj_list_size[label] = fwd_adj_size; + } + if bwd_adj_size > self.label_to_largest_bwd_adj_list_size[label] { + self.label_to_largest_bwd_adj_list_size[label] = bwd_adj_size; + } + } } + + // init count + let node_labels_cnt = std::cmp::max(self.num_of_node_labels(), 1); + let edge_labels_cnt = std::cmp::max(self.num_of_edge_labels(), 1); + for from in 0..node_labels_cnt { + for to in 0..node_labels_cnt { + for label in 0..edge_labels_cnt { + let edge = Self::get_edge_key(from, to, label); + self.edge_key_to_num_edges_map.entry(edge).or_insert(0); + let to_label = Self::get_edge_key_by_label(label, to); + self.to_label_to_percentage_map.entry(to_label).or_insert(0); + let from_label = Self::get_edge_key_by_label(from, label); + self.from_label_to_percentage_map + .entry(from_label) + .or_insert(0); + } + } + } + + for from in 0..self.num_nodes { + let from_type = self.node_types[from]; + let offsets = self.fwd_adj_lists[from] + .as_ref() + .unwrap() + .get_offsets() + .clone(); + if self.sort_by_node { + let label = 0; + for to_type in 0..(offsets.len() - 1) { + let num_edges = offsets[to_type + 1] - offsets[to_type]; + self.add_edge_count(from_type as usize, to_type, label, num_edges); + } + } else { + let neighbours = self.fwd_adj_lists[from] + .as_ref() + .unwrap() + .get_neighbor_ids() + .clone(); + for label in 0..(offsets.len() - 1) { + for to_idx in offsets[label]..offsets[label + 1] { + let to_type = self.node_types[neighbours[to_idx].id()]; + self.add_edge_count(from_type as usize, to_type as usize, label, 1); + } + } + } + } + } + + fn add_edge_count(&mut self, from_type: usize, to_type: usize, label: usize, num_edges: usize) { + let edge = Self::get_edge_key(from_type, to_type, label); + let num_edges_origin = self.edge_key_to_num_edges_map.get(&edge).unwrap(); + self.edge_key_to_num_edges_map + .insert(edge, num_edges_origin + num_edges); + let to_label = Self::get_edge_key_by_label(label, to_type); + let to_percentage = self.to_label_to_percentage_map.get(&to_label).unwrap(); + self.to_label_to_percentage_map + .insert(to_label, to_percentage + num_edges); + let from_label = Self::get_edge_key_by_label(from_type, label); + let from_percentage = self.from_label_to_percentage_map.get(&from_label).unwrap(); + self.from_label_to_percentage_map + .insert(from_label, from_percentage + num_edges); + } + + pub fn is_sorted_by_node(&self) -> bool { + self.sort_by_node } #[inline] @@ -415,6 +581,274 @@ impl pub fn find_edge_index(&self, start: Id, target: Id) -> Option { self.edge_vec.find_edge_index(start, target) } + + pub fn get_num_edges(&self, from_type: i32, to_type: i32, label: i32) -> usize { + if from_type == KEY_ANY && to_type == KEY_ANY { + return self.label_to_num_edges[label as usize]; + } else if from_type != KEY_ANY && to_type != KEY_ANY { + return self.edge_key_to_num_edges_map + [&Self::get_edge_key(from_type as usize, to_type as usize, label as usize)]; + } else if from_type != KEY_ANY { + return self.from_label_to_percentage_map + [&Self::get_edge_key_by_label(from_type as usize, label as usize)]; + } + self.to_label_to_percentage_map + [&Self::get_edge_key_by_label(label as usize, to_type as usize)] + } + + pub fn get_edge_key(from_type: usize, to_type: usize, label: usize) -> u64 { + (((from_type & 0xFFFF) << 48) as u64) + | (((label & 0x0000FFFF) << 16) as u64) + | ((to_type & 0xFFFF) as u64) + } + + fn get_edge_key_by_label(from_label: usize, to_label: usize) -> u32 { + (((from_label & 0x0000FFFF) << 16) as u32) | ((to_label & 0xFFFF) as u32) + } + + // Partition nodes by type and generating node_ids && offsets for retrieving. + fn partition_nodes(&mut self) { + if 0 == self.num_of_node_labels() { + let mut node_ids = vec![Id::new(0); self.num_nodes]; + for i in 0..self.num_nodes { + node_ids[i] = Id::new(i); + } + self.node_ids = node_ids; + self.node_types = vec![0; self.num_nodes]; + self.node_type_offsets = vec![0, self.num_nodes + 1]; + return; + } + let offsets = self.get_node_offsets(); + let num_nodes = offsets[offsets.len() - 1]; + + let mut node_ids = vec![Id::new(0); num_nodes]; + let mut node_types = vec![0; num_nodes]; + let mut curr_idx_by_type = vec![0; offsets.len()]; + self.node_indices().for_each(|id| { + let node_id = id.id(); + let node_label_id = self + .get_node(id) + .get_label_id() + .map(|op| op.id()) + .unwrap_or(0) as i32; + node_ids[offsets[node_label_id as usize] + curr_idx_by_type[node_label_id as usize]] = + id; + curr_idx_by_type[node_label_id as usize] += 1; + node_types[node_id] = node_label_id; + }); + self.node_ids = node_ids; + self.node_types = node_types; + self.node_type_offsets = offsets; + } + + // Partition edges by edge label or node label(if there did not exist edge labels in graph) + fn partition_edges(&mut self) { + self.sort_by_node = self.num_of_edge_labels() == 1 && self.num_of_node_labels() > 1; + let (fwd_adj_meta_data, bwd_adj_meta_data) = self.get_adj_meta_data(); + let num_vertices = self.num_nodes; + let mut fwd_adj_lists: Vec>> = vec![Option::None; num_vertices]; + let mut bwd_adj_lists: Vec>> = vec![Option::None; num_vertices]; + let mut fwd_adj_list_curr_idx = HashMap::new(); + let mut bwd_adj_list_curr_idx = HashMap::new(); + let offset_size = { + if self.sort_by_node { + self.num_of_node_labels() + } else { + self.num_of_edge_labels() + } + }; + for node_id in 0..num_vertices { + fwd_adj_lists[node_id] = Some(SortedAdjVec::new( + fwd_adj_meta_data.get(&node_id).unwrap().to_owned(), + )); + fwd_adj_list_curr_idx.insert(node_id, vec![0; offset_size + 1]); + bwd_adj_lists[node_id] = Some(SortedAdjVec::new( + bwd_adj_meta_data.get(&node_id).unwrap().to_owned(), + )); + bwd_adj_list_curr_idx.insert(node_id, vec![0; offset_size + 1]); + } + self.edge_indices() + .flat_map(|(from, to)| { + if !Ty::is_directed() { + return vec![(from, to), (to, from)]; + } + vec![(from, to)] + }) + .for_each(|(from, to)| { + let edge_label_id = self + .get_edge(from, to) + .get_label_id() + .map(|op| op.id()) + .unwrap_or(0); + let (from_label_id, to_label_id) = if self.sort_by_node { + (self.node_types[from.id()], self.node_types[to.id()]) + } else { + (edge_label_id as i32, edge_label_id as i32) + }; + let mut idx = fwd_adj_list_curr_idx[&from.id()][to_label_id as usize]; + let mut offset = fwd_adj_meta_data[&from.id()][to_label_id as usize]; + fwd_adj_list_curr_idx.get_mut(&from.id()).unwrap()[to_label_id as usize] += 1; + fwd_adj_lists[from.id()] + .as_mut() + .unwrap() + .set_neighbor_id(to, offset + idx); + idx = bwd_adj_list_curr_idx[&to.id()][from_label_id as usize]; + offset = bwd_adj_meta_data[&to.id()][from_label_id as usize]; + bwd_adj_list_curr_idx.get_mut(&to.id()).unwrap()[from_label_id as usize] += 1; + bwd_adj_lists[to.id()] + .as_mut() + .unwrap() + .set_neighbor_id(from, offset + idx); + }); + + for node_id in 0..num_vertices { + fwd_adj_lists[node_id].as_mut().unwrap().sort(); + bwd_adj_lists[node_id].as_mut().unwrap().sort(); + } + + self.fwd_adj_lists = fwd_adj_lists; + self.bwd_adj_lists = bwd_adj_lists; + } + + fn get_node_offsets(&mut self) -> Vec { + let mut type_to_count_map: HashMap = HashMap::new(); + self.node_indices().for_each(|x| { + let label_id = self + .get_node(x) + .get_label_id() + .map(|op| op.id()) + .unwrap_or(0); + let default_v = 0; + let v = type_to_count_map.get(&label_id).unwrap_or(&default_v); + type_to_count_map.insert(label_id, v + 1); + }); + + let mut next_node_label_key = self.num_of_node_labels(); + if next_node_label_key == 0 { + next_node_label_key = 1; + } + let mut offsets = vec![0; next_node_label_key + 1]; + type_to_count_map + .into_iter() + .for_each(|(label_id, label_cnt)| { + if label_id < next_node_label_key - 1 { + offsets[label_id + 1] = label_cnt; + } + offsets[next_node_label_key] += label_cnt; + }); + for i in 1..offsets.len() - 1 { + offsets[i] += offsets[i - 1]; + } + offsets + } + + fn get_adj_meta_data(&self) -> (HashMap>, HashMap>) { + let mut fwd_adj_list_metadata = HashMap::new(); + let mut bwd_adj_list_metadata = HashMap::new(); + let mut next_node_or_edge = if self.sort_by_node { + self.num_of_node_labels() + } else { + self.num_of_edge_labels() + }; + if next_node_or_edge == 0 { + next_node_or_edge = 1; + } + for i in 0..self.num_nodes { + fwd_adj_list_metadata.insert(i, vec![0; next_node_or_edge + 1]); + bwd_adj_list_metadata.insert(i, vec![0; next_node_or_edge + 1]); + } + self.edge_indices() + .flat_map(|(from, to)| { + if Ty::is_directed() { + return vec![(from, to)]; + } + return vec![(from, to), (to, from)]; + }) + .for_each(|(from, to)| { + if self.sort_by_node { + let from_type = self.node_types[from.id()]; + let to_type = self.node_types[to.id()]; + fwd_adj_list_metadata.get_mut(&from.id()).unwrap()[(to_type + 1) as usize] += 1; + bwd_adj_list_metadata.get_mut(&to.id()).unwrap()[(from_type + 1) as usize] += 1; + } else { + let label_id = self + .get_edge(from, to) + .get_label_id() + .map(|op| op.id()) + .unwrap_or(0); + fwd_adj_list_metadata.get_mut(&from.id()).unwrap()[label_id + 1] += 1; + bwd_adj_list_metadata.get_mut(&to.id()).unwrap()[label_id + 1] += 1; + } + }); + fwd_adj_list_metadata.iter_mut().for_each(|(_id, offsets)| { + for i in 1..offsets.len() - 1 { + offsets[next_node_or_edge] += offsets[i]; + offsets[i] += offsets[i - 1]; + } + }); + bwd_adj_list_metadata.iter_mut().for_each(|(_id, offsets)| { + for i in 1..offsets.len() - 1 { + offsets[next_node_or_edge] += offsets[i]; + offsets[i] += offsets[i - 1]; + } + }); + + (fwd_adj_list_metadata, bwd_adj_list_metadata) + } + + fn get_neighbors_slice_by_node(&self, id: Id, label: Option) -> &[Id] { + if let Some(label) = label { + if let Some(fwd_list) = &self.fwd_adj_lists[id.id()] { + let offset = fwd_list.get_offsets(); + let label_id = self.node_label_map.find_index(&label).map_or(0, |id| id); + + return &fwd_list.get_neighbor_ids()[offset[label_id]..offset[label_id + 1]]; + } + } + self.edge_vec.neighbors(id) + } + + fn get_neighbors_slice_by_edge(&self, id: Id, label: Option) -> &[Id] { + if let Some(label) = label { + if let Some(fwd_list) = &self.fwd_adj_lists[id.id()] { + let offset = fwd_list.get_offsets(); + let label_id = self.edge_label_map.find_index(&label).map_or(0, |id| id); + return &fwd_list.get_neighbor_ids()[offset[label_id]..offset[label_id + 1]]; + } + } + self.edge_vec.neighbors(id) + } + + pub fn get_node_ids(&self) -> &Vec { + &self.node_ids + } + + pub fn get_node_types(&self) -> &Vec { + self.node_types.as_ref() + } + + pub fn get_node_type_offsets(&self) -> &Vec { + self.node_type_offsets.as_ref() + } + + pub fn get_fwd_adj_list(&self) -> &Vec>> { + self.fwd_adj_lists.as_ref() + } + + pub fn get_bwd_adj_list(&self) -> &Vec>> { + self.bwd_adj_lists.as_ref() + } + + pub fn get_largest_adj_list_size( + &self, + node_or_edge_label: i32, + direction: Direction, + ) -> usize { + if let Direction::Fwd = direction { + return self.label_to_largest_fwd_adj_list_size[node_or_edge_label as usize]; + } + self.label_to_largest_bwd_adj_list_size[node_or_edge_label as usize] + } } impl GraphTrait @@ -569,6 +1003,91 @@ impl fn get_edge_label_map(&self) -> &SetMap { &self.edge_label_map } + + fn neighbors_of_node_iter(&self, id: Id, label: Option) -> Iter { + if !self.is_sorted_by_node() { + panic!("Call `neighbors_of_node` on a graph partition by edge"); + } + Iter::new(Box::new( + self.get_neighbors_slice_by_node(id, label) + .iter() + .map(|x| *x), + )) + } + + fn neighbors_of_edge_iter(&self, id: Id, label: Option) -> Iter { + if self.is_sorted_by_node() { + panic!("Call `neighbors_of_edge` on a graph partition by node"); + } + Iter::new(Box::new( + self.get_neighbors_slice_by_edge(id, label) + .iter() + .map(|x| *x), + )) + } + + fn neighbors_of_node(&self, id: Id, label: Option) -> Cow<[Id]> { + if !self.is_sorted_by_node() { + panic!("Call `neighbors_of_node` on a graph partition by edge"); + } + self.get_neighbors_slice_by_node(id, label).into() + } + + fn neighbors_of_edge(&self, id: Id, label: Option) -> Cow<[Id]> { + if self.is_sorted_by_node() { + panic!("Call `neighbors_of_edge` on a graph partition by node"); + } + self.get_neighbors_slice_by_edge(id, label).into() + } + + fn nodes_with_label(&self, label: Option) -> Iter { + if let Some(label) = label { + let label_id = self.node_label_map.find_index(&label); + if label_id.is_none() { + return Iter::new(Box::new(iter::empty::())); + } + let label_id = label_id.unwrap(); + return Iter::new(Box::new( + self.fwd_adj_lists + .iter() + .skip_while(|&x| x.is_none()) + .enumerate() + .flat_map(move |(_sid, list_op)| { + let list = list_op.as_ref().unwrap(); + let offset = list.get_offsets(); + let label = label_id.clone(); + let neighbors = &list.get_neighbor_ids()[offset[label]..offset[label + 1]]; + neighbors.iter().map(move |id| *id) + }) + .unique(), + )); + } + self.node_indices() + } + + fn edges_with_label(&self, label: Option) -> Iter<(Id, Id)> { + if let Some(label) = label { + let label_id = self.edge_label_map.find_index(&label); + if label_id.is_none() { + return Iter::new(Box::new(iter::empty::<(Id, Id)>())); + } + let label_id = label_id.unwrap(); + return Iter::new(Box::new( + self.fwd_adj_lists + .iter() + .skip_while(|&x| x.is_none()) + .enumerate() + .flat_map(move |(sid, list_op)| { + let list = list_op.as_ref().unwrap(); + let offset = list.get_offsets(); + let label = label_id.clone(); + let neighbors = &list.get_neighbor_ids()[offset[label]..offset[label + 1]]; + neighbors.iter().map(move |id| (Id::new(sid), *id)) + }), + )); + } + self.edge_indices() + } } impl UnGraphTrait @@ -681,6 +1200,17 @@ impl None, @@ -691,6 +1221,7 @@ impl { + label_offset: Vec, + neighbour_ids: Vec, +} + +impl SortedAdjVec { + pub fn new(offset: Vec) -> Self { + let len = offset[offset.len() - 1]; + Self { + label_offset: offset, + neighbour_ids: vec![IdType::new(0); len], + } + } + + pub fn get_neighbor_id(&self, idx: Id) -> Id { + self.neighbour_ids[idx.id()] + } + + pub fn set_neighbor_id(&mut self, neighbor_id: Id, idx: usize) { + self.neighbour_ids[idx] = neighbor_id + } + + pub fn set_neighbor_ids(&self, label_or_type: i32, neighbours: &mut Neighbours) { + neighbours.ids = self.neighbour_ids.clone(); + neighbours.start_idx = self.label_offset[label_or_type as usize]; + neighbours.end_idx = self.label_offset[(label_or_type + 1) as usize]; + } + + pub fn get_offsets(&self) -> &Vec { + self.label_offset.as_ref() + } + + pub fn get_neighbor_ids(&self) -> &Vec { + self.neighbour_ids.as_ref() + } + + pub fn sort(&mut self) { + for i in 0..self.label_offset.len() - 1 { + let block = self.neighbour_ids[self.label_offset[i]..self.label_offset[i + 1]].as_mut(); + block.sort(); + } + } + + pub fn intersect( + &self, + label_or_type: i32, + some_neighbours: &mut Neighbours, + neighbours: &mut Neighbours, + ) -> usize { + self.inner_intersect( + some_neighbours, + neighbours, + &self.neighbour_ids, + self.label_offset[label_or_type as usize], + self.label_offset[(label_or_type + 1) as usize], + ); + self.label_offset[(label_or_type + 1) as usize] - self.label_offset[label_or_type as usize] + } + + fn inner_intersect( + &self, + some_neighbours: &mut Neighbours, + neighbours: &mut Neighbours, + neighbour_ids: &Vec, + mut this_idx: usize, + this_idx_end: usize, + ) { + neighbours.reset(); + let some_neighbour_ids = &some_neighbours.ids; + let mut some_idx = some_neighbours.start_idx; + let some_idx_end = some_neighbours.end_idx; + while this_idx < this_idx_end && some_idx < some_idx_end { + if neighbour_ids[this_idx] < some_neighbour_ids[some_idx] { + this_idx += 1; + while this_idx < this_idx_end + && neighbour_ids[this_idx] < some_neighbour_ids[some_idx] + { + this_idx += 1; + } + } else if neighbour_ids[this_idx] > some_neighbour_ids[some_idx] { + some_idx += 1; + while some_idx < some_idx_end + && neighbour_ids[this_idx] > some_neighbour_ids[some_idx] + { + some_idx += 1; + } + } else { + neighbours.ids[neighbours.end_idx] = neighbour_ids[this_idx]; + neighbours.end_idx += 1; + this_idx += 1; + some_idx += 1; + } + } + } + pub fn len(&self) -> usize { + self.neighbour_ids.len() + } + + pub fn sub_len(&self, label: usize) -> usize { + self.label_offset[label + 1] + self.label_offset[label] + } +} diff --git a/src/lib.rs b/src/lib.rs index 13186999..574a2378 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ * specific language governing permissions and limitations * under the License. */ +#![feature(test)] extern crate bincode; extern crate counter; extern crate csv; @@ -37,8 +38,10 @@ extern crate log; #[macro_use] extern crate serde_derive; +extern crate core; #[cfg(feature = "hdfs")] extern crate hdfs; +extern crate test; pub mod algorithm; pub mod generic; diff --git a/tests/io.rs b/tests/io.rs index 443ccea8..0366e337 100644 --- a/tests/io.rs +++ b/tests/io.rs @@ -75,8 +75,8 @@ fn test_cvs_labeled() { let tmp_dir = TempDir::new().unwrap(); let tmp_dir_path = tmp_dir.path(); - let nodes = 10; - let edges = 20; + let nodes = 10000; + let edges = 200000; let node_labels = &vec!["a".to_owned(), "b".to_owned()]; let edge_labels = &vec![1, 2, 3]; diff --git a/tests/static_graph.rs b/tests/static_graph.rs index d092d2e9..7486d5ae 100644 --- a/tests/static_graph.rs +++ b/tests/static_graph.rs @@ -20,15 +20,26 @@ */ #[macro_use] extern crate rust_graph; +extern crate hashbrown; +extern crate itertools; extern crate tempfile; +use hashbrown::HashMap; +use itertools::Itertools; use rust_graph::generic::DefaultId; +use rust_graph::graph_impl::multi_graph::plan::query_plan_worker::QPWorkers; +use rust_graph::graph_impl::multi_graph::planner::catalog::catalog::LOGGER_FLAG; +use rust_graph::graph_impl::multi_graph::planner::catalog::query_edge::QueryEdge; +use rust_graph::graph_impl::multi_graph::planner::catalog::query_graph::QueryGraph; +use rust_graph::graph_impl::multi_graph::runner::{catalog_generator, optimizer_executor}; use rust_graph::graph_impl::static_graph::StaticNode; -use rust_graph::graph_impl::Edge; use rust_graph::graph_impl::EdgeVec; +use rust_graph::graph_impl::{Edge, TypedDiGraphMap, TypedGraphMap}; +use rust_graph::io::read_from_csv; use rust_graph::map::SetMap; use rust_graph::prelude::*; use rust_graph::{DiStaticGraph, UnStaticGraph}; +use std::path::Path; #[test] fn test_directed() { @@ -36,6 +47,30 @@ fn test_directed() { let in_edge_vec = EdgeVec::new(vec![0, 2, 3, 4], vec![1, 2, 0, 0]); let g = DiStaticGraph::::new(edge_vec, Some(in_edge_vec), None, None); + assert_eq!(g.get_node_ids(), &vec![0, 1, 2]); + assert_eq!(g.get_node_types(), &vec![0, 0, 0]); + assert_eq!(g.get_node_type_offsets(), &vec![0, 4]); + + let fwd_adj_list = g.get_fwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let fwd_adj_list = g.get_fwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 1]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0]); + let fwd_adj_list = g.get_fwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 1]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0]); + + let bwd_adj_list = g.get_bwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let bwd_adj_list = g.get_bwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 1]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0]); + let bwd_adj_list = g.get_bwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 1]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0]); + assert_eq!(g.node_count(), 3); assert_eq!(g.edge_count(), 4); @@ -95,7 +130,32 @@ fn test_undirected() { let edge_vec = EdgeVec::new(vec![0, 2, 4, 6], vec![1, 2, 0, 2, 0, 1]); let g = UnStaticGraph::::new(edge_vec, None, None, None); let edges: Vec<_> = g.edge_indices().collect(); - assert_eq!(edges, vec![(0, 1), (0, 2), (1, 2)]) + assert_eq!(edges, vec![(0, 1), (0, 2), (1, 2)]); + + assert_eq!(g.get_node_ids(), &vec![0, 1, 2]); + // Without node labels + assert_eq!(g.get_node_types(), &vec![0, 0, 0]); + assert_eq!(g.get_node_type_offsets(), &vec![0, 4]); + + let fwd_adj_list = g.get_fwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let fwd_adj_list = g.get_fwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0, 2]); + let fwd_adj_list = g.get_fwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0, 1]); + + let bwd_adj_list = g.get_bwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let bwd_adj_list = g.get_bwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0, 2]); + let bwd_adj_list = g.get_bwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 2]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0, 1]); } #[test] @@ -156,6 +216,100 @@ fn test_labeled() { assert!(edges.contains(&g.get_edge(0, 2))); assert!(edges.contains(&g.get_edge(1, 0))); assert!(edges.contains(&g.get_edge(2, 0))); + + assert_eq!(g.get_node_ids(), &vec![1, 0, 2]); + assert_eq!(g.get_node_types(), &vec![1, 0, 1]); + assert_eq!(g.get_node_type_offsets(), &vec![0, 1, 3]); + + let fwd_adj_list = g.get_fwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 1, 2]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let fwd_adj_list = g.get_fwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 1, 1]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0]); + let fwd_adj_list = g.get_fwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(fwd_adj_list.get_offsets(), &vec![0, 0, 1]); + assert_eq!(fwd_adj_list.get_neighbor_ids(), &vec![0]); + + let bwd_adj_list = g.get_bwd_adj_list()[0].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 1, 2]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![1, 2]); + let bwd_adj_list = g.get_bwd_adj_list()[1].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 1, 1]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0]); + let bwd_adj_list = g.get_bwd_adj_list()[2].as_ref().unwrap(); + assert_eq!(bwd_adj_list.get_offsets(), &vec![0, 0, 1]); + assert_eq!(bwd_adj_list.get_neighbor_ids(), &vec![0]); + + let neighbour_edge_no: Vec = g.neighbors_of_edge_iter(0, None).collect(); + let neighbour_edge_0_a: Vec = g.neighbors_of_edge_iter(0, Some("a")).collect(); + let neighbour_edge_0_b: Vec = g.neighbors_of_edge_iter(0, Some("b")).collect(); + let neighbour_edge_1_a: Vec = g.neighbors_of_edge_iter(1, Some("a")).collect(); + let neighbour_edge_2_b: Vec = g.neighbors_of_edge_iter(2, Some("b")).collect(); + assert_eq!(&neighbour_edge_no, &vec![1, 2]); + assert_eq!(&neighbour_edge_0_a, &vec![1]); + assert_eq!(&neighbour_edge_0_b, &vec![2]); + assert_eq!(&neighbour_edge_1_a, &vec![0]); + assert_eq!(&neighbour_edge_2_b, &vec![0]); + + let nodes_a = g.nodes_with_label(Some("a")); + let nodes_b = g.nodes_with_label(Some("b")); + assert_eq!(nodes_a.collect_vec(), vec![1, 0]); + assert_eq!(nodes_b.collect_vec(), vec![2, 0]); + + let edges_a = g.edges_with_label(Some("a")); + let edges_b = g.edges_with_label(Some("b")); + assert_eq!(edges_a.collect_vec(), vec![(0, 1), (1, 0)]); + assert_eq!(edges_b.collect_vec(), vec![(0, 2), (2, 0)]); +} + +#[test] +fn test_get_neighbours_by_label() { + let edge_vec = EdgeVec::new(vec![0, 2, 3, 4], vec![1, 2, 0, 0]); + let in_edge_vec = EdgeVec::new(vec![0, 2, 3, 4], vec![1, 2, 0, 0]); + let labels = vec![1, 0, 1]; + let g = DiStaticGraph::<&str>::with_labels( + edge_vec, + Some(in_edge_vec), + labels, + setmap!["a", "b"], + setmap![], + None, + None, + ); + let neighbour_edge_no_iter: Vec = g.neighbors_of_node_iter(0, None).collect(); + let neighbour_edge_0_a_iter: Vec = g.neighbors_of_node_iter(0, Some("a")).collect(); + let neighbour_edge_0_b_iter: Vec = g.neighbors_of_node_iter(0, Some("b")).collect(); + let neighbour_edge_1_a_iter: Vec = g.neighbors_of_node_iter(1, Some("a")).collect(); + let neighbour_edge_2_b_iter: Vec = g.neighbors_of_node_iter(2, Some("b")).collect(); + assert_eq!(&neighbour_edge_no_iter, &vec![1, 2]); + assert_eq!(&neighbour_edge_0_a_iter, &vec![1]); + assert_eq!(&neighbour_edge_0_b_iter, &vec![2]); + assert_eq!(&neighbour_edge_1_a_iter, &(Vec::::new())); + assert_eq!(&neighbour_edge_2_b_iter, &vec![0]); + let neighbour_edge_no = g.neighbors_of_node(0, None); + let neighbour_edge_0_a = g.neighbors_of_node(0, Some("a")); + let neighbour_edge_0_b = g.neighbors_of_node(0, Some("b")); + let neighbour_edge_1_a = g.neighbors_of_node(1, Some("a")); + let neighbour_edge_2_b = g.neighbors_of_node(2, Some("b")); + assert_eq!(&neighbour_edge_no.iter().collect_vec(), &vec![&1, &2]); + assert_eq!(&neighbour_edge_0_a.iter().collect_vec(), &vec![&1]); + assert_eq!(&neighbour_edge_0_b.iter().collect_vec(), &vec![&2]); + assert_eq!( + &neighbour_edge_1_a.iter().collect_vec(), + &(Vec::<&u32>::new()) + ); + assert_eq!(&neighbour_edge_2_b.iter().collect_vec(), &vec![&0]); + + let nodes_a = g.nodes_with_label(Some("a")); + let nodes_b = g.nodes_with_label(Some("b")); + assert_eq!(nodes_a.collect_vec(), vec![1]); + assert_eq!(nodes_b.collect_vec(), vec![2, 0]); + + let edges_a = g.edges_with_label(Some("a")); + let edges_b = g.edges_with_label(Some("b")); + assert_eq!(edges_a.collect_vec(), Vec::<(u32, u32)>::new()); + assert_eq!(edges_b.collect_vec(), Vec::<(u32, u32)>::new()); } #[test] @@ -165,3 +319,130 @@ fn test_clone() { let g = DiStaticGraph::::new(edge_vec, Some(in_edge_vec), None, None); assert_eq!(g, g.clone()); } + +#[test] +fn test_graphflow_planner() { + let mut g_: TypedGraphMap = TypedDiGraphMap::new(); + let path_to_nodes = Path::new("C:\\Users\\76155\\Desktop\\rust_graphflow\\human-vertices.csv"); + // Path::new("C:\\Users\\cheny\\OneDrive\\桌面\\rust_graphflow\\human-vertices.csv"); + let path_to_edges = Path::new("C:\\Users\\76155\\Desktop\\rust_graphflow\\human-edges.csv"); + // Path::new("C:\\Users\\cheny\\OneDrive\\桌面\\rust_graphflow\\human-edges.csv"); + read_from_csv( + &mut g_, + vec![path_to_nodes], + vec![path_to_edges], + None, + false, + false, + ); + let g = g_.into_static(); + println!("node_count={}", g.node_count()); + println!("edge_count={}", g.edge_count()); + println!("num_of_node_labels={}", g.num_of_node_labels()); + println!("num_of_edge_labels={}", g.num_of_edge_labels()); + println!("load finished."); + let mut qvertex_to_qedges_map = HashMap::new(); + let mut qvertex_to_type_map = HashMap::new(); + let mut qvertex_to_deg_map = HashMap::new(); + + let q_edges = vec![ + QueryEdge::default("a".to_owned(), "b".to_owned()), + QueryEdge::default("a".to_owned(), "c".to_owned()), + QueryEdge::default("b".to_owned(), "c".to_owned()), + QueryEdge::default("c".to_owned(), "e".to_owned()), + QueryEdge::default("c".to_owned(), "f".to_owned()), + QueryEdge::default("e".to_owned(), "f".to_owned()), + ]; + let mut qedges_map = HashMap::new(); + qedges_map.insert( + "b".to_owned(), + vec![QueryEdge::default("a".to_owned(), "b".to_owned())], + ); + qedges_map.insert( + "c".to_owned(), + vec![QueryEdge::default("a".to_owned(), "c".to_owned())], + ); + qvertex_to_qedges_map.insert("a".to_owned(), qedges_map); + qedges_map = HashMap::new(); + qedges_map.insert( + "a".to_owned(), + vec![QueryEdge::default("a".to_owned(), "b".to_owned())], + ); + qedges_map.insert( + "c".to_owned(), + vec![QueryEdge::default("b".to_owned(), "c".to_owned())], + ); + qvertex_to_qedges_map.insert("b".to_owned(), qedges_map); + qedges_map = HashMap::new(); + qedges_map.insert( + "a".to_owned(), + vec![QueryEdge::default("a".to_owned(), "c".to_owned())], + ); + qedges_map.insert( + "b".to_owned(), + vec![QueryEdge::default("b".to_owned(), "c".to_owned())], + ); + qedges_map.insert( + "e".to_owned(), + vec![QueryEdge::default("c".to_owned(), "e".to_owned())], + ); + qedges_map.insert( + "f".to_owned(), + vec![QueryEdge::default("c".to_owned(), "f".to_owned())], + ); + qvertex_to_qedges_map.insert("c".to_owned(), qedges_map); + qedges_map = HashMap::new(); + qedges_map.insert( + "c".to_owned(), + vec![QueryEdge::default("c".to_owned(), "e".to_owned())], + ); + qedges_map.insert( + "f".to_owned(), + vec![QueryEdge::default("e".to_owned(), "f".to_owned())], + ); + qvertex_to_qedges_map.insert("e".to_owned(), qedges_map); + qedges_map = HashMap::new(); + qedges_map.insert( + "c".to_owned(), + vec![QueryEdge::default("c".to_owned(), "f".to_owned())], + ); + qedges_map.insert( + "e".to_owned(), + vec![QueryEdge::default("e".to_owned(), "f".to_owned())], + ); + qvertex_to_qedges_map.insert("f".to_owned(), qedges_map); + + qvertex_to_type_map.insert("a".to_owned(), 0); + qvertex_to_type_map.insert("b".to_owned(), 0); + qvertex_to_type_map.insert("c".to_owned(), 0); + qvertex_to_type_map.insert("e".to_owned(), 0); + qvertex_to_type_map.insert("f".to_owned(), 0); + + qvertex_to_deg_map.insert("a".to_owned(), vec![2, 0]); + qvertex_to_deg_map.insert("b".to_owned(), vec![1, 1]); + qvertex_to_deg_map.insert("c".to_owned(), vec![2, 2]); + qvertex_to_deg_map.insert("e".to_owned(), vec![1, 1]); + qvertex_to_deg_map.insert("f".to_owned(), vec![0, 2]); + + let query_graph = QueryGraph { + qvertex_to_qedges_map, + qvertex_to_type_map, + qvertex_to_deg_map, + q_edges, + it: None, + encoding: None, + limit: 0, + }; + let catalog = catalog_generator::default(&g); + let mut query_plan = optimizer_executor::generate_plan(query_graph, catalog, g.clone()); + // println!("QueryPlan:{}", query_plan.get_output_log()); + // println!( + // "num_out_tuple={},icost={}", + // query_plan.estimated_num_out_tuples, query_plan.estimated_icost + // ); + let mut workers = QPWorkers::new(query_plan, 1); + workers.init(&g); + workers.execute(); + println!("Query result:{}", workers.get_output_log()); + assert!(false); +}