Added option to output directed graphs of the group heirarchy before and after compression (#43)
It outputs nodes and edges information before and after the compressor has run - these can be visualised in a tool like Gephi A good way to visualise what the compressor is actually doing!
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,4 +2,5 @@
|
|||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
*.data
|
*.data
|
||||||
*.old
|
*.old
|
||||||
out.sql
|
out.sql
|
||||||
|
*.csv
|
||||||
71
src/graphing.rs
Normal file
71
src/graphing.rs
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::{fs::File, io::Write};
|
||||||
|
|
||||||
|
use super::StateGroupEntry;
|
||||||
|
|
||||||
|
type Graph = BTreeMap<i64, StateGroupEntry>;
|
||||||
|
|
||||||
|
/// Outputs information from a state group graph into an edges file and a node file
|
||||||
|
///
|
||||||
|
/// These can be loaded into something like Gephi to visualise the graphs
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `groups` - A map from state group ids to StateGroupEntries
|
||||||
|
/// * `edges_output` - The file to output the predecessor link information to
|
||||||
|
/// * `nodes_output` - The file to output the state group information to
|
||||||
|
fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) {
|
||||||
|
// The line A;B in the edges file means:
|
||||||
|
// That state group A has predecessor B
|
||||||
|
writeln!(edges_output, "Source;Target",).unwrap();
|
||||||
|
|
||||||
|
// The line A;B;C;"B" in the nodes file means:
|
||||||
|
// The state group id is A
|
||||||
|
// This state group has B rows in the state_groups_state table
|
||||||
|
// If C is true then A has no predecessor
|
||||||
|
writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap();
|
||||||
|
|
||||||
|
for (source, entry) in groups {
|
||||||
|
// If the group has a predecessor then write an edge in the edges file
|
||||||
|
if let Some(target) = entry.prev_state_group {
|
||||||
|
writeln!(edges_output, "{};{}", source, target,).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the state group's information to the nodes file
|
||||||
|
writeln!(
|
||||||
|
nodes_output,
|
||||||
|
"{};{};{};\"{}\"",
|
||||||
|
source,
|
||||||
|
entry.state_map.len(),
|
||||||
|
entry.prev_state_group.is_none(),
|
||||||
|
entry.state_map.len(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Outputs information from two state group graph into files
|
||||||
|
///
|
||||||
|
/// These can be loaded into something like Gephi to visualise the graphs
|
||||||
|
/// before and after the compressor is run
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `before` - A map from state group ids to StateGroupEntries
|
||||||
|
/// the information from this map goes into before_edges.csv
|
||||||
|
/// and before_nodes.csv
|
||||||
|
/// * `after` - A map from state group ids to StateGroupEntries
|
||||||
|
/// the information from this map goes into after_edges.csv
|
||||||
|
/// and after_nodes.csv
|
||||||
|
pub fn make_graphs(before: Graph, after: Graph) {
|
||||||
|
// Open all the files to output to
|
||||||
|
let mut before_edges_file = File::create("before_edges.csv").unwrap();
|
||||||
|
let mut before_nodes_file = File::create("before_nodes.csv").unwrap();
|
||||||
|
let mut after_edges_file = File::create("after_edges.csv").unwrap();
|
||||||
|
let mut after_nodes_file = File::create("after_nodes.csv").unwrap();
|
||||||
|
|
||||||
|
// Write before's information to before_edges and before_nodes
|
||||||
|
output_csv(&before, &mut before_edges_file, &mut before_nodes_file);
|
||||||
|
// Write afters's information to after_edges and after_nodes
|
||||||
|
output_csv(&after, &mut after_edges_file, &mut after_nodes_file);
|
||||||
|
}
|
||||||
26
src/lib.rs
26
src/lib.rs
@@ -16,6 +16,10 @@
|
|||||||
//! Synapse instance's database. Specifically, it aims to reduce the number of
|
//! Synapse instance's database. Specifically, it aims to reduce the number of
|
||||||
//! rows that a given room takes up in the `state_groups_state` table.
|
//! rows that a given room takes up in the `state_groups_state` table.
|
||||||
|
|
||||||
|
// This file contains configuring config options, which neccessarily means lots
|
||||||
|
// of arguments - this hopefully doesn't make the code unclear
|
||||||
|
// #[allow(clippy::too_many_arguments)] is therefore used around some functions
|
||||||
|
|
||||||
use pyo3::{exceptions, prelude::*};
|
use pyo3::{exceptions, prelude::*};
|
||||||
|
|
||||||
#[cfg(feature = "jemalloc")]
|
#[cfg(feature = "jemalloc")]
|
||||||
@@ -31,6 +35,7 @@ use string_cache::DefaultAtom as Atom;
|
|||||||
|
|
||||||
mod compressor;
|
mod compressor;
|
||||||
mod database;
|
mod database;
|
||||||
|
mod graphing;
|
||||||
|
|
||||||
use compressor::Compressor;
|
use compressor::Compressor;
|
||||||
use database::PGEscape;
|
use database::PGEscape;
|
||||||
@@ -72,6 +77,7 @@ pub struct Config {
|
|||||||
min_saved_rows: Option<i32>,
|
min_saved_rows: Option<i32>,
|
||||||
transactions: bool,
|
transactions: bool,
|
||||||
level_sizes: LevelSizes,
|
level_sizes: LevelSizes,
|
||||||
|
graphs: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
@@ -137,6 +143,10 @@ impl Config {
|
|||||||
))
|
))
|
||||||
.default_value("100,50,25")
|
.default_value("100,50,25")
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
|
).arg(
|
||||||
|
Arg::with_name("graphs")
|
||||||
|
.short("g")
|
||||||
|
.help("Whether to produce graphs of state groups before and after compression instead of SQL")
|
||||||
).get_matches();
|
).get_matches();
|
||||||
|
|
||||||
let db_url = matches
|
let db_url = matches
|
||||||
@@ -164,6 +174,8 @@ impl Config {
|
|||||||
let level_sizes = value_t!(matches, "level_sizes", LevelSizes)
|
let level_sizes = value_t!(matches, "level_sizes", LevelSizes)
|
||||||
.unwrap_or_else(|e| panic!("Unable to parse level_sizes: {}", e));
|
.unwrap_or_else(|e| panic!("Unable to parse level_sizes: {}", e));
|
||||||
|
|
||||||
|
let graphs = matches.is_present("graphs");
|
||||||
|
|
||||||
Config {
|
Config {
|
||||||
db_url: String::from(db_url),
|
db_url: String::from(db_url),
|
||||||
output_file,
|
output_file,
|
||||||
@@ -172,6 +184,7 @@ impl Config {
|
|||||||
min_saved_rows,
|
min_saved_rows,
|
||||||
transactions,
|
transactions,
|
||||||
level_sizes,
|
level_sizes,
|
||||||
|
graphs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -260,6 +273,10 @@ pub fn run(mut config: Config) {
|
|||||||
// transaction.
|
// transaction.
|
||||||
|
|
||||||
output_sql(&mut config, &state_group_map, &new_state_group_map);
|
output_sql(&mut config, &state_group_map, &new_state_group_map);
|
||||||
|
|
||||||
|
if config.graphs {
|
||||||
|
graphing::make_graphs(state_group_map, new_state_group_map);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produces SQL code to carry out changes and saves it to file
|
/// Produces SQL code to carry out changes and saves it to file
|
||||||
@@ -440,6 +457,7 @@ impl Config {
|
|||||||
/// Converts string and bool arguments into a Config struct
|
/// Converts string and bool arguments into a Config struct
|
||||||
///
|
///
|
||||||
/// This function panics if db_url or room_id are empty strings!
|
/// This function panics if db_url or room_id are empty strings!
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
db_url: String,
|
db_url: String,
|
||||||
room_id: String,
|
room_id: String,
|
||||||
@@ -448,6 +466,7 @@ impl Config {
|
|||||||
min_saved_rows: Option<i32>,
|
min_saved_rows: Option<i32>,
|
||||||
transactions: bool,
|
transactions: bool,
|
||||||
level_sizes: String,
|
level_sizes: String,
|
||||||
|
graphs: bool,
|
||||||
) -> Result<Config, String> {
|
) -> Result<Config, String> {
|
||||||
let mut output: Option<File> = None;
|
let mut output: Option<File> = None;
|
||||||
if let Some(file) = output_file {
|
if let Some(file) = output_file {
|
||||||
@@ -471,6 +490,7 @@ impl Config {
|
|||||||
min_saved_rows,
|
min_saved_rows,
|
||||||
transactions,
|
transactions,
|
||||||
level_sizes,
|
level_sizes,
|
||||||
|
graphs,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -480,6 +500,7 @@ impl Config {
|
|||||||
/// Default arguments are equivalent to using the command line tool
|
/// Default arguments are equivalent to using the command line tool
|
||||||
/// No default's are provided for db_url or room_id since these arguments
|
/// No default's are provided for db_url or room_id since these arguments
|
||||||
/// are compulsory (so that new() act's like parse_arguments())
|
/// are compulsory (so that new() act's like parse_arguments())
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[pyfunction(
|
#[pyfunction(
|
||||||
// db_url has no default
|
// db_url has no default
|
||||||
// room_id has no default
|
// room_id has no default
|
||||||
@@ -487,7 +508,8 @@ impl Config {
|
|||||||
max_state_group = "None",
|
max_state_group = "None",
|
||||||
min_saved_rows = "None",
|
min_saved_rows = "None",
|
||||||
transactions = false,
|
transactions = false,
|
||||||
level_sizes = "String::from(\"100,50,25\")"
|
level_sizes = "String::from(\"100,50,25\")",
|
||||||
|
graphs = false
|
||||||
)]
|
)]
|
||||||
fn run_compression(
|
fn run_compression(
|
||||||
db_url: String,
|
db_url: String,
|
||||||
@@ -497,6 +519,7 @@ fn run_compression(
|
|||||||
min_saved_rows: Option<i32>,
|
min_saved_rows: Option<i32>,
|
||||||
transactions: bool,
|
transactions: bool,
|
||||||
level_sizes: String,
|
level_sizes: String,
|
||||||
|
graphs: bool,
|
||||||
) -> PyResult<()> {
|
) -> PyResult<()> {
|
||||||
let config = Config::new(
|
let config = Config::new(
|
||||||
db_url,
|
db_url,
|
||||||
@@ -506,6 +529,7 @@ fn run_compression(
|
|||||||
min_saved_rows,
|
min_saved_rows,
|
||||||
transactions,
|
transactions,
|
||||||
level_sizes,
|
level_sizes,
|
||||||
|
graphs,
|
||||||
);
|
);
|
||||||
match config {
|
match config {
|
||||||
Err(e) => Err(PyErr::new::<exceptions::PyException, _>(e)),
|
Err(e) => Err(PyErr::new::<exceptions::PyException, _>(e)),
|
||||||
|
|||||||
Reference in New Issue
Block a user