Added option to output directed graphs of the group heirarchy before and after compression (#43)
It outputs nodes and edges information before and after the compressor has run - these can be visualised in a tool like Gephi A good way to visualise what the compressor is actually doing!
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@
|
||||
*.data
|
||||
*.old
|
||||
out.sql
|
||||
*.csv
|
||||
71
src/graphing.rs
Normal file
71
src/graphing.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::{fs::File, io::Write};
|
||||
|
||||
use super::StateGroupEntry;
|
||||
|
||||
type Graph = BTreeMap<i64, StateGroupEntry>;
|
||||
|
||||
/// Outputs information from a state group graph into an edges file and a node file
|
||||
///
|
||||
/// These can be loaded into something like Gephi to visualise the graphs
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `groups` - A map from state group ids to StateGroupEntries
|
||||
/// * `edges_output` - The file to output the predecessor link information to
|
||||
/// * `nodes_output` - The file to output the state group information to
|
||||
fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) {
|
||||
// The line A;B in the edges file means:
|
||||
// That state group A has predecessor B
|
||||
writeln!(edges_output, "Source;Target",).unwrap();
|
||||
|
||||
// The line A;B;C;"B" in the nodes file means:
|
||||
// The state group id is A
|
||||
// This state group has B rows in the state_groups_state table
|
||||
// If C is true then A has no predecessor
|
||||
writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap();
|
||||
|
||||
for (source, entry) in groups {
|
||||
// If the group has a predecessor then write an edge in the edges file
|
||||
if let Some(target) = entry.prev_state_group {
|
||||
writeln!(edges_output, "{};{}", source, target,).unwrap();
|
||||
}
|
||||
|
||||
// Write the state group's information to the nodes file
|
||||
writeln!(
|
||||
nodes_output,
|
||||
"{};{};{};\"{}\"",
|
||||
source,
|
||||
entry.state_map.len(),
|
||||
entry.prev_state_group.is_none(),
|
||||
entry.state_map.len(),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Outputs information from two state group graph into files
|
||||
///
|
||||
/// These can be loaded into something like Gephi to visualise the graphs
|
||||
/// before and after the compressor is run
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `before` - A map from state group ids to StateGroupEntries
|
||||
/// the information from this map goes into before_edges.csv
|
||||
/// and before_nodes.csv
|
||||
/// * `after` - A map from state group ids to StateGroupEntries
|
||||
/// the information from this map goes into after_edges.csv
|
||||
/// and after_nodes.csv
|
||||
pub fn make_graphs(before: Graph, after: Graph) {
|
||||
// Open all the files to output to
|
||||
let mut before_edges_file = File::create("before_edges.csv").unwrap();
|
||||
let mut before_nodes_file = File::create("before_nodes.csv").unwrap();
|
||||
let mut after_edges_file = File::create("after_edges.csv").unwrap();
|
||||
let mut after_nodes_file = File::create("after_nodes.csv").unwrap();
|
||||
|
||||
// Write before's information to before_edges and before_nodes
|
||||
output_csv(&before, &mut before_edges_file, &mut before_nodes_file);
|
||||
// Write afters's information to after_edges and after_nodes
|
||||
output_csv(&after, &mut after_edges_file, &mut after_nodes_file);
|
||||
}
|
||||
26
src/lib.rs
26
src/lib.rs
@@ -16,6 +16,10 @@
|
||||
//! Synapse instance's database. Specifically, it aims to reduce the number of
|
||||
//! rows that a given room takes up in the `state_groups_state` table.
|
||||
|
||||
// This file contains configuring config options, which neccessarily means lots
|
||||
// of arguments - this hopefully doesn't make the code unclear
|
||||
// #[allow(clippy::too_many_arguments)] is therefore used around some functions
|
||||
|
||||
use pyo3::{exceptions, prelude::*};
|
||||
|
||||
#[cfg(feature = "jemalloc")]
|
||||
@@ -31,6 +35,7 @@ use string_cache::DefaultAtom as Atom;
|
||||
|
||||
mod compressor;
|
||||
mod database;
|
||||
mod graphing;
|
||||
|
||||
use compressor::Compressor;
|
||||
use database::PGEscape;
|
||||
@@ -72,6 +77,7 @@ pub struct Config {
|
||||
min_saved_rows: Option<i32>,
|
||||
transactions: bool,
|
||||
level_sizes: LevelSizes,
|
||||
graphs: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -137,6 +143,10 @@ impl Config {
|
||||
))
|
||||
.default_value("100,50,25")
|
||||
.takes_value(true),
|
||||
).arg(
|
||||
Arg::with_name("graphs")
|
||||
.short("g")
|
||||
.help("Whether to produce graphs of state groups before and after compression instead of SQL")
|
||||
).get_matches();
|
||||
|
||||
let db_url = matches
|
||||
@@ -164,6 +174,8 @@ impl Config {
|
||||
let level_sizes = value_t!(matches, "level_sizes", LevelSizes)
|
||||
.unwrap_or_else(|e| panic!("Unable to parse level_sizes: {}", e));
|
||||
|
||||
let graphs = matches.is_present("graphs");
|
||||
|
||||
Config {
|
||||
db_url: String::from(db_url),
|
||||
output_file,
|
||||
@@ -172,6 +184,7 @@ impl Config {
|
||||
min_saved_rows,
|
||||
transactions,
|
||||
level_sizes,
|
||||
graphs,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -260,6 +273,10 @@ pub fn run(mut config: Config) {
|
||||
// transaction.
|
||||
|
||||
output_sql(&mut config, &state_group_map, &new_state_group_map);
|
||||
|
||||
if config.graphs {
|
||||
graphing::make_graphs(state_group_map, new_state_group_map);
|
||||
}
|
||||
}
|
||||
|
||||
/// Produces SQL code to carry out changes and saves it to file
|
||||
@@ -440,6 +457,7 @@ impl Config {
|
||||
/// Converts string and bool arguments into a Config struct
|
||||
///
|
||||
/// This function panics if db_url or room_id are empty strings!
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
db_url: String,
|
||||
room_id: String,
|
||||
@@ -448,6 +466,7 @@ impl Config {
|
||||
min_saved_rows: Option<i32>,
|
||||
transactions: bool,
|
||||
level_sizes: String,
|
||||
graphs: bool,
|
||||
) -> Result<Config, String> {
|
||||
let mut output: Option<File> = None;
|
||||
if let Some(file) = output_file {
|
||||
@@ -471,6 +490,7 @@ impl Config {
|
||||
min_saved_rows,
|
||||
transactions,
|
||||
level_sizes,
|
||||
graphs,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -480,6 +500,7 @@ impl Config {
|
||||
/// Default arguments are equivalent to using the command line tool
|
||||
/// No default's are provided for db_url or room_id since these arguments
|
||||
/// are compulsory (so that new() act's like parse_arguments())
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[pyfunction(
|
||||
// db_url has no default
|
||||
// room_id has no default
|
||||
@@ -487,7 +508,8 @@ impl Config {
|
||||
max_state_group = "None",
|
||||
min_saved_rows = "None",
|
||||
transactions = false,
|
||||
level_sizes = "String::from(\"100,50,25\")"
|
||||
level_sizes = "String::from(\"100,50,25\")",
|
||||
graphs = false
|
||||
)]
|
||||
fn run_compression(
|
||||
db_url: String,
|
||||
@@ -497,6 +519,7 @@ fn run_compression(
|
||||
min_saved_rows: Option<i32>,
|
||||
transactions: bool,
|
||||
level_sizes: String,
|
||||
graphs: bool,
|
||||
) -> PyResult<()> {
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
@@ -506,6 +529,7 @@ fn run_compression(
|
||||
min_saved_rows,
|
||||
transactions,
|
||||
level_sizes,
|
||||
graphs,
|
||||
);
|
||||
match config {
|
||||
Err(e) => Err(PyErr::new::<exceptions::PyException, _>(e)),
|
||||
|
||||
Reference in New Issue
Block a user