diff --git a/.gitignore b/.gitignore index cc7f9bb..7be2025 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ **/*.rs.bk *.data *.old -out.sql \ No newline at end of file +out.sql +*.csv \ No newline at end of file diff --git a/src/graphing.rs b/src/graphing.rs new file mode 100644 index 0000000..0d1a30d --- /dev/null +++ b/src/graphing.rs @@ -0,0 +1,71 @@ +use std::collections::BTreeMap; +use std::{fs::File, io::Write}; + +use super::StateGroupEntry; + +type Graph = BTreeMap; + +/// Outputs information from a state group graph into an edges file and a node file +/// +/// These can be loaded into something like Gephi to visualise the graphs +/// +/// # Arguments +/// +/// * `groups` - A map from state group ids to StateGroupEntries +/// * `edges_output` - The file to output the predecessor link information to +/// * `nodes_output` - The file to output the state group information to +fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) { + // The line A;B in the edges file means: + // That state group A has predecessor B + writeln!(edges_output, "Source;Target",).unwrap(); + + // The line A;B;C;"B" in the nodes file means: + // The state group id is A + // This state group has B rows in the state_groups_state table + // If C is true then A has no predecessor + writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap(); + + for (source, entry) in groups { + // If the group has a predecessor then write an edge in the edges file + if let Some(target) = entry.prev_state_group { + writeln!(edges_output, "{};{}", source, target,).unwrap(); + } + + // Write the state group's information to the nodes file + writeln!( + nodes_output, + "{};{};{};\"{}\"", + source, + entry.state_map.len(), + entry.prev_state_group.is_none(), + entry.state_map.len(), + ) + .unwrap(); + } +} + +/// Outputs information from two state group graph into files +/// +/// These can be loaded into something like Gephi to visualise the graphs +/// before and after the compressor is run +/// +/// # Arguments +/// +/// * `before` - A map from state group ids to StateGroupEntries +/// the information from this map goes into before_edges.csv +/// and before_nodes.csv +/// * `after` - A map from state group ids to StateGroupEntries +/// the information from this map goes into after_edges.csv +/// and after_nodes.csv +pub fn make_graphs(before: Graph, after: Graph) { + // Open all the files to output to + let mut before_edges_file = File::create("before_edges.csv").unwrap(); + let mut before_nodes_file = File::create("before_nodes.csv").unwrap(); + let mut after_edges_file = File::create("after_edges.csv").unwrap(); + let mut after_nodes_file = File::create("after_nodes.csv").unwrap(); + + // Write before's information to before_edges and before_nodes + output_csv(&before, &mut before_edges_file, &mut before_nodes_file); + // Write afters's information to after_edges and after_nodes + output_csv(&after, &mut after_edges_file, &mut after_nodes_file); +} diff --git a/src/lib.rs b/src/lib.rs index c380f15..c1d80b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,10 @@ //! Synapse instance's database. Specifically, it aims to reduce the number of //! rows that a given room takes up in the `state_groups_state` table. +// This file contains configuring config options, which neccessarily means lots +// of arguments - this hopefully doesn't make the code unclear +// #[allow(clippy::too_many_arguments)] is therefore used around some functions + use pyo3::{exceptions, prelude::*}; #[cfg(feature = "jemalloc")] @@ -31,6 +35,7 @@ use string_cache::DefaultAtom as Atom; mod compressor; mod database; +mod graphing; use compressor::Compressor; use database::PGEscape; @@ -72,6 +77,7 @@ pub struct Config { min_saved_rows: Option, transactions: bool, level_sizes: LevelSizes, + graphs: bool, } impl Config { @@ -137,6 +143,10 @@ impl Config { )) .default_value("100,50,25") .takes_value(true), + ).arg( + Arg::with_name("graphs") + .short("g") + .help("Whether to produce graphs of state groups before and after compression instead of SQL") ).get_matches(); let db_url = matches @@ -164,6 +174,8 @@ impl Config { let level_sizes = value_t!(matches, "level_sizes", LevelSizes) .unwrap_or_else(|e| panic!("Unable to parse level_sizes: {}", e)); + let graphs = matches.is_present("graphs"); + Config { db_url: String::from(db_url), output_file, @@ -172,6 +184,7 @@ impl Config { min_saved_rows, transactions, level_sizes, + graphs, } } } @@ -260,6 +273,10 @@ pub fn run(mut config: Config) { // transaction. output_sql(&mut config, &state_group_map, &new_state_group_map); + + if config.graphs { + graphing::make_graphs(state_group_map, new_state_group_map); + } } /// Produces SQL code to carry out changes and saves it to file @@ -440,6 +457,7 @@ impl Config { /// Converts string and bool arguments into a Config struct /// /// This function panics if db_url or room_id are empty strings! + #[allow(clippy::too_many_arguments)] pub fn new( db_url: String, room_id: String, @@ -448,6 +466,7 @@ impl Config { min_saved_rows: Option, transactions: bool, level_sizes: String, + graphs: bool, ) -> Result { let mut output: Option = None; if let Some(file) = output_file { @@ -471,6 +490,7 @@ impl Config { min_saved_rows, transactions, level_sizes, + graphs, }) } } @@ -480,6 +500,7 @@ impl Config { /// Default arguments are equivalent to using the command line tool /// No default's are provided for db_url or room_id since these arguments /// are compulsory (so that new() act's like parse_arguments()) +#[allow(clippy::too_many_arguments)] #[pyfunction( // db_url has no default // room_id has no default @@ -487,7 +508,8 @@ impl Config { max_state_group = "None", min_saved_rows = "None", transactions = false, - level_sizes = "String::from(\"100,50,25\")" + level_sizes = "String::from(\"100,50,25\")", + graphs = false )] fn run_compression( db_url: String, @@ -497,6 +519,7 @@ fn run_compression( min_saved_rows: Option, transactions: bool, level_sizes: String, + graphs: bool, ) -> PyResult<()> { let config = Config::new( db_url, @@ -506,6 +529,7 @@ fn run_compression( min_saved_rows, transactions, level_sizes, + graphs, ); match config { Err(e) => Err(PyErr::new::(e)),