Merge pull request #15 from heftig/modernize

Modernize the code
This commit is contained in:
Erik Johnston
2020-02-20 13:02:49 +00:00
committed by GitHub
5 changed files with 700 additions and 644 deletions

1184
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -3,16 +3,16 @@ authors = ["Erik Johnston"]
description = "A tool to compress some state in a Synapse instance's database" description = "A tool to compress some state in a Synapse instance's database"
name = "synapse-compress-state" name = "synapse-compress-state"
version = "0.1.0" version = "0.1.0"
edition = "2018"
[dependencies] [dependencies]
clap = "2.32.0" clap = "2.33.0"
fallible-iterator = "0.1.5" indicatif = "0.14.0"
indicatif = "0.11.0" jemallocator = "0.3.2"
jemallocator = "0.3.0" postgres = "0.17.0"
postgres = "0.15.2" rand = "0.7.2"
rand = "0.7.0" rayon = "1.3.0"
rayon = "1.0.2" string_cache = "0.8.0"
string_cache = "0.7.3"
[dependencies.state-map] [dependencies.state-map]
git = "https://github.com/matrix-org/rust-matrix-state-map" git = "https://github.com/matrix-org/rust-matrix-state-map"

View File

@@ -30,11 +30,10 @@
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use state_map::StateMap; use state_map::StateMap;
use std::collections::BTreeMap;
use string_cache::DefaultAtom as Atom; use string_cache::DefaultAtom as Atom;
use std::collections::BTreeMap; use super::{collapse_state_maps, StateGroupEntry};
use {collapse_state_maps, StateGroupEntry};
/// Holds information about a particular level. /// Holds information about a particular level.
struct Level { struct Level {

View File

@@ -12,17 +12,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use fallible_iterator::FallibleIterator;
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use postgres::{Connection, TlsMode}; use postgres::{fallible_iterator::FallibleIterator, Client};
use rand::distributions::Alphanumeric; use rand::{distributions::Alphanumeric, thread_rng, Rng};
use rand::{thread_rng, Rng}; use std::{borrow::Cow, collections::BTreeMap, fmt, iter};
use std::borrow::Cow; use super::StateGroupEntry;
use std::collections::BTreeMap;
use std::fmt;
use StateGroupEntry;
/// Fetch the entries in state_groups_state (and their prev groups) for the /// Fetch the entries in state_groups_state (and their prev groups) for the
/// given `room_id` by connecting to the postgres database at `db_url`. /// given `room_id` by connecting to the postgres database at `db_url`.
@@ -31,9 +26,9 @@ pub fn get_data_from_db(
room_id: &str, room_id: &str,
max_state_group: Option<i64>, max_state_group: Option<i64>,
) -> BTreeMap<i64, StateGroupEntry> { ) -> BTreeMap<i64, StateGroupEntry> {
let conn = Connection::connect(db_url, TlsMode::None).unwrap(); let mut client = Client::connect(db_url, postgres::NoTls).unwrap();
let mut state_group_map = get_initial_data_from_db(&conn, room_id, max_state_group); let mut state_group_map = get_initial_data_from_db(&mut client, room_id, max_state_group);
println!("Got initial state from database. Checking for any missing state groups..."); println!("Got initial state from database. Checking for any missing state groups...");
@@ -68,7 +63,7 @@ pub fn get_data_from_db(
println!("Missing {} state groups", missing_sgs.len()); println!("Missing {} state groups", missing_sgs.len());
let map = get_missing_from_db(&conn, &missing_sgs); let map = get_missing_from_db(&mut client, &missing_sgs);
state_group_map.extend(map.into_iter()); state_group_map.extend(map.into_iter());
} }
@@ -78,33 +73,25 @@ pub fn get_data_from_db(
/// Fetch the entries in state_groups_state (and their prev groups) for the /// Fetch the entries in state_groups_state (and their prev groups) for the
/// given `room_id` by fetching all state with the given `room_id`. /// given `room_id` by fetching all state with the given `room_id`.
fn get_initial_data_from_db( fn get_initial_data_from_db(
conn: &Connection, client: &mut Client,
room_id: &str, room_id: &str,
max_state_group: Option<i64>, max_state_group: Option<i64>,
) -> BTreeMap<i64, StateGroupEntry> { ) -> BTreeMap<i64, StateGroupEntry> {
let sql = format!( let sql = r#"
r#"
SELECT m.id, prev_state_group, type, state_key, s.event_id SELECT m.id, prev_state_group, type, state_key, s.event_id
FROM state_groups AS m FROM state_groups AS m
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group) LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
LEFT JOIN state_group_edges AS e ON (m.id = e.state_group) LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
WHERE m.room_id = $1 {} WHERE m.room_id = $1
"#, "#;
if max_state_group.is_some() {
"AND m.id <= $2"
} else {
""
}
);
let stmt = conn.prepare(&sql).unwrap();
let trans = conn.transaction().unwrap();
let mut rows = if let Some(s) = max_state_group { let mut rows = if let Some(s) = max_state_group {
stmt.lazy_query(&trans, &[&room_id, &s], 1000) client.query_raw(
format!(r"{} AND m.id <= $2", sql).as_str(),
vec![&room_id as _, &s as _],
)
} else { } else {
stmt.lazy_query(&trans, &[&room_id], 1000) client.query_raw(sql, iter::once(&room_id as _))
} }
.unwrap(); .unwrap();
@@ -116,16 +103,12 @@ fn get_initial_data_from_db(
); );
pb.enable_steady_tick(100); pb.enable_steady_tick(100);
let mut num_rows = 0;
while let Some(row) = rows.next().unwrap() { while let Some(row) = rows.next().unwrap() {
let state_group = row.get(0); let entry = state_group_map.entry(row.get(0)).or_default();
let entry = state_group_map.entry(state_group).or_default();
entry.prev_state_group = row.get(1); entry.prev_state_group = row.get(1);
let etype: Option<String> = row.get(2);
if let Some(etype) = etype { if let Some(etype) = row.get::<_, Option<String>>(2) {
entry.state_map.insert( entry.state_map.insert(
&etype, &etype,
&row.get::<_, String>(3), &row.get::<_, String>(3),
@@ -134,34 +117,31 @@ fn get_initial_data_from_db(
} }
pb.inc(1); pb.inc(1);
num_rows += 1;
} }
pb.set_length(num_rows); pb.set_length(pb.position());
pb.finish(); pb.finish();
state_group_map state_group_map
} }
/// Get any missing state groups from the database /// Get any missing state groups from the database
fn get_missing_from_db(conn: &Connection, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> { fn get_missing_from_db(client: &mut Client, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
let stmt = conn let mut rows = client
.prepare( .query_raw(
r#" r#"
SELECT state_group, prev_state_group SELECT state_group, prev_state_group
FROM state_group_edges FROM state_group_edges
WHERE state_group = ANY($1) WHERE state_group = ANY($1)
"#, "#,
iter::once(&missing_sgs as _),
) )
.unwrap(); .unwrap();
let trans = conn.transaction().unwrap();
let mut rows = stmt.lazy_query(&trans, &[&missing_sgs], 100).unwrap();
// initialise the map with empty entries (the missing group may not // initialise the map with empty entries (the missing group may not
// have a prev_state_group either) // have a prev_state_group either)
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = let mut state_group_map: BTreeMap<i64, StateGroupEntry> = missing_sgs
missing_sgs.iter() .iter()
.map(|sg| (*sg, StateGroupEntry::default())) .map(|sg| (*sg, StateGroupEntry::default()))
.collect(); .collect();
@@ -175,10 +155,10 @@ fn get_missing_from_db(conn: &Connection, missing_sgs: &[i64]) -> BTreeMap<i64,
} }
/// Helper function that escapes the wrapped text when writing SQL /// Helper function that escapes the wrapped text when writing SQL
pub struct PGEscapse<'a>(pub &'a str); pub struct PGEscape<'a>(pub &'a str);
impl<'a> fmt::Display for PGEscapse<'a> { impl<'a> fmt::Display for PGEscape<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut delim = Cow::from("$$"); let mut delim = Cow::from("$$");
while self.0.contains(&delim as &str) { while self.0.contains(&delim as &str) {
let s: String = thread_rng().sample_iter(&Alphanumeric).take(10).collect(); let s: String = thread_rng().sample_iter(&Alphanumeric).take(10).collect();
@@ -192,12 +172,12 @@ impl<'a> fmt::Display for PGEscapse<'a> {
#[test] #[test]
fn test_pg_escape() { fn test_pg_escape() {
let s = format!("{}", PGEscapse("test")); let s = format!("{}", PGEscape("test"));
assert_eq!(s, "$$test$$"); assert_eq!(s, "$$test$$");
let dodgy_string = "test$$ing"; let dodgy_string = "test$$ing";
let s = format!("{}", PGEscapse(dodgy_string)); let s = format!("{}", PGEscape(dodgy_string));
// prefix and suffixes should match // prefix and suffixes should match
let start_pos = s.find(dodgy_string).expect("expected to find dodgy string"); let start_pos = s.find(dodgy_string).expect("expected to find dodgy string");

View File

@@ -16,17 +16,6 @@
//! Synapse instance's database. Specifically, it aims to reduce the number of //! Synapse instance's database. Specifically, it aims to reduce the number of
//! rows that a given room takes up in the `state_groups_state` table. //! rows that a given room takes up in the `state_groups_state` table.
#[macro_use]
extern crate clap;
extern crate fallible_iterator;
extern crate indicatif;
extern crate jemallocator;
extern crate postgres;
extern crate rand;
extern crate rayon;
extern crate state_map;
extern crate string_cache;
mod compressor; mod compressor;
mod database; mod database;
@@ -34,19 +23,17 @@ mod database;
static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
use compressor::Compressor; use compressor::Compressor;
use database::PGEscapse; use database::PGEscape;
use clap::{App, Arg}; use clap::{
crate_authors, crate_description, crate_name, crate_version, value_t_or_exit, App, Arg,
};
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*; use rayon::prelude::*;
use state_map::StateMap; use state_map::StateMap;
use std::{collections::BTreeMap, fs::File, io::Write, str::FromStr};
use string_cache::DefaultAtom as Atom; use string_cache::DefaultAtom as Atom;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Write;
use std::str::FromStr;
/// An entry for a state group. Consists of an (optional) previous group and the /// An entry for a state group. Consists of an (optional) previous group and the
/// delta from that previous group (or the full state if no previous group) /// delta from that previous group (or the full state if no previous group)
#[derive(Default, Debug, Clone, PartialEq, Eq)] #[derive(Default, Debug, Clone, PartialEq, Eq)]
@@ -94,7 +81,7 @@ impl FromStr for LevelSizes {
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sizes = Vec::new(); let mut sizes = Vec::new();
for size_str in s.split(",") { for size_str in s.split(',') {
let size: usize = size_str let size: usize = size_str
.parse() .parse()
.map_err(|_| "Not a comma separated list of numbers")?; .map_err(|_| "Not a comma separated list of numbers")?;
@@ -106,6 +93,7 @@ impl FromStr for LevelSizes {
} }
fn main() { fn main() {
#[allow(deprecated)]
let matches = App::new(crate_name!()) let matches = App::new(crate_name!())
.version(crate_version!()) .version(crate_version!())
.author(crate_authors!("\n")) .author(crate_authors!("\n"))
@@ -127,9 +115,17 @@ fn main() {
).arg( ).arg(
Arg::with_name("max_state_group") Arg::with_name("max_state_group")
.short("s") .short("s")
.value_name("MAX_STATE_GROUP")
.help("The maximum state group to process up to") .help("The maximum state group to process up to")
.takes_value(true) .takes_value(true)
.required(false), .required(false),
).arg(
Arg::with_name("min_saved_rows")
.short("m")
.value_name("COUNT")
.help("Suppress output if fewer than COUNT rows would be saved")
.takes_value(true)
.required(false),
).arg( ).arg(
Arg::with_name("output_file") Arg::with_name("output_file")
.short("o") .short("o")
@@ -176,6 +172,10 @@ fn main() {
.value_of("max_state_group") .value_of("max_state_group")
.map(|s| s.parse().expect("max_state_group must be an integer")); .map(|s| s.parse().expect("max_state_group must be an integer"));
let min_saved_rows = matches
.value_of("min_saved_rows")
.map(|v| v.parse().expect("COUNT must be an integer"));
let transactions = matches.is_present("transactions"); let transactions = matches.is_present("transactions");
let level_sizes = value_t_or_exit!(matches, "level_sizes", LevelSizes); let level_sizes = value_t_or_exit!(matches, "level_sizes", LevelSizes);
@@ -228,6 +228,17 @@ fn main() {
compressor.stats.state_groups_changed compressor.stats.state_groups_changed
); );
if let Some(min) = min_saved_rows {
let saving = (original_summed_size - compressed_summed_size) as i32;
if saving < min {
println!(
"Only {} rows would be saved by this compression. Skipping output.",
saving
);
return;
}
}
// If we are given an output file, we output the changes as SQL. If the // If we are given an output file, we output the changes as SQL. If the
// `transactions` argument is set we wrap each change to a state group in a // `transactions` argument is set we wrap each change to a state group in a
// transaction. // transaction.
@@ -267,7 +278,7 @@ fn main() {
sg sg
) )
.unwrap(); .unwrap();
if new_entry.state_map.len() > 0 { if !new_entry.state_map.is_empty() {
writeln!(output, "INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES").unwrap(); writeln!(output, "INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES").unwrap();
let mut first = true; let mut first = true;
for ((t, s), e) in new_entry.state_map.iter() { for ((t, s), e) in new_entry.state_map.iter() {
@@ -281,10 +292,10 @@ fn main() {
output, output,
"({}, {}, {}, {}, {})", "({}, {}, {}, {}, {})",
sg, sg,
PGEscapse(room_id), PGEscape(room_id),
PGEscapse(t), PGEscape(t),
PGEscapse(s), PGEscape(s),
PGEscapse(e) PGEscape(e)
) )
.unwrap(); .unwrap();
} }