Improve algorithm and documentation

This commit is contained in:
Erik Johnston
2018-09-18 15:40:58 +01:00
parent 3ba940c743
commit 096467853d
6 changed files with 821 additions and 290 deletions

278
Cargo.lock generated
View File

@@ -1,3 +1,11 @@
[[package]]
name = "aho-corasick"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
@@ -135,6 +143,41 @@ dependencies = [
"vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "clicolors-control"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "console"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"clicolors-control 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"termios 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "constant_time_eq"
version = "0.1.3"
@@ -259,6 +302,17 @@ dependencies = [
"digest 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "indicatif"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "iovec"
version = "0.1.2"
@@ -273,6 +327,20 @@ name = "itoa"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lazy_static"
version = "1.1.0"
@@ -295,6 +363,15 @@ dependencies = [
"pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lock_api"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "log"
version = "0.4.5"
@@ -321,6 +398,14 @@ dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memchr"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "memoffset"
version = "0.2.1"
@@ -352,6 +437,35 @@ dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "owning_ref"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "parking_lot"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lock_api 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "parking_lot_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf"
version = "0.7.23"
@@ -451,6 +565,23 @@ dependencies = [
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rayon"
version = "1.0.2"
@@ -485,6 +616,46 @@ dependencies = [
"redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rust-matrix-lib"
version = "0.1.0"
@@ -502,22 +673,19 @@ dependencies = [
"sodiumoxide 0.0.16 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rust-synapse-compress-state"
version = "0.1.0"
dependencies = [
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fallible-iterator 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"postgres 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rust-matrix-lib 0.1.0 (git+https://github.com/erikjohnston/rust-matrix-lib)",
]
[[package]]
name = "rustc-demangle"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ryu"
version = "0.2.6"
@@ -533,6 +701,19 @@ name = "scopeguard"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde"
version = "1.0.78"
@@ -608,6 +789,11 @@ dependencies = [
"serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "stable_deref_trait"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "stringprep"
version = "0.1.2"
@@ -642,6 +828,18 @@ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "synapse-compress-state"
version = "0.1.0"
dependencies = [
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fallible-iterator 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"postgres 0.15.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rust-matrix-lib 0.1.0 (git+https://github.com/erikjohnston/rust-matrix-lib)",
]
[[package]]
name = "synstructure"
version = "0.9.0"
@@ -663,6 +861,14 @@ dependencies = [
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "termios"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "textwrap"
version = "0.10.0"
@@ -671,6 +877,14 @@ dependencies = [
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.40"
@@ -686,6 +900,11 @@ name = "typenum"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ucd-util"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-bidi"
version = "0.3.4"
@@ -717,6 +936,11 @@ dependencies = [
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "utf8-ranges"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "vec_map"
version = "0.8.1"
@@ -746,6 +970,11 @@ dependencies = [
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
@@ -757,6 +986,7 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "68f56c7353e5a9547cbd76ed90f7bb5ffc3ba09d4ea9bd1d8c06c8b1142eeb5a"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee"
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
@@ -774,6 +1004,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3"
"checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878"
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
"checksum clicolors-control 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1f84dec9bc083ce2503908cd305af98bd363da6f54bf8d4bf0ac14ee749ad5d1"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum console 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ecd48adf136733979b49e15bc3b4c43cc0d3c85ece7bd08e6daa414c6fcb13e6"
"checksum constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8ff012e225ce166d4422e0e78419d901719760f62ae2b7969ca6b564d1b54a9e"
"checksum crossbeam-deque 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f739f8c5363aca78cfb059edf753d8f0d36908c348f3d8d1503f03d8b75d9cf3"
"checksum crossbeam-epoch 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "927121f5407de9956180ff5e936fe3cf4324279280001cd56b669d28ee7e9150"
@@ -790,20 +1023,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ef25c5683767570c2bbd7deba372926a55eaae9982d7726ee2a1050239d45b9d"
"checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa"
"checksum hmac 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44f3bdb08579d99d7dc761c0e266f13b5f2ab8c8c703b9fc9ef333cd8f48f55e"
"checksum indicatif 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a29b2fa6f00010c268bface64c18bb0310aaa70d46a195d5382d288c477fb016"
"checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08"
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
"checksum lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca488b89a5657b0a2ecd45b95609b3e848cf1755da332a0da46e2b2b1cb371a7"
"checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d"
"checksum libsodium-sys 0.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "fcbd1beeed8d44caa8a669ebaa697c313976e242c03cc9fb23d88bf1656f5542"
"checksum lock_api 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "949826a5ccf18c1b3a7c3d57692778d21768b79e46eb9dd07bfc4c2160036c54"
"checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
"checksum md5 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "79c56d6a0b07f9e19282511c83fc5b086364cbae4ba8c7d5f190c3d9b0425a48"
"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
"checksum memchr 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a3b4142ab8738a78c51896f704f83c11df047ff1bda9a92a661aa6361552d93d"
"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3"
"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2"
"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
"checksum num-traits 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "630de1ef5cc79d0cdd78b7e33b81f083cbfe90de0f4b2b2f07f905867c70e9fe"
"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30"
"checksum owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cdf84f41639e037b484f93433aa3897863b561ed65c6e59c7073d7c561710f37"
"checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5"
"checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c"
"checksum phf 0.7.23 (registry+https://github.com/rust-lang/crates.io-index)" = "cec29da322b242f4c3098852c77a0ca261c9c01b806cae85a5572a1eb94db9a6"
"checksum phf_shared 0.7.23 (registry+https://github.com/rust-lang/crates.io-index)" = "b539898d22d4273ded07f64a05737649dc69095d92cb87c7097ec68e3f150b93"
"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c"
@@ -814,15 +1055,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "dd636425967c33af890042c483632d33fa7a18f19ad1d7ea72e8998c6ef8dea5"
"checksum rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)" = "15a732abf9d20f0ad8eeb6f909bf6868722d9a06e1e50802b6a70351f40b4eb1"
"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
"checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c"
"checksum rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edecf0f94da5551fc9b492093e30b041a891657db7940ee221f9d2f66e82eef2"
"checksum rayon 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "df7a791f788cb4c516f0e091301a29c2b71ef680db5e644a7d68835c8ae6dbfa"
"checksum rayon-core 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b055d1e92aba6877574d8fe604a63c8b5df60f60e5982bf7ccbb1338ea527356"
"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
"checksum regex 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "2069749032ea3ec200ca51e4a31df41759190a88edca0d2d86ee8bedf7073341"
"checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d"
"checksum rust-matrix-lib 0.1.0 (git+https://github.com/erikjohnston/rust-matrix-lib)" = "<none>"
"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7153dd96dade874ab973e098cb62fcdbb89a03682e46b144fd09550998d4a4a7"
"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
"checksum serde 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)" = "92ec94e2754699adddbbc4f555791bd3acc2a2f5574cba16c93a4a9cf4a04415"
"checksum serde_derive 1.0.78 (registry+https://github.com/rust-lang/crates.io-index)" = "0fb622d85245add5327d4f08b2d24fd51fa5d35fe1bba19ee79a1f211e9ac0ff"
"checksum serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "59790990c5115d16027f00913e2e66de23a51f70422e549d2ad68c8c5f268f1c"
@@ -832,24 +1082,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum smallvec 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "153ffa32fd170e9944f7e0838edf824a754ec4c1fc64746fcc9fe1f8fa602e5d"
"checksum socket2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c4d11a52082057d87cb5caa31ad812f4504b97ab44732cd8359df2e9ff9f48e7"
"checksum sodiumoxide 0.0.16 (registry+https://github.com/rust-lang/crates.io-index)" = "eb5cb2f14f9a51352ad65e59257a0a9459d5a36a3615f3d53a974c82fdaaa00a"
"checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8"
"checksum stringprep 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)" = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741"
"checksum syn 0.15.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e5c1514eb7bb4216fc722b3cd08783d326d7de0d62f6d5e48a774f610bc97cb6"
"checksum synstructure 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "85bb9b7550d063ea184027c9b8c20ac167cd36d3e06b3a40bceb9d746dc1a7b7"
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
"checksum termios 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "70226acdf12d182df757d9fb07c0257a1558ec48c8059f607d6b38145ce4e2fa"
"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"
"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum utf8-ranges 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd70f467df6810094968e2fce0ee1bd0e87157aceb026a8c083bcf5e25b9efe4"
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
"checksum version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7716c242968ee87e5542f8021178248f267f295a5c4803beae8b8b7fd9bc6051"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,11 +1,13 @@
[package]
authors = ["Erik Johnston"]
name = "rust-synapse-compress-state"
name = "synapse-compress-state"
version = "0.1.0"
description = "A tool to compress some state in a Synapse instance's database"
[dependencies]
clap = "2.32.0"
fallible-iterator = "0.1.5"
indicatif = "0.9.0"
postgres = "0.15.2"
rayon = "1.0.2"

View File

@@ -2,22 +2,43 @@
An experimental tool that reads in the rows from `state_groups_state` and
`state_group_edges` tables for a particular room and calculates the changes that
could be made that (hopefully) will signifcantly reduce the number of rows.
could be made that (hopefully) will significantly reduce the number of rows.
This tool currently *does not* write to the database in any way, so should be
safe to run.
safe to run. If the `-o` option is specified then SQL will be written to the
given file that would change the tables to match the calculated state. (Note
that if `-t` is given then each change to a particular state group is wrapped
in a transaction)
## Algorithm
The algorithm works by attempting to create a tree of deltas, produced by
appending state groups to different "levels". Each level has a maximum size, where
each state group is appended to the lowest level that is not full.
This produces a graph that looks approximately like the following, in the case
of having two levels with the bottom level (L1) having a maximum size of 3:
```
L2 <-------------------- L2 <---------- ...
^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
```
The sizes and number of levels used can be controlled via `-l`.
## Example
```
$ cargo run --release -- -p "postgresql://localhost/synapse" -r '!some_room:example.com'
Compiling rust-synapse-compress-state v0.1.0 (file:///home/erikj/git/rust-synapse-compress-state)
Finished release [optimized] target(s) in 2.39s
Running `target/release/rust-synapse-compress-state -p 'postgresql://localhost/synapse' -r '!some_room:example.com'`
Missing 11 state groups
Number of entries: 25694
Number of rows: 356650
Number of rows compressed: 41068
$ synapse-compress-state -p "postgresql://localhost/synapse" -r '!some_room:example.com'
Fetching state from DB for room '!some_room:example.com'...
Got initial state from database. Checking for any missing state groups...
Number of state groups: 73904
Number of rows in current table: 2240043
Number of rows after compression: 165754 (7.40%)
Compression Statistics:
Number of forced resets due to lacking prev: 34
Number of compressed rows caused by the above: 17092
Number of state groups changed: 2748
New state map matches old one
```

255
src/compressor.rs Normal file
View File

@@ -0,0 +1,255 @@
//! This is the actual compression algorithm.
//!
//! The algorithm attempts to make a tree of deltas for the state group maps.
//! This is done by having multiple "levels", where each level has a maximum
//! size. The state groups are iterated over, with deltas being calculated
//! against the smallest level that isn't yet full. When a state group is
//! inserted into a level, or lower levels are reset to have their current
//! "head" at the new state group.
//!
//! This produces graphs that look roughly like, for two levels:
//!
//! ```
//! L2 <-------------------- L2 <---------- ...
//! ^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
//! ```
use rust_matrix_lib::state_map::StateMap;
use std::collections::BTreeMap;
use {collapse_state_maps, StateGroupEntry};
/// Holds information about a particular level.
struct Level {
/// The maximum size this level is allowed to be
max_length: usize,
/// The (approximate) current chain length of this level. This is equivalent
/// to recursively following `current`
current_chain_length: usize,
/// The head of this level
current: Option<i64>,
}
impl Level {
/// Creates a new Level with the given maximum length
pub fn new(max_length: usize) -> Level {
Level {
max_length,
current_chain_length: 0,
current: None,
}
}
/// Update the current head of this level. If delta is true then it means
/// that given state group will (probably) reference the previous head.
///
/// Panics if `delta` is true and the level is already full.
pub fn update(&mut self, current: i64, delta: bool) {
self.current = Some(current);
if delta {
// If we're referencing the previous head then increment our chain
// length estimate
if !self.has_space() {
panic!("Tried to add to a already full level");
}
self.current_chain_length += 1;
} else {
// Otherwise, we've started a new chain with a single entry.
self.current_chain_length = 1;
}
}
/// Get the current head of the level
pub fn get_current(&self) -> Option<i64> {
self.current
}
/// Whether there is space in the current chain at this level. If not then a
/// new chain should be started.
pub fn has_space(&self) -> bool {
self.current_chain_length < self.max_length
}
}
/// Keeps track of some statistics of a compression run.
#[derive(Default)]
pub struct Stats {
/// How many state groups we couldn't find a delta for, despite trying.
pub resets_no_suitable_prev: usize,
/// The sum of the rows of the state groups counted by
/// `resets_no_suitable_prev`.
pub resets_no_suitable_prev_size: usize,
/// How many state groups we have changed.
pub state_groups_changed: usize,
}
/// Attempts to compress a set of state deltas using the given level sizes.
pub struct Compressor<'a> {
original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
pub new_state_group_map: BTreeMap<i64, StateGroupEntry>,
levels: Vec<Level>,
pub stats: Stats,
}
impl<'a> Compressor<'a> {
/// Creates a compressor and runs the compression algorithm.
pub fn compress(
original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
level_sizes: &[usize],
) -> Compressor<'a> {
let mut compressor = Compressor {
original_state_map,
new_state_group_map: BTreeMap::new(),
levels: level_sizes.iter().map(|size| Level::new(*size)).collect(),
stats: Stats::default(),
};
compressor.create_new_tree();
compressor
}
/// Actually runs the compression algorithm
fn create_new_tree(&mut self) {
if !self.new_state_group_map.is_empty() {
panic!("Can only call `create_new_tree` once");
}
for (&state_group, entry) in self.original_state_map {
let mut prev_state_group = None;
for level in &mut self.levels {
if level.has_space() {
prev_state_group = level.get_current();
level.update(state_group, true);
break;
} else {
level.update(state_group, false);
}
}
let (delta, prev_state_group) = if entry.prev_state_group == prev_state_group {
(entry.state_map.clone(), prev_state_group)
} else {
self.stats.state_groups_changed += 1;
self.get_delta(prev_state_group, state_group)
};
self.new_state_group_map.insert(
state_group,
StateGroupEntry {
prev_state_group,
state_map: delta,
},
);
}
}
/// Attempts to calculate the delta between two state groups.
///
/// This is not always possible if the given candidate previous state group
/// have state keys that are not in the new state group. In this case the
/// function will try and iterate back up the current tree to find a state
/// group that can be used as a base for a delta.
///
/// Returns the state map and the actual base state group (if any) used.
fn get_delta(&mut self, prev_sg: Option<i64>, sg: i64) -> (StateMap<String>, Option<i64>) {
let state_map = collapse_state_maps(&self.original_state_map, sg);
let mut prev_sg = if let Some(prev_sg) = prev_sg {
prev_sg
} else {
return (state_map, None);
};
// This is a loop to go through to find the first prev_sg which can be
// a valid base for the state group.
let mut prev_state_map;
'outer: loop {
prev_state_map = collapse_state_maps(&self.original_state_map, prev_sg);
for (t, s) in prev_state_map.keys() {
if !state_map.contains_key(t, s) {
// This is not a valid base as it contains key the new state
// group doesn't have. Attempt to walk up the tree to find a
// better base.
if let Some(psg) = self.new_state_group_map[&prev_sg].prev_state_group {
prev_sg = psg;
continue 'outer;
}
// Couldn't find a new base, so we give up and just persist
// a full state group here.
self.stats.resets_no_suitable_prev += 1;
self.stats.resets_no_suitable_prev_size += state_map.len();
return (state_map, None);
}
}
break;
}
// We've found a valid base, now we just need to calculate the delta.
let mut delta_map = StateMap::new();
for ((t, s), e) in state_map.iter() {
if prev_state_map.get(t, s) != Some(e) {
delta_map.insert(t, s, e.clone());
}
}
(delta_map, Some(prev_sg))
}
}
#[test]
fn test_new_map() {
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
let mut prev = None;
for i in 0i64..=13i64 {
initial.insert(
i,
StateGroupEntry {
prev_state_group: prev,
state_map: StateMap::new(),
},
);
prev = Some(i)
}
let compressor = Compressor::compress(&initial, &[3, 3]);
let new_state = compressor.new_state_group_map;
let expected_edges: BTreeMap<i64, i64> = vec![
(1, 0),
(2, 1),
(4, 3),
(5, 4),
(6, 3),
(7, 6),
(8, 7),
(9, 6),
(10, 9),
(11, 10),
(13, 12),
].into_iter()
.collect();
for sg in 0i64..=13i64 {
assert_eq!(
expected_edges.get(&sg).cloned(),
new_state[&sg].prev_state_group,
"state group {} did not match expected",
sg,
);
}
}

125
src/database.rs Normal file
View File

@@ -0,0 +1,125 @@
use fallible_iterator::FallibleIterator;
use indicatif::{ProgressBar, ProgressStyle};
use postgres::{Connection, TlsMode};
use std::collections::BTreeMap;
use StateGroupEntry;
/// Fetch the entries in state_groups_state (and their prev groups) for the
/// given `room_id` by connecting to the postgres database at `db_url`.
pub fn get_data_from_db(db_url: &str, room_id: &str) -> BTreeMap<i64, StateGroupEntry> {
let conn = Connection::connect(db_url, TlsMode::None).unwrap();
let mut state_group_map = get_initial_data_from_db(&conn, room_id);
println!("Got initial state from database. Checking for any missing state groups...");
// Due to reasons some of the state groups appear in the edges table, but
// not in the state_groups_state table. This means they don't get included
// in our DB queries, so we have to fetch any missing groups explicitly.
// Since the returned groups may themselves reference groups we don't have,
// we need to do this recursively until we don't find any more missing.
loop {
let missing_sgs: Vec<_> = state_group_map
.iter()
.filter_map(|(_sg, entry)| {
if let Some(prev_sg) = entry.prev_state_group {
if state_group_map.contains_key(&prev_sg) {
None
} else {
Some(prev_sg)
}
} else {
None
}
}).collect();
if missing_sgs.is_empty() {
break;
}
println!("Missing {} state groups", missing_sgs.len());
let map = get_missing_from_db(&conn, &missing_sgs);
state_group_map.extend(map.into_iter());
}
state_group_map
}
/// Fetch the entries in state_groups_state (and their prev groups) for the
/// given `room_id` by fetching all state with the given `room_id`.
fn get_initial_data_from_db(conn: &Connection, room_id: &str) -> BTreeMap<i64, StateGroupEntry> {
let stmt = conn
.prepare(
r#"
SELECT m.id, prev_state_group, type, state_key, s.event_id
FROM state_groups AS m
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
WHERE m.room_id = $1
"#,
).unwrap();
let trans = conn.transaction().unwrap();
let mut rows = stmt.lazy_query(&trans, &[&room_id], 1000).unwrap();
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
let pb = ProgressBar::new_spinner();
pb.set_style(ProgressStyle::default_spinner().template("{spinner} [{elapsed}] {pos}"));
pb.enable_steady_tick(100);
let mut num_rows = 0;
while let Some(row) = rows.next().unwrap() {
let state_group = row.get(0);
let entry = state_group_map.entry(state_group).or_default();
entry.prev_state_group = row.get(1);
let etype: Option<String> = row.get(2);
if let Some(etype) = etype {
entry
.state_map
.insert(&etype, &row.get::<_, String>(3), row.get(4));
}
pb.inc(1);
num_rows += 1;
}
pb.set_length(num_rows);
pb.finish();
state_group_map
}
/// Get any missing state groups from the database
fn get_missing_from_db(conn: &Connection, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
let stmt = conn
.prepare(
r#"
SELECT state_group, prev_state_group
FROM state_group_edges
WHERE state_group = ANY($1)
"#,
).unwrap();
let trans = conn.transaction().unwrap();
let mut rows = stmt.lazy_query(&trans, &[&missing_sgs], 100).unwrap();
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
while let Some(row) = rows.next().unwrap() {
let state_group = row.get(0);
let entry = state_group_map.entry(state_group).or_default();
entry.prev_state_group = row.get(1);
}
state_group_map
}

View File

@@ -1,149 +1,42 @@
//! This is a tool that attempts to further compress state maps within a
//! Synapse instance's database. Specifically, it aims to reduce the number of
//! rows that a given room takes up in the `state_groups_state` table.
#[macro_use]
extern crate clap;
extern crate fallible_iterator;
extern crate indicatif;
extern crate postgres;
extern crate rayon;
extern crate rust_matrix_lib;
use clap::{App, Arg, ArgGroup};
use fallible_iterator::FallibleIterator;
use postgres::{Connection, TlsMode};
mod compressor;
mod database;
use compressor::Compressor;
use clap::{App, Arg};
use rayon::prelude::*;
use rust_matrix_lib::state_map::StateMap;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::io::Write;
use std::str::FromStr;
/// An entry for a state group. Consists of an (optional) previous group and the
/// delta from that previous group (or the full state if no previous group)
#[derive(Default, Debug, Clone)]
struct StateGroupEntry {
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct StateGroupEntry {
prev_state_group: Option<i64>,
state_map: StateMap<String>,
}
/// Fetch the entries in state_groups_state (and their prev groups) for the
/// given `room_id` by connecting to the postgres database at `db_url`.
fn get_data_from_db(db_url: &str, room_id: &str) -> BTreeMap<i64, StateGroupEntry> {
let conn = Connection::connect(db_url, TlsMode::None).unwrap();
let stmt = conn
.prepare(
r#"
SELECT state_group, prev_state_group, type, state_key, event_id
FROM state_groups_state
LEFT JOIN state_group_edges USING (state_group)
WHERE room_id = $1
"#,
).unwrap();
let trans = conn.transaction().unwrap();
let mut rows = stmt.lazy_query(&trans, &[&room_id], 100).unwrap();
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
let mut started = false;
while let Some(row) = rows.next().unwrap() {
if !started {
started = true;
println!("Started streaming from DB!");
}
let state_group = row.get(0);
let entry = state_group_map.entry(state_group).or_default();
entry.prev_state_group = row.get(1);
entry.state_map.insert(
&row.get::<_, String>(2),
&row.get::<_, String>(3),
row.get(4),
);
}
state_group_map
}
/// Get any missing state groups from the database
fn get_missing_from_db(db_url: &str, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
let conn = Connection::connect(db_url, TlsMode::None).unwrap();
let stmt = conn
.prepare(
r#"
SELECT state_group, prev_state_group
FROM state_group_edges
WHERE state_group = ANY($1)
"#,
).unwrap();
let trans = conn.transaction().unwrap();
let mut rows = stmt.lazy_query(&trans, &[&missing_sgs], 100).unwrap();
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
while let Some(row) = rows.next().unwrap() {
let state_group = row.get(0);
let entry = state_group_map.entry(state_group).or_default();
entry.prev_state_group = row.get(1);
}
state_group_map
}
/// Get state group entries from the file at `path`.
///
/// This should be formatted as `|` separated values, with the empty string
/// representing null. (Yes, this is a bit dodgy by means its trivial to get
/// from postgres. We should use a better format).
///
/// The following invocation produces the correct output:
///
/// ```bash
/// psql -At synapse > test.data <<EOF
/// SELECT state_group, prev_state_group, type, state_key, event_id
/// FROM state_groups_state
/// LEFT JOIN state_group_edges USING (state_group)
/// WHERE room_id = '!some_room:example.com';
/// EOF
/// ```
fn get_data_from_file(path: &str) -> BTreeMap<i64, StateGroupEntry> {
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
let f = File::open(path).unwrap();
let f = BufReader::new(f);
for line in f.lines() {
let line = line.unwrap();
let mut iter = line.split('|');
let state_group = iter.next().unwrap().parse().unwrap();
let entry = state_group_map.entry(state_group).or_default();
let prev_state_group_str = iter.next().unwrap();
entry.prev_state_group = if prev_state_group_str.is_empty() {
None
} else {
Some(prev_state_group_str.parse().unwrap())
};
entry.state_map.insert(
iter.next().unwrap(),
iter.next().unwrap(),
iter.next().unwrap().to_string(),
);
}
state_group_map
}
/// Gets the full state for a given group from the map (of deltas)
fn collapse_state_maps(map: &BTreeMap<i64, StateGroupEntry>, state_group: i64) -> StateMap<String> {
pub fn collapse_state_maps(
map: &BTreeMap<i64, StateGroupEntry>,
state_group: i64,
) -> StateMap<String> {
let mut entry = &map[&state_group];
let mut state_map = StateMap::new();
@@ -169,6 +62,26 @@ fn collapse_state_maps(map: &BTreeMap<i64, StateGroupEntry>, state_group: i64) -
state_map
}
/// Helper struct for parsing the `level_sizes` argument.
struct LevelSizes(Vec<usize>);
impl FromStr for LevelSizes {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut sizes = Vec::new();
for size_str in s.split(",") {
let size: usize = size_str
.parse()
.map_err(|_| "Not a comma separated list of numbers")?;
sizes.push(size);
}
Ok(LevelSizes(sizes))
}
}
fn main() {
let matches = App::new(crate_name!())
.version(crate_version!())
@@ -186,23 +99,26 @@ fn main() {
.short("r")
.value_name("ROOM_ID")
.help("The room to process")
.takes_value(true),
.takes_value(true)
.required(true),
).arg(
Arg::with_name("input")
.short("f")
.value_name("FILE")
.help("File containing dumped state groups")
.takes_value(true),
).arg(
Arg::with_name("output_diff")
Arg::with_name("output_file")
.short("o")
.value_name("FILE")
.help("File to output the changes to")
.help("File to output the changes to in SQL")
.takes_value(true),
).arg(
Arg::with_name("individual_transactions")
.short("t")
.help("Whether to wrap each state group change in a transaction, when writing to file")
.requires("output_file"),
).arg(
Arg::with_name("level_sizes")
.short("l")
.value_name("LEVELS")
.help("Sizes of each new level in the compression algorithm, as a comma separate list")
.default_value("100,50,25")
.takes_value(true),
).group(
ArgGroup::with_name("target")
.args(&["input", "room_id"])
.required(true),
).get_matches();
let db_url = matches
@@ -210,156 +126,112 @@ fn main() {
.expect("db url should be required");
let mut output_file = matches
.value_of("output_diff")
.value_of("output_file")
.map(|path| File::create(path).unwrap());
// First we need to get the current state groups
let mut state_group_map = if let Some(path) = matches.value_of("input") {
get_data_from_file(path)
} else {
let room_id = matches
.value_of("room_id")
.expect("room_id should be required since no file");
get_data_from_db(db_url, room_id)
};
// For reasons that escape me some of the state groups appear in the edges
// table, but not in the state_groups_state table. This means they don't
// get included in our DB queries, so we have to fetch any missing groups
// explicitly. Since the returned groups may themselves reference groups
// we don't have we need to do this recursively until we don't find any
// more
loop {
let missing_sgs: Vec<_> = state_group_map
.iter()
.filter_map(|(_sg, entry)| {
if let Some(prev_sg) = entry.prev_state_group {
if state_group_map.contains_key(&prev_sg) {
None
} else {
Some(prev_sg)
}
} else {
None
}
}).collect();
let individual_transactions = matches.is_present("individual_transactions");
if missing_sgs.is_empty() {
break;
}
let level_sizes = value_t_or_exit!(matches, "level_sizes", LevelSizes);
println!("Missing {} state groups", missing_sgs.len());
// First we need to get the current state groups
println!("Fetching state from DB for room '{}'...", room_id);
let state_group_map = database::get_data_from_db(db_url, room_id);
let map = get_missing_from_db(db_url, &missing_sgs);
state_group_map.extend(map.into_iter());
}
println!("Number of state groups: {}", state_group_map.len());
println!("Number of entries: {}", state_group_map.len());
let summed_size = state_group_map
let original_summed_size = state_group_map
.iter()
.fold(0, |acc, (_, v)| acc + v.state_map.len());
println!("Number of rows: {}", summed_size);
println!("Number of rows in current table: {}", original_summed_size);
let mut new_state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
// Now we actually call the compression algorithm.
// Now we loop through and create our new state maps from the existing
// ones.
// The existing table is made up of chains of groups at most 100 nodes
// long. At the start of each chain there is a copy of the full state at
// that point. This algorithm adds edges between such "checkpoint" nodes,
// so that there are chains between them. We cap such checkpoint chains to
// a length of 50.
//
// The idea here is that between checkpoint nodes only small subsets of
// state will have actually changed.
//
// (This approach can be generalised by adding more and more layers)
let compressor = Compressor::compress(&state_group_map, &level_sizes.0);
let mut last_checkpoint_opt = None;
let mut checkpoint_length = 0;
let new_state_group_map = compressor.new_state_group_map;
for (state_group, entry) in &state_group_map {
if entry.prev_state_group.is_none() {
// We're at a checkpoint node. If this is our first checkpoint
// node then there isn't much to do other than mark it.
let mut added_to_chain = false;
if let Some(ref last_checkpoint) = last_checkpoint_opt {
let checkpoint_entry = &state_group_map[last_checkpoint];
// Done! Now to print a bunch of stats.
// We need to ensure that that aren't any entries in the
// previous checkpoint node that aren't in the state at this
// point, since the table schema doesn't support the idea of
// "deleting" state in the deltas.
//
// Note: The entry.state_map will be the full state here, rather
// than just the delta since prev_state_group is None.
if checkpoint_entry
.state_map
.keys()
.all(|(t, s)| entry.state_map.contains_key(t, s))
{
// We create the new map by filtering out entries that match
// those in the previous checkpoint state.
let new_map: StateMap<String> = entry
.state_map
let compressed_summed_size = new_state_group_map
.iter()
.filter(|((t, s), e)| checkpoint_entry.state_map.get(t, s) != Some(e))
.map(|((t, s), e)| ((t, s), e.clone()))
.collect();
.fold(0, |acc, (_, v)| acc + v.state_map.len());
// If we have an output file write the changes we've made
if let Some(ref mut fs) = output_file {
writeln!(fs, "edge_addition {} {}", state_group, *last_checkpoint).unwrap();
for ((t, s), e) in new_map.iter() {
writeln!(fs, "state_replace {} {} {} {}", state_group, t, s, e)
.unwrap();
}
}
let ratio = (compressed_summed_size as f64) / (original_summed_size as f64);
new_state_group_map.insert(
*state_group,
StateGroupEntry {
prev_state_group: Some(*last_checkpoint),
state_map: new_map,
},
println!(
"Number of rows after compression: {} ({:.2}%)",
compressed_summed_size,
ratio * 100.
);
added_to_chain = true;
println!("Compression Statistics:");
println!(
" Number of forced resets due to lacking prev: {}",
compressor.stats.resets_no_suitable_prev
);
println!(
" Number of compressed rows caused by the above: {}",
compressor.stats.resets_no_suitable_prev_size
);
println!(
" Number of state groups changed: {}",
compressor.stats.state_groups_changed
);
// If we are given an output file, we output the changes as SQL. If the
// `individual_transactions` argument is set we wrap each change to a state
// group in a transaction.
if let Some(output) = &mut output_file {
for (sg, old_entry) in &state_group_map {
let new_entry = &new_state_group_map[sg];
if old_entry != new_entry {
if individual_transactions {
writeln!(output, "BEGIN;");
}
writeln!(
output,
"DELETE FROM state_group_edges WHERE state_group = {};",
sg
);
if let Some(prev_sg) = new_entry.prev_state_group {
writeln!(output, "INSERT INTO state_group_edges (state_group, prev_state_group) VALUES ({}, {});", sg, prev_sg);
}
writeln!(
output,
"DELETE FROM state_groups_state WHERE state_group = {};",
sg
);
if new_entry.state_map.len() > 0 {
writeln!(output, "INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES");
let mut first = true;
for ((t, s), e) in new_entry.state_map.iter() {
if first {
write!(output, " ");
first = false;
} else {
new_state_group_map.insert(*state_group, entry.clone());
write!(output, " ,");
}
} else {
new_state_group_map.insert(*state_group, entry.clone());
writeln!(output, "({}, '{}', '{}', '{}', '{}')", sg, room_id, t, s, e);
}
writeln!(output, ";");
}
last_checkpoint_opt = Some(*state_group);
// If we've added to the checkpoint chain we increment the length,
// otherwise it gets reset to zero.
if added_to_chain {
checkpoint_length += 1;
} else {
checkpoint_length = 0;
if individual_transactions {
writeln!(output, "COMMIT;");
}
writeln!(output);
}
// If the chain is longer than 50 then lets reset to create a new
// chain.
if checkpoint_length >= 50 {
checkpoint_length = 0;
last_checkpoint_opt = None;
}
} else {
new_state_group_map.insert(*state_group, entry.clone());
}
}
let summed_size = new_state_group_map
.iter()
.fold(0, |acc, (_, v)| acc + v.state_map.len());
println!("Number of rows compressed: {}", summed_size);
// Now let's iterate through and assert that the state for each group
// matches between the two versions.