Add documentation for autocompressor (#70)
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1143,10 +1143,12 @@ dependencies = [
|
||||
"indicatif",
|
||||
"jemallocator",
|
||||
"log",
|
||||
"log-panics",
|
||||
"openssl",
|
||||
"postgres",
|
||||
"postgres-openssl",
|
||||
"pyo3",
|
||||
"pyo3-log",
|
||||
"rand",
|
||||
"rayon",
|
||||
"state-map",
|
||||
|
||||
@@ -20,6 +20,8 @@ rayon = "1.3.0"
|
||||
string_cache = "0.8.0"
|
||||
env_logger = "0.9.0"
|
||||
log = "0.4.14"
|
||||
pyo3-log = "0.4.0"
|
||||
log-panics = "2.0.0"
|
||||
|
||||
[dependencies.state-map]
|
||||
git = "https://github.com/matrix-org/rust-matrix-state-map"
|
||||
|
||||
293
README.md
293
README.md
@@ -1,47 +1,111 @@
|
||||
# Compress Synapse State Tables
|
||||
|
||||
An experimental tool that reads in the rows from `state_groups_state` and
|
||||
`state_group_edges` tables for a particular room and calculates the changes that
|
||||
could be made that (hopefully) will significantly reduce the number of rows.
|
||||
This workspace contains experimental tools that attempt to reduce the number of
|
||||
rows in the `state_groups_state` table inside of a Synapse Postgresql database.
|
||||
|
||||
This tool currently *does not* write to the database in any way, so should be
|
||||
# Automated tool: auto_compressor
|
||||
|
||||
## Introduction:
|
||||
|
||||
This tool is significantly more simple to use than the manual tool (described below).
|
||||
It scans through all of the rows in the `state_groups` database table from the start. When
|
||||
it finds a group that hasn't been compressed, it runs the compressor for a while on that
|
||||
group's room, saving where it got up to. After compressing a number of these chunks it stops,
|
||||
saving where it got up to for the next run of the `auto_compressor`.
|
||||
|
||||
It creates three extra tables in the database: `state_compressor_state` which stores the
|
||||
information needed to stop and start the compressor for each room, `state_compressor_progress`
|
||||
which stores the most recently compressed state group for each room and `state_compressor_total_progress`
|
||||
which stores how far through the `state_groups` table the compressor has scanned.
|
||||
|
||||
The tool can be run manually when you are running out of space, or be scheduled to run
|
||||
periodically.
|
||||
|
||||
The output from the auto_compressor will be sent to `auto_compressor.log` (in the directory
|
||||
that the compressor is run from).
|
||||
|
||||
## Building
|
||||
|
||||
This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
|
||||
for instructions on how to do this.
|
||||
|
||||
To build `auto_compressor`, clone this repository and navigate to the `autocompressor/`
|
||||
subdirectory. Then execute `cargo build`.
|
||||
|
||||
This will create an executable and store it in `auto_compressor/target/debug/auto_compressor`.
|
||||
|
||||
## Example usage
|
||||
```
|
||||
$ auto_compressor -p postgresql://user:pass@localhost/synapse -c 500 -n 100
|
||||
```
|
||||
## Running Options
|
||||
|
||||
- -p [POSTGRES_LOCATION] **Required**
|
||||
The configuration for connecting to the Postgres database. This should be of the form
|
||||
`"postgresql://username:password@mydomain.com/database"` or a key-value pair
|
||||
string: `"user=username password=password dbname=database host=mydomain.com"`
|
||||
See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
|
||||
for the full details.
|
||||
|
||||
- -c [CHUNK_SIZE] **Required**
|
||||
The number of state groups to work on at once. All of the entries from state_groups_state are
|
||||
requested from the database for state groups that are worked on. Therefore small chunk
|
||||
sizes may be needed on machines with low memory. Note: if the compressor fails to find
|
||||
space savings on the chunk as a whole (which may well happen in rooms with lots of backfill
|
||||
in) then the entire chunk is skipped.
|
||||
|
||||
- -n [CHUNKS_TO_COMPRESS] **Required**
|
||||
*CHUNKS_TO_COMPRESS* chunks of size *CHUNK_SIZE* will be compressed. The higher this
|
||||
number is set to, the longer the compressor will run for.
|
||||
|
||||
- -d [LEVELS]
|
||||
Sizes of each new level in the compression algorithm, as a comma-separated list.
|
||||
The first entry in the list is for the lowest, most granular level, with each
|
||||
subsequent entry being for the next highest level. The number of entries in the
|
||||
list determines the number of levels that will be used. The sum of the sizes of
|
||||
the levels affects the performance of fetching the state from the database, as the
|
||||
sum of the sizes is the upper bound on the number of iterations needed to fetch a
|
||||
given set of state. [defaults to "100,50,25"]
|
||||
|
||||
## Scheduling the compressor
|
||||
The automatic tool may put some strain on the database, so it might be best to schedule
|
||||
it to run at a quiet time for the server. This could be done by creating an executable
|
||||
script and scheduling it with something like
|
||||
[cron](https://www.man7.org/linux/man-pages/man1/crontab.1.html).
|
||||
|
||||
# Manual tool: synapse_compress_state
|
||||
|
||||
## Introduction
|
||||
|
||||
A manual tool that reads in the rows from `state_groups_state` and `state_group_edges`
|
||||
tables for a specified room and calculates the changes that could be made that
|
||||
(hopefully) will significantly reduce the number of rows.
|
||||
|
||||
This tool currently *does not* write to the database by default, so should be
|
||||
safe to run. If the `-o` option is specified then SQL will be written to the
|
||||
given file that would change the tables to match the calculated state. (Note
|
||||
that if `-t` is given then each change to a particular state group is wrapped
|
||||
in a transaction).
|
||||
in a transaction). If you do wish to send the changes to the database automatically
|
||||
then the `-c` flag can be set.
|
||||
|
||||
The SQL generated by the `-o` option is safe to apply against the database with
|
||||
Synapse running. This is because the `state_groups` and `state_groups_state`
|
||||
tables are append-only: once written to the database, they are never modified.
|
||||
There is therefore no danger of a modification racing against a running synapse.
|
||||
Further, this script makes its changes within atomic transactions, and each
|
||||
transaction should not affect the results from any of the queries that synapse
|
||||
performs.
|
||||
The SQL generated is safe to apply against the database with Synapse running.
|
||||
This is because the `state_groups` and `state_groups_state` tables are append-only:
|
||||
once written to the database, they are never modified. There is therefore no danger
|
||||
of a modification racing against a running Synapse. Further, this script makes its
|
||||
changes within atomic transactions, and each transaction should not affect the results
|
||||
from any of the queries that Synapse performs.
|
||||
|
||||
The tool will also ensure that the generated state deltas do give the same state
|
||||
as the existing state deltas before generating any SQL.
|
||||
|
||||
## Algorithm
|
||||
## Building
|
||||
|
||||
The algorithm works by attempting to create a tree of deltas, produced by
|
||||
appending state groups to different "levels". Each level has a maximum size, where
|
||||
each state group is appended to the lowest level that is not full.
|
||||
This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
|
||||
for instructions on how to do this.
|
||||
|
||||
This produces a graph that looks approximately like the following, in the case
|
||||
of having two levels with the bottom level (L1) having a maximum size of 3:
|
||||
|
||||
```
|
||||
L2 <-------------------- L2 <---------- ...
|
||||
^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
||||
```
|
||||
|
||||
The sizes and number of levels used can be controlled via `-l`, and defaults to 3
|
||||
levels of sizes 100, 50 and 25.
|
||||
|
||||
**Note**: Increasing the sum of the sizes of levels will increase the time it
|
||||
takes for to query the full state of a given state group. By default Synapse
|
||||
attempts to keep this below 100.
|
||||
To build `synapse_compress_state`, clone this repository and then execute `cargo build`.
|
||||
|
||||
This will create an executable and store it in `target/debug/synapse_compress_state`.
|
||||
|
||||
## Example usage
|
||||
|
||||
@@ -64,92 +128,65 @@ $ psql synapse < out.data
|
||||
|
||||
## Running Options
|
||||
|
||||
- -p [URL] **Required**
|
||||
The url for connecting to the postgres database. This should be of the form
|
||||
"postgresql://username:password@mydomain.com/database"
|
||||
- -p [POSTGRES_LOCATION] **Required**
|
||||
The configuration for connecting to the Postgres database. This should be of the form
|
||||
`"postgresql://username:password@mydomain.com/database"` or a key-value pair
|
||||
string: `"user=username password=password dbname=database host=mydomain.com"`
|
||||
See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
|
||||
for the full details.
|
||||
|
||||
- -r [ROOM_ID] **Required**
|
||||
The room to process. This is the value found in the `rooms` table of the database
|
||||
not the common name for the room - is should look like: "!wOlkWNmgkAZFxbTaqj:matrix.org"
|
||||
The room to process (this is the value found in the `rooms` table of the database
|
||||
not the common name for the room - it should look like: "!wOlkWNmgkAZFxbTaqj:matrix.org".
|
||||
|
||||
- -b [MIN_STATE_GROUP]
|
||||
The state group to start processing from (non inclusive)
|
||||
The state group to start processing from (non-inclusive).
|
||||
|
||||
- -n [GROUPS_TO_COMPRESS]
|
||||
How many groups to load into memory to compress (starting
|
||||
from the 1st group in the room or the group specified by -s)
|
||||
from the 1st group in the room or the group specified by -b).
|
||||
|
||||
- -l [LEVELS]
|
||||
Sizes of each new level in the compression algorithm, as a comma separated list.
|
||||
Sizes of each new level in the compression algorithm, as a comma-separated list.
|
||||
The first entry in the list is for the lowest, most granular level, with each
|
||||
subsequent entry being for the next highest level. The number of entries in the
|
||||
list determines the number of levels that will be used. The sum of the sizes of
|
||||
the levels effect the performance of fetching the state from the database, as the
|
||||
sum of the sizes is the upper bound on number of iterations needed to fetch a
|
||||
given set of state. [default's to 100,50,25]
|
||||
the levels affects the performance of fetching the state from the database, as the
|
||||
sum of the sizes is the upper bound on the number of iterations needed to fetch a
|
||||
given set of state. [defaults to "100,50,25"]
|
||||
|
||||
- -m [COUNT]
|
||||
If the compressor cannot save this many rows from the database then it will stop early
|
||||
If the compressor cannot save this many rows from the database then it will stop early.
|
||||
|
||||
- -s [MAX_STATE_GROUP]
|
||||
If a max_state_group is specified then only state groups with id's lower than this number are able to be
|
||||
compressed.
|
||||
If a max_state_group is specified then only state groups with id's lower than this
|
||||
number can be compressed.
|
||||
|
||||
- -o [FILE]
|
||||
File to output the SQL transactions to (for later running on the database)
|
||||
File to output the SQL transactions to (for later running on the database).
|
||||
|
||||
- -t
|
||||
If this flag is set then then each change to a particular state group is wrapped in a transaction. This should be done if you wish to apply the changes while synapse is still running.
|
||||
If this flag is set then each change to a particular state group is wrapped in a
|
||||
transaction. This should be done if you wish to apply the changes while synapse is
|
||||
still running.
|
||||
|
||||
- -c
|
||||
If this flag is set then the changes the compressor makes will be committed to the
|
||||
database. This should be safe to use while synapse is running as it wraps the changes
|
||||
to every state group in it's own transaction (as if the transaction flag was set).
|
||||
|
||||
- -g
|
||||
If this flag is set then output the node and edge information for the state_group
|
||||
directed graph built up from the predecessor state_group links. These can be looked
|
||||
at in something like Gephi (https://gephi.org)
|
||||
at in something like Gephi (https://gephi.org).
|
||||
|
||||
- -c
|
||||
If this flag is set then the changes the compressor makes will be committed to the
|
||||
database. This should be safe to use while synapse is running as it assumes by default
|
||||
that the transactions flag is set
|
||||
|
||||
## Using as python library
|
||||
# Running tests
|
||||
|
||||
The compressor can also be built into a python library as it uses PyO3. It can be
|
||||
built and installed into the current virtual environment by running `maturin develop`
|
||||
There are integration tests for these tools stored in `compressor_integration_tests/`.
|
||||
|
||||
All the same running options are available, see the comments in the Config struct
|
||||
in lib.rs for the names of each argument. All arguments other than `db_url` and `room_id`
|
||||
are optional.
|
||||
|
||||
The following code does exactly the same as the command-line example from above:
|
||||
```
|
||||
import synapse_compress_state as comp
|
||||
|
||||
comp.run_compression(
|
||||
db_url="postgresql://localhost/synapse",
|
||||
room_id="!some_room:example.com",
|
||||
output_file="out.sql",
|
||||
transactions=True
|
||||
)
|
||||
```
|
||||
|
||||
Note: since this library uses Jemalloc, you might get an error of the form:
|
||||
```
|
||||
ImportError: /[LONG_PATH]/synapse_compress_state.abi3.so: cannot allocate memory in static TLS block
|
||||
```
|
||||
If this happens then try running the following:
|
||||
```
|
||||
LD_PATH=/[LONG_PATH]/synapse_compress_state.abi3.so ./my_python_script
|
||||
```
|
||||
Or just try disabling jemalloc:
|
||||
```
|
||||
$ maturin develop --cargo-extra-args="--no-default-features"
|
||||
```
|
||||
|
||||
## Running tests
|
||||
There are integration tests for these tool stored in `compressor_integration_tests/`
|
||||
|
||||
To run the integration tests, you first need to start up a postgres database
|
||||
for the libary to talk to. There is a docker-compose file that sets one up
|
||||
To run the integration tests, you first need to start up a Postgres database
|
||||
for the library to talk to. There is a docker-compose file that sets one up
|
||||
with all of the correct tables. The tests can therefore be run as follows:
|
||||
|
||||
```
|
||||
@@ -159,5 +196,81 @@ $ cargo test --workspace
|
||||
$ docker-compose down
|
||||
```
|
||||
|
||||
Note, any output from these tests goes into `compressor_integration_tests/tmp/` so if this
|
||||
directory doesn't already exist then you will need to create it.
|
||||
# Using the synapse_compress_state library
|
||||
|
||||
If you want to use the compressor in another project, it is recomended that you
|
||||
use jemalloc `https://github.com/gnzlbg/jemallocator`.
|
||||
|
||||
To prevent the progress bars from being shown, use the `no-progress-bars` feature.
|
||||
(See `auto_compressor/Cargo.toml` for an example)
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
## Connecting to database
|
||||
|
||||
### From local machine
|
||||
|
||||
If you setup Synapse using the instructions on https://matrix-org.github.io/synapse/latest/postgres.html
|
||||
you should have a username and password to use to login to the postgres database. To run the compressor
|
||||
from the machine where Postgres is running, the url will be the following:
|
||||
|
||||
`postgresql://synapse_user:synapse_password@localhost/synapse`
|
||||
|
||||
### From remote machine
|
||||
|
||||
If you wish to connect from a different machine, you'll need to edit your Postgres settings to allow
|
||||
remote connections. This requires updating the
|
||||
[`pg_hba.conf`](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) and the `listen_addresses`
|
||||
setting in [`postgresql.conf`](https://www.postgresql.org/docs/current/runtime-config-connection.html)
|
||||
|
||||
## Printing debugging logs
|
||||
|
||||
The amount of output the tools produce can be altered by setting the RUST_LOG
|
||||
environment variable to something.
|
||||
|
||||
To get more logs when running the auto_compressor tool try the following:
|
||||
|
||||
```
|
||||
$ RUST_LOG=debug auto_compressor -p postgresql://user:pass@localhost/synapse -c 50 -n 100
|
||||
```
|
||||
|
||||
If you want to suppress all the debugging info you are getting from the
|
||||
Postgres client then try:
|
||||
|
||||
```
|
||||
RUST_LOG=auto_compressor=debug,synapse_compress_state=debug auto_compressor [etc.]
|
||||
```
|
||||
|
||||
This will only print the debugging information from those two packages. For more info see
|
||||
https://docs.rs/env_logger/0.9.0/env_logger/.
|
||||
|
||||
## Building difficulties
|
||||
|
||||
Building the `openssl-sys` dependency crate requires OpenSSL development tools to be installed,
|
||||
and building on Linux will also require `pkg-config`
|
||||
|
||||
This can be done on Ubuntu with: `$ apt-get install libssl-dev pkg-config`
|
||||
|
||||
Note that building requires quite a lot of memory and out-of-memory errors might not be
|
||||
obvious. It's recomended you only build these tools on machines with at least 2GB of RAM.
|
||||
|
||||
## Auto Compressor skips chunks when running on already compressed room
|
||||
|
||||
If you have used the compressor before, with certain config options, the automatic tool will
|
||||
produce lots of warnings of the form: `The compressor tried to increase the number of rows in ...`
|
||||
|
||||
To fix this, ensure that the chunk_size is set to at least the L1 level size (so if the level
|
||||
sizes are "100,50,25" then the chunk_size should be at least 100).
|
||||
|
||||
Note: if the level sizes being used when rerunning are different to when run previously
|
||||
this might lead to less efficient compression and thus chunks being skipped, but this shouldn't
|
||||
be a large problem.
|
||||
|
||||
## Compressor is trying to increase the number of rows
|
||||
|
||||
Backfilling can lead to issues with compression. The auto_compressor will
|
||||
skip chunks it can't reduce the size of and so this should help jump over the backfilled
|
||||
state_groups. Lots of state resolution might also impact the ability to use the compressor.
|
||||
|
||||
To examine the state_group hierarchy run the manual tool on a room with the `-g` option
|
||||
and look at the graphs.
|
||||
|
||||
@@ -65,11 +65,15 @@ fn main() {
|
||||
.arg(
|
||||
Arg::with_name("postgres-url")
|
||||
.short("p")
|
||||
.value_name("URL")
|
||||
.help("The url for connecting to the postgres database.")
|
||||
.value_name("POSTGRES_LOCATION")
|
||||
.help("The configruation for connecting to the postgres database.")
|
||||
.long_help(concat!(
|
||||
"The url for connecting to the postgres database.This should be of",
|
||||
" the form \"postgresql://username:password@mydomain.com/database\""))
|
||||
"The configuration for connecting to the Postgres database. This should be of the form ",
|
||||
r#""postgresql://username:password@mydomain.com/database" or a key-value pair "#,
|
||||
r#"string: "user=username password=password dbname=database host=mydomain.com" "#,
|
||||
"See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html ",
|
||||
"for the full details."
|
||||
))
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
).arg(
|
||||
@@ -111,7 +115,10 @@ fn main() {
|
||||
.short("n")
|
||||
.value_name("CHUNKS_TO_COMPRESS")
|
||||
.help("The number of chunks to compress")
|
||||
.long_help("This many chunks of the database will be compressed ")
|
||||
.long_help(concat!(
|
||||
"This many chunks of the database will be compressed. The higher this number is set to, ",
|
||||
"the longer the compressor will run for."
|
||||
))
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
).get_matches();
|
||||
|
||||
@@ -358,7 +358,7 @@ fn functions_are_self_consistent() {
|
||||
pub fn setup_logger() {
|
||||
// setup the logger for the auto_compressor
|
||||
// The default can be overwritten with RUST_LOG
|
||||
// see the README for more information <--- TODO
|
||||
// see the README for more information
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
let mut log_builder = env_logger::builder();
|
||||
// set is_test(true) so that the output is hidden by cargo test (unless the test fails)
|
||||
|
||||
107
docs/algorithm.md
Normal file
107
docs/algorithm.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# Compression algorithm
|
||||
|
||||
## What is state?
|
||||
State is things like who is in a room, what the room topic/name is, who has
|
||||
what privilege levels etc. Synapse keeps track of it for various reasons such as
|
||||
spotting invalid events (e.g. ones sent by banned users) and providing room membership
|
||||
information to clients.
|
||||
|
||||
## What is a state group?
|
||||
|
||||
Synapse needs to keep track of the state at the moment of each event. A state group
|
||||
corresponds to a unique state. The database table `event_to_state_groups` keeps track
|
||||
of the mapping from event ids to state group ids.
|
||||
|
||||
Consider the following simplified example:
|
||||
```
|
||||
State group id | State
|
||||
_____________________________________________
|
||||
1 | Alice in room
|
||||
2 | Alice in room, Bob in room
|
||||
3 | Bob in room
|
||||
|
||||
|
||||
Event id | What the event was
|
||||
______________________________________
|
||||
1 | Alice sends a message
|
||||
3 | Bob joins the room
|
||||
4 | Bob sends a message
|
||||
5 | Alice leaves the room
|
||||
6 | Bob sends a message
|
||||
|
||||
|
||||
Event id | State group id
|
||||
_________________________
|
||||
1 | 1
|
||||
2 | 1
|
||||
3 | 2
|
||||
4 | 2
|
||||
5 | 3
|
||||
6 | 3
|
||||
```
|
||||
|
||||
## What are deltas and predecessors?
|
||||
When a new state event happens (e.g. Bob joins the room) a new state group is created.
|
||||
BUT instead of copying all of the state from the previous state group, we just store
|
||||
the change from the previous group (saving on lots of storage space!). The difference
|
||||
from the previous state group is called the "delta".
|
||||
|
||||
So for the previous example, we would have the following (Note only rows 1 and 2 will
|
||||
make sense at this point):
|
||||
|
||||
```
|
||||
State group id | Previous state group id | Delta
|
||||
____________________________________________________________
|
||||
1 | NONE | Alice in room
|
||||
2 | 1 | Bob in room
|
||||
3 | NONE | Bob in room
|
||||
```
|
||||
|
||||
So why is state group 3's previous state group NONE and not 2? Well, the way that deltas
|
||||
work in Synapse is that they can only add in new state or overwrite old state, but they
|
||||
cannot remove it. (So if the room topic is changed then that is just overwriting state,
|
||||
but removing Alice from the room is neither an addition nor an overwriting). If it is
|
||||
impossible to find a delta, then you just start from scratch again with a "snapshot" of
|
||||
the entire state.
|
||||
|
||||
(NOTE this is not documentation on how synapse handles leaving rooms but is purely for illustrative
|
||||
purposes)
|
||||
|
||||
The state of a state group is worked out by following the previous state group's and adding
|
||||
together all of the deltas (with the most recent taking precedence).
|
||||
|
||||
The mapping from state group to previous state group takes place in `state_group_edges`
|
||||
and the deltas are stored in `state_groups_state`.
|
||||
|
||||
## What are we compressing then?
|
||||
In order to speed up the conversion from state group id to state, there is a limit of 100
|
||||
hops set by synapse (that is: we will only ever have to look up the deltas for a maximum of
|
||||
100 state groups). It does this by taking another "snapshot" every 100 state groups.
|
||||
|
||||
However, it is these snapshots that take up the bulk of the storage in a synapse database,
|
||||
so we want to find a way to reduce the number of them without dramatically increasing the
|
||||
maximum number of hops needed to do lookups.
|
||||
|
||||
|
||||
## Compression Algorithm
|
||||
|
||||
The algorithm works by attempting to create a *tree* of deltas, produced by
|
||||
appending state groups to different "levels". Each level has a maximum size, where
|
||||
each state group is appended to the lowest level that is not full. This tool calls a
|
||||
state group "compressed" once it has been added to
|
||||
one of these levels.
|
||||
|
||||
This produces a graph that looks approximately like the following, in the case
|
||||
of having two levels with the bottom level (L1) having a maximum size of 3:
|
||||
|
||||
```
|
||||
L2 <-------------------- L2 <---------- ...
|
||||
^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
||||
|
||||
NOTE: A <--- B means that state group B's predecessor is A
|
||||
```
|
||||
The structure that synapse creates by default would be equivalent to having one level with
|
||||
a maximum length of 100.
|
||||
|
||||
**Note**: Increasing the sum of the sizes of levels will increase the time it
|
||||
takes to query the full state of a given state group.
|
||||
54
docs/python.md
Normal file
54
docs/python.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Running the compressor tools from python
|
||||
|
||||
Both the automatic and manual tools use PyO3 to allow the compressor
|
||||
to be run from Python.
|
||||
|
||||
To see any output from the tools, logging must be setup in Python before
|
||||
the compressor is run.
|
||||
|
||||
## Setting things up
|
||||
|
||||
1. Create a virtual environment in the place you want to use the compressor from
|
||||
(if it doesn't already exist)
|
||||
`$ virtualenv -p python3 venv`
|
||||
|
||||
2. Activate the virtual environment and install `maturin` (if you haven't already)
|
||||
`$ source venv/bin/activate`
|
||||
`$ pip install maturin`
|
||||
|
||||
3. Navigate to the correct location
|
||||
For the automatic tool:
|
||||
`$ cd /home/synapse/rust-synapse-compress-state/auto_compressor`
|
||||
For the manual tool:
|
||||
`$ cd /home/synapse/rust-synapse-compress-state`
|
||||
|
||||
3. Build and install the library
|
||||
`$ maturin develop`
|
||||
|
||||
This will install the relevant compressor tool into the activated virtual environment.
|
||||
|
||||
## Automatic tool example:
|
||||
|
||||
```python
|
||||
import auto_compressor
|
||||
|
||||
auto_compressor.compress_state_events_table(
|
||||
db_url="postgresql://localhost/synapse",
|
||||
chunk_size=500,
|
||||
default_levels="100,50,25",
|
||||
number_of_chunks=100
|
||||
)
|
||||
```
|
||||
|
||||
# Manual tool example:
|
||||
|
||||
```python
|
||||
import synapse_compress_state
|
||||
|
||||
synapse_compress_state.run_compression(
|
||||
db_url="postgresql://localhost/synapse",
|
||||
room_id="!some_room:example.com",
|
||||
output_file="out.sql",
|
||||
transactions=True
|
||||
)
|
||||
```
|
||||
24
src/lib.rs
24
src/lib.rs
@@ -20,7 +20,7 @@
|
||||
// of arguments - this hopefully doesn't make the code unclear
|
||||
// #[allow(clippy::too_many_arguments)] is therefore used around some functions
|
||||
|
||||
use log::{info, warn};
|
||||
use log::{info, warn, LevelFilter};
|
||||
use pyo3::{exceptions, prelude::*};
|
||||
|
||||
use clap::{crate_authors, crate_description, crate_name, crate_version, value_t, App, Arg};
|
||||
@@ -121,11 +121,15 @@ impl Config {
|
||||
.arg(
|
||||
Arg::with_name("postgres-url")
|
||||
.short("p")
|
||||
.value_name("URL")
|
||||
.help("The url for connecting to the postgres database.")
|
||||
.value_name("POSTGRES_LOCATION")
|
||||
.help("The configruation for connecting to the postgres database.")
|
||||
.long_help(concat!(
|
||||
"The url for connecting to the postgres database.This should be of",
|
||||
" the form \"postgresql://username:password@mydomain.com/database\""))
|
||||
"The configuration for connecting to the Postgres database. This should be of the form ",
|
||||
r#""postgresql://username:password@mydomain.com/database" or a key-value pair "#,
|
||||
r#"string: "user=username password=password dbname=database host=mydomain.com" "#,
|
||||
"See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html ",
|
||||
"for the full details."
|
||||
))
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
).arg(
|
||||
@@ -781,6 +785,16 @@ fn run_compression(
|
||||
/// Python module - "import synapse_compress_state" to use
|
||||
#[pymodule]
|
||||
fn synapse_compress_state(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
let _ = pyo3_log::Logger::default()
|
||||
// don't send out anything lower than a warning from other crates
|
||||
.filter(LevelFilter::Warn)
|
||||
// don't log warnings from synapse_compress_state, the auto_compressor handles these
|
||||
// situations and provides better log messages
|
||||
.filter_target("synapse_compress_state".to_owned(), LevelFilter::Debug)
|
||||
.install();
|
||||
// ensure any panics produce error messages in the log
|
||||
log_panics::init();
|
||||
|
||||
m.add_function(wrap_pyfunction!(run_compression, m)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ use synapse_compress_state as comp_state;
|
||||
fn main() {
|
||||
// setup the logger
|
||||
// The default can be overwritten with RUST_LOG
|
||||
// see the README for more information <---- TODO
|
||||
// see the README for more information
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
let mut log_builder = env_logger::builder();
|
||||
// Only output the log message (and not the prefixed timestamp etc.)
|
||||
|
||||
Reference in New Issue
Block a user