Compare commits
83 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
01bce55ade
|
|||
|
|
bf92c82b7f | ||
|
|
4b9f2e2d64 | ||
|
|
71f24cf2b9 | ||
|
|
575d0fd878 | ||
|
|
2697e261da | ||
|
|
d3aad1a23f | ||
|
|
0890891bb0 | ||
|
|
8dc70fec8d | ||
|
|
982ee5ead8 | ||
|
|
8fca8adb04 | ||
|
|
9ee99cd547 | ||
|
|
6a065de6fc | ||
|
|
f4d96c73a8 | ||
|
|
923ca65f67 | ||
|
|
13882d7654 | ||
|
|
c0dac572c1 | ||
|
|
856b799c53 | ||
|
|
aab4d37123 | ||
|
|
fce2a7eee8 | ||
|
|
74bd719262 | ||
|
|
e3075d1451 | ||
|
|
d22acc6906 | ||
|
|
88d97ea413 | ||
|
|
152808baca | ||
|
|
2596f25eea | ||
|
|
4d3049d3ed | ||
|
|
9ff021f32e | ||
|
|
019b100521 | ||
|
|
da6271a331 | ||
|
|
dd62afb3d5 | ||
|
|
65ffce2362 | ||
|
|
b4f3d8adbd | ||
|
|
04ac0529e1 | ||
|
|
d6df1ac5a4 | ||
|
|
32e43da3e8 | ||
|
|
4c7316311b | ||
|
|
83e8dedfa9 | ||
|
|
38d800a775 | ||
|
|
5272acedd2 | ||
|
|
9d642cfb67 | ||
|
|
0111079153 | ||
|
|
bf57e81f54 | ||
|
|
dcfa67ea98 | ||
|
|
9bfd786f3b | ||
|
|
f822da9625 | ||
|
|
b8e323c321 | ||
|
|
bc30942e2d | ||
|
|
a069d8765a | ||
|
|
3271221311 | ||
|
|
a9bc800b87 | ||
|
|
80795aa813 | ||
|
|
55ee83ce13 | ||
|
|
a409cdbd8e | ||
|
|
8c72a0de52 | ||
|
|
a951ba4dae | ||
|
|
4c3d6bd346 | ||
|
|
d32f49303b | ||
|
|
d908d13f8f | ||
|
|
65861de06e | ||
|
|
3b5d7fd176 | ||
|
|
0f7f2c2660 | ||
|
|
011f9f8da5 | ||
|
|
9a59b1121c | ||
|
|
aa6137ce52 | ||
|
|
e5174f4181 | ||
|
|
5c248b5362 | ||
|
|
63e1d6e3c9 | ||
|
|
3290726990 | ||
|
|
becb2937aa | ||
|
|
a887107607 | ||
|
|
013080f712 | ||
|
|
31e37996ea | ||
|
|
f3b1dd0b8a | ||
|
|
2fc2db2848 | ||
|
|
4f823fba78 | ||
|
|
b33c498e99 | ||
|
|
d2ab661b1d | ||
|
|
b07397b5e3 | ||
|
|
4a56406fb3 | ||
|
|
dc635bdd0e | ||
|
|
9720b19332 | ||
|
|
8e691aec1f |
3
.dockerignore
Normal file
3
.dockerignore
Normal file
@@ -0,0 +1,3 @@
|
||||
.git
|
||||
.github
|
||||
/target
|
||||
46
.gitea/workflows/docker.yaml
Normal file
46
.gitea/workflows/docker.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
name: Build and push docker images
|
||||
|
||||
on:
|
||||
push:
|
||||
tags: ["v*"]
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Log in to Gitea Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: git.yongyuancv.cn
|
||||
username: ${{ gitea.repository_owner }}
|
||||
password: ${{ secrets.GITEA_TOKEN }}
|
||||
|
||||
- name: Calculate docker image tag
|
||||
id: set-tag
|
||||
uses: docker/metadata-action@master
|
||||
with:
|
||||
images: |
|
||||
git.yongyuancv.cn/${{ gitea.repository }}
|
||||
git.yongyuancv.cn/heimoshuiyu/${{ gitea.event.repository.name }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ gitea.ref == 'refs/heads/main' }}
|
||||
type=sha,prefix=,format=long
|
||||
type=semver,pattern=v{{version}}
|
||||
type=semver,pattern=v{{major}}.{{minor}}
|
||||
|
||||
- name: Build and push all platforms
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
push: true
|
||||
labels: "gitsha1=${{ gitea.sha }}"
|
||||
tags: "${{ steps.set-tag.outputs.tags }}"
|
||||
platforms: linux/amd64,linux/arm64
|
||||
cache-from: type=registry,ref=git.yongyuancv.cn/${{ gitea.repository }}:buildcache
|
||||
cache-to: type=registry,ref=git.yongyuancv.cn/${{ gitea.repository }}:buildcache,mode=max
|
||||
2
.github/CODEOWNERS
vendored
Normal file
2
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Automatically request reviews from the synapse-core team when a pull request comes in.
|
||||
* @matrix-org/synapse-core
|
||||
58
.github/workflows/docker.yaml
vendored
Normal file
58
.github/workflows/docker.yaml
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
# GitHub actions workflow which builds and publishes the docker images.
|
||||
|
||||
name: Build and push docker images
|
||||
|
||||
on:
|
||||
push:
|
||||
tags: ["v*"]
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Log in to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Calculate docker image tag
|
||||
id: set-tag
|
||||
uses: docker/metadata-action@master
|
||||
with:
|
||||
images: |
|
||||
ghcr.io/${{ github.repository }}
|
||||
docker.io/${{ secrets.DOCKER_HUB_USERNAME }}/${{ github.event.repository.name }}
|
||||
flavor: |
|
||||
latest=false
|
||||
tags: |
|
||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
|
||||
type=sha,prefix=,format=long
|
||||
type=semver,pattern=v{{version}}
|
||||
type=semver,pattern=v{{major}}.{{minor}}
|
||||
|
||||
- name: Build and push all platforms
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
push: true
|
||||
labels: "gitsha1=${{ github.sha }}"
|
||||
tags: "${{ steps.set-tag.outputs.tags }}"
|
||||
platforms: linux/amd64,linux/arm64
|
||||
cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:buildcache
|
||||
cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:buildcache,mode=max
|
||||
74
.github/workflows/tests.yaml
vendored
Normal file
74
.github/workflows/tests.yaml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
name: Continuous integration
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Cargo Check
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: Swatinem/rust-cache@v1
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: check
|
||||
|
||||
test:
|
||||
name: Test Suite
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: Swatinem/rust-cache@v1
|
||||
- run: cd compressor_integration_tests && docker-compose up -d
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace
|
||||
|
||||
fmt:
|
||||
name: Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
components: rustfmt
|
||||
- uses: Swatinem/rust-cache@v1
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: fmt
|
||||
args: --all -- --check
|
||||
|
||||
clippy:
|
||||
name: Clippy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: stable
|
||||
override: true
|
||||
components: clippy
|
||||
- uses: Swatinem/rust-cache@v1
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: -- -D warnings
|
||||
28
.github/workflows/triage_incoming.yml
vendored
Normal file
28
.github/workflows/triage_incoming.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Move new issues into the issue triage board
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [ opened ]
|
||||
|
||||
jobs:
|
||||
add_new_issues:
|
||||
name: Add new issues to the triage board
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: octokit/graphql-action@v2.x
|
||||
id: add_to_project
|
||||
with:
|
||||
headers: '{"GraphQL-Features": "projects_next_graphql"}'
|
||||
query: |
|
||||
mutation add_to_project($projectid:ID!,$contentid:ID!) {
|
||||
addProjectV2ItemById(input: {projectId: $projectid contentId: $contentid}) {
|
||||
item {
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
projectid: ${{ env.PROJECT_ID }}
|
||||
contentid: ${{ github.event.issue.node_id }}
|
||||
env:
|
||||
PROJECT_ID: "PVT_kwDOAIB0Bs4AFDdZ"
|
||||
GITHUB_TOKEN: ${{ secrets.ELEMENT_BOT_TOKEN }}
|
||||
44
.github/workflows/triage_labelled.yml
vendored
Normal file
44
.github/workflows/triage_labelled.yml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: Move labelled issues to correct projects
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
move_needs_info:
|
||||
name: Move X-Needs-Info on the triage board
|
||||
runs-on: ubuntu-latest
|
||||
if: >
|
||||
contains(github.event.issue.labels.*.name, 'X-Needs-Info')
|
||||
steps:
|
||||
- uses: actions/add-to-project@main
|
||||
id: add_project
|
||||
with:
|
||||
project-url: "https://github.com/orgs/matrix-org/projects/67"
|
||||
github-token: ${{ secrets.ELEMENT_BOT_TOKEN }}
|
||||
- name: Set status
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.ELEMENT_BOT_TOKEN }}
|
||||
run: |
|
||||
gh api graphql -f query='
|
||||
mutation(
|
||||
$project: ID!
|
||||
$item: ID!
|
||||
$fieldid: ID!
|
||||
$columnid: String!
|
||||
) {
|
||||
updateProjectV2ItemFieldValue(
|
||||
input: {
|
||||
projectId: $project
|
||||
itemId: $item
|
||||
fieldId: $fieldid
|
||||
value: {
|
||||
singleSelectOptionId: $columnid
|
||||
}
|
||||
}
|
||||
) {
|
||||
projectV2Item {
|
||||
id
|
||||
}
|
||||
}
|
||||
}' -f project="PVT_kwDOAIB0Bs4AFDdZ" -f item=${{ steps.add_project.outputs.itemId }} -f fieldid="PVTSSF_lADOAIB0Bs4AFDdZzgC6ZA4" -f columnid=ba22e43c --silent
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,3 +2,6 @@
|
||||
**/*.rs.bk
|
||||
*.data
|
||||
*.old
|
||||
**.sql
|
||||
*.csv
|
||||
**.log
|
||||
80
CONTRIBUTING.md
Normal file
80
CONTRIBUTING.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Contributing
|
||||
|
||||
## Sign off
|
||||
|
||||
In order to have a concrete record that your contribution is intentional
|
||||
and you agree to license it under the same terms as the project's license, we've adopted the
|
||||
same lightweight approach that the Linux Kernel
|
||||
[submitting patches process](
|
||||
https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin>),
|
||||
[Docker](https://github.com/docker/docker/blob/master/CONTRIBUTING.md), and many other
|
||||
projects use: the DCO ([Developer Certificate of Origin](http://developercertificate.org/)).
|
||||
This is a simple declaration that you wrote
|
||||
the contribution or otherwise have the right to contribute it to Matrix:
|
||||
|
||||
```
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
```
|
||||
|
||||
If you agree to this for your contribution, then all that's needed is to
|
||||
include the line in your commit or pull request comment:
|
||||
|
||||
```
|
||||
Signed-off-by: Your Name <your@email.example.org>
|
||||
```
|
||||
|
||||
We accept contributions under a legally identifiable name, such as
|
||||
your name on government documentation or common-law names (names
|
||||
claimed by legitimate usage or repute). Unfortunately, we cannot
|
||||
accept anonymous contributions at this time.
|
||||
|
||||
Git allows you to add this signoff automatically when using the `-s`
|
||||
flag to `git commit`, which uses the name and email set in your
|
||||
`user.name` and `user.email` git configs.
|
||||
|
||||
### Private Sign off
|
||||
|
||||
If you would like to provide your legal name privately to the Matrix.org
|
||||
Foundation (instead of in a public commit or comment), you can do so
|
||||
by emailing your legal name and a link to the pull request to
|
||||
[dco@matrix.org](mailto:dco@matrix.org?subject=Private%20sign%20off).
|
||||
It helps to include "sign off" or similar in the subject line. You will then
|
||||
be instructed further.
|
||||
|
||||
Once private sign off is complete, doing so for future contributions will not
|
||||
be required.
|
||||
|
||||
1529
Cargo.lock
generated
1529
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
54
Cargo.toml
54
Cargo.toml
@@ -1,18 +1,56 @@
|
||||
[workspace]
|
||||
members = ["synapse_auto_compressor", "compressor_integration_tests"]
|
||||
|
||||
[package]
|
||||
authors = ["Erik Johnston"]
|
||||
description = "A tool to compress some state in a Synapse instance's database"
|
||||
name = "synapse-compress-state"
|
||||
name = "synapse_compress_state"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "synapse_compress_state"
|
||||
required-features = ["clap"]
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
indicatif = "0.14.0"
|
||||
jemallocator = "0.3.2"
|
||||
postgres = "0.17.0"
|
||||
rand = "0.7.2"
|
||||
rayon = "1.3.0"
|
||||
string_cache = "0.8.0"
|
||||
indicatif = "0.17.6"
|
||||
openssl = "0.10.60"
|
||||
postgres = "0.19.7"
|
||||
postgres-openssl = "0.5.0"
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
string_cache = "0.8.7"
|
||||
env_logger = "0.10.0"
|
||||
log = "0.4.20"
|
||||
log-panics = "2.1.0"
|
||||
|
||||
[dependencies.state-map]
|
||||
git = "https://github.com/matrix-org/rust-matrix-state-map"
|
||||
|
||||
# Needed for pyo3 support
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[dependencies.clap]
|
||||
version = "4.4.2"
|
||||
features = ["cargo"]
|
||||
optional = true
|
||||
|
||||
[dependencies.pyo3]
|
||||
version = "0.19.2"
|
||||
features = ["extension-module"]
|
||||
optional = true
|
||||
|
||||
[dependencies.pyo3-log]
|
||||
version = "0.8.3"
|
||||
optional = true
|
||||
|
||||
[dependencies.tikv-jemallocator]
|
||||
version = "0.5.4"
|
||||
optional = true
|
||||
|
||||
[features]
|
||||
default = ["clap", "jemalloc"]
|
||||
jemalloc = ["tikv-jemallocator"]
|
||||
no-progress-bars = []
|
||||
pyo3 = ["dep:pyo3", "dep:pyo3-log"]
|
||||
|
||||
54
Dockerfile
Normal file
54
Dockerfile
Normal file
@@ -0,0 +1,54 @@
|
||||
# This uses the multi-stage build feature of Docker to build the binaries for multiple architectures without QEMU.
|
||||
# The first stage is responsible for building binaries for all the supported architectures (amd64 and arm64), and the
|
||||
# second stage only copies the binaries for the target architecture.
|
||||
# We leverage Zig and cargo-zigbuild for providing a cross-compilation-capable C compiler and linker.
|
||||
|
||||
ARG RUSTC_VERSION=1.72.0
|
||||
ARG ZIG_VERSION=0.11.0
|
||||
ARG CARGO_ZIGBUILD_VERSION=0.17.1
|
||||
|
||||
FROM --platform=${BUILDPLATFORM} docker.io/rust:${RUSTC_VERSION} AS builder
|
||||
|
||||
# Install cargo-zigbuild for cross-compilation
|
||||
ARG CARGO_ZIGBUILD_VERSION
|
||||
RUN cargo install --locked cargo-zigbuild@=${CARGO_ZIGBUILD_VERSION}
|
||||
|
||||
# Download zig compiler for cross-compilation
|
||||
ARG ZIG_VERSION
|
||||
RUN curl -L "https://ziglang.org/download/${ZIG_VERSION}/zig-linux-$(uname -m)-${ZIG_VERSION}.tar.xz" | tar -J -x -C /usr/local && \
|
||||
ln -s "/usr/local/zig-linux-$(uname -m)-${ZIG_VERSION}/zig" /usr/local/bin/zig
|
||||
|
||||
# Install all cross-compilation targets
|
||||
ARG RUSTC_VERSION
|
||||
RUN rustup target add \
|
||||
--toolchain "${RUSTC_VERSION}" \
|
||||
x86_64-unknown-linux-musl \
|
||||
aarch64-unknown-linux-musl
|
||||
|
||||
WORKDIR /opt/synapse-compressor/
|
||||
COPY . .
|
||||
|
||||
# Build for all targets
|
||||
RUN cargo zigbuild \
|
||||
--release \
|
||||
--workspace \
|
||||
--bins \
|
||||
--features "openssl/vendored" \
|
||||
--target aarch64-unknown-linux-musl \
|
||||
--target x86_64-unknown-linux-musl
|
||||
|
||||
# Move the binaries in a separate folder per architecture, so we can copy them using the TARGETARCH build arg
|
||||
RUN mkdir -p /opt/binaries/amd64 /opt/binaries/arm64
|
||||
RUN mv target/x86_64-unknown-linux-musl/release/synapse_compress_state \
|
||||
target/x86_64-unknown-linux-musl/release/synapse_auto_compressor \
|
||||
/opt/binaries/amd64
|
||||
RUN mv target/aarch64-unknown-linux-musl/release/synapse_compress_state \
|
||||
target/aarch64-unknown-linux-musl/release/synapse_auto_compressor \
|
||||
/opt/binaries/arm64
|
||||
|
||||
FROM --platform=${TARGETPLATFORM} docker.io/alpine
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
COPY --from=builder /opt/binaries/${TARGETARCH}/synapse_compress_state /usr/local/bin/synapse_compress_state
|
||||
COPY --from=builder /opt/binaries/${TARGETARCH}/synapse_auto_compressor /usr/local/bin/synapse_auto_compressor
|
||||
285
README.md
285
README.md
@@ -1,51 +1,125 @@
|
||||
# Compress Synapse State Tables
|
||||
|
||||
An experimental tool that reads in the rows from `state_groups_state` and
|
||||
`state_group_edges` tables for a particular room and calculates the changes that
|
||||
could be made that (hopefully) will significantly reduce the number of rows.
|
||||
This workspace contains experimental tools that attempt to reduce the number of
|
||||
rows in the `state_groups_state` table inside of a Synapse Postgresql database.
|
||||
|
||||
This tool currently *does not* write to the database in any way, so should be
|
||||
# Automated tool: synapse_auto_compressor
|
||||
|
||||
## Introduction:
|
||||
|
||||
This tool is significantly more simple to use than the manual tool (described below).
|
||||
It scans through all of the rows in the `state_groups` database table from the start. When
|
||||
it finds a group that hasn't been compressed, it runs the compressor for a while on that
|
||||
group's room, saving where it got up to. After compressing a number of these chunks it stops,
|
||||
saving where it got up to for the next run of the `synapse_auto_compressor`.
|
||||
|
||||
It creates three extra tables in the database: `state_compressor_state` which stores the
|
||||
information needed to stop and start the compressor for each room, `state_compressor_progress`
|
||||
which stores the most recently compressed state group for each room and `state_compressor_total_progress`
|
||||
which stores how far through the `state_groups` table the compressor has scanned.
|
||||
|
||||
The tool can be run manually when you are running out of space, or be scheduled to run
|
||||
periodically.
|
||||
|
||||
## Building
|
||||
|
||||
This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
|
||||
for instructions on how to do this.
|
||||
|
||||
This project follows the deprecation policy of [Synapse](https://matrix-org.github.io/synapse/latest/deprecation_policy.html)
|
||||
on Rust and will assume a recent stable version of Rust and the ability to fetch a more recent one if necessary.
|
||||
|
||||
To build `synapse_auto_compressor`, clone this repository and navigate to the
|
||||
`synapse_auto_compressor/` subdirectory. Then execute `cargo build`.
|
||||
|
||||
This will create an executable and store it in
|
||||
`synapse_auto_compressor/target/debug/synapse_auto_compressor`.
|
||||
|
||||
## Example usage
|
||||
|
||||
Compress 100 chunks of size 500 in a remote PostgreSQL database:
|
||||
```
|
||||
$ synapse_auto_compressor -p postgresql://user:pass@localhost/synapse -c 500 -n 100
|
||||
```
|
||||
|
||||
Compress 100 chunks of size 500 using local PostgreSQL socket:
|
||||
```
|
||||
$ sudo -u postgres synapse_auto_compressor -p "user=postgres dbname=matrix-synapse host=/var/run/postgresql" -c 500 -n 100
|
||||
```
|
||||
|
||||
## Running Options
|
||||
|
||||
- -p [POSTGRES_LOCATION] **Required**
|
||||
The configuration for connecting to the Postgres database. This should be of the form
|
||||
`"postgresql://username:password@mydomain.com/database"` or a key-value pair
|
||||
string: `"user=username password=password dbname=database host=mydomain.com"`
|
||||
See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
|
||||
for the full details.
|
||||
|
||||
- -c [CHUNK_SIZE] **Required**
|
||||
The number of state groups to work on at once. All of the entries from state_groups_state are
|
||||
requested from the database for state groups that are worked on. Therefore small chunk
|
||||
sizes may be needed on machines with low memory. Note: if the compressor fails to find
|
||||
space savings on the chunk as a whole (which may well happen in rooms with lots of backfill
|
||||
in) then the entire chunk is skipped.
|
||||
|
||||
- -n [CHUNKS_TO_COMPRESS] **Required**
|
||||
*CHUNKS_TO_COMPRESS* chunks of size *CHUNK_SIZE* will be compressed. The higher this
|
||||
number is set to, the longer the compressor will run for.
|
||||
|
||||
- -l [LEVELS]
|
||||
Sizes of each new level in the compression algorithm, as a comma-separated list.
|
||||
The first entry in the list is for the lowest, most granular level, with each
|
||||
subsequent entry being for the next highest level. The number of entries in the
|
||||
list determines the number of levels that will be used. The sum of the sizes of
|
||||
the levels affects the performance of fetching the state from the database, as the
|
||||
sum of the sizes is the upper bound on the number of iterations needed to fetch a
|
||||
given set of state. [defaults to "100,50,25"]
|
||||
|
||||
## Scheduling the compressor
|
||||
The automatic tool may put some strain on the database, so it might be best to schedule
|
||||
it to run at a quiet time for the server. This could be done by creating an executable
|
||||
script and scheduling it with something like
|
||||
[cron](https://www.man7.org/linux/man-pages/man1/crontab.1.html).
|
||||
|
||||
# Manual tool: synapse_compress_state
|
||||
|
||||
## Introduction
|
||||
|
||||
A manual tool that reads in the rows from `state_groups_state` and `state_group_edges`
|
||||
tables for a specified room and calculates the changes that could be made that
|
||||
(hopefully) will significantly reduce the number of rows.
|
||||
|
||||
This tool currently *does not* write to the database by default, so should be
|
||||
safe to run. If the `-o` option is specified then SQL will be written to the
|
||||
given file that would change the tables to match the calculated state. (Note
|
||||
that if `-t` is given then each change to a particular state group is wrapped
|
||||
in a transaction).
|
||||
in a transaction). If you do wish to send the changes to the database automatically
|
||||
then the `-c` flag can be set.
|
||||
|
||||
The SQL generated by the `-o` option is safe to apply against the database with
|
||||
Synapse running. This is because the `state_groups` and `state_groups_state`
|
||||
tables are append-only: once written to the database, they are never modified.
|
||||
There is therefore no danger of a modification racing against a running synapse.
|
||||
Further, this script makes its changes within atomic transactions, and each
|
||||
transaction should not affect the results from any of the queries that synapse
|
||||
performs.
|
||||
The SQL generated is safe to apply against the database with Synapse running.
|
||||
This is because the `state_groups` and `state_groups_state` tables are append-only:
|
||||
once written to the database, they are never modified. There is therefore no danger
|
||||
of a modification racing against a running Synapse. Further, this script makes its
|
||||
changes within atomic transactions, and each transaction should not affect the results
|
||||
from any of the queries that Synapse performs.
|
||||
|
||||
The tool will also ensure that the generated state deltas do give the same state
|
||||
as the existing state deltas.
|
||||
as the existing state deltas before generating any SQL.
|
||||
|
||||
## Algorithm
|
||||
## Building
|
||||
|
||||
The algorithm works by attempting to create a tree of deltas, produced by
|
||||
appending state groups to different "levels". Each level has a maximum size, where
|
||||
each state group is appended to the lowest level that is not full.
|
||||
This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
|
||||
for instructions on how to do this.
|
||||
|
||||
This produces a graph that looks approximately like the following, in the case
|
||||
of having two levels with the bottom level (L1) having a maximum size of 3:
|
||||
|
||||
```
|
||||
L2 <-------------------- L2 <---------- ...
|
||||
^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
||||
```
|
||||
|
||||
The sizes and number of levels used can be controlled via `-l`.
|
||||
|
||||
**Note**: Increasing the sum of the sizes of levels will increase the time it
|
||||
takes for to query the full state of a given state group. By default Synapse
|
||||
attempts to keep this below 100.
|
||||
To build `synapse_compress_state`, clone this repository and then execute `cargo build`.
|
||||
|
||||
This will create an executable and store it in `target/debug/synapse_compress_state`.
|
||||
|
||||
## Example usage
|
||||
|
||||
```
|
||||
$ synapse-compress-state -p "postgresql://localhost/synapse" -r '!some_room:example.com' -o out.sql -t
|
||||
$ synapse_compress_state -p "postgresql://localhost/synapse" -r '!some_room:example.com' -o out.sql -t
|
||||
Fetching state from DB for room '!some_room:example.com'...
|
||||
Got initial state from database. Checking for any missing state groups...
|
||||
Number of state groups: 73904
|
||||
@@ -60,3 +134,152 @@ New state map matches old one
|
||||
# It's finished, so we can now go and rewrite the DB
|
||||
$ psql synapse < out.data
|
||||
```
|
||||
|
||||
## Running Options
|
||||
|
||||
- -p [POSTGRES_LOCATION] **Required**
|
||||
The configuration for connecting to the Postgres database. This should be of the form
|
||||
`"postgresql://username:password@mydomain.com/database"` or a key-value pair
|
||||
string: `"user=username password=password dbname=database host=mydomain.com"`
|
||||
See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
|
||||
for the full details.
|
||||
|
||||
- -r [ROOM_ID] **Required**
|
||||
The room to process (this is the value found in the `rooms` table of the database
|
||||
not the common name for the room - it should look like: "!wOlkWNmgkAZFxbTaqj:matrix.org".
|
||||
|
||||
- -b [MIN_STATE_GROUP]
|
||||
The state group to start processing from (non-inclusive).
|
||||
|
||||
- -n [GROUPS_TO_COMPRESS]
|
||||
How many groups to load into memory to compress (starting
|
||||
from the 1st group in the room or the group specified by -b).
|
||||
|
||||
- -l [LEVELS]
|
||||
Sizes of each new level in the compression algorithm, as a comma-separated list.
|
||||
The first entry in the list is for the lowest, most granular level, with each
|
||||
subsequent entry being for the next highest level. The number of entries in the
|
||||
list determines the number of levels that will be used. The sum of the sizes of
|
||||
the levels affects the performance of fetching the state from the database, as the
|
||||
sum of the sizes is the upper bound on the number of iterations needed to fetch a
|
||||
given set of state. [defaults to "100,50,25"]
|
||||
|
||||
- -m [COUNT]
|
||||
If the compressor cannot save this many rows from the database then it will stop early.
|
||||
|
||||
- -s [MAX_STATE_GROUP]
|
||||
If a max_state_group is specified then only state groups with id's lower than this
|
||||
number can be compressed.
|
||||
|
||||
- -o [FILE]
|
||||
File to output the SQL transactions to (for later running on the database).
|
||||
|
||||
- -t
|
||||
If this flag is set then each change to a particular state group is wrapped in a
|
||||
transaction. This should be done if you wish to apply the changes while synapse is
|
||||
still running.
|
||||
|
||||
- -c
|
||||
If this flag is set then the changes the compressor makes will be committed to the
|
||||
database. This should be safe to use while synapse is running as it wraps the changes
|
||||
to every state group in it's own transaction (as if the transaction flag was set).
|
||||
|
||||
- -g
|
||||
If this flag is set then output the node and edge information for the state_group
|
||||
directed graph built up from the predecessor state_group links. These can be looked
|
||||
at in something like Gephi (https://gephi.org).
|
||||
|
||||
|
||||
# Running tests
|
||||
|
||||
There are integration tests for these tools stored in `compressor_integration_tests/`.
|
||||
|
||||
To run the integration tests, you first need to start up a Postgres database
|
||||
for the library to talk to. There is a docker-compose file that sets one up
|
||||
with all of the correct tables. The tests can therefore be run as follows:
|
||||
|
||||
```
|
||||
$ cd compressor_integration_tests/
|
||||
$ docker-compose up -d
|
||||
$ cargo test --workspace
|
||||
$ docker-compose down
|
||||
```
|
||||
|
||||
# Using the synapse_compress_state library
|
||||
|
||||
If you want to use the compressor in another project, it is recomended that you
|
||||
use jemalloc `https://github.com/tikv/jemallocator`.
|
||||
|
||||
To prevent the progress bars from being shown, use the `no-progress-bars` feature.
|
||||
(See `synapse_auto_compressor/Cargo.toml` for an example)
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
## Connecting to database
|
||||
|
||||
### From local machine
|
||||
|
||||
If you setup Synapse using the instructions on https://matrix-org.github.io/synapse/latest/postgres.html
|
||||
you should have a username and password to use to login to the postgres database. To run the compressor
|
||||
from the machine where Postgres is running, the url will be the following:
|
||||
|
||||
`postgresql://synapse_user:synapse_password@localhost/synapse`
|
||||
|
||||
### From remote machine
|
||||
|
||||
If you wish to connect from a different machine, you'll need to edit your Postgres settings to allow
|
||||
remote connections. This requires updating the
|
||||
[`pg_hba.conf`](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) and the `listen_addresses`
|
||||
setting in [`postgresql.conf`](https://www.postgresql.org/docs/current/runtime-config-connection.html)
|
||||
|
||||
## Printing debugging logs
|
||||
|
||||
The amount of output the tools produce can be altered by setting the RUST_LOG
|
||||
environment variable to something.
|
||||
|
||||
To get more logs when running the synapse_auto_compressor tool try the following:
|
||||
|
||||
```
|
||||
$ RUST_LOG=debug synapse_auto_compressor -p postgresql://user:pass@localhost/synapse -c 50 -n 100
|
||||
```
|
||||
|
||||
If you want to suppress all the debugging info you are getting from the
|
||||
Postgres client then try:
|
||||
|
||||
```
|
||||
RUST_LOG=synapse_auto_compressor=debug,synapse_compress_state=debug synapse_auto_compressor [etc.]
|
||||
```
|
||||
|
||||
This will only print the debugging information from those two packages. For more info see
|
||||
https://docs.rs/env_logger/0.9.0/env_logger/.
|
||||
|
||||
## Building difficulties
|
||||
|
||||
Building the `openssl-sys` dependency crate requires OpenSSL development tools to be installed,
|
||||
and building on Linux will also require `pkg-config`
|
||||
|
||||
This can be done on Ubuntu with: `$ apt-get install libssl-dev pkg-config`
|
||||
|
||||
Note that building requires quite a lot of memory and out-of-memory errors might not be
|
||||
obvious. It's recomended you only build these tools on machines with at least 2GB of RAM.
|
||||
|
||||
## Auto Compressor skips chunks when running on already compressed room
|
||||
|
||||
If you have used the compressor before, with certain config options, the automatic tool will
|
||||
produce lots of warnings of the form: `The compressor tried to increase the number of rows in ...`
|
||||
|
||||
To fix this, ensure that the chunk_size is set to at least the L1 level size (so if the level
|
||||
sizes are "100,50,25" then the chunk_size should be at least 100).
|
||||
|
||||
Note: if the level sizes being used when rerunning are different to when run previously
|
||||
this might lead to less efficient compression and thus chunks being skipped, but this shouldn't
|
||||
be a large problem.
|
||||
|
||||
## Compressor is trying to increase the number of rows
|
||||
|
||||
Backfilling can lead to issues with compression. The synapse_auto_compressor will
|
||||
skip chunks it can't reduce the size of and so this should help jump over the backfilled
|
||||
state_groups. Lots of state resolution might also impact the ability to use the compressor.
|
||||
|
||||
To examine the state_group hierarchy run the manual tool on a room with the `-g` option
|
||||
and look at the graphs.
|
||||
|
||||
21
compressor_integration_tests/Cargo.toml
Normal file
21
compressor_integration_tests/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "compressor_integration_tests"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
string_cache = "0.8.7"
|
||||
serial_test = "2.0.0"
|
||||
openssl = "0.10.60"
|
||||
postgres = "0.19.7"
|
||||
postgres-openssl = "0.5.0"
|
||||
rand = "0.8.5"
|
||||
synapse_compress_state = { path = "../", features = ["no-progress-bars"] }
|
||||
synapse_auto_compressor = { path = "../synapse_auto_compressor/" }
|
||||
env_logger = "0.10.0"
|
||||
log = "0.4.20"
|
||||
|
||||
[dependencies.state-map]
|
||||
git = "https://github.com/matrix-org/rust-matrix-state-map"
|
||||
28
compressor_integration_tests/database_setup.sh
Executable file
28
compressor_integration_tests/database_setup.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/sh
|
||||
|
||||
#N.B. the database setup comes from:
|
||||
#https://github.com/matrix-org/synapse/blob/develop/synapse/storage/schema/state/full_schemas/54/full.sql
|
||||
|
||||
# Setup the required tables for testing
|
||||
psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<SQLCODE
|
||||
|
||||
CREATE TABLE state_groups (
|
||||
id BIGINT PRIMARY KEY,
|
||||
room_id TEXT NOT NULL,
|
||||
event_id TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE state_groups_state (
|
||||
state_group BIGINT NOT NULL,
|
||||
room_id TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
state_key TEXT NOT NULL,
|
||||
event_id TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE state_group_edges (
|
||||
state_group BIGINT NOT NULL,
|
||||
prev_state_group BIGINT NOT NULL
|
||||
);
|
||||
|
||||
SQLCODE
|
||||
20
compressor_integration_tests/docker-compose.yaml
Normal file
20
compressor_integration_tests/docker-compose.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: '3'
|
||||
services:
|
||||
postgres:
|
||||
image: "postgres:latest"
|
||||
|
||||
ports:
|
||||
# N.B. format is [port on machine]:[port to expose from container]
|
||||
- 5432:5432
|
||||
|
||||
environment:
|
||||
POSTGRES_USER: synapse_user
|
||||
POSTGRES_PASSWORD: synapse_pass
|
||||
POSTGRES_DB: synapse
|
||||
PGDATA: /tmp/data
|
||||
|
||||
volumes:
|
||||
- ./database_setup.sh:/docker-entrypoint-initdb.d/1_database_setup.sh
|
||||
|
||||
tmpfs:
|
||||
/tmp/data
|
||||
386
compressor_integration_tests/src/lib.rs
Normal file
386
compressor_integration_tests/src/lib.rs
Normal file
@@ -0,0 +1,386 @@
|
||||
use log::LevelFilter;
|
||||
use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
|
||||
use postgres::{fallible_iterator::FallibleIterator, Client};
|
||||
use postgres_openssl::MakeTlsConnector;
|
||||
use rand::{distributions::Alphanumeric, thread_rng, Rng};
|
||||
use state_map::StateMap;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
collections::BTreeMap,
|
||||
env,
|
||||
fmt::{self, Write as _},
|
||||
};
|
||||
use string_cache::DefaultAtom as Atom;
|
||||
|
||||
use synapse_compress_state::StateGroupEntry;
|
||||
|
||||
pub mod map_builder;
|
||||
|
||||
pub static DB_URL: &str = "postgresql://synapse_user:synapse_pass@localhost/synapse";
|
||||
|
||||
/// Adds the contents of a state group map to the testing database
|
||||
pub fn add_contents_to_database(room_id: &str, state_group_map: &BTreeMap<i64, StateGroupEntry>) {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(DB_URL, connector).unwrap();
|
||||
|
||||
// build up the query
|
||||
let mut sql = String::new();
|
||||
|
||||
let room_id = PGEscape(room_id);
|
||||
let event_id = PGEscape("left_blank");
|
||||
|
||||
for (sg, entry) in state_group_map {
|
||||
// create the entry for state_groups
|
||||
writeln!(
|
||||
sql,
|
||||
"INSERT INTO state_groups (id, room_id, event_id) \
|
||||
VALUES ({sg}, {room_id}, {event_id});",
|
||||
)
|
||||
.expect("Writing to a String cannot fail");
|
||||
|
||||
// create the entry in state_group_edges IF exists
|
||||
if let Some(prev_sg) = entry.prev_state_group {
|
||||
writeln!(
|
||||
sql,
|
||||
"INSERT INTO state_group_edges (state_group, prev_state_group) \
|
||||
VALUES ({sg}, {prev_sg});",
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// write entry for each row in delta
|
||||
if !entry.state_map.is_empty() {
|
||||
sql.push_str(
|
||||
"INSERT INTO state_groups_state \
|
||||
(state_group, room_id, type, state_key, event_id) \
|
||||
VALUES\n",
|
||||
);
|
||||
|
||||
for ((t, s), e) in entry.state_map.iter() {
|
||||
let t = PGEscape(t);
|
||||
let s = PGEscape(s);
|
||||
let e = PGEscape(e);
|
||||
|
||||
writeln!(sql, " ({sg}, {room_id}, {t}, {s}, {e}),").unwrap();
|
||||
}
|
||||
|
||||
// Replace the last comma with a semicolon
|
||||
sql.replace_range((sql.len() - 2).., ";\n");
|
||||
}
|
||||
}
|
||||
|
||||
client.batch_execute(&sql).unwrap();
|
||||
}
|
||||
|
||||
/// Clears the contents of the testing database
|
||||
pub fn empty_database() {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(DB_URL, connector).unwrap();
|
||||
|
||||
// delete all the contents from all three tables
|
||||
let sql = r"
|
||||
TRUNCATE state_groups;
|
||||
TRUNCATE state_group_edges;
|
||||
TRUNCATE state_groups_state;
|
||||
";
|
||||
|
||||
client.batch_execute(sql).unwrap();
|
||||
}
|
||||
|
||||
/// Safely escape the strings in sql queries
|
||||
struct PGEscape<'a>(pub &'a str);
|
||||
|
||||
impl<'a> fmt::Display for PGEscape<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut delim = Cow::from("$$");
|
||||
while self.0.contains(&delim as &str) {
|
||||
let s: String = thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(10)
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
delim = format!("${}$", s).into();
|
||||
}
|
||||
|
||||
write!(f, "{}{}{}", delim, self.0, delim)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether the state at each state group is the same as what the map thinks it should be
|
||||
///
|
||||
/// i.e. when synapse tries to work out the state for a given state group by looking at
|
||||
/// the database. Will the state it gets be the same as what the map thinks it should be
|
||||
pub fn database_collapsed_states_match_map(
|
||||
state_group_map: &BTreeMap<i64, StateGroupEntry>,
|
||||
) -> bool {
|
||||
for sg in state_group_map.keys() {
|
||||
let map_state = collapse_state_with_map(state_group_map, *sg);
|
||||
let database_state = collapse_state_with_database(*sg);
|
||||
if map_state != database_state {
|
||||
println!("database state {} doesn't match", sg);
|
||||
println!("expected {:?}", map_state);
|
||||
println!("but found {:?}", database_state);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Gets the full state for a given group from the map (of deltas)
|
||||
fn collapse_state_with_map(
|
||||
map: &BTreeMap<i64, StateGroupEntry>,
|
||||
state_group: i64,
|
||||
) -> StateMap<Atom> {
|
||||
let mut entry = &map[&state_group];
|
||||
let mut state_map = StateMap::new();
|
||||
|
||||
let mut stack = vec![state_group];
|
||||
|
||||
while let Some(prev_state_group) = entry.prev_state_group {
|
||||
stack.push(prev_state_group);
|
||||
if !map.contains_key(&prev_state_group) {
|
||||
panic!("Missing {}", prev_state_group);
|
||||
}
|
||||
entry = &map[&prev_state_group];
|
||||
}
|
||||
|
||||
for sg in stack.iter().rev() {
|
||||
state_map.extend(
|
||||
map[sg]
|
||||
.state_map
|
||||
.iter()
|
||||
.map(|((t, s), e)| ((t, s), e.clone())),
|
||||
);
|
||||
}
|
||||
|
||||
state_map
|
||||
}
|
||||
|
||||
fn collapse_state_with_database(state_group: i64) -> StateMap<Atom> {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(DB_URL, connector).unwrap();
|
||||
|
||||
// Gets the delta for a specific state group
|
||||
let query_deltas = r#"
|
||||
SELECT m.id, type, state_key, s.event_id
|
||||
FROM state_groups AS m
|
||||
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
|
||||
WHERE m.id = $1
|
||||
"#;
|
||||
|
||||
// If there is no delta for that specific state group, then we still want to find
|
||||
// the predecessor (so have split this into a different query)
|
||||
let query_pred = r#"
|
||||
SELECT prev_state_group
|
||||
FROM state_group_edges
|
||||
WHERE state_group = $1
|
||||
"#;
|
||||
|
||||
let mut state_map: StateMap<Atom> = StateMap::new();
|
||||
|
||||
let mut next_group = Some(state_group);
|
||||
|
||||
while let Some(sg) = next_group {
|
||||
// get predecessor from state_group_edges
|
||||
let mut pred = client.query_raw(query_pred, [sg]).unwrap();
|
||||
|
||||
// set next_group to predecessor
|
||||
next_group = match pred.next().unwrap() {
|
||||
Some(p) => p.get(0),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// if there was a predecessor then assert that it is unique
|
||||
if next_group.is_some() {
|
||||
assert!(pred.next().unwrap().is_none());
|
||||
}
|
||||
drop(pred);
|
||||
|
||||
let mut rows = client.query_raw(query_deltas, [sg]).unwrap();
|
||||
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
// Copy the single delta from the predecessor stored in this row
|
||||
if let Some(etype) = row.get::<_, Option<String>>(1) {
|
||||
let key = &row.get::<_, String>(2);
|
||||
|
||||
// only insert if not overriding existing entry
|
||||
// this is because the newer delta is found FIRST
|
||||
if state_map.get(&etype, key).is_none() {
|
||||
state_map.insert(&etype, key, row.get::<_, String>(3).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state_map
|
||||
}
|
||||
|
||||
/// Check whether predecessors and deltas stored in the database are the same as in the map
|
||||
pub fn database_structure_matches_map(state_group_map: &BTreeMap<i64, StateGroupEntry>) -> bool {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(DB_URL, connector).unwrap();
|
||||
|
||||
// Gets the delta for a specific state group
|
||||
let query_deltas = r#"
|
||||
SELECT m.id, type, state_key, s.event_id
|
||||
FROM state_groups AS m
|
||||
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
|
||||
WHERE m.id = $1
|
||||
"#;
|
||||
|
||||
// If there is no delta for that specific state group, then we still want to find
|
||||
// the predecessor (so have split this into a different query)
|
||||
let query_pred = r#"
|
||||
SELECT prev_state_group
|
||||
FROM state_group_edges
|
||||
WHERE state_group = $1
|
||||
"#;
|
||||
|
||||
for (sg, entry) in state_group_map {
|
||||
// get predecessor from state_group_edges
|
||||
let mut pred_iter = client.query_raw(query_pred, &[sg]).unwrap();
|
||||
|
||||
// read out the predecessor value from the database
|
||||
let database_pred = match pred_iter.next().unwrap() {
|
||||
Some(p) => p.get(0),
|
||||
None => None,
|
||||
};
|
||||
|
||||
// if there was a predecessor then assert that it is unique
|
||||
if database_pred.is_some() {
|
||||
assert!(pred_iter.next().unwrap().is_none());
|
||||
}
|
||||
|
||||
// check if it matches map
|
||||
if database_pred != entry.prev_state_group {
|
||||
println!(
|
||||
"ERROR: predecessor for {} was {:?} (expected {:?})",
|
||||
sg, database_pred, entry.prev_state_group
|
||||
);
|
||||
return false;
|
||||
}
|
||||
// needed so that can create another query
|
||||
drop(pred_iter);
|
||||
|
||||
// Now check that deltas are the same
|
||||
let mut state_map: StateMap<Atom> = StateMap::new();
|
||||
|
||||
// Get delta from state_groups_state
|
||||
let mut rows = client.query_raw(query_deltas, &[sg]).unwrap();
|
||||
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
// Copy the single delta from the predecessor stored in this row
|
||||
if let Some(etype) = row.get::<_, Option<String>>(1) {
|
||||
state_map.insert(
|
||||
&etype,
|
||||
&row.get::<_, String>(2),
|
||||
row.get::<_, String>(3).into(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the delta matches the map
|
||||
if state_map != entry.state_map {
|
||||
println!("ERROR: delta for {} didn't match", sg);
|
||||
println!("Expected: {:?}", entry.state_map);
|
||||
println!("Actual: {:?}", state_map);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Clears the compressor state from the database
|
||||
pub fn clear_compressor_state() {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(DB_URL, connector).unwrap();
|
||||
|
||||
// delete all the contents from the state compressor tables
|
||||
let sql = r"
|
||||
TRUNCATE state_compressor_state;
|
||||
TRUNCATE state_compressor_progress;
|
||||
UPDATE state_compressor_total_progress SET lowest_uncompressed_group = 0;
|
||||
";
|
||||
|
||||
client.batch_execute(sql).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn functions_are_self_consistent() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in 0i64..=13i64 {
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
assert!(database_structure_matches_map(&initial));
|
||||
}
|
||||
|
||||
pub fn setup_logger() {
|
||||
// setup the logger for the synapse_auto_compressor
|
||||
// The default can be overwritten with RUST_LOG
|
||||
// see the README for more information
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
let mut log_builder = env_logger::builder();
|
||||
// set is_test(true) so that the output is hidden by cargo test (unless the test fails)
|
||||
log_builder.is_test(true);
|
||||
// default to printing the debug information for both packages being tested
|
||||
// (Note that just setting the global level to debug will log every sql transaction)
|
||||
log_builder.filter_module("synapse_compress_state", LevelFilter::Debug);
|
||||
log_builder.filter_module("synapse_auto_compressor", LevelFilter::Debug);
|
||||
// use try_init() incase the logger has been setup by some previous test
|
||||
let _ = log_builder.try_init();
|
||||
} else {
|
||||
// If RUST_LOG was set then use that
|
||||
let mut log_builder = env_logger::Builder::from_env("RUST_LOG");
|
||||
// set is_test(true) so that the output is hidden by cargo test (unless the test fails)
|
||||
log_builder.is_test(true);
|
||||
// use try_init() in case the logger has been setup by some previous test
|
||||
let _ = log_builder.try_init();
|
||||
}
|
||||
}
|
||||
217
compressor_integration_tests/src/map_builder.rs
Normal file
217
compressor_integration_tests/src/map_builder.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use state_map::StateMap;
|
||||
use synapse_compress_state::StateGroupEntry;
|
||||
|
||||
/// Generates long chain of state groups each with state deltas
|
||||
///
|
||||
/// If called wiht start=0, end=13 this would build the following:
|
||||
///
|
||||
/// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
///
|
||||
/// Where each group i has state:
|
||||
/// ('node','is', i)
|
||||
/// ('group', j, 'seen') - for all j less than i
|
||||
pub fn line_with_state(start: i64, end: i64) -> BTreeMap<i64, StateGroupEntry> {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
for i in start..=end {
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
initial
|
||||
}
|
||||
|
||||
/// Generates line segments in a chain of state groups each with state deltas
|
||||
///
|
||||
/// If called wiht start=0, end=13 this would build the following:
|
||||
///
|
||||
/// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
///
|
||||
/// Where each group i has state:
|
||||
/// ('node','is', i)
|
||||
/// ('group', j, 'seen') - for all j less than i
|
||||
pub fn line_segments_with_state(start: i64, end: i64) -> BTreeMap<i64, StateGroupEntry> {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
for i in start..=end {
|
||||
// if the state is a snapshot then set its predecessor to NONE
|
||||
if (i - start) % 3 == 0 {
|
||||
prev = None;
|
||||
}
|
||||
|
||||
// create a blank entry for it
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
|
||||
// if it's a snapshot then add in all previous state
|
||||
if prev.is_none() {
|
||||
for j in start..i {
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &j.to_string(), "seen".into());
|
||||
}
|
||||
}
|
||||
|
||||
// add in the new state for this state group
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
// put it into the initial map
|
||||
initial.insert(i, entry);
|
||||
|
||||
// set this group as the predecessor for the next
|
||||
prev = Some(i)
|
||||
}
|
||||
initial
|
||||
}
|
||||
|
||||
/// This generates the correct compressed structure with 3,3 levels
|
||||
///
|
||||
/// Note: only correct structure when no impossible predecessors
|
||||
///
|
||||
/// Structure generated:
|
||||
///
|
||||
/// 0 3\ 12
|
||||
/// 1 4 6\ 13
|
||||
/// 2 5 7 9
|
||||
/// 8 10
|
||||
/// 11
|
||||
/// Where each group i has state:
|
||||
/// ('node','is', i)
|
||||
/// ('group', j, 'seen') - for all j less than i
|
||||
pub fn compressed_3_3_from_0_to_13_with_state() -> BTreeMap<i64, StateGroupEntry> {
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mut expected: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in 0i64..=13i64 {
|
||||
let prev = expected_edges.get(&i);
|
||||
|
||||
//change from Option<&i64> to Option<i64>
|
||||
let prev = prev.copied();
|
||||
|
||||
// create a blank entry for it
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
|
||||
// Add in all state between predecessor and now (non inclusive)
|
||||
if let Some(p) = prev {
|
||||
for j in (p + 1)..i {
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &j.to_string(), "seen".into());
|
||||
}
|
||||
} else {
|
||||
for j in 0i64..i {
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &j.to_string(), "seen".into());
|
||||
}
|
||||
}
|
||||
|
||||
// add in the new state for this state group
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
// put it into the expected map
|
||||
expected.insert(i, entry);
|
||||
}
|
||||
expected
|
||||
}
|
||||
|
||||
/// Generates state map structure that corresponds to edges (with deltas)
|
||||
///
|
||||
/// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
pub fn structure_from_edges_with_state(
|
||||
edges: BTreeMap<i64, i64>,
|
||||
start: i64,
|
||||
end: i64,
|
||||
) -> BTreeMap<i64, StateGroupEntry> {
|
||||
let mut expected: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in start..=end {
|
||||
let prev = edges.get(&i);
|
||||
|
||||
//change from Option<&i64> to Option<i64>
|
||||
let prev = prev.copied();
|
||||
|
||||
// create a blank entry for it
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
|
||||
// Add in all state between predecessor and now (non inclusive)
|
||||
if let Some(p) = prev {
|
||||
for j in (p + 1)..i {
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &j.to_string(), "seen".into());
|
||||
}
|
||||
} else {
|
||||
for j in start..i {
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &j.to_string(), "seen".into());
|
||||
}
|
||||
}
|
||||
|
||||
// add in the new state for this state group
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
// put it into the expected map
|
||||
expected.insert(i, entry);
|
||||
}
|
||||
expected
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use compressor_integration_tests::{
|
||||
add_contents_to_database, clear_compressor_state, database_collapsed_states_match_map,
|
||||
database_structure_matches_map, empty_database,
|
||||
map_builder::{
|
||||
compressed_3_3_from_0_to_13_with_state, line_segments_with_state,
|
||||
structure_from_edges_with_state,
|
||||
},
|
||||
setup_logger, DB_URL,
|
||||
};
|
||||
use serial_test::serial;
|
||||
use synapse_auto_compressor::{
|
||||
manager::{compress_chunks_of_database, run_compressor_on_room_chunk},
|
||||
state_saving::{connect_to_database, create_tables_if_needed},
|
||||
};
|
||||
use synapse_compress_state::Level;
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn run_compressor_on_room_chunk_works() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
let mut client = connect_to_database(DB_URL).unwrap();
|
||||
create_tables_if_needed(&mut client).unwrap();
|
||||
clear_compressor_state();
|
||||
|
||||
// compress in 3,3 level sizes by default
|
||||
let default_levels = vec![Level::new(3), Level::new(3)];
|
||||
|
||||
// compress the first 7 groups in the room
|
||||
// structure should be the following afterwards
|
||||
// (NOTE: only including compressed groups)
|
||||
//
|
||||
// 0 3\
|
||||
// 1 4 6
|
||||
// 2 5
|
||||
run_compressor_on_room_chunk(DB_URL, "room1", 7, &default_levels).unwrap();
|
||||
|
||||
// compress the next 7 groups
|
||||
|
||||
run_compressor_on_room_chunk(DB_URL, "room1", 7, &default_levels).unwrap();
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from before
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn compress_chunks_of_database_compresses_multiple_rooms() {
|
||||
setup_logger();
|
||||
// This creates 2 with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
// (with room2's numbers shifted up 14)
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i in that room
|
||||
let initial1 = line_segments_with_state(0, 13);
|
||||
let initial2 = line_segments_with_state(14, 27);
|
||||
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial1);
|
||||
add_contents_to_database("room2", &initial2);
|
||||
|
||||
let mut client = connect_to_database(DB_URL).unwrap();
|
||||
create_tables_if_needed(&mut client).unwrap();
|
||||
clear_compressor_state();
|
||||
|
||||
// compress in 3,3 level sizes by default
|
||||
let default_levels = vec![Level::new(3), Level::new(3)];
|
||||
|
||||
// Compress 4 chunks of size 8.
|
||||
// The first two should compress room1 and the second two should compress room2
|
||||
compress_chunks_of_database(DB_URL, 8, &default_levels, 4).unwrap();
|
||||
|
||||
// We are aiming for the following structure in the database for room1
|
||||
// i.e. groups 6 and 9 should have changed from initial map
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// Where each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let expected1 = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group in room1
|
||||
assert!(database_collapsed_states_match_map(&initial1));
|
||||
|
||||
// Check that the structure of the database matches the expected structure for room1
|
||||
assert!(database_structure_matches_map(&expected1));
|
||||
|
||||
// room 2 should have the same structure but will all numbers shifted up by 14
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(15, 14),
|
||||
(16, 15),
|
||||
(18, 17),
|
||||
(19, 18),
|
||||
(20, 17),
|
||||
(21, 20),
|
||||
(22, 21),
|
||||
(23, 20),
|
||||
(24, 23),
|
||||
(25, 24),
|
||||
(27, 26),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let expected2 = structure_from_edges_with_state(expected_edges, 14, 27);
|
||||
|
||||
// Check that the database still gives correct states for each group in room2
|
||||
assert!(database_collapsed_states_match_map(&initial2));
|
||||
|
||||
// Check that the structure of the database matches the expected structure for room2
|
||||
assert!(database_structure_matches_map(&expected2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn compress_chunks_of_database_continues_where_it_left_off() {
|
||||
setup_logger();
|
||||
// This creates 2 with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
// (with room2's numbers shifted up 14)
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i in that room
|
||||
let initial1 = line_segments_with_state(0, 13);
|
||||
let initial2 = line_segments_with_state(14, 27);
|
||||
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial1);
|
||||
add_contents_to_database("room2", &initial2);
|
||||
|
||||
let mut client = connect_to_database(DB_URL).unwrap();
|
||||
create_tables_if_needed(&mut client).unwrap();
|
||||
clear_compressor_state();
|
||||
|
||||
// compress in 3,3 level sizes by default
|
||||
let default_levels = vec![Level::new(3), Level::new(3)];
|
||||
|
||||
// Compress chunks of various sizes:
|
||||
//
|
||||
// These two should compress room1
|
||||
compress_chunks_of_database(DB_URL, 8, &default_levels, 1).unwrap();
|
||||
compress_chunks_of_database(DB_URL, 100, &default_levels, 1).unwrap();
|
||||
// These three should compress room2
|
||||
compress_chunks_of_database(DB_URL, 1, &default_levels, 2).unwrap();
|
||||
compress_chunks_of_database(DB_URL, 5, &default_levels, 1).unwrap();
|
||||
compress_chunks_of_database(DB_URL, 5, &default_levels, 1).unwrap();
|
||||
|
||||
// We are aiming for the following structure in the database for room1
|
||||
// i.e. groups 6 and 9 should have changed from initial map
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// Where each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let expected1 = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group in room1
|
||||
assert!(database_collapsed_states_match_map(&initial1));
|
||||
|
||||
// Check that the structure of the database matches the expected structure for room1
|
||||
assert!(database_structure_matches_map(&expected1));
|
||||
|
||||
// room 2 should have the same structure but will all numbers shifted up by 14
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(15, 14),
|
||||
(16, 15),
|
||||
(18, 17),
|
||||
(19, 18),
|
||||
(20, 17),
|
||||
(21, 20),
|
||||
(22, 21),
|
||||
(23, 20),
|
||||
(24, 23),
|
||||
(25, 24),
|
||||
(27, 26),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let expected2 = structure_from_edges_with_state(expected_edges, 14, 27);
|
||||
|
||||
// Check that the database still gives correct states for each group in room2
|
||||
assert!(database_collapsed_states_match_map(&initial2));
|
||||
|
||||
// Check that the structure of the database matches the expected structure for room2
|
||||
assert!(database_structure_matches_map(&expected2));
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
use compressor_integration_tests::{clear_compressor_state, setup_logger, DB_URL};
|
||||
use serial_test::serial;
|
||||
use synapse_auto_compressor::state_saving::{
|
||||
connect_to_database, create_tables_if_needed, read_room_compressor_state,
|
||||
write_room_compressor_state,
|
||||
};
|
||||
use synapse_compress_state::Level;
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn write_then_read_state_gives_correct_results() {
|
||||
setup_logger();
|
||||
let mut client = connect_to_database(DB_URL).unwrap();
|
||||
create_tables_if_needed(&mut client).unwrap();
|
||||
clear_compressor_state();
|
||||
|
||||
let room_id = "room1";
|
||||
let written_info: Vec<Level> =
|
||||
vec![Level::restore(3, 1, Some(6)), Level::restore(3, 2, Some(6))];
|
||||
let written_num = 53;
|
||||
write_room_compressor_state(&mut client, room_id, &written_info, written_num).unwrap();
|
||||
|
||||
let (read_num, read_info) = read_room_compressor_state(&mut client, room_id)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(written_info, read_info);
|
||||
assert_eq!(written_num, read_num);
|
||||
}
|
||||
575
compressor_integration_tests/tests/compressor_config_tests.rs
Normal file
575
compressor_integration_tests/tests/compressor_config_tests.rs
Normal file
@@ -0,0 +1,575 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use compressor_integration_tests::{
|
||||
add_contents_to_database, database_collapsed_states_match_map, database_structure_matches_map,
|
||||
empty_database,
|
||||
map_builder::{
|
||||
compressed_3_3_from_0_to_13_with_state, line_segments_with_state, line_with_state,
|
||||
structure_from_edges_with_state,
|
||||
},
|
||||
setup_logger, DB_URL,
|
||||
};
|
||||
use serial_test::serial;
|
||||
use synapse_compress_state::{run, Config};
|
||||
|
||||
// Remember to add #[serial(db)] before any test that access the database.
|
||||
// Only one test with this annotation can run at once - preventing
|
||||
// concurrency bugs.
|
||||
//
|
||||
// You will probably also want to use common::empty_database() at the start
|
||||
// of each test as well (since their order of execution is not guaranteed)
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn run_succeeds_without_crashing() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_with_state(0, 13);
|
||||
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/run_succeeds_without_crashing.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let groups_to_compress = None;
|
||||
let min_saved_rows = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = false;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
run(config);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn changes_commited_if_no_min_saved_rows() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/changes_commited_if_no_min_saved_rows.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = None;
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from before
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected))
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn changes_commited_if_min_saved_rows_exceeded() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/changes_commited_if_no_min_saved_rows.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = Some(10);
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from before
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn changes_not_commited_if_fewer_than_min_saved_rows() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file =
|
||||
Some("./tests/tmp/changes_not_commited_if_fewer_than_min_saved_rows.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = Some(12);
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
|
||||
// This should have created the following structure when running
|
||||
// (i.e. try and change groups 6 and 9 only)
|
||||
// BUT: This saves 11 rows which is fewer than min_saved_rows
|
||||
// therefore there should be no changes committed!
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&initial));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Error connecting to the database:")]
|
||||
fn run_panics_if_invalid_db_url() {
|
||||
setup_logger();
|
||||
// set up the config options
|
||||
let db_url = "thisIsAnInvalidURL".to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/run_panics_if_invalid_db_url.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = None;
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn run_only_affects_given_room_id() {
|
||||
setup_logger();
|
||||
// build room1 stuff up
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial_room_1 = line_segments_with_state(0, 13);
|
||||
|
||||
// build room2 stuff up
|
||||
// This starts with the same structure as room 1 but just all group ids
|
||||
// 14 higher
|
||||
let initial_room_2 = line_segments_with_state(14, 28);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial_room_1);
|
||||
add_contents_to_database("room2", &initial_room_2);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/run_only_affects_given_room_id.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = None;
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from before
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group
|
||||
// in both room1 and room2
|
||||
assert!(database_collapsed_states_match_map(&initial_room_1));
|
||||
assert!(database_collapsed_states_match_map(&initial_room_2));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
// in both room1 and room2
|
||||
assert!(database_structure_matches_map(&expected));
|
||||
assert!(database_structure_matches_map(&initial_room_2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn run_respects_groups_to_compress() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file = Some("./tests/tmp/run_respects_groups_to_compress.sql".to_string());
|
||||
let min_state_group = Some(2);
|
||||
let min_saved_rows = None;
|
||||
let groups_to_compress = Some(9);
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Run the compressor with those settings
|
||||
run(config);
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// as it should only compress from groups higher than 2 (non inclusive)
|
||||
// and should only compress a total of 9 groups
|
||||
// i.e. so only group 9 should have changed from before
|
||||
// N.B. this saves 7 rows
|
||||
//
|
||||
// 0 3 6\ 12
|
||||
// 1 4 7 9 13
|
||||
// 2 5 8 10
|
||||
// 11
|
||||
//
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let expected = structure_from_edges_with_state(expected_edges, 0, 13);
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected))
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn run_is_idempotent_when_run_on_whole_room() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
// set up the config options
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
let output_file1 =
|
||||
Some("./tests/tmp/run_is_idempotent_when_run_on_whole_room_1.sql".to_string());
|
||||
let output_file2 =
|
||||
Some("./tests/tmp/run_is_idempotent_when_run_on_whole_room_2.sql".to_string());
|
||||
let min_state_group = None;
|
||||
let min_saved_rows = None;
|
||||
let groups_to_compress = None;
|
||||
let max_state_group = None;
|
||||
let level_sizes = "3,3".to_string();
|
||||
let transactions = true;
|
||||
let graphs = false;
|
||||
let commit_changes = true;
|
||||
let verify = true;
|
||||
|
||||
let config1 = Config::new(
|
||||
db_url.clone(),
|
||||
room_id.clone(),
|
||||
output_file1,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes.clone(),
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let config2 = Config::new(
|
||||
db_url,
|
||||
room_id,
|
||||
output_file2,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
min_saved_rows,
|
||||
max_state_group,
|
||||
level_sizes,
|
||||
transactions,
|
||||
graphs,
|
||||
commit_changes,
|
||||
verify,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// We are aiming for the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from initial map
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// Where each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Run the compressor with those settings for the first time
|
||||
run(config1);
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected));
|
||||
|
||||
// Run the compressor with those settings for the second time
|
||||
run(config2);
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database still matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected));
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
use compressor_integration_tests::{
|
||||
add_contents_to_database, database_collapsed_states_match_map, database_structure_matches_map,
|
||||
empty_database,
|
||||
map_builder::{compressed_3_3_from_0_to_13_with_state, line_segments_with_state},
|
||||
setup_logger, DB_URL,
|
||||
};
|
||||
use serial_test::serial;
|
||||
use synapse_compress_state::{continue_run, Level};
|
||||
|
||||
// Tests the saving and continuing functionality
|
||||
// The compressor should produce the same results when run in one go
|
||||
// as when run in multiple stages
|
||||
#[test]
|
||||
#[serial(db)]
|
||||
fn continue_run_called_twice_same_as_run() {
|
||||
setup_logger();
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
let initial = line_segments_with_state(0, 13);
|
||||
|
||||
// Place this initial state into an empty database
|
||||
empty_database();
|
||||
add_contents_to_database("room1", &initial);
|
||||
|
||||
let db_url = DB_URL.to_string();
|
||||
let room_id = "room1".to_string();
|
||||
|
||||
// will run the compression in two batches
|
||||
let start = None;
|
||||
let chunk_size = 7;
|
||||
|
||||
// compress in 3,3 level sizes
|
||||
// since the compressor hasn't been run before they are empty
|
||||
let level_info = vec![Level::new(3), Level::new(3)];
|
||||
|
||||
// Run the compressor with those settings
|
||||
let chunk_stats_1 = continue_run(start, chunk_size, &db_url, &room_id, &level_info).unwrap();
|
||||
|
||||
// Assert that it stopped at 6 (i.e. after the 7 groups 0...6)
|
||||
assert_eq!(chunk_stats_1.last_compressed_group, 6);
|
||||
// structure should be the following at this point
|
||||
// (NOTE: only including compressed groups)
|
||||
//
|
||||
// 0 3\
|
||||
// 1 4 6
|
||||
// 2 5
|
||||
assert_eq!(
|
||||
chunk_stats_1.new_level_info,
|
||||
vec![Level::restore(3, 1, Some(6)), Level::restore(3, 2, Some(6))]
|
||||
);
|
||||
|
||||
let start = Some(6);
|
||||
let chunk_size = 7;
|
||||
let level_info = chunk_stats_1.new_level_info;
|
||||
|
||||
// Run the compressor with those settings
|
||||
let chunk_stats_2 = continue_run(start, chunk_size, &db_url, &room_id, &level_info).unwrap();
|
||||
|
||||
// Assert that it stopped at 7
|
||||
assert_eq!(chunk_stats_2.last_compressed_group, 13);
|
||||
|
||||
// This should have created the following structure in the database
|
||||
// i.e. groups 6 and 9 should have changed from before
|
||||
// N.B. this saves 11 rows
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected = compressed_3_3_from_0_to_13_with_state();
|
||||
|
||||
// Check that the database still gives correct states for each group!
|
||||
assert!(database_collapsed_states_match_map(&initial));
|
||||
|
||||
// Check that the structure of the database matches the expected structure
|
||||
assert!(database_structure_matches_map(&expected))
|
||||
}
|
||||
1
compressor_integration_tests/tests/tmp/README.md
Normal file
1
compressor_integration_tests/tests/tmp/README.md
Normal file
@@ -0,0 +1 @@
|
||||
This folder is where sql files generated by the integration tests are saved
|
||||
107
docs/algorithm.md
Normal file
107
docs/algorithm.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# Compression algorithm
|
||||
|
||||
## What is state?
|
||||
State is things like who is in a room, what the room topic/name is, who has
|
||||
what privilege levels etc. Synapse keeps track of it for various reasons such as
|
||||
spotting invalid events (e.g. ones sent by banned users) and providing room membership
|
||||
information to clients.
|
||||
|
||||
## What is a state group?
|
||||
|
||||
Synapse needs to keep track of the state at the moment of each event. A state group
|
||||
corresponds to a unique state. The database table `event_to_state_groups` keeps track
|
||||
of the mapping from event ids to state group ids.
|
||||
|
||||
Consider the following simplified example:
|
||||
```
|
||||
State group id | State
|
||||
_____________________________________________
|
||||
1 | Alice in room
|
||||
2 | Alice in room, Bob in room
|
||||
3 | Bob in room
|
||||
|
||||
|
||||
Event id | What the event was
|
||||
______________________________________
|
||||
1 | Alice sends a message
|
||||
3 | Bob joins the room
|
||||
4 | Bob sends a message
|
||||
5 | Alice leaves the room
|
||||
6 | Bob sends a message
|
||||
|
||||
|
||||
Event id | State group id
|
||||
_________________________
|
||||
1 | 1
|
||||
2 | 1
|
||||
3 | 2
|
||||
4 | 2
|
||||
5 | 3
|
||||
6 | 3
|
||||
```
|
||||
|
||||
## What are deltas and predecessors?
|
||||
When a new state event happens (e.g. Bob joins the room) a new state group is created.
|
||||
BUT instead of copying all of the state from the previous state group, we just store
|
||||
the change from the previous group (saving on lots of storage space!). The difference
|
||||
from the previous state group is called the "delta".
|
||||
|
||||
So for the previous example, we would have the following (Note only rows 1 and 2 will
|
||||
make sense at this point):
|
||||
|
||||
```
|
||||
State group id | Previous state group id | Delta
|
||||
____________________________________________________________
|
||||
1 | NONE | Alice in room
|
||||
2 | 1 | Bob in room
|
||||
3 | NONE | Bob in room
|
||||
```
|
||||
|
||||
So why is state group 3's previous state group NONE and not 2? Well, the way that deltas
|
||||
work in Synapse is that they can only add in new state or overwrite old state, but they
|
||||
cannot remove it. (So if the room topic is changed then that is just overwriting state,
|
||||
but removing Alice from the room is neither an addition nor an overwriting). If it is
|
||||
impossible to find a delta, then you just start from scratch again with a "snapshot" of
|
||||
the entire state.
|
||||
|
||||
(NOTE this is not documentation on how synapse handles leaving rooms but is purely for illustrative
|
||||
purposes)
|
||||
|
||||
The state of a state group is worked out by following the previous state group's and adding
|
||||
together all of the deltas (with the most recent taking precedence).
|
||||
|
||||
The mapping from state group to previous state group takes place in `state_group_edges`
|
||||
and the deltas are stored in `state_groups_state`.
|
||||
|
||||
## What are we compressing then?
|
||||
In order to speed up the conversion from state group id to state, there is a limit of 100
|
||||
hops set by synapse (that is: we will only ever have to look up the deltas for a maximum of
|
||||
100 state groups). It does this by taking another "snapshot" every 100 state groups.
|
||||
|
||||
However, it is these snapshots that take up the bulk of the storage in a synapse database,
|
||||
so we want to find a way to reduce the number of them without dramatically increasing the
|
||||
maximum number of hops needed to do lookups.
|
||||
|
||||
|
||||
## Compression Algorithm
|
||||
|
||||
The algorithm works by attempting to create a *tree* of deltas, produced by
|
||||
appending state groups to different "levels". Each level has a maximum size, where
|
||||
each state group is appended to the lowest level that is not full. This tool calls a
|
||||
state group "compressed" once it has been added to
|
||||
one of these levels.
|
||||
|
||||
This produces a graph that looks approximately like the following, in the case
|
||||
of having two levels with the bottom level (L1) having a maximum size of 3:
|
||||
|
||||
```
|
||||
L2 <-------------------- L2 <---------- ...
|
||||
^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
||||
|
||||
NOTE: A <--- B means that state group B's predecessor is A
|
||||
```
|
||||
The structure that synapse creates by default would be equivalent to having one level with
|
||||
a maximum length of 100.
|
||||
|
||||
**Note**: Increasing the sum of the sizes of levels will increase the time it
|
||||
takes to query the full state of a given state group.
|
||||
54
docs/python.md
Normal file
54
docs/python.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# Running the compressor tools from python
|
||||
|
||||
Both the automatic and manual tools use PyO3 to allow the compressor
|
||||
to be run from Python.
|
||||
|
||||
To see any output from the tools, logging must be setup in Python before
|
||||
the compressor is run.
|
||||
|
||||
## Setting things up
|
||||
|
||||
1. Create a virtual environment in the place you want to use the compressor from
|
||||
(if it doesn't already exist)
|
||||
`$ virtualenv -p python3 venv`
|
||||
|
||||
2. Activate the virtual environment and install `maturin` (if you haven't already)
|
||||
`$ source venv/bin/activate`
|
||||
`$ pip install maturin`
|
||||
|
||||
3. Navigate to the correct location
|
||||
For the automatic tool:
|
||||
`$ cd /home/synapse/rust-synapse-compress-state/synpase_auto_compressor`
|
||||
For the manual tool:
|
||||
`$ cd /home/synapse/rust-synapse-compress-state`
|
||||
|
||||
3. Build and install the library
|
||||
`$ maturin develop`
|
||||
|
||||
This will install the relevant compressor tool into the activated virtual environment.
|
||||
|
||||
## Automatic tool example:
|
||||
|
||||
```python
|
||||
import synapse_auto_compressor
|
||||
|
||||
synapse_auto_compressor.compress_state_events_table(
|
||||
db_url="postgresql://localhost/synapse",
|
||||
chunk_size=500,
|
||||
default_levels="100,50,25",
|
||||
number_of_chunks=100
|
||||
)
|
||||
```
|
||||
|
||||
# Manual tool example:
|
||||
|
||||
```python
|
||||
import synapse_compress_state
|
||||
|
||||
synapse_compress_state.run_compression(
|
||||
db_url="postgresql://localhost/synapse",
|
||||
room_id="!some_room:example.com",
|
||||
output_file="out.sql",
|
||||
transactions=True
|
||||
)
|
||||
```
|
||||
8
pyproject.toml
Normal file
8
pyproject.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.0,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[tool.maturin]
|
||||
profile = "release"
|
||||
features = ["pyo3"]
|
||||
no-default-features = true
|
||||
@@ -23,27 +23,28 @@
|
||||
//!
|
||||
//! This produces graphs that look roughly like, for two levels:
|
||||
//!
|
||||
//! ```
|
||||
//! ```ignore
|
||||
//! L2 <-------------------- L2 <---------- ...
|
||||
//! ^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
||||
//! ```
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use state_map::StateMap;
|
||||
use std::collections::BTreeMap;
|
||||
use std::{collections::BTreeMap, time::Duration};
|
||||
use string_cache::DefaultAtom as Atom;
|
||||
|
||||
use super::{collapse_state_maps, StateGroupEntry};
|
||||
|
||||
/// Holds information about a particular level.
|
||||
struct Level {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Level {
|
||||
/// The maximum size this level is allowed to be
|
||||
max_length: usize,
|
||||
/// The (approximate) current chain length of this level. This is equivalent
|
||||
/// to recursively following `current`
|
||||
current_chain_length: usize,
|
||||
/// The head of this level
|
||||
current: Option<i64>,
|
||||
head: Option<i64>,
|
||||
}
|
||||
|
||||
impl Level {
|
||||
@@ -52,7 +53,16 @@ impl Level {
|
||||
Level {
|
||||
max_length,
|
||||
current_chain_length: 0,
|
||||
current: None,
|
||||
head: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new level from stored state
|
||||
pub fn restore(max_length: usize, current_chain_length: usize, head: Option<i64>) -> Level {
|
||||
Level {
|
||||
max_length,
|
||||
current_chain_length,
|
||||
head,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,14 +70,14 @@ impl Level {
|
||||
/// that given state group will (probably) reference the previous head.
|
||||
///
|
||||
/// Panics if `delta` is true and the level is already full.
|
||||
pub fn update(&mut self, current: i64, delta: bool) {
|
||||
self.current = Some(current);
|
||||
fn update(&mut self, new_head: i64, delta: bool) {
|
||||
self.head = Some(new_head);
|
||||
|
||||
if delta {
|
||||
// If we're referencing the previous head then increment our chain
|
||||
// length estimate
|
||||
if !self.has_space() {
|
||||
panic!("Tried to add to a already full level");
|
||||
panic!("Tried to add to an already full level");
|
||||
}
|
||||
|
||||
self.current_chain_length += 1;
|
||||
@@ -77,9 +87,19 @@ impl Level {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the max length of the level
|
||||
pub fn get_max_length(&self) -> usize {
|
||||
self.max_length
|
||||
}
|
||||
|
||||
/// Get the current length of the level
|
||||
pub fn get_current_length(&self) -> usize {
|
||||
self.current_chain_length
|
||||
}
|
||||
|
||||
/// Get the current head of the level
|
||||
pub fn get_current(&self) -> Option<i64> {
|
||||
self.current
|
||||
pub fn get_head(&self) -> Option<i64> {
|
||||
self.head
|
||||
}
|
||||
|
||||
/// Whether there is space in the current chain at this level. If not then a
|
||||
@@ -127,24 +147,75 @@ impl<'a> Compressor<'a> {
|
||||
compressor
|
||||
}
|
||||
|
||||
/// Creates a compressor and runs the compression algorithm.
|
||||
/// used when restoring compressor state from a previous run
|
||||
/// in which case the levels heads are also known
|
||||
pub fn compress_from_save(
|
||||
original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
|
||||
level_info: &[Level],
|
||||
) -> Compressor<'a> {
|
||||
let levels = level_info
|
||||
.iter()
|
||||
.map(|l| Level::restore(l.max_length, l.current_chain_length, l.head))
|
||||
.collect();
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels,
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
compressor.create_new_tree();
|
||||
compressor
|
||||
}
|
||||
|
||||
/// Returns all the state required to save the compressor so it can be continued later
|
||||
pub fn get_level_info(&self) -> Vec<Level> {
|
||||
self.levels.clone()
|
||||
}
|
||||
|
||||
/// Actually runs the compression algorithm
|
||||
fn create_new_tree(&mut self) {
|
||||
if !self.new_state_group_map.is_empty() {
|
||||
panic!("Can only call `create_new_tree` once");
|
||||
}
|
||||
|
||||
let pb = ProgressBar::new(self.original_state_map.len() as u64);
|
||||
let pb = if cfg!(feature = "no-progress-bars") {
|
||||
ProgressBar::hidden()
|
||||
} else {
|
||||
ProgressBar::new(self.original_state_map.len() as u64)
|
||||
};
|
||||
pb.set_style(
|
||||
ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
|
||||
ProgressStyle::default_bar()
|
||||
.template("[{elapsed_precise}] {bar} {pos}/{len} {msg}")
|
||||
.unwrap(),
|
||||
);
|
||||
pb.set_message("state groups");
|
||||
pb.enable_steady_tick(100);
|
||||
pb.enable_steady_tick(Duration::from_millis(100));
|
||||
|
||||
for (&state_group, entry) in self.original_state_map {
|
||||
// Check whether this entry is in_range or is just present in the map due to being
|
||||
// a predecessor of a group that IS in_range for compression
|
||||
if !entry.in_range {
|
||||
let new_entry = StateGroupEntry {
|
||||
// in_range is kept the same so that the new entry is equal to the old entry
|
||||
// otherwise it might trigger a useless database transaction
|
||||
in_range: entry.in_range,
|
||||
prev_state_group: entry.prev_state_group,
|
||||
state_map: entry.state_map.clone(),
|
||||
};
|
||||
// Paranoidly assert that not making changes to this entry
|
||||
// could probably be removed...
|
||||
assert!(new_entry == *entry);
|
||||
self.new_state_group_map.insert(state_group, new_entry);
|
||||
|
||||
continue;
|
||||
}
|
||||
let mut prev_state_group = None;
|
||||
for level in &mut self.levels {
|
||||
if level.has_space() {
|
||||
prev_state_group = level.get_current();
|
||||
prev_state_group = level.get_head();
|
||||
level.update(state_group, true);
|
||||
break;
|
||||
} else {
|
||||
@@ -162,6 +233,7 @@ impl<'a> Compressor<'a> {
|
||||
self.new_state_group_map.insert(
|
||||
state_group,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group,
|
||||
state_map: delta,
|
||||
},
|
||||
@@ -182,7 +254,7 @@ impl<'a> Compressor<'a> {
|
||||
///
|
||||
/// Returns the state map and the actual base state group (if any) used.
|
||||
fn get_delta(&mut self, prev_sg: Option<i64>, sg: i64) -> (StateMap<Atom>, Option<i64>) {
|
||||
let state_map = collapse_state_maps(&self.original_state_map, sg);
|
||||
let state_map = collapse_state_maps(self.original_state_map, sg);
|
||||
|
||||
let mut prev_sg = if let Some(prev_sg) = prev_sg {
|
||||
prev_sg
|
||||
@@ -194,7 +266,7 @@ impl<'a> Compressor<'a> {
|
||||
// a valid base for the state group.
|
||||
let mut prev_state_map;
|
||||
'outer: loop {
|
||||
prev_state_map = collapse_state_maps(&self.original_state_map, prev_sg);
|
||||
prev_state_map = collapse_state_maps(self.original_state_map, prev_sg);
|
||||
for (t, s) in prev_state_map.keys() {
|
||||
if !state_map.contains_key(t, s) {
|
||||
// This is not a valid base as it contains key the new state
|
||||
@@ -230,49 +302,11 @@ impl<'a> Compressor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_map() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
#[cfg(test)]
|
||||
mod level_tests;
|
||||
|
||||
let mut prev = None;
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
#[cfg(test)]
|
||||
mod compressor_tests;
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let compressor = Compressor::compress(&initial, &[3, 3]);
|
||||
|
||||
let new_state = compressor.new_state_group_map;
|
||||
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod stats_tests;
|
||||
|
||||
692
src/compressor/compressor_tests.rs
Normal file
692
src/compressor/compressor_tests.rs
Normal file
@@ -0,0 +1,692 @@
|
||||
use crate::{
|
||||
compressor::{Compressor, Level, Stats},
|
||||
StateGroupEntry,
|
||||
};
|
||||
use state_map::StateMap;
|
||||
use std::collections::BTreeMap;
|
||||
use string_cache::DefaultAtom as Atom;
|
||||
|
||||
#[test]
|
||||
fn compress_creates_correct_compressor() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let compressor = Compressor::compress(&initial, &[3, 3]);
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_new_tree_does_nothing_if_already_compressed() {
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let initial_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
for i in 0i64..=13i64 {
|
||||
// edge from map
|
||||
let pred_group = initial_edges.get(&i);
|
||||
|
||||
// Need Option<i64> not Option<&i64>
|
||||
let prev = pred_group.copied();
|
||||
|
||||
// insert that edge into the initial map
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
compressor.create_new_tree();
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
assert_eq!(initial, *new_state);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_new_tree_respects_levels() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
compressor.create_new_tree();
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Can only call `create_new_tree` once")]
|
||||
fn create_new_tree_panics_if_run_twice() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
compressor.create_new_tree();
|
||||
compressor.create_new_tree();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_new_tree_respects_all_not_in_range() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: false,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
compressor.create_new_tree();
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13 (i.e. no change!)
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(3, 2),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 5),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 8),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(12, 11),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_new_tree_respects_some_not_in_range() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18
|
||||
for i in 0i64..=18i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: i > 4,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
compressor.create_new_tree();
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 5 8\ 17
|
||||
// 1 6 9 11\ 18
|
||||
// 2 7 10 12 14
|
||||
// 3 13 15
|
||||
// 4 16
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(3, 2),
|
||||
(4, 3), // No compression of nodes 0,1,2,3,4
|
||||
(6, 5), // Compresses in 3,3 leveling starting at 5
|
||||
(7, 6),
|
||||
(9, 8),
|
||||
(10, 9),
|
||||
(11, 8),
|
||||
(12, 11),
|
||||
(13, 12),
|
||||
(14, 11),
|
||||
(15, 14),
|
||||
(16, 15),
|
||||
(18, 17),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
for n in new_state {
|
||||
println!("{:?}", n);
|
||||
}
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_new_tree_deals_with_impossible_preds() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// (note missing 3-4 link)
|
||||
// 0-1-2-3
|
||||
// 4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') where j is ancestor of i
|
||||
for i in 0i64..=13i64 {
|
||||
if i == 4 {
|
||||
prev = None
|
||||
}
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
compressor.create_new_tree();
|
||||
|
||||
let new_state = &compressor.new_state_group_map;
|
||||
|
||||
for n in new_state {
|
||||
println!("{:?}", n);
|
||||
}
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// Brackets mean that has NO predecessor but is in that position in the
|
||||
// levels tree
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 (4)(6)\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let expected_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(5, 4),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
for sg in 0i64..=13i64 {
|
||||
assert_eq!(
|
||||
expected_edges.get(&sg).cloned(),
|
||||
new_state[&sg].prev_state_group,
|
||||
"state group {} did not match expected",
|
||||
sg,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_delta_returns_snapshot_if_no_prev_given() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in 0i64..=13i64 {
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
// This should produce the following structure (tested above)
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// State contents should be the same as before
|
||||
let mut compressor = Compressor::compress(&initial, &[3, 3]);
|
||||
|
||||
let (found_delta, found_pred) = compressor.get_delta(None, 6);
|
||||
|
||||
let mut expected_delta: StateMap<Atom> = StateMap::new();
|
||||
expected_delta.insert("node", "is", "6".into());
|
||||
expected_delta.insert("group", "0", "seen".into());
|
||||
expected_delta.insert("group", "1", "seen".into());
|
||||
expected_delta.insert("group", "2", "seen".into());
|
||||
expected_delta.insert("group", "3", "seen".into());
|
||||
expected_delta.insert("group", "4", "seen".into());
|
||||
expected_delta.insert("group", "5", "seen".into());
|
||||
expected_delta.insert("group", "6", "seen".into());
|
||||
|
||||
assert_eq!(found_delta, expected_delta);
|
||||
assert_eq!(found_pred, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_delta_returns_delta_if_original_predecessor() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in 0i64..=13i64 {
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
// This should produce the following structure (tested above)
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// State contents should be the same as before
|
||||
let mut compressor = Compressor::compress(&initial, &[3, 3]);
|
||||
|
||||
let (found_delta, found_pred) = compressor.get_delta(Some(5), 6);
|
||||
|
||||
let mut expected_delta: StateMap<Atom> = StateMap::new();
|
||||
expected_delta.insert("node", "is", "6".into());
|
||||
expected_delta.insert("group", "6", "seen".into());
|
||||
|
||||
assert_eq!(found_delta, expected_delta);
|
||||
assert_eq!(found_pred, Some(5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_delta_returns_delta_if_original_multi_hop_predecessor() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') - for all j less than i
|
||||
for i in 0i64..=13i64 {
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
// This should produce the following structure (tested above)
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// State contents should be the same as before
|
||||
let mut compressor = Compressor::compress(&initial, &[3, 3]);
|
||||
|
||||
let (found_delta, found_pred) = compressor.get_delta(Some(3), 6);
|
||||
|
||||
let mut expected_delta: StateMap<Atom> = StateMap::new();
|
||||
expected_delta.insert("node", "is", "6".into());
|
||||
expected_delta.insert("group", "4", "seen".into());
|
||||
expected_delta.insert("group", "5", "seen".into());
|
||||
expected_delta.insert("group", "6", "seen".into());
|
||||
|
||||
assert_eq!(found_delta, expected_delta);
|
||||
assert_eq!(found_pred, Some(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_delta_returns_snapshot_if_no_prev_possible() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// (note missing 3-4 link)
|
||||
// 0-1-2-3
|
||||
// 4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') where j is ancestor of i
|
||||
for i in 0i64..=13i64 {
|
||||
// don't add 3-4 link
|
||||
if i == 4 {
|
||||
prev = None
|
||||
}
|
||||
|
||||
// populate the delta for this state
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
// put the entry into the initial map
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
// This should create the following structure if create_new_tree() was run
|
||||
// (tested in create_new_tree_deals_with_impossible_preds())
|
||||
//
|
||||
// Brackets mean that has NO predecessor but is in that position in the
|
||||
// levels tree
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 (4)(6)\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
//
|
||||
// State contents should be the same as before
|
||||
|
||||
// build up new_tree after 0,1,2,3 added
|
||||
let mut new_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
// 0-1-2 is left the same
|
||||
new_map.insert(0, initial.get(&0).unwrap().clone());
|
||||
new_map.insert(1, initial.get(&1).unwrap().clone());
|
||||
new_map.insert(2, initial.get(&2).unwrap().clone());
|
||||
|
||||
// 3 is now a snapshot
|
||||
let mut entry_3: StateMap<Atom> = StateMap::new();
|
||||
entry_3.insert("node", "is", "3".into());
|
||||
entry_3.insert("group", "0", "seen".into());
|
||||
entry_3.insert("group", "1", "seen".into());
|
||||
entry_3.insert("group", "2", "seen".into());
|
||||
entry_3.insert("group", "3", "seen".into());
|
||||
new_map.insert(
|
||||
3,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: None,
|
||||
state_map: entry_3,
|
||||
},
|
||||
);
|
||||
|
||||
// build the compressor with this partialy built new map
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: new_map,
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
// make the levels how they would be after 0,1,2,3 added
|
||||
// they should both be of length 1 and have 3 as the current head
|
||||
let mut levels_iter = compressor.levels.iter_mut();
|
||||
|
||||
let l1 = levels_iter.next().unwrap();
|
||||
l1.head = Some(3);
|
||||
l1.current_chain_length = 1;
|
||||
|
||||
let l2 = levels_iter.next().unwrap();
|
||||
l2.head = Some(3);
|
||||
l2.current_chain_length = 1;
|
||||
|
||||
// Now try and find delta for 4 with 3 as pred
|
||||
let (found_delta, found_pred) = compressor.get_delta(Some(3), 4);
|
||||
|
||||
let mut expected_delta: StateMap<Atom> = StateMap::new();
|
||||
expected_delta.insert("node", "is", "4".into());
|
||||
expected_delta.insert("group", "4", "seen".into());
|
||||
|
||||
assert_eq!(found_delta, expected_delta);
|
||||
assert_eq!(found_pred, None);
|
||||
}
|
||||
80
src/compressor/level_tests.rs
Normal file
80
src/compressor/level_tests.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use crate::compressor::Level;
|
||||
|
||||
#[test]
|
||||
fn new_produces_empty_level() {
|
||||
let l = Level::new(15);
|
||||
assert_eq!(l.max_length, 15);
|
||||
assert_eq!(l.current_chain_length, 0);
|
||||
assert_eq!(l.head, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_adds_to_non_full_level() {
|
||||
let mut l = Level::new(10);
|
||||
l.update(7, true);
|
||||
assert_eq!(l.max_length, 10);
|
||||
assert_eq!(l.current_chain_length, 1);
|
||||
assert_eq!(l.head, Some(7));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(expected = "Tried to add to an already full level")]
|
||||
fn update_panics_if_adding_and_too_full() {
|
||||
let mut l = Level::new(5);
|
||||
l.update(1, true);
|
||||
l.update(2, true);
|
||||
l.update(3, true);
|
||||
l.update(4, true);
|
||||
l.update(5, true);
|
||||
l.update(6, true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn update_resets_level_correctly() {
|
||||
let mut l = Level::new(5);
|
||||
l.update(1, true);
|
||||
l.update(2, true);
|
||||
l.update(3, true);
|
||||
l.update(4, true);
|
||||
l.update(5, true);
|
||||
l.update(6, false);
|
||||
assert_eq!(l.max_length, 5);
|
||||
assert_eq!(l.current_chain_length, 1);
|
||||
assert_eq!(l.head, Some(6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_head_returns_head() {
|
||||
let mut l = Level::new(5);
|
||||
assert_eq!(l.get_head(), None);
|
||||
l.update(23, true);
|
||||
assert_eq!(l.get_head(), Some(23));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_space_returns_true_if_empty() {
|
||||
let l = Level::new(15);
|
||||
assert!(l.has_space());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_space_returns_true_if_part_full() {
|
||||
let mut l = Level::new(15);
|
||||
l.update(12, true);
|
||||
l.update(234, true);
|
||||
l.update(1, true);
|
||||
l.update(143, true);
|
||||
l.update(15, true);
|
||||
assert!(l.has_space());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_space_returns_false_if_full() {
|
||||
let mut l = Level::new(5);
|
||||
l.update(1, true);
|
||||
l.update(2, true);
|
||||
l.update(3, true);
|
||||
l.update(4, true);
|
||||
l.update(5, true);
|
||||
assert!(!l.has_space());
|
||||
}
|
||||
181
src/compressor/stats_tests.rs
Normal file
181
src/compressor/stats_tests.rs
Normal file
@@ -0,0 +1,181 @@
|
||||
use crate::{
|
||||
compressor::{Compressor, Level, Stats},
|
||||
StateGroupEntry,
|
||||
};
|
||||
use state_map::StateMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[test]
|
||||
fn stats_correct_when_no_resets() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
|
||||
for i in 0i64..=13i64 {
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
compressor.create_new_tree();
|
||||
|
||||
// No resets should have taken place
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev, 0);
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev_size, 0);
|
||||
|
||||
// Groups 3,6,9,12 should be the only ones changed
|
||||
assert_eq!(compressor.stats.state_groups_changed, 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stats_correct_when_some_resets() {
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
let mut prev = None;
|
||||
|
||||
// This starts with the following structure
|
||||
//
|
||||
// (note missing 3-4 link)
|
||||
// 0-1-2-3
|
||||
// 4-5-6-7-8-9-10-11-12-13
|
||||
//
|
||||
// Each group i has state:
|
||||
// ('node','is', i)
|
||||
// ('group', j, 'seen') where j is ancestor of i
|
||||
for i in 0i64..=13i64 {
|
||||
if i == 4 {
|
||||
prev = None
|
||||
}
|
||||
let mut entry = StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
};
|
||||
entry
|
||||
.state_map
|
||||
.insert("group", &i.to_string(), "seen".into());
|
||||
entry.state_map.insert("node", "is", i.to_string().into());
|
||||
|
||||
initial.insert(i, entry);
|
||||
|
||||
prev = Some(i)
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
// This should create the following structure
|
||||
//
|
||||
// Brackets mean that has NO predecessor but is in that position in the
|
||||
// levels tree
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 (4)(6)\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
compressor.create_new_tree();
|
||||
|
||||
// the reset required for 4 contributes 2 to the size stat
|
||||
// - (1 'node' and 1 'group') entry
|
||||
// the reset required for 6 contributes 4 to the size stat
|
||||
// - (1 'node' and 3 'group') entry
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev, 2);
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev_size, 6);
|
||||
|
||||
// groups 3,4,6,9,12 are the only ones changed
|
||||
assert_eq!(compressor.stats.state_groups_changed, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stats_correct_if_no_changes() {
|
||||
// This should create the following structure
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
let initial_edges: BTreeMap<i64, i64> = vec![
|
||||
(1, 0),
|
||||
(2, 1),
|
||||
(4, 3),
|
||||
(5, 4),
|
||||
(6, 3),
|
||||
(7, 6),
|
||||
(8, 7),
|
||||
(9, 6),
|
||||
(10, 9),
|
||||
(11, 10),
|
||||
(13, 12),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
for i in 0i64..=13i64 {
|
||||
// edge from map
|
||||
let pred_group = initial_edges.get(&i);
|
||||
|
||||
// Need Option<i64> not Option<&i64>
|
||||
let prev = pred_group.copied();
|
||||
|
||||
// insert that edge into the initial map
|
||||
initial.insert(
|
||||
i,
|
||||
StateGroupEntry {
|
||||
in_range: true,
|
||||
prev_state_group: prev,
|
||||
state_map: StateMap::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let mut compressor = Compressor {
|
||||
original_state_map: &initial,
|
||||
new_state_group_map: BTreeMap::new(),
|
||||
levels: vec![Level::new(3), Level::new(3)],
|
||||
stats: Stats::default(),
|
||||
};
|
||||
|
||||
// This should create the following structure (i.e. no change)
|
||||
//
|
||||
// 0 3\ 12
|
||||
// 1 4 6\ 13
|
||||
// 2 5 7 9
|
||||
// 8 10
|
||||
// 11
|
||||
compressor.create_new_tree();
|
||||
|
||||
// No changes should have been made (the old tree should be the same)
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev, 0);
|
||||
assert_eq!(compressor.stats.resets_no_suitable_prev_size, 0);
|
||||
assert_eq!(compressor.stats.state_groups_changed, 0);
|
||||
}
|
||||
495
src/database.rs
495
src/database.rs
@@ -13,101 +13,381 @@
|
||||
// limitations under the License.
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use postgres::{fallible_iterator::FallibleIterator, Client};
|
||||
use log::{debug, trace};
|
||||
use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
|
||||
use postgres::{fallible_iterator::FallibleIterator, types::ToSql, Client};
|
||||
use postgres_openssl::MakeTlsConnector;
|
||||
use rand::{distributions::Alphanumeric, thread_rng, Rng};
|
||||
use std::{borrow::Cow, collections::BTreeMap, fmt, iter};
|
||||
use std::{borrow::Cow, collections::BTreeMap, fmt, time::Duration};
|
||||
|
||||
use crate::{compressor::Level, generate_sql};
|
||||
|
||||
use super::StateGroupEntry;
|
||||
|
||||
/// Fetch the entries in state_groups_state (and their prev groups) for the
|
||||
/// given `room_id` by connecting to the postgres database at `db_url`.
|
||||
/// Fetch the entries in state_groups_state (and their prev groups) for a
|
||||
/// specific room.
|
||||
///
|
||||
/// Returns with the state_group map and the id of the last group that was used
|
||||
/// Or None if there are no state groups within the range given
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `db_url` - The URL of a Postgres database. This should be of the
|
||||
/// form: "postgresql://user:pass@domain:port/database"
|
||||
/// * `min_state_group` - If specified, then only fetch the entries for state
|
||||
/// groups greater than (but not equal) to this number. It
|
||||
/// also requires groups_to_compress to be specified
|
||||
/// * `max_state_group` - If specified, then only fetch the entries for state
|
||||
/// groups lower than or equal to this number.
|
||||
/// * 'groups_to_compress' - The number of groups to get from the database before stopping
|
||||
pub fn get_data_from_db(
|
||||
db_url: &str,
|
||||
room_id: &str,
|
||||
min_state_group: Option<i64>,
|
||||
groups_to_compress: Option<i64>,
|
||||
max_state_group: Option<i64>,
|
||||
) -> BTreeMap<i64, StateGroupEntry> {
|
||||
let mut client = Client::connect(db_url, postgres::NoTls).unwrap();
|
||||
) -> Option<(BTreeMap<i64, StateGroupEntry>, i64)> {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut state_group_map = get_initial_data_from_db(&mut client, room_id, max_state_group);
|
||||
let mut client = Client::connect(db_url, connector)
|
||||
.unwrap_or_else(|e| panic!("Error connecting to the database: {}", e));
|
||||
|
||||
println!("Got initial state from database. Checking for any missing state groups...");
|
||||
// Search for the group id of the groups_to_compress'th group after min_state_group
|
||||
// If this is saved, then the compressor can continue by having min_state_group being
|
||||
// set to this maximum. If no such group can be found then return None.
|
||||
let max_group_found = find_max_group(
|
||||
&mut client,
|
||||
room_id,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
max_state_group,
|
||||
)?;
|
||||
|
||||
let state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
Some(load_map_from_db(
|
||||
&mut client,
|
||||
room_id,
|
||||
min_state_group,
|
||||
max_group_found,
|
||||
state_group_map,
|
||||
))
|
||||
}
|
||||
|
||||
/// Fetch the entries in state_groups_state (and their prev groups) for a
|
||||
/// specific room. This method should only be called if resuming the compressor from
|
||||
/// where it last finished - and as such also loads in the state groups from the heads
|
||||
/// of each of the levels (as they were at the end of the last run of the compressor)
|
||||
///
|
||||
/// Returns with the state_group map and the id of the last group that was used
|
||||
/// Or None if there are no state groups within the range given
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `db_url` - The URL of a Postgres database. This should be of the
|
||||
/// form: "postgresql://user:pass@domain:port/database"
|
||||
/// * `min_state_group` - If specified, then only fetch the entries for state
|
||||
/// groups greater than (but not equal) to this number. It
|
||||
/// also requires groups_to_compress to be specified
|
||||
/// * 'groups_to_compress' - The number of groups to get from the database before stopping
|
||||
/// * 'level_info' - The maximum size, current length and current head for each
|
||||
/// level (as it was when the compressor last finished for this
|
||||
/// room)
|
||||
pub fn reload_data_from_db(
|
||||
db_url: &str,
|
||||
room_id: &str,
|
||||
min_state_group: Option<i64>,
|
||||
groups_to_compress: Option<i64>,
|
||||
level_info: &[Level],
|
||||
) -> Option<(BTreeMap<i64, StateGroupEntry>, i64)> {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(db_url, connector)
|
||||
.unwrap_or_else(|e| panic!("Error connecting to the database: {}", e));
|
||||
|
||||
// Search for the group id of the groups_to_compress'th group after min_state_group
|
||||
// If this is saved, then the compressor can continue by having min_state_group being
|
||||
// set to this maximum.If no such group can be found then return None.
|
||||
let max_group_found = find_max_group(
|
||||
&mut client,
|
||||
room_id,
|
||||
min_state_group,
|
||||
groups_to_compress,
|
||||
// max state group not used when saving and loading
|
||||
None,
|
||||
)?;
|
||||
|
||||
// load just the state_groups at the head of each level
|
||||
// this doesn't load their predecessors as that will be done at the end of
|
||||
// load_map_from_db()
|
||||
let state_group_map: BTreeMap<i64, StateGroupEntry> = load_level_heads(&mut client, level_info);
|
||||
|
||||
Some(load_map_from_db(
|
||||
&mut client,
|
||||
room_id,
|
||||
min_state_group,
|
||||
max_group_found,
|
||||
state_group_map,
|
||||
))
|
||||
}
|
||||
|
||||
/// Finds the state_groups that are at the head of each compressor level
|
||||
/// NOTE this does not also retrieve their predecessors
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client' - A Postgres client to make requests with
|
||||
/// * `levels' - The levels who's heads are being requested
|
||||
fn load_level_heads(client: &mut Client, level_info: &[Level]) -> BTreeMap<i64, StateGroupEntry> {
|
||||
// obtain all of the heads that aren't None from level_info
|
||||
let level_heads: Vec<i64> = level_info.iter().filter_map(|l| (*l).get_head()).collect();
|
||||
|
||||
// Query to get id, predecessor and deltas for each state group
|
||||
let sql = r#"
|
||||
SELECT m.id, prev_state_group, type, state_key, s.event_id
|
||||
FROM state_groups AS m
|
||||
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
|
||||
LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
|
||||
WHERE m.id = ANY($1)
|
||||
ORDER BY m.id
|
||||
"#;
|
||||
|
||||
// Actually do the query
|
||||
let mut rows = client.query_raw(sql, &[&level_heads]).unwrap();
|
||||
|
||||
// Copy the data from the database into a map
|
||||
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
// The row in the map to copy the data to
|
||||
// NOTE: default StateGroupEntry has in_range as false
|
||||
// This is what we want since as a level head, it has already been compressed by the
|
||||
// previous run!
|
||||
let entry = state_group_map.entry(row.get(0)).or_default();
|
||||
|
||||
// Save the predecessor (this may already be there)
|
||||
entry.prev_state_group = row.get(1);
|
||||
|
||||
// Copy the single delta from the predecessor stored in this row
|
||||
if let Some(etype) = row.get::<_, Option<String>>(2) {
|
||||
entry.state_map.insert(
|
||||
&etype,
|
||||
&row.get::<_, String>(3),
|
||||
row.get::<_, String>(4).into(),
|
||||
);
|
||||
}
|
||||
}
|
||||
state_group_map
|
||||
}
|
||||
|
||||
/// Fetch the entries in state_groups_state (and their prev groups) for a
|
||||
/// specific room within a certain range. These are appended onto the provided
|
||||
/// map.
|
||||
///
|
||||
/// - Fetches the first [group] rows with group id after [min]
|
||||
/// - Recursively searches for missing predecessors and adds those
|
||||
///
|
||||
/// Returns with the state_group map and the id of the last group that was used
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A Postgres client to make requests with
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `min_state_group` - If specified, then only fetch the entries for state
|
||||
/// groups greater than (but not equal) to this number. It
|
||||
/// also requires groups_to_compress to be specified
|
||||
/// * 'max_group_found' - The last group to get from the database before stopping
|
||||
/// * 'state_group_map' - The map to populate with the entries from the database
|
||||
|
||||
fn load_map_from_db(
|
||||
client: &mut Client,
|
||||
room_id: &str,
|
||||
min_state_group: Option<i64>,
|
||||
max_group_found: i64,
|
||||
mut state_group_map: BTreeMap<i64, StateGroupEntry>,
|
||||
) -> (BTreeMap<i64, StateGroupEntry>, i64) {
|
||||
state_group_map.append(&mut get_initial_data_from_db(
|
||||
client,
|
||||
room_id,
|
||||
min_state_group,
|
||||
max_group_found,
|
||||
));
|
||||
|
||||
debug!("Got initial state from database. Checking for any missing state groups...");
|
||||
|
||||
// Due to reasons some of the state groups appear in the edges table, but
|
||||
// not in the state_groups_state table. This means they don't get included
|
||||
// in our DB queries, so we have to fetch any missing groups explicitly.
|
||||
// not in the state_groups_state table.
|
||||
//
|
||||
// Also it is likely that the predecessor of a node will not be within the
|
||||
// chunk that was specified by min_state_group and groups_to_compress.
|
||||
// This means they don't get included in our DB queries, so we have to fetch
|
||||
// any missing groups explicitly.
|
||||
//
|
||||
// Since the returned groups may themselves reference groups we don't have,
|
||||
// we need to do this recursively until we don't find any more missing.
|
||||
loop {
|
||||
let mut missing_sgs: Vec<_> = state_group_map
|
||||
.iter()
|
||||
.filter_map(|(_sg, entry)| {
|
||||
if let Some(prev_sg) = entry.prev_state_group {
|
||||
if state_group_map.contains_key(&prev_sg) {
|
||||
None
|
||||
} else {
|
||||
Some(prev_sg)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
entry
|
||||
.prev_state_group
|
||||
.filter(|&prev_sg| !state_group_map.contains_key(&prev_sg))
|
||||
})
|
||||
.collect();
|
||||
|
||||
if missing_sgs.is_empty() {
|
||||
println!("No missing state groups");
|
||||
trace!("No missing state groups");
|
||||
break;
|
||||
}
|
||||
|
||||
missing_sgs.sort_unstable();
|
||||
missing_sgs.dedup();
|
||||
|
||||
println!("Missing {} state groups", missing_sgs.len());
|
||||
trace!("Missing {} state groups", missing_sgs.len());
|
||||
|
||||
let map = get_missing_from_db(&mut client, &missing_sgs);
|
||||
state_group_map.extend(map.into_iter());
|
||||
// find state groups not picked up already and add them to the map
|
||||
let map = get_missing_from_db(client, &missing_sgs, min_state_group, max_group_found);
|
||||
for (k, v) in map {
|
||||
state_group_map.entry(k).or_insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
state_group_map
|
||||
(state_group_map, max_group_found)
|
||||
}
|
||||
|
||||
/// Fetch the entries in state_groups_state (and their prev groups) for the
|
||||
/// given `room_id` by fetching all state with the given `room_id`.
|
||||
/// Returns the group ID of the last group to be compressed
|
||||
///
|
||||
/// This can be saved so that future runs of the compressor only
|
||||
/// continue from after this point. If no groups can be found in
|
||||
/// the range specified it returns None.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A Postgres client to make requests with
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `min_state_group` - The lower limit (non inclusive) of group id's to compress
|
||||
/// * 'groups_to_compress' - How many groups to compress
|
||||
/// * `max_state_group` - The upper bound on what this method can return
|
||||
fn find_max_group(
|
||||
client: &mut Client,
|
||||
room_id: &str,
|
||||
min_state_group: Option<i64>,
|
||||
groups_to_compress: Option<i64>,
|
||||
max_state_group: Option<i64>,
|
||||
) -> Option<i64> {
|
||||
// Get list of state_id's in a certain room
|
||||
let mut query_chunk_of_ids = "SELECT id FROM state_groups WHERE room_id = $1".to_string();
|
||||
let params: Vec<&(dyn ToSql + Sync)>;
|
||||
|
||||
if let Some(max) = max_state_group {
|
||||
query_chunk_of_ids = format!("{} AND id <= {}", query_chunk_of_ids, max)
|
||||
}
|
||||
|
||||
// Adds additional constraint if a groups_to_compress or min_state_group have been specified
|
||||
// Note a min state group is only used if groups_to_compress also is
|
||||
if min_state_group.is_some() && groups_to_compress.is_some() {
|
||||
params = vec![&room_id, &min_state_group, &groups_to_compress];
|
||||
query_chunk_of_ids = format!(
|
||||
r"{} AND id > $2 ORDER BY id ASC LIMIT $3",
|
||||
query_chunk_of_ids
|
||||
);
|
||||
} else if groups_to_compress.is_some() {
|
||||
params = vec![&room_id, &groups_to_compress];
|
||||
query_chunk_of_ids = format!(r"{} ORDER BY id ASC LIMIT $2", query_chunk_of_ids);
|
||||
} else {
|
||||
params = vec![&room_id];
|
||||
}
|
||||
|
||||
let sql_query = format!(
|
||||
"SELECT id FROM ({}) AS ids ORDER BY ids.id DESC LIMIT 1",
|
||||
query_chunk_of_ids
|
||||
);
|
||||
|
||||
// This vector should have length 0 or 1
|
||||
let rows = client
|
||||
.query(sql_query.as_str(), ¶ms)
|
||||
.expect("Something went wrong while querying the database");
|
||||
|
||||
// If no row can be found then return None
|
||||
let final_row = rows.last()?;
|
||||
|
||||
// Else return the id of the group found
|
||||
Some(final_row.get::<_, i64>(0))
|
||||
}
|
||||
|
||||
/// Fetch the entries in state_groups_state and immediate predecessors for
|
||||
/// a specific room.
|
||||
///
|
||||
/// - Fetches first [groups_to_compress] rows with group id higher than min
|
||||
/// - Stores the group id, predecessor id and deltas into a map
|
||||
/// - returns map and maximum row that was considered
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A Postgres client to make requests with
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `min_state_group` - If specified, then only fetch the entries for state
|
||||
/// groups greater than (but not equal) to this number. It
|
||||
/// also requires groups_to_compress to be specified
|
||||
/// * 'max_group_found' - The upper limit on state_groups ids to get from the database
|
||||
fn get_initial_data_from_db(
|
||||
client: &mut Client,
|
||||
room_id: &str,
|
||||
max_state_group: Option<i64>,
|
||||
min_state_group: Option<i64>,
|
||||
max_group_found: i64,
|
||||
) -> BTreeMap<i64, StateGroupEntry> {
|
||||
// Query to get id, predecessor and deltas for each state group
|
||||
let sql = r#"
|
||||
SELECT m.id, prev_state_group, type, state_key, s.event_id
|
||||
FROM state_groups AS m
|
||||
LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
|
||||
LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
|
||||
WHERE m.room_id = $1
|
||||
WHERE m.room_id = $1 AND m.id <= $2
|
||||
"#;
|
||||
|
||||
let mut rows = if let Some(s) = max_state_group {
|
||||
client.query_raw(
|
||||
format!(r"{} AND m.id <= $2", sql).as_str(),
|
||||
vec![&room_id as _, &s as _],
|
||||
)
|
||||
// Adds additional constraint if minimum state_group has been specified.
|
||||
let mut rows = if let Some(min) = min_state_group {
|
||||
let params: Vec<&dyn ToSql> = vec![&room_id, &max_group_found, &min];
|
||||
client.query_raw(format!(r"{} AND m.id > $3", sql).as_str(), params)
|
||||
} else {
|
||||
client.query_raw(sql, iter::once(&room_id as _))
|
||||
let params: Vec<&dyn ToSql> = vec![&room_id, &max_group_found];
|
||||
client.query_raw(sql, params)
|
||||
}
|
||||
.unwrap();
|
||||
.expect("Something went wrong while querying the database");
|
||||
|
||||
// Copy the data from the database into a map
|
||||
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
let pb = ProgressBar::new_spinner();
|
||||
let pb = if cfg!(feature = "no-progress-bars") {
|
||||
ProgressBar::hidden()
|
||||
} else {
|
||||
ProgressBar::new_spinner()
|
||||
};
|
||||
pb.set_style(
|
||||
ProgressStyle::default_spinner().template("{spinner} [{elapsed}] {pos} rows retrieved"),
|
||||
ProgressStyle::default_spinner()
|
||||
.template("{spinner} [{elapsed}] {pos} rows retrieved")
|
||||
.unwrap(),
|
||||
);
|
||||
pb.enable_steady_tick(100);
|
||||
pb.enable_steady_tick(Duration::from_millis(100));
|
||||
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
// The row in the map to copy the data to
|
||||
let entry = state_group_map.entry(row.get(0)).or_default();
|
||||
|
||||
// Save the predecessor and mark for compression (this may already be there)
|
||||
// TODO: slightly fewer redundant rewrites
|
||||
entry.prev_state_group = row.get(1);
|
||||
entry.in_range = true;
|
||||
|
||||
// Copy the single delta from the predecessor stored in this row
|
||||
if let Some(etype) = row.get::<_, Option<String>>(2) {
|
||||
entry.state_map.insert(
|
||||
&etype,
|
||||
@@ -125,35 +405,65 @@ fn get_initial_data_from_db(
|
||||
state_group_map
|
||||
}
|
||||
|
||||
/// Get any missing state groups from the database
|
||||
fn get_missing_from_db(client: &mut Client, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
|
||||
let mut rows = client
|
||||
.query_raw(
|
||||
r#"
|
||||
SELECT state_group, prev_state_group
|
||||
FROM state_group_edges
|
||||
WHERE state_group = ANY($1)
|
||||
"#,
|
||||
iter::once(&missing_sgs as _),
|
||||
)
|
||||
.unwrap();
|
||||
/// Finds the predecessors of missing state groups
|
||||
///
|
||||
/// N.B. this does NOT find their deltas
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A Postgres client to make requests with
|
||||
/// * `missing_sgs` - An array of missing state_group ids
|
||||
/// * 'min_state_group' - Minimum state_group id to mark as in range
|
||||
/// * 'max_group_found' - Maximum state_group id to mark as in range
|
||||
fn get_missing_from_db(
|
||||
client: &mut Client,
|
||||
missing_sgs: &[i64],
|
||||
min_state_group: Option<i64>,
|
||||
max_group_found: i64,
|
||||
) -> BTreeMap<i64, StateGroupEntry> {
|
||||
// "Due to reasons" it is possible that some states only appear in edges table and not in state_groups table
|
||||
// so since we know the IDs we're looking for as they are the missing predecessors, we can find them by
|
||||
// left joining onto the edges table (instead of the state_group table!)
|
||||
let sql = r#"
|
||||
SELECT target.prev_state_group, source.prev_state_group, state.type, state.state_key, state.event_id
|
||||
FROM state_group_edges AS target
|
||||
LEFT JOIN state_group_edges AS source ON (target.prev_state_group = source.state_group)
|
||||
LEFT JOIN state_groups_state AS state ON (target.prev_state_group = state.state_group)
|
||||
WHERE target.prev_state_group = ANY($1)
|
||||
"#;
|
||||
|
||||
// initialise the map with empty entries (the missing group may not
|
||||
// have a prev_state_group either)
|
||||
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = missing_sgs
|
||||
.iter()
|
||||
.map(|sg| (*sg, StateGroupEntry::default()))
|
||||
.collect();
|
||||
let mut rows = client.query_raw(sql, &[missing_sgs]).unwrap();
|
||||
|
||||
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
||||
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
let state_group = row.get(0);
|
||||
let entry = state_group_map.get_mut(&state_group).unwrap();
|
||||
let id = row.get(0);
|
||||
// The row in the map to copy the data to
|
||||
let entry = state_group_map.entry(id).or_default();
|
||||
|
||||
// Save the predecessor and mark for compression (this may already be there)
|
||||
// Also may well not exist!
|
||||
entry.prev_state_group = row.get(1);
|
||||
if let Some(min) = min_state_group {
|
||||
if min < id && id <= max_group_found {
|
||||
entry.in_range = true
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the single delta from the predecessor stored in this row
|
||||
if let Some(etype) = row.get::<_, Option<String>>(2) {
|
||||
entry.state_map.insert(
|
||||
&etype,
|
||||
&row.get::<_, String>(3),
|
||||
row.get::<_, String>(4).into(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
state_group_map
|
||||
}
|
||||
|
||||
// TODO: find a library that has an existing safe postgres escape function
|
||||
/// Helper function that escapes the wrapped text when writing SQL
|
||||
pub struct PGEscape<'a>(pub &'a str);
|
||||
|
||||
@@ -161,7 +471,11 @@ impl<'a> fmt::Display for PGEscape<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut delim = Cow::from("$$");
|
||||
while self.0.contains(&delim as &str) {
|
||||
let s: String = thread_rng().sample_iter(&Alphanumeric).take(10).collect();
|
||||
let s: String = thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(10)
|
||||
.map(char::from)
|
||||
.collect();
|
||||
|
||||
delim = format!("${}$", s).into();
|
||||
}
|
||||
@@ -188,3 +502,66 @@ fn test_pg_escape() {
|
||||
assert_eq!(&s[0..1], "$");
|
||||
assert_eq!(&s[start_pos - 1..start_pos], "$");
|
||||
}
|
||||
|
||||
/// Send changes to the database
|
||||
///
|
||||
/// Note that currently ignores config.transactions and wraps every state
|
||||
/// group in it's own transaction (i.e. as if config.transactions was true)
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `db_url` - The URL of a Postgres database. This should be of the
|
||||
/// form: "postgresql://user:pass@domain:port/database"
|
||||
/// * `room_id` - The ID of the room in the database
|
||||
/// * `old_map` - The state group data originally in the database
|
||||
/// * `new_map` - The state group data generated by the compressor to
|
||||
/// replace replace the old contents
|
||||
pub fn send_changes_to_db(
|
||||
db_url: &str,
|
||||
room_id: &str,
|
||||
old_map: &BTreeMap<i64, StateGroupEntry>,
|
||||
new_map: &BTreeMap<i64, StateGroupEntry>,
|
||||
) {
|
||||
// connect to the database
|
||||
let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let mut client = Client::connect(db_url, connector).unwrap();
|
||||
|
||||
debug!("Writing changes...");
|
||||
|
||||
// setup the progress bar
|
||||
let pb = if cfg!(feature = "no-progress-bars") {
|
||||
ProgressBar::hidden()
|
||||
} else {
|
||||
ProgressBar::new(old_map.len() as u64)
|
||||
};
|
||||
pb.set_style(
|
||||
ProgressStyle::default_bar()
|
||||
.template("[{elapsed_precise}] {bar} {pos}/{len} {msg}")
|
||||
.unwrap(),
|
||||
);
|
||||
pb.set_message("state groups");
|
||||
pb.enable_steady_tick(Duration::from_millis(100));
|
||||
|
||||
for sql_transaction in generate_sql(old_map, new_map, room_id) {
|
||||
if sql_transaction.is_empty() {
|
||||
pb.inc(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// commit this change to the database
|
||||
// N.B. this is a synchronous library so will wait until finished before continueing...
|
||||
// if want to speed up compressor then this might be a good place to start!
|
||||
let mut single_group_transaction = client.transaction().unwrap();
|
||||
single_group_transaction
|
||||
.batch_execute(&sql_transaction)
|
||||
.unwrap();
|
||||
single_group_transaction.commit().unwrap();
|
||||
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
pb.finish();
|
||||
}
|
||||
|
||||
71
src/graphing.rs
Normal file
71
src/graphing.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::{fs::File, io::Write};
|
||||
|
||||
use super::StateGroupEntry;
|
||||
|
||||
type Graph = BTreeMap<i64, StateGroupEntry>;
|
||||
|
||||
/// Outputs information from a state group graph into an edges file and a node file
|
||||
///
|
||||
/// These can be loaded into something like Gephi to visualise the graphs
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `groups` - A map from state group ids to StateGroupEntries
|
||||
/// * `edges_output` - The file to output the predecessor link information to
|
||||
/// * `nodes_output` - The file to output the state group information to
|
||||
fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) {
|
||||
// The line A;B in the edges file means:
|
||||
// That state group A has predecessor B
|
||||
writeln!(edges_output, "Source;Target",).unwrap();
|
||||
|
||||
// The line A;B;C;"B" in the nodes file means:
|
||||
// The state group id is A
|
||||
// This state group has B rows in the state_groups_state table
|
||||
// If C is true then A has no predecessor
|
||||
writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap();
|
||||
|
||||
for (source, entry) in groups {
|
||||
// If the group has a predecessor then write an edge in the edges file
|
||||
if let Some(target) = entry.prev_state_group {
|
||||
writeln!(edges_output, "{};{}", source, target,).unwrap();
|
||||
}
|
||||
|
||||
// Write the state group's information to the nodes file
|
||||
writeln!(
|
||||
nodes_output,
|
||||
"{};{};{};\"{}\"",
|
||||
source,
|
||||
entry.state_map.len(),
|
||||
entry.prev_state_group.is_none(),
|
||||
entry.state_map.len(),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Outputs information from two state group graph into files
|
||||
///
|
||||
/// These can be loaded into something like Gephi to visualise the graphs
|
||||
/// before and after the compressor is run
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `before` - A map from state group ids to StateGroupEntries
|
||||
/// the information from this map goes into before_edges.csv
|
||||
/// and before_nodes.csv
|
||||
/// * `after` - A map from state group ids to StateGroupEntries
|
||||
/// the information from this map goes into after_edges.csv
|
||||
/// and after_nodes.csv
|
||||
pub fn make_graphs(before: &Graph, after: &Graph) {
|
||||
// Open all the files to output to
|
||||
let mut before_edges_file = File::create("before_edges.csv").unwrap();
|
||||
let mut before_nodes_file = File::create("before_nodes.csv").unwrap();
|
||||
let mut after_edges_file = File::create("after_edges.csv").unwrap();
|
||||
let mut after_nodes_file = File::create("after_nodes.csv").unwrap();
|
||||
|
||||
// Write before's information to before_edges and before_nodes
|
||||
output_csv(before, &mut before_edges_file, &mut before_nodes_file);
|
||||
// Write afters's information to after_edges and after_nodes
|
||||
output_csv(after, &mut after_edges_file, &mut after_nodes_file);
|
||||
}
|
||||
1333
src/lib.rs
Normal file
1333
src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
344
src/main.rs
344
src/main.rs
@@ -16,335 +16,31 @@
|
||||
//! Synapse instance's database. Specifically, it aims to reduce the number of
|
||||
//! rows that a given room takes up in the `state_groups_state` table.
|
||||
|
||||
mod compressor;
|
||||
mod database;
|
||||
|
||||
#[cfg(feature = "jemalloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
use compressor::Compressor;
|
||||
use database::PGEscape;
|
||||
use log::LevelFilter;
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
|
||||
use clap::{
|
||||
crate_authors, crate_description, crate_name, crate_version, value_t_or_exit, App, Arg,
|
||||
};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use rayon::prelude::*;
|
||||
use state_map::StateMap;
|
||||
use std::{collections::BTreeMap, fs::File, io::Write, str::FromStr};
|
||||
use string_cache::DefaultAtom as Atom;
|
||||
|
||||
/// An entry for a state group. Consists of an (optional) previous group and the
|
||||
/// delta from that previous group (or the full state if no previous group)
|
||||
#[derive(Default, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct StateGroupEntry {
|
||||
prev_state_group: Option<i64>,
|
||||
state_map: StateMap<Atom>,
|
||||
}
|
||||
|
||||
/// Gets the full state for a given group from the map (of deltas)
|
||||
pub fn collapse_state_maps(
|
||||
map: &BTreeMap<i64, StateGroupEntry>,
|
||||
state_group: i64,
|
||||
) -> StateMap<Atom> {
|
||||
let mut entry = &map[&state_group];
|
||||
let mut state_map = StateMap::new();
|
||||
|
||||
let mut stack = vec![state_group];
|
||||
|
||||
while let Some(prev_state_group) = entry.prev_state_group {
|
||||
stack.push(prev_state_group);
|
||||
if !map.contains_key(&prev_state_group) {
|
||||
panic!("Missing {}", prev_state_group);
|
||||
}
|
||||
entry = &map[&prev_state_group];
|
||||
}
|
||||
|
||||
for sg in stack.iter().rev() {
|
||||
state_map.extend(
|
||||
map[&sg]
|
||||
.state_map
|
||||
.iter()
|
||||
.map(|((t, s), e)| ((t, s), e.clone())),
|
||||
);
|
||||
}
|
||||
|
||||
state_map
|
||||
}
|
||||
|
||||
/// Helper struct for parsing the `level_sizes` argument.
|
||||
struct LevelSizes(Vec<usize>);
|
||||
|
||||
impl FromStr for LevelSizes {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let mut sizes = Vec::new();
|
||||
|
||||
for size_str in s.split(',') {
|
||||
let size: usize = size_str
|
||||
.parse()
|
||||
.map_err(|_| "Not a comma separated list of numbers")?;
|
||||
sizes.push(size);
|
||||
}
|
||||
|
||||
Ok(LevelSizes(sizes))
|
||||
}
|
||||
}
|
||||
use synapse_compress_state as comp_state;
|
||||
|
||||
fn main() {
|
||||
#[allow(deprecated)]
|
||||
let matches = App::new(crate_name!())
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("postgres-url")
|
||||
.short("p")
|
||||
.value_name("URL")
|
||||
.help("The url for connecting to the postgres database")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
).arg(
|
||||
Arg::with_name("room_id")
|
||||
.short("r")
|
||||
.value_name("ROOM_ID")
|
||||
.help("The room to process")
|
||||
.takes_value(true)
|
||||
.required(true),
|
||||
).arg(
|
||||
Arg::with_name("max_state_group")
|
||||
.short("s")
|
||||
.value_name("MAX_STATE_GROUP")
|
||||
.help("The maximum state group to process up to")
|
||||
.takes_value(true)
|
||||
.required(false),
|
||||
).arg(
|
||||
Arg::with_name("min_saved_rows")
|
||||
.short("m")
|
||||
.value_name("COUNT")
|
||||
.help("Suppress output if fewer than COUNT rows would be saved")
|
||||
.takes_value(true)
|
||||
.required(false),
|
||||
).arg(
|
||||
Arg::with_name("output_file")
|
||||
.short("o")
|
||||
.value_name("FILE")
|
||||
.help("File to output the changes to in SQL")
|
||||
.takes_value(true),
|
||||
).arg(
|
||||
Arg::with_name("transactions")
|
||||
.short("t")
|
||||
.help("Whether to wrap each state group change in a transaction")
|
||||
.requires("output_file"),
|
||||
).arg(
|
||||
Arg::with_name("level_sizes")
|
||||
.short("l")
|
||||
.value_name("LEVELS")
|
||||
.help("Sizes of each new level in the compression algorithm, as a comma separated list.")
|
||||
.long_help(concat!(
|
||||
"Sizes of each new level in the compression algorithm, as a comma separated list.",
|
||||
" The first entry in the list is for the lowest, most granular level,",
|
||||
" with each subsequent entry being for the next highest level.",
|
||||
" The number of entries in the list determines the number of levels",
|
||||
" that will be used.",
|
||||
"\nThe sum of the sizes of the levels effect the performance of fetching the state",
|
||||
" from the database, as the sum of the sizes is the upper bound on number of",
|
||||
" iterations needed to fetch a given set of state.",
|
||||
))
|
||||
.default_value("100,50,25")
|
||||
.takes_value(true),
|
||||
).get_matches();
|
||||
|
||||
let db_url = matches
|
||||
.value_of("postgres-url")
|
||||
.expect("db url should be required");
|
||||
|
||||
let mut output_file = matches
|
||||
.value_of("output_file")
|
||||
.map(|path| File::create(path).unwrap());
|
||||
|
||||
let room_id = matches
|
||||
.value_of("room_id")
|
||||
.expect("room_id should be required since no file");
|
||||
|
||||
let max_state_group = matches
|
||||
.value_of("max_state_group")
|
||||
.map(|s| s.parse().expect("max_state_group must be an integer"));
|
||||
|
||||
let min_saved_rows = matches
|
||||
.value_of("min_saved_rows")
|
||||
.map(|v| v.parse().expect("COUNT must be an integer"));
|
||||
|
||||
let transactions = matches.is_present("transactions");
|
||||
|
||||
let level_sizes = value_t_or_exit!(matches, "level_sizes", LevelSizes);
|
||||
|
||||
// First we need to get the current state groups
|
||||
println!("Fetching state from DB for room '{}'...", room_id);
|
||||
let state_group_map = database::get_data_from_db(db_url, room_id, max_state_group);
|
||||
|
||||
println!("Number of state groups: {}", state_group_map.len());
|
||||
|
||||
let original_summed_size = state_group_map
|
||||
.iter()
|
||||
.fold(0, |acc, (_, v)| acc + v.state_map.len());
|
||||
|
||||
println!("Number of rows in current table: {}", original_summed_size);
|
||||
|
||||
// Now we actually call the compression algorithm.
|
||||
|
||||
println!("Compressing state...");
|
||||
|
||||
let compressor = Compressor::compress(&state_group_map, &level_sizes.0);
|
||||
|
||||
let new_state_group_map = compressor.new_state_group_map;
|
||||
|
||||
// Done! Now to print a bunch of stats.
|
||||
|
||||
let compressed_summed_size = new_state_group_map
|
||||
.iter()
|
||||
.fold(0, |acc, (_, v)| acc + v.state_map.len());
|
||||
|
||||
let ratio = (compressed_summed_size as f64) / (original_summed_size as f64);
|
||||
|
||||
println!(
|
||||
"Number of rows after compression: {} ({:.2}%)",
|
||||
compressed_summed_size,
|
||||
ratio * 100.
|
||||
);
|
||||
|
||||
println!("Compression Statistics:");
|
||||
println!(
|
||||
" Number of forced resets due to lacking prev: {}",
|
||||
compressor.stats.resets_no_suitable_prev
|
||||
);
|
||||
println!(
|
||||
" Number of compressed rows caused by the above: {}",
|
||||
compressor.stats.resets_no_suitable_prev_size
|
||||
);
|
||||
println!(
|
||||
" Number of state groups changed: {}",
|
||||
compressor.stats.state_groups_changed
|
||||
);
|
||||
|
||||
if let Some(min) = min_saved_rows {
|
||||
let saving = (original_summed_size - compressed_summed_size) as i32;
|
||||
if saving < min {
|
||||
println!(
|
||||
"Only {} rows would be saved by this compression. Skipping output.",
|
||||
saving
|
||||
);
|
||||
return;
|
||||
}
|
||||
// setup the logger
|
||||
// The default can be overwritten with RUST_LOG
|
||||
// see the README for more information
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
let mut log_builder = env_logger::builder();
|
||||
// Only output the log message (and not the prefixed timestamp etc.)
|
||||
log_builder.format(|buf, record| writeln!(buf, "{}", record.args()));
|
||||
// By default print all of the debugging messages from this library
|
||||
log_builder.filter_module("synapse_compress_state", LevelFilter::Debug);
|
||||
log_builder.init();
|
||||
} else {
|
||||
// If RUST_LOG was set then use that
|
||||
env_logger::Builder::from_env("RUST_LOG").init();
|
||||
}
|
||||
|
||||
// If we are given an output file, we output the changes as SQL. If the
|
||||
// `transactions` argument is set we wrap each change to a state group in a
|
||||
// transaction.
|
||||
|
||||
if let Some(output) = &mut output_file {
|
||||
println!("Writing changes...");
|
||||
|
||||
let pb = ProgressBar::new(state_group_map.len() as u64);
|
||||
pb.set_style(
|
||||
ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
|
||||
);
|
||||
pb.set_message("state groups");
|
||||
pb.enable_steady_tick(100);
|
||||
|
||||
for (sg, old_entry) in &state_group_map {
|
||||
let new_entry = &new_state_group_map[sg];
|
||||
|
||||
if old_entry != new_entry {
|
||||
if transactions {
|
||||
writeln!(output, "BEGIN;").unwrap();
|
||||
}
|
||||
|
||||
writeln!(
|
||||
output,
|
||||
"DELETE FROM state_group_edges WHERE state_group = {};",
|
||||
sg
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
if let Some(prev_sg) = new_entry.prev_state_group {
|
||||
writeln!(output, "INSERT INTO state_group_edges (state_group, prev_state_group) VALUES ({}, {});", sg, prev_sg).unwrap();
|
||||
}
|
||||
|
||||
writeln!(
|
||||
output,
|
||||
"DELETE FROM state_groups_state WHERE state_group = {};",
|
||||
sg
|
||||
)
|
||||
.unwrap();
|
||||
if !new_entry.state_map.is_empty() {
|
||||
writeln!(output, "INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES").unwrap();
|
||||
let mut first = true;
|
||||
for ((t, s), e) in new_entry.state_map.iter() {
|
||||
if first {
|
||||
write!(output, " ").unwrap();
|
||||
first = false;
|
||||
} else {
|
||||
write!(output, " ,").unwrap();
|
||||
}
|
||||
writeln!(
|
||||
output,
|
||||
"({}, {}, {}, {}, {})",
|
||||
sg,
|
||||
PGEscape(room_id),
|
||||
PGEscape(t),
|
||||
PGEscape(s),
|
||||
PGEscape(e)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(output, ";").unwrap();
|
||||
}
|
||||
|
||||
if transactions {
|
||||
writeln!(output, "COMMIT;").unwrap();
|
||||
}
|
||||
writeln!(output).unwrap();
|
||||
}
|
||||
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
pb.finish();
|
||||
}
|
||||
|
||||
println!("Checking that state maps match...");
|
||||
|
||||
let pb = ProgressBar::new(state_group_map.len() as u64);
|
||||
pb.set_style(
|
||||
ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
|
||||
);
|
||||
pb.set_message("state groups");
|
||||
pb.enable_steady_tick(100);
|
||||
|
||||
// Now let's iterate through and assert that the state for each group
|
||||
// matches between the two versions.
|
||||
state_group_map
|
||||
.par_iter() // This uses rayon to run the checks in parallel
|
||||
.try_for_each(|(sg, _)| {
|
||||
let expected = collapse_state_maps(&state_group_map, *sg);
|
||||
let actual = collapse_state_maps(&new_state_group_map, *sg);
|
||||
|
||||
pb.inc(1);
|
||||
|
||||
if expected != actual {
|
||||
println!("State Group: {}", sg);
|
||||
println!("Expected: {:#?}", expected);
|
||||
println!("actual: {:#?}", actual);
|
||||
Err(format!("State for group {} do not match", sg))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
})
|
||||
.expect("expected state to match");
|
||||
|
||||
pb.finish();
|
||||
|
||||
println!("New state map matches old one");
|
||||
comp_state::run(comp_state::Config::parse_arguments());
|
||||
}
|
||||
|
||||
56
synapse_auto_compressor/Cargo.toml
Normal file
56
synapse_auto_compressor/Cargo.toml
Normal file
@@ -0,0 +1,56 @@
|
||||
[package]
|
||||
name = "synapse_auto_compressor"
|
||||
authors = ["William Ashton"]
|
||||
version = "0.1.3"
|
||||
edition = "2018"
|
||||
|
||||
[[bin]]
|
||||
name = "synapse_auto_compressor"
|
||||
required-features = ["clap"]
|
||||
|
||||
[package.metadata.maturin]
|
||||
requires-python = ">=3.7"
|
||||
project-url = {Source = "https://github.com/matrix-org/rust-synapse-compress-state"}
|
||||
classifier = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Programming Language :: Rust",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
openssl = { version = "0.10.60", features = ["vendored"] }
|
||||
postgres = "0.19.7"
|
||||
postgres-openssl = "0.5.0"
|
||||
rand = "0.8.5"
|
||||
serial_test = "2.0.0"
|
||||
synapse_compress_state = { path = "../", features = ["no-progress-bars"], default-features = false }
|
||||
env_logger = "0.10.0"
|
||||
log = "0.4.20"
|
||||
log-panics = "2.1.0"
|
||||
anyhow = "1.0.75"
|
||||
|
||||
# Needed for pyo3 support
|
||||
[lib]
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
|
||||
[dependencies.clap]
|
||||
version = "4.4.2"
|
||||
features = ["cargo"]
|
||||
optional = true
|
||||
|
||||
[dependencies.pyo3]
|
||||
version = "0.19.2"
|
||||
features = ["extension-module"]
|
||||
optional = true
|
||||
|
||||
[dependencies.pyo3-log]
|
||||
version = "0.8.3"
|
||||
optional = true
|
||||
|
||||
[dependencies.tikv-jemallocator]
|
||||
version = "0.5.4"
|
||||
optional = true
|
||||
|
||||
[features]
|
||||
default = ["clap", "jemalloc"]
|
||||
jemalloc = ["tikv-jemallocator", "synapse_compress_state/jemalloc"]
|
||||
pyo3 = ["dep:pyo3", "dep:pyo3-log", "synapse_compress_state/pyo3"]
|
||||
12
synapse_auto_compressor/README.md
Normal file
12
synapse_auto_compressor/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Auto Compressor
|
||||
|
||||
See the top level readme for information.
|
||||
|
||||
|
||||
## Publishing to PyPI
|
||||
|
||||
Bump the version number and run from the root directory of the repo:
|
||||
|
||||
```
|
||||
docker run -it --rm -v $(pwd):/io -e OPENSSL_STATIC=1 konstin2/maturin publish -m synapse_auto_compressor/Cargo.toml --cargo-extra-args "\--features='openssl/vendored'"
|
||||
```
|
||||
8
synapse_auto_compressor/pyproject.toml
Normal file
8
synapse_auto_compressor/pyproject.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.0,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[tool.maturin]
|
||||
profile = "release"
|
||||
features = ["pyo3"]
|
||||
no-default-features = true
|
||||
132
synapse_auto_compressor/src/lib.rs
Normal file
132
synapse_auto_compressor/src/lib.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
//! This is a tool that uses the synapse_compress_state library to
|
||||
//! reduce the size of the synapse state_groups_state table in a postgres
|
||||
//! database.
|
||||
//!
|
||||
//! It adds the tables state_compressor_state and state_compressor_progress
|
||||
//! to the database and uses these to enable it to incrementally work
|
||||
//! on space reductions
|
||||
|
||||
use anyhow::Result;
|
||||
#[cfg(feature = "pyo3")]
|
||||
use log::{error, LevelFilter};
|
||||
#[cfg(feature = "pyo3")]
|
||||
use pyo3::{
|
||||
exceptions::PyRuntimeError, prelude::pymodule, types::PyModule, PyErr, PyResult, Python,
|
||||
};
|
||||
use std::str::FromStr;
|
||||
|
||||
use synapse_compress_state::Level;
|
||||
|
||||
pub mod manager;
|
||||
pub mod state_saving;
|
||||
|
||||
/// Helper struct for parsing the `default_levels` argument.
|
||||
///
|
||||
/// The compressor keeps track of a number of Levels, each of which have a maximum length,
|
||||
/// current length, and an optional current head (None if level is empty, Some if a head
|
||||
/// exists).
|
||||
///
|
||||
/// This is needed since FromStr cannot be implemented for structs
|
||||
/// that aren't defined in this scope
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub struct LevelInfo(pub Vec<Level>);
|
||||
|
||||
// Implement FromStr so that an argument of the form "100,50,25"
|
||||
// can be used to create a vector of levels with max sizes 100, 50 and 25
|
||||
// For more info see the LevelState documentation in lib.rs
|
||||
impl FromStr for LevelInfo {
|
||||
type Err = &'static str;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
// Stores the max sizes of each level
|
||||
let mut level_info: Vec<Level> = Vec::new();
|
||||
|
||||
// Split the string up at each comma
|
||||
for size_str in s.split(',') {
|
||||
// try and convert each section into a number
|
||||
// panic if that fails
|
||||
let size: usize = size_str
|
||||
.parse()
|
||||
.map_err(|_| "Not a comma separated list of numbers")?;
|
||||
// add this parsed number to the sizes struct
|
||||
level_info.push(Level::new(size));
|
||||
}
|
||||
|
||||
// Return the built up vector inside a LevelInfo struct
|
||||
Ok(LevelInfo(level_info))
|
||||
}
|
||||
}
|
||||
|
||||
// PyO3 INTERFACE STARTS HERE
|
||||
#[cfg(feature = "pyo3")]
|
||||
#[pymodule]
|
||||
fn synapse_auto_compressor(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
let _ = pyo3_log::Logger::default()
|
||||
// don't send out anything lower than a warning from other crates
|
||||
.filter(LevelFilter::Warn)
|
||||
// don't log warnings from synapse_compress_state, the
|
||||
// synapse_auto_compressor handles these situations and provides better
|
||||
// log messages
|
||||
.filter_target("synapse_compress_state".to_owned(), LevelFilter::Error)
|
||||
// log info and above for the synapse_auto_compressor
|
||||
.filter_target("synapse_auto_compressor".to_owned(), LevelFilter::Debug)
|
||||
.install();
|
||||
// ensure any panics produce error messages in the log
|
||||
log_panics::init();
|
||||
|
||||
#[pyfn(m)]
|
||||
#[pyo3(name = "compress_largest_rooms")]
|
||||
fn compress_state_events_table(
|
||||
py: Python,
|
||||
db_url: String,
|
||||
chunk_size: i64,
|
||||
default_levels: String,
|
||||
number_of_chunks: i64,
|
||||
) -> PyResult<()> {
|
||||
// Stops the compressor from holding the GIL while running
|
||||
py.allow_threads(|| {
|
||||
_compress_state_events_table_body(db_url, chunk_size, default_levels, number_of_chunks)
|
||||
})
|
||||
}
|
||||
|
||||
// Not accessbile through Py03. It is a "private" function.
|
||||
fn _compress_state_events_table_body(
|
||||
db_url: String,
|
||||
chunk_size: i64,
|
||||
default_levels: String,
|
||||
number_of_chunks: i64,
|
||||
) -> PyResult<()> {
|
||||
// Announce the start of the program to the logs
|
||||
log::info!("synapse_auto_compressor started");
|
||||
|
||||
// Parse the default_level string into a LevelInfo struct
|
||||
let default_levels: LevelInfo = match default_levels.parse() {
|
||||
Ok(l_sizes) => l_sizes,
|
||||
Err(e) => {
|
||||
return Err(PyErr::new::<PyRuntimeError, _>(format!(
|
||||
"Unable to parse level_sizes: {}",
|
||||
e
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
// call compress_largest_rooms with the arguments supplied
|
||||
let run_result = manager::compress_chunks_of_database(
|
||||
&db_url,
|
||||
chunk_size,
|
||||
&default_levels.0,
|
||||
number_of_chunks,
|
||||
);
|
||||
|
||||
// (Note, need to do `{:?}` formatting to show error context)
|
||||
// Don't log the context of errors but do use it in the PyRuntimeError
|
||||
if let Err(e) = run_result {
|
||||
error!("{}", e);
|
||||
return Err(PyErr::new::<PyRuntimeError, _>(format!("{:?}", e)));
|
||||
}
|
||||
|
||||
log::info!("synapse_auto_compressor finished");
|
||||
Ok(())
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
158
synapse_auto_compressor/src/main.rs
Normal file
158
synapse_auto_compressor/src/main.rs
Normal file
@@ -0,0 +1,158 @@
|
||||
//! This is a tool that uses the synapse_compress_state library to
|
||||
//! reduce the size of the synapse state_groups_state table in a postgres
|
||||
//! database.
|
||||
//!
|
||||
//! It adds the tables state_compressor_state and state_compressor_progress
|
||||
//! to the database and uses these to enable it to incrementally work
|
||||
//! on space reductions.
|
||||
//!
|
||||
//! This binary calls manager::compress_largest_rooms() with the arguments
|
||||
//! provided. That is, it compresses (in batches) the top N rooms ranked by
|
||||
//! amount of "uncompressed" state. This is measured by the number of rows in
|
||||
//! the state_groups_state table.
|
||||
//!
|
||||
//! After each batch, the rows processed are marked as "compressed" (using
|
||||
//! the state_compressor_progress table), and the program state is saved into
|
||||
//! the state_compressor_state table so that the compressor can seemlesly
|
||||
//! continue from where it left off.
|
||||
|
||||
#[cfg(feature = "jemalloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
use clap::{crate_authors, crate_description, crate_name, crate_version, Arg, Command};
|
||||
use log::LevelFilter;
|
||||
use std::env;
|
||||
use synapse_auto_compressor::{manager, state_saving, LevelInfo};
|
||||
|
||||
/// Execution starts here
|
||||
fn main() {
|
||||
// setup the logger for the synapse_auto_compressor
|
||||
// The default can be overwritten with RUST_LOG
|
||||
// see the README for more information
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
let mut log_builder = env_logger::builder();
|
||||
// Ensure panics still come through
|
||||
log_builder.filter_module("panic", LevelFilter::Error);
|
||||
// Only output errors from the synapse_compress state library
|
||||
log_builder.filter_module("synapse_compress_state", LevelFilter::Error);
|
||||
// Output log levels info and above from synapse_auto_compressor
|
||||
log_builder.filter_module("synapse_auto_compressor", LevelFilter::Info);
|
||||
log_builder.init();
|
||||
} else {
|
||||
// If RUST_LOG was set then use that
|
||||
let mut log_builder = env_logger::Builder::from_env("RUST_LOG");
|
||||
// Ensure panics still come through
|
||||
log_builder.filter_module("panic", LevelFilter::Error);
|
||||
log_builder.init();
|
||||
}
|
||||
log_panics::init();
|
||||
// Announce the start of the program to the logs
|
||||
log::info!("synapse_auto_compressor started");
|
||||
|
||||
// parse the command line arguments using the clap crate
|
||||
let arguments = Command::new(crate_name!())
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::new("postgres-url")
|
||||
.short('p')
|
||||
.value_name("POSTGRES_LOCATION")
|
||||
.help("The configruation for connecting to the postgres database.")
|
||||
.long_help(concat!(
|
||||
"The configuration for connecting to the Postgres database. This should be of the form ",
|
||||
r#""postgresql://username:password@mydomain.com/database" or a key-value pair "#,
|
||||
r#"string: "user=username password=password dbname=database host=mydomain.com" "#,
|
||||
"See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html ",
|
||||
"for the full details."
|
||||
))
|
||||
.num_args(1)
|
||||
.required(true),
|
||||
).arg(
|
||||
Arg::new("chunk_size")
|
||||
.short('c')
|
||||
.value_name("COUNT")
|
||||
.value_parser(clap::value_parser!(i64))
|
||||
.help("The maximum number of state groups to load into memroy at once")
|
||||
.long_help(concat!(
|
||||
"The number of state_groups to work on at once. All of the entries",
|
||||
" from state_groups_state are requested from the database",
|
||||
" for state groups that are worked on. Therefore small",
|
||||
" chunk sizes may be needed on machines with low memory.",
|
||||
" (Note: if the compressor fails to find space savings on the",
|
||||
" chunk as a whole (which may well happen in rooms with lots",
|
||||
" of backfill in) then the entire chunk is skipped.)",
|
||||
))
|
||||
.num_args(1)
|
||||
.required(true),
|
||||
).arg(
|
||||
Arg::new("default_levels")
|
||||
.short('l')
|
||||
.value_name("LEVELS")
|
||||
.value_parser(clap::value_parser!(LevelInfo))
|
||||
.help("Sizes of each new level in the compression algorithm, as a comma separated list.")
|
||||
.long_help(concat!(
|
||||
"Sizes of each new level in the compression algorithm, as a comma separated list.",
|
||||
" The first entry in the list is for the lowest, most granular level,",
|
||||
" with each subsequent entry being for the next highest level.",
|
||||
" The number of entries in the list determines the number of levels",
|
||||
" that will be used.",
|
||||
"\nThe sum of the sizes of the levels effect the performance of fetching the state",
|
||||
" from the database, as the sum of the sizes is the upper bound on number of",
|
||||
" iterations needed to fetch a given set of state.",
|
||||
))
|
||||
.default_value("100,50,25")
|
||||
.num_args(1)
|
||||
.required(false),
|
||||
).arg(
|
||||
Arg::new("number_of_chunks")
|
||||
.short('n')
|
||||
.value_name("CHUNKS_TO_COMPRESS")
|
||||
.value_parser(clap::value_parser!(i64))
|
||||
.help("The number of chunks to compress")
|
||||
.long_help(concat!(
|
||||
"This many chunks of the database will be compressed. The higher this number is set to, ",
|
||||
"the longer the compressor will run for."
|
||||
))
|
||||
.num_args(1)
|
||||
.required(true),
|
||||
).get_matches();
|
||||
|
||||
// The URL of the database
|
||||
let db_url = arguments
|
||||
.get_one::<String>("postgres-url")
|
||||
.expect("A database url is required");
|
||||
|
||||
// The number of state groups to work on at once
|
||||
let chunk_size = arguments
|
||||
.get_one("chunk_size")
|
||||
.copied()
|
||||
.expect("A chunk size is required");
|
||||
|
||||
// The default structure to use when compressing
|
||||
let default_levels = arguments
|
||||
.get_one::<LevelInfo>("default_levels")
|
||||
.cloned()
|
||||
.unwrap();
|
||||
|
||||
// The number of rooms to compress with this tool
|
||||
let number_of_chunks = arguments
|
||||
.get_one("number_of_chunks")
|
||||
.copied()
|
||||
.expect("number_of_chunks is required");
|
||||
|
||||
// Connect to the database and create the 2 tables this tool needs
|
||||
// (Note: if they already exist then this does nothing)
|
||||
let mut client = state_saving::connect_to_database(db_url)
|
||||
.unwrap_or_else(|e| panic!("Error occured while connecting to {}: {}", db_url, e));
|
||||
state_saving::create_tables_if_needed(&mut client)
|
||||
.unwrap_or_else(|e| panic!("Error occured while creating tables in database: {}", e));
|
||||
|
||||
// call compress_largest_rooms with the arguments supplied
|
||||
// panic if an error is produced
|
||||
manager::compress_chunks_of_database(db_url, chunk_size, &default_levels.0, number_of_chunks)
|
||||
.unwrap();
|
||||
|
||||
log::info!("synapse_auto_compressor finished");
|
||||
}
|
||||
194
synapse_auto_compressor/src/manager.rs
Normal file
194
synapse_auto_compressor/src/manager.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
// This module contains functions that carry out diffferent types
|
||||
// of compression on the database.
|
||||
|
||||
use crate::state_saving::{
|
||||
connect_to_database, create_tables_if_needed, get_next_room_to_compress,
|
||||
read_room_compressor_state, write_room_compressor_state,
|
||||
};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use log::{debug, info, warn};
|
||||
use synapse_compress_state::{continue_run, ChunkStats, Level};
|
||||
|
||||
/// Runs the compressor on a chunk of the room
|
||||
///
|
||||
/// Returns `Some(chunk_stats)` if the compressor has progressed
|
||||
/// and `None` if it had already got to the end of the room
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `db_url` - The URL of the postgres database that synapse is using.
|
||||
/// e.g. "postgresql://user:password@domain.com/synapse"
|
||||
///
|
||||
/// * `room_id` - The id of the room to run the compressor on. Note this
|
||||
/// is the id as stored in the database and will look like
|
||||
/// "!aasdfasdfafdsdsa:matrix.org" instead of the common
|
||||
/// name
|
||||
///
|
||||
/// * `chunk_size` - The number of state_groups to work on. All of the entries
|
||||
/// from state_groups_state are requested from the database
|
||||
/// for state groups that are worked on. Therefore small
|
||||
/// chunk sizes may be needed on machines with low memory.
|
||||
/// (Note: if the compressor fails to find space savings on the
|
||||
/// chunk as a whole (which may well happen in rooms with lots
|
||||
/// of backfill in) then the entire chunk is skipped.)
|
||||
///
|
||||
/// * `default_levels` - If the compressor has never been run on this room before
|
||||
/// then we need to provide the compressor with some information
|
||||
/// on what sort of compression structure we want. The default that
|
||||
/// the library suggests is `vec![Level::new(100), Level::new(50), Level::new(25)]`
|
||||
pub fn run_compressor_on_room_chunk(
|
||||
db_url: &str,
|
||||
room_id: &str,
|
||||
chunk_size: i64,
|
||||
default_levels: &[Level],
|
||||
) -> Result<Option<ChunkStats>> {
|
||||
// connect to the database
|
||||
let mut client =
|
||||
connect_to_database(db_url).with_context(|| format!("Failed to connect to {}", db_url))?;
|
||||
|
||||
// Access the database to find out where the compressor last got up to
|
||||
let retrieved_state = read_room_compressor_state(&mut client, room_id)
|
||||
.with_context(|| format!("Failed to read compressor state for room {}", room_id,))?;
|
||||
|
||||
// If the database didn't contain any information, then use the default state
|
||||
let (start, level_info) = match retrieved_state {
|
||||
Some((s, l)) => (Some(s), l),
|
||||
None => (None, default_levels.to_vec()),
|
||||
};
|
||||
|
||||
// run the compressor on this chunk
|
||||
let option_chunk_stats = continue_run(start, chunk_size, db_url, room_id, &level_info);
|
||||
|
||||
if option_chunk_stats.is_none() {
|
||||
debug!("No work to do on this room...");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Ok to unwrap because have checked that it's not None
|
||||
let chunk_stats = option_chunk_stats.unwrap();
|
||||
|
||||
debug!("{:?}", chunk_stats);
|
||||
|
||||
// Check to see whether the compressor sent its changes to the database
|
||||
if !chunk_stats.commited {
|
||||
if chunk_stats.new_num_rows - chunk_stats.original_num_rows != 0 {
|
||||
warn!(
|
||||
"The compressor tried to increase the number of rows in {} between {:?} and {}. Skipping...",
|
||||
room_id, start, chunk_stats.last_compressed_group,
|
||||
);
|
||||
}
|
||||
|
||||
// Skip over the failed chunk and set the level info to the default (empty) state
|
||||
write_room_compressor_state(
|
||||
&mut client,
|
||||
room_id,
|
||||
default_levels,
|
||||
chunk_stats.last_compressed_group,
|
||||
)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to skip chunk in room {} between {:?} and {}",
|
||||
room_id, start, chunk_stats.last_compressed_group
|
||||
)
|
||||
})?;
|
||||
|
||||
return Ok(Some(chunk_stats));
|
||||
}
|
||||
|
||||
// Save where we got up to after this successful commit
|
||||
write_room_compressor_state(
|
||||
&mut client,
|
||||
room_id,
|
||||
&chunk_stats.new_level_info,
|
||||
chunk_stats.last_compressed_group,
|
||||
)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to save state after compressing chunk in room {} between {:?} and {}",
|
||||
room_id, start, chunk_stats.last_compressed_group
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(Some(chunk_stats))
|
||||
}
|
||||
|
||||
/// Runs the compressor in chunks on rooms with the lowest uncompressed state group ids
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `db_url` - The URL of the postgres database that synapse is using.
|
||||
/// e.g. "postgresql://user:password@domain.com/synapse"
|
||||
///
|
||||
/// * `chunk_size` - The number of state_groups to work on. All of the entries
|
||||
/// from state_groups_state are requested from the database
|
||||
/// for state groups that are worked on. Therefore small
|
||||
/// chunk sizes may be needed on machines with low memory.
|
||||
/// (Note: if the compressor fails to find space savings on the
|
||||
/// chunk as a whole (which may well happen in rooms with lots
|
||||
/// of backfill in) then the entire chunk is skipped.)
|
||||
///
|
||||
/// * `default_levels` - If the compressor has never been run on this room before
|
||||
/// Then we need to provide the compressor with some information
|
||||
/// on what sort of compression structure we want. The default that
|
||||
/// the library suggests is empty levels with max sizes of 100, 50 and 25
|
||||
///
|
||||
/// * `number_of_chunks`- The number of chunks to compress. The larger this number is, the longer
|
||||
/// the compressor will run for.
|
||||
pub fn compress_chunks_of_database(
|
||||
db_url: &str,
|
||||
chunk_size: i64,
|
||||
default_levels: &[Level],
|
||||
number_of_chunks: i64,
|
||||
) -> Result<()> {
|
||||
// connect to the database
|
||||
let mut client = connect_to_database(db_url)
|
||||
.with_context(|| format!("Failed to connect to database at {}", db_url))?;
|
||||
|
||||
create_tables_if_needed(&mut client).context("Failed to create state compressor tables")?;
|
||||
|
||||
let mut skipped_chunks = 0;
|
||||
let mut rows_saved = 0;
|
||||
let mut chunks_processed = 0;
|
||||
|
||||
while chunks_processed < number_of_chunks {
|
||||
let room_to_compress = get_next_room_to_compress(&mut client)
|
||||
.context("Failed to work out what room to compress next")?;
|
||||
|
||||
if room_to_compress.is_none() {
|
||||
break;
|
||||
}
|
||||
|
||||
let room_to_compress =
|
||||
room_to_compress.expect("Have checked that rooms_to_compress is not None");
|
||||
|
||||
info!(
|
||||
"Running compressor on room {} with chunk size {}",
|
||||
room_to_compress, chunk_size
|
||||
);
|
||||
|
||||
let work_done =
|
||||
run_compressor_on_room_chunk(db_url, &room_to_compress, chunk_size, default_levels)?;
|
||||
|
||||
if let Some(ref chunk_stats) = work_done {
|
||||
if chunk_stats.commited {
|
||||
let savings = chunk_stats.original_num_rows - chunk_stats.new_num_rows;
|
||||
rows_saved += chunk_stats.original_num_rows - chunk_stats.new_num_rows;
|
||||
debug!("Saved {} rows for room {}", savings, room_to_compress);
|
||||
} else {
|
||||
skipped_chunks += 1;
|
||||
debug!(
|
||||
"Unable to make savings for room {}, skipping chunk",
|
||||
room_to_compress
|
||||
);
|
||||
}
|
||||
chunks_processed += 1;
|
||||
} else {
|
||||
bail!("Ran the compressor on a room that had no more work to do!")
|
||||
}
|
||||
}
|
||||
info!(
|
||||
"Finished running compressor. Saved {} rows. Skipped {}/{} chunks",
|
||||
rows_saved, skipped_chunks, chunks_processed
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
321
synapse_auto_compressor/src/state_saving.rs
Normal file
321
synapse_auto_compressor/src/state_saving.rs
Normal file
@@ -0,0 +1,321 @@
|
||||
// This module contains functions to communicate with the database
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use log::trace;
|
||||
use synapse_compress_state::Level;
|
||||
|
||||
use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
|
||||
use postgres::{fallible_iterator::FallibleIterator, types::ToSql, Client};
|
||||
use postgres_openssl::MakeTlsConnector;
|
||||
|
||||
/// Connects to the database and returns a postgres client
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `db_url` - The URL of the postgres database that synapse is using.
|
||||
/// e.g. "postgresql://user:password@domain.com/synapse"
|
||||
pub fn connect_to_database(db_url: &str) -> Result<Client> {
|
||||
let mut builder = SslConnector::builder(SslMethod::tls())?;
|
||||
builder.set_verify(SslVerifyMode::NONE);
|
||||
let connector = MakeTlsConnector::new(builder.build());
|
||||
|
||||
let client = Client::connect(db_url, connector)?;
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
/// Creates the state_compressor_state and state_compressor progress tables
|
||||
///
|
||||
/// If these tables already exist then this function does nothing
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A postgres client used to send the requests to the database
|
||||
pub fn create_tables_if_needed(client: &mut Client) -> Result<()> {
|
||||
let create_state_table = r#"
|
||||
CREATE TABLE IF NOT EXISTS state_compressor_state (
|
||||
room_id TEXT NOT NULL,
|
||||
level_num INT NOT NULL,
|
||||
max_size INT NOT NULL,
|
||||
current_length INT NOT NULL,
|
||||
current_head BIGINT,
|
||||
UNIQUE (room_id, level_num)
|
||||
)"#;
|
||||
|
||||
client.execute(create_state_table, &[])?;
|
||||
|
||||
let create_state_table_indexes = r#"
|
||||
CREATE INDEX IF NOT EXISTS state_compressor_state_index ON state_compressor_state (room_id)"#;
|
||||
|
||||
client.execute(create_state_table_indexes, &[])?;
|
||||
|
||||
let create_progress_table = r#"
|
||||
CREATE TABLE IF NOT EXISTS state_compressor_progress (
|
||||
room_id TEXT PRIMARY KEY,
|
||||
last_compressed BIGINT NOT NULL
|
||||
)"#;
|
||||
|
||||
client.execute(create_progress_table, &[])?;
|
||||
|
||||
let create_compressor_global_progress_table = r#"
|
||||
CREATE TABLE IF NOT EXISTS state_compressor_total_progress(
|
||||
lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,
|
||||
lowest_uncompressed_group BIGINT NOT NULL,
|
||||
CHECK (Lock='X')
|
||||
);
|
||||
INSERT INTO state_compressor_total_progress
|
||||
(lowest_uncompressed_group)
|
||||
VALUES (0)
|
||||
ON CONFLICT (lock) DO NOTHING;
|
||||
"#;
|
||||
|
||||
client.batch_execute(create_compressor_global_progress_table)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve the level info so we can restart the compressor
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A postgres client used to send the requests to the database
|
||||
/// * `room_id` - The room who's saved compressor state we want to load
|
||||
pub fn read_room_compressor_state(
|
||||
client: &mut Client,
|
||||
room_id: &str,
|
||||
) -> Result<Option<(i64, Vec<Level>)>> {
|
||||
// Query to retrieve all levels from state_compressor_state
|
||||
// Ordered by ascending level_number
|
||||
let sql = r#"
|
||||
SELECT level_num, max_size, current_length, current_head, last_compressed
|
||||
FROM state_compressor_state
|
||||
LEFT JOIN state_compressor_progress USING (room_id)
|
||||
WHERE room_id = $1
|
||||
ORDER BY level_num ASC
|
||||
"#;
|
||||
|
||||
// send the query to the database
|
||||
let mut levels = client.query_raw(sql, &[room_id])?;
|
||||
|
||||
// Needed to ensure that the rows are for unique consecutive levels
|
||||
// starting from 1 (i.e of form [1,2,3] not [0,1,2] or [1,1,2,2,3])
|
||||
let mut prev_seen = 0;
|
||||
|
||||
// The vector to store the level info from the database in
|
||||
let mut level_info: Vec<Level> = Vec::new();
|
||||
|
||||
// Where the last compressor run stopped
|
||||
let mut last_compressed = None;
|
||||
// Used to only read last_compressed value once
|
||||
let mut first_row = true;
|
||||
|
||||
// Loop through all the rows retrieved by that query
|
||||
while let Some(l) = levels.next()? {
|
||||
// Read out the fields into variables
|
||||
//
|
||||
// Some of these are `usize` as they may be used to index vectors, but stored as Postgres
|
||||
// type `INT` which is the same as`i32`.
|
||||
//
|
||||
// Since usize is unlikely to be ess than 32 bits wide, this conversion should be safe
|
||||
let level_num: usize = l.get::<_, i32>("level_num") as usize;
|
||||
let max_size: usize = l.get::<_, i32>("max_size") as usize;
|
||||
let current_length: usize = l.get::<_, i32>("current_length") as usize;
|
||||
let current_head: Option<i64> = l.get("current_head");
|
||||
|
||||
// Only read the last compressed column once since is the same for each row
|
||||
if first_row {
|
||||
last_compressed = l.get("last_compressed"); // might be NULL if corrupted
|
||||
if last_compressed.is_none() {
|
||||
bail!(
|
||||
"No entry in state_compressor_progress for room {} but entries in state_compressor_state were found",
|
||||
room_id
|
||||
)
|
||||
}
|
||||
first_row = false;
|
||||
}
|
||||
|
||||
// Check that there aren't multiple entries for the same level number
|
||||
// in the database. (Should be impossible due to unique key constraint)
|
||||
if prev_seen == level_num {
|
||||
bail!(
|
||||
"The level {} occurs twice in state_compressor_state for room {}",
|
||||
level_num,
|
||||
room_id,
|
||||
);
|
||||
}
|
||||
|
||||
// Check that there is no missing level in the database
|
||||
// e.g. if the previous row retrieved was for level 1 and this
|
||||
// row is for level 3 then since the SQL query orders the results
|
||||
// in ascenting level numbers, there was no level 2 found!
|
||||
if prev_seen != level_num - 1 {
|
||||
bail!("Levels between {} and {} are missing", prev_seen, level_num,);
|
||||
}
|
||||
|
||||
// if the level is not empty, then it must have a head!
|
||||
if current_head.is_none() && current_length != 0 {
|
||||
bail!(
|
||||
"Level {} has no head but current length is {} in room {}",
|
||||
level_num,
|
||||
current_length,
|
||||
room_id,
|
||||
);
|
||||
}
|
||||
|
||||
// If the level has more groups in than the maximum then something is wrong!
|
||||
if current_length > max_size {
|
||||
bail!(
|
||||
"Level {} has length {} but max size {} in room {}",
|
||||
level_num,
|
||||
current_length,
|
||||
max_size,
|
||||
room_id,
|
||||
);
|
||||
}
|
||||
|
||||
// Add this level to the level_info vector
|
||||
level_info.push(Level::restore(max_size, current_length, current_head));
|
||||
// Mark the previous level_number seen as the current one
|
||||
prev_seen = level_num;
|
||||
}
|
||||
|
||||
// If we didn't retrieve anything from the database then there is no saved state
|
||||
// in the database!
|
||||
if level_info.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Return the compressor state we retrieved
|
||||
// last_compressed cannot be None at this point, so safe to unwrap
|
||||
Ok(Some((last_compressed.unwrap(), level_info)))
|
||||
}
|
||||
|
||||
/// Save the level info so it can be loaded by the next run of the compressor
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A postgres client used to send the requests to the database
|
||||
/// * `room_id` - The room who's saved compressor state we want to save
|
||||
/// * `level_info` - The state that can be used to restore the compressor later
|
||||
/// * `last_compressed` - The last state_group that was compressed. This is needed
|
||||
/// so that the compressor knows where to start from next
|
||||
pub fn write_room_compressor_state(
|
||||
client: &mut Client,
|
||||
room_id: &str,
|
||||
level_info: &[Level],
|
||||
last_compressed: i64,
|
||||
) -> Result<()> {
|
||||
// Wrap all the changes to the state for this room in a transaction
|
||||
// This prevents accidentally having malformed compressor start info
|
||||
let mut write_transaction = client.transaction()?;
|
||||
|
||||
// Go through every level that the compressor is using
|
||||
for (level_num, level) in level_info.iter().enumerate() {
|
||||
// the 1st level is level 1 not level 0, but enumerate starts at 0
|
||||
// so need to add 1 to get correct number
|
||||
let level_num = level_num + 1;
|
||||
|
||||
// bring the level info out of the Level struct
|
||||
let (max_size, current_len, current_head) = (
|
||||
level.get_max_length(),
|
||||
level.get_current_length(),
|
||||
level.get_head(),
|
||||
);
|
||||
|
||||
// Update the database with this compressor state information
|
||||
//
|
||||
// Some of these are `usize` as they may be used to index vectors, but stored as Postgres
|
||||
// type `INT` which is the same as`i32`.
|
||||
//
|
||||
// Since these values should always be small, this conversion should be safe.
|
||||
let (level_num, max_size, current_len) =
|
||||
(level_num as i32, max_size as i32, current_len as i32);
|
||||
let params: Vec<&(dyn ToSql + Sync)> =
|
||||
vec![&room_id, &level_num, &max_size, ¤t_len, ¤t_head];
|
||||
|
||||
write_transaction.execute(
|
||||
r#"
|
||||
INSERT INTO state_compressor_state
|
||||
(room_id, level_num, max_size, current_length, current_head)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
ON CONFLICT (room_id, level_num)
|
||||
DO UPDATE SET
|
||||
max_size = excluded.max_size,
|
||||
current_length = excluded.current_length,
|
||||
current_head= excluded.current_head;
|
||||
"#,
|
||||
¶ms,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Update the database with this progress information
|
||||
let params: Vec<&(dyn ToSql + Sync)> = vec![&room_id, &last_compressed];
|
||||
write_transaction.execute(
|
||||
r#"
|
||||
INSERT INTO state_compressor_progress (room_id, last_compressed)
|
||||
VALUES ($1, $2)
|
||||
ON CONFLICT (room_id)
|
||||
DO UPDATE SET last_compressed = excluded.last_compressed;
|
||||
"#,
|
||||
¶ms,
|
||||
)?;
|
||||
|
||||
// Commit the transaction (otherwise changes never happen)
|
||||
write_transaction.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the room with with the lowest uncompressed state group id
|
||||
///
|
||||
/// A group is detected as uncompressed if it is greater than the `last_compressed`
|
||||
/// entry in `state_compressor_progress` for that room.
|
||||
///
|
||||
/// The `lowest_uncompressed_group` value stored in `state_compressor_total_progress`
|
||||
/// stores where this method last finished, to prevent repeating work
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `client` - A postgres client used to send the requests to the database
|
||||
pub fn get_next_room_to_compress(client: &mut Client) -> Result<Option<String>> {
|
||||
// Walk the state_groups table until find next uncompressed group
|
||||
let get_next_room = r#"
|
||||
SELECT room_id, id
|
||||
FROM state_groups
|
||||
LEFT JOIN state_compressor_progress USING (room_id)
|
||||
WHERE
|
||||
id >= (SELECT lowest_uncompressed_group FROM state_compressor_total_progress)
|
||||
AND (
|
||||
id > last_compressed
|
||||
OR last_compressed IS NULL
|
||||
)
|
||||
ORDER BY id ASC
|
||||
LIMIT 1
|
||||
"#;
|
||||
|
||||
let row_opt = client.query_opt(get_next_room, &[])?;
|
||||
|
||||
let next_room_row = if let Some(row) = row_opt {
|
||||
row
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let next_room: String = next_room_row.get("room_id");
|
||||
let lowest_uncompressed_group: i64 = next_room_row.get("id");
|
||||
|
||||
// This method has determined where the lowest uncompressesed group is, save that
|
||||
// information so we don't have to redo this work in the future.
|
||||
let update_total_progress = r#"
|
||||
UPDATE state_compressor_total_progress SET lowest_uncompressed_group = $1;
|
||||
"#;
|
||||
|
||||
client.execute(update_total_progress, &[&lowest_uncompressed_group])?;
|
||||
|
||||
trace!(
|
||||
"next_room: {}, lowest_uncompressed: {}",
|
||||
next_room,
|
||||
lowest_uncompressed_group
|
||||
);
|
||||
|
||||
Ok(Some(next_room))
|
||||
}
|
||||
Reference in New Issue
Block a user