add gitea action

Bump openssl from 0.10.57 to 0.10.60 (#131 )
Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.57 to 0.10.60. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.57...openssl-v0.10.60) --- updated-dependencies: - dependency-name: openssl dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-13 15:04:27 +08:00 · 2023-11-29 10:23:23 +00:00 · 2023-09-28 14:42:26 -04:00 · 2023-09-19 18:03:06 +01:00 · 2023-09-19 13:51:39 +01:00 · 2023-09-11 12:19:46 +01:00
41 changed files with 7310 additions and 940 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+.git
+.github
+/target
--- a/.gitea/workflows/docker.yaml
+++ b/.gitea/workflows/docker.yaml
@@ -0,0 +1,46 @@
+name: Build and push docker images
+
+on:
+  push:
+    tags: ["v*"]
+    branches: [ main ]
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to Gitea Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: git.yongyuancv.cn
+          username: ${{ gitea.repository_owner }}
+          password: ${{ secrets.GITEA_TOKEN }}
+
+      - name: Calculate docker image tag
+        id: set-tag
+        uses: docker/metadata-action@master
+        with:
+          images: |
+            git.yongyuancv.cn/${{ gitea.repository }}
+            git.yongyuancv.cn/heimoshuiyu/${{ gitea.event.repository.name }}            
+          flavor: |
+            latest=false            
+          tags: |
+            type=raw,value=latest,enable=${{ gitea.ref == 'refs/heads/main' }}
+            type=sha,prefix=,format=long
+            type=semver,pattern=v{{version}}
+            type=semver,pattern=v{{major}}.{{minor}}            
+
+      - name: Build and push all platforms
+        uses: docker/build-push-action@v4
+        with:
+          push: true
+          labels: "gitsha1=${{ gitea.sha }}"
+          tags: "${{ steps.set-tag.outputs.tags }}"
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=registry,ref=git.yongyuancv.cn/${{ gitea.repository }}:buildcache
+          cache-to: type=registry,ref=git.yongyuancv.cn/${{ gitea.repository }}:buildcache,mode=max
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1,2 @@
+# Automatically request reviews from the synapse-core team when a pull request comes in.
+* @matrix-org/synapse-core
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -0,0 +1,58 @@
+# GitHub actions workflow which builds and publishes the docker images.
+
+name: Build and push docker images
+
+on:
+  push:
+    tags: ["v*"]
+    branches: [ main ]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_HUB_USERNAME }}
+          password: ${{ secrets.DOCKER_HUB_TOKEN }}
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Calculate docker image tag
+        id: set-tag
+        uses: docker/metadata-action@master
+        with:
+          images: |
+            ghcr.io/${{ github.repository }}
+            docker.io/${{ secrets.DOCKER_HUB_USERNAME }}/${{ github.event.repository.name }}
+          flavor: |
+            latest=false
+          tags: |
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=sha,prefix=,format=long
+            type=semver,pattern=v{{version}}
+            type=semver,pattern=v{{major}}.{{minor}}
+
+      - name: Build and push all platforms
+        uses: docker/build-push-action@v4
+        with:
+          push: true
+          labels: "gitsha1=${{ github.sha }}"
+          tags: "${{ steps.set-tag.outputs.tags }}"
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=registry,ref=ghcr.io/${{ github.repository }}:buildcache
+          cache-to: type=registry,ref=ghcr.io/${{ github.repository }}:buildcache,mode=max
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -0,0 +1,74 @@
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+name: Continuous integration
+
+jobs:
+  check:
+    name: Cargo Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: Swatinem/rust-cache@v1
+      - uses: actions-rs/cargo@v1
+        with:
+          command: check
+
+  test:
+    name: Test Suite
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: Swatinem/rust-cache@v1
+      - run: cd compressor_integration_tests && docker-compose up -d
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --workspace
+
+  fmt:
+    name: Rustfmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+          components: rustfmt
+      - uses: Swatinem/rust-cache@v1
+      - uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --all -- --check
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+          components: clippy
+      - uses: Swatinem/rust-cache@v1
+      - uses: actions-rs/cargo@v1
+        with:
+          command: clippy
+          args: -- -D warnings
--- a/.github/workflows/triage_incoming.yml
+++ b/.github/workflows/triage_incoming.yml
@@ -0,0 +1,28 @@
+name: Move new issues into the issue triage board
+
+on:
+  issues:
+    types: [ opened ]
+
+jobs:
+  add_new_issues:
+    name: Add new issues to the triage board
+    runs-on: ubuntu-latest
+    steps:
+      - uses: octokit/graphql-action@v2.x
+        id: add_to_project
+        with:
+          headers: '{"GraphQL-Features": "projects_next_graphql"}'
+          query: |
+              mutation add_to_project($projectid:ID!,$contentid:ID!) {
+                addProjectV2ItemById(input: {projectId: $projectid contentId: $contentid}) {
+                item {
+                  id
+                  }
+                }
+              }
+          projectid: ${{ env.PROJECT_ID }}
+          contentid: ${{ github.event.issue.node_id }}
+        env:
+          PROJECT_ID: "PVT_kwDOAIB0Bs4AFDdZ"
+          GITHUB_TOKEN: ${{ secrets.ELEMENT_BOT_TOKEN }}
--- a/.github/workflows/triage_labelled.yml
+++ b/.github/workflows/triage_labelled.yml
@@ -0,0 +1,44 @@
+name: Move labelled issues to correct projects
+
+on:
+  issues:
+    types: [ labeled ]
+
+jobs:
+  move_needs_info:
+    name: Move X-Needs-Info on the triage board
+    runs-on: ubuntu-latest
+    if: >
+      contains(github.event.issue.labels.*.name, 'X-Needs-Info')
+    steps:
+      - uses: actions/add-to-project@main
+        id: add_project
+        with:
+          project-url: "https://github.com/orgs/matrix-org/projects/67"
+          github-token: ${{ secrets.ELEMENT_BOT_TOKEN }}
+      - name: Set status
+        env:
+          GITHUB_TOKEN: ${{ secrets.ELEMENT_BOT_TOKEN }}
+        run: |
+          gh api graphql -f query='
+          mutation(
+              $project: ID!
+              $item: ID!
+              $fieldid: ID!
+              $columnid: String!
+            )  {
+            updateProjectV2ItemFieldValue(
+              input: {
+               projectId: $project
+                itemId: $item
+                fieldId: $fieldid
+                value: { 
+                  singleSelectOptionId: $columnid
+                  }
+              }
+            ) {
+              projectV2Item {
+                id
+              }
+            }
+          }' -f project="PVT_kwDOAIB0Bs4AFDdZ" -f item=${{ steps.add_project.outputs.itemId }} -f fieldid="PVTSSF_lADOAIB0Bs4AFDdZzgC6ZA4" -f columnid=ba22e43c --silent
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,6 @@
 **/*.rs.bk
 *.data
 *.old
+**.sql
+*.csv
+**.log
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,80 @@
+# Contributing
+
+## Sign off
+
+In order to have a concrete record that your contribution is intentional
+and you agree to license it under the same terms as the project's license, we've adopted the
+same lightweight approach that the Linux Kernel
+[submitting patches process](
+https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin>),
+[Docker](https://github.com/docker/docker/blob/master/CONTRIBUTING.md), and many other
+projects use: the DCO ([Developer Certificate of Origin](http://developercertificate.org/)).
+This is a simple declaration that you wrote
+the contribution or otherwise have the right to contribute it to Matrix:
+
+```
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
+660 York Street, Suite 102,
+San Francisco, CA 94110 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.
+```
+
+If you agree to this for your contribution, then all that's needed is to
+include the line in your commit or pull request comment:
+
+```
+Signed-off-by: Your Name <your@email.example.org>
+```
+
+We accept contributions under a legally identifiable name, such as
+your name on government documentation or common-law names (names
+claimed by legitimate usage or repute). Unfortunately, we cannot
+accept anonymous contributions at this time.
+
+Git allows you to add this signoff automatically when using the `-s`
+flag to `git commit`, which uses the name and email set in your
+`user.name` and `user.email` git configs.
+
+### Private Sign off
+
+If you would like to provide your legal name privately to the Matrix.org
+Foundation (instead of in a public commit or comment), you can do so
+by emailing your legal name and a link to the pull request to
+[dco@matrix.org](mailto:dco@matrix.org?subject=Private%20sign%20off).
+It helps to include "sign off" or similar in the subject line. You will then
+be instructed further.
+
+Once private sign off is complete, doing so for future contributions will not
+be required.
+
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,18 +1,56 @@
+[workspace]
+members = ["synapse_auto_compressor", "compressor_integration_tests"]
+
 [package]
 authors = ["Erik Johnston"]
 description = "A tool to compress some state in a Synapse instance's database"
-name = "synapse-compress-state"
+name = "synapse_compress_state"
 version = "0.1.0"
 edition = "2018"

+[[bin]]
+name = "synapse_compress_state"
+required-features = ["clap"]
+
 [dependencies]
-clap = "2.33.0"
-indicatif = "0.14.0"
-jemallocator = "0.3.2"
-postgres = "0.17.0"
-rand = "0.7.2"
-rayon = "1.3.0"
-string_cache = "0.8.0"
+indicatif = "0.17.6"
+openssl = "0.10.60"
+postgres = "0.19.7"
+postgres-openssl = "0.5.0"
+rand = "0.8.5"
+rayon = "1.7.0"
+string_cache = "0.8.7"
+env_logger = "0.10.0"
+log = "0.4.20"
+log-panics = "2.1.0"

 [dependencies.state-map]
 git = "https://github.com/matrix-org/rust-matrix-state-map"
+
+# Needed for pyo3 support
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+[dependencies.clap]
+version = "4.4.2"
+features = ["cargo"]
+optional = true
+
+[dependencies.pyo3]
+version = "0.19.2"
+features = ["extension-module"]
+optional = true
+
+[dependencies.pyo3-log]
+version = "0.8.3"
+optional = true
+
+[dependencies.tikv-jemallocator]
+version = "0.5.4"
+optional = true
+
+[features]
+default = ["clap", "jemalloc"]
+jemalloc = ["tikv-jemallocator"]
+no-progress-bars = []
+pyo3 = ["dep:pyo3", "dep:pyo3-log"]
--- a/54
+++ b/54
@@ -0,0 +1,54 @@
+# This uses the multi-stage build feature of Docker to build the binaries for multiple architectures without QEMU.
+# The first stage is responsible for building binaries for all the supported architectures (amd64 and arm64), and the
+# second stage only copies the binaries for the target architecture.
+# We leverage Zig and cargo-zigbuild for providing a cross-compilation-capable C compiler and linker.
+
+ARG RUSTC_VERSION=1.72.0
+ARG ZIG_VERSION=0.11.0
+ARG CARGO_ZIGBUILD_VERSION=0.17.1
+
+FROM --platform=${BUILDPLATFORM} docker.io/rust:${RUSTC_VERSION} AS builder
+
+# Install cargo-zigbuild for cross-compilation
+ARG CARGO_ZIGBUILD_VERSION
+RUN cargo install --locked cargo-zigbuild@=${CARGO_ZIGBUILD_VERSION}
+
+# Download zig compiler for cross-compilation
+ARG ZIG_VERSION
+RUN curl -L "https://ziglang.org/download/${ZIG_VERSION}/zig-linux-$(uname -m)-${ZIG_VERSION}.tar.xz" | tar -J -x -C /usr/local && \
+  ln -s "/usr/local/zig-linux-$(uname -m)-${ZIG_VERSION}/zig" /usr/local/bin/zig
+
+# Install all cross-compilation targets
+ARG RUSTC_VERSION
+RUN rustup target add  \
+    --toolchain "${RUSTC_VERSION}" \
+    x86_64-unknown-linux-musl \
+    aarch64-unknown-linux-musl
+
+WORKDIR /opt/synapse-compressor/
+COPY . .
+
+# Build for all targets
+RUN cargo zigbuild \
+    --release \
+    --workspace \
+    --bins \
+    --features "openssl/vendored" \
+    --target aarch64-unknown-linux-musl \
+    --target x86_64-unknown-linux-musl
+
+# Move the binaries in a separate folder per architecture, so we can copy them using the TARGETARCH build arg
+RUN mkdir -p /opt/binaries/amd64 /opt/binaries/arm64
+RUN mv target/x86_64-unknown-linux-musl/release/synapse_compress_state \
+       target/x86_64-unknown-linux-musl/release/synapse_auto_compressor \
+       /opt/binaries/amd64
+RUN mv target/aarch64-unknown-linux-musl/release/synapse_compress_state \
+       target/aarch64-unknown-linux-musl/release/synapse_auto_compressor \
+       /opt/binaries/arm64
+
+FROM --platform=${TARGETPLATFORM} docker.io/alpine
+
+ARG TARGETARCH
+
+COPY --from=builder /opt/binaries/${TARGETARCH}/synapse_compress_state /usr/local/bin/synapse_compress_state
+COPY --from=builder /opt/binaries/${TARGETARCH}/synapse_auto_compressor /usr/local/bin/synapse_auto_compressor
--- a/README.md
+++ b/README.md
@@ -1,51 +1,125 @@
 # Compress Synapse State Tables

-An experimental tool that reads in the rows from `state_groups_state` and
-`state_group_edges` tables for a particular room and calculates the changes that
-could be made that (hopefully) will significantly reduce the number of rows.
+This workspace contains experimental tools that attempt to reduce the number of
+rows in the `state_groups_state` table inside of a Synapse Postgresql database.

-This tool currently *does not* write to the database in any way, so should be
+# Automated tool: synapse_auto_compressor
+
+## Introduction:
+
+This tool is significantly more simple to use than the manual tool (described below).
+It scans through all of the rows in the `state_groups` database table from the start. When
+it finds a group that hasn't been compressed, it runs the compressor for a while on that
+group's room, saving where it got up to. After compressing a number of these chunks it stops,
+saving where it got up to for the next run of the `synapse_auto_compressor`.
+
+It creates three extra tables in the database: `state_compressor_state` which stores the
+information needed to stop and start the compressor for each room, `state_compressor_progress`
+which stores the most recently compressed state group for each room and `state_compressor_total_progress`
+which stores how far through the `state_groups` table the compressor has scanned.
+
+The tool can be run manually when you are running out of space, or be scheduled to run
+periodically.
+
+## Building
+
+This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
+for instructions on how to do this.
+
+This project follows the deprecation policy of [Synapse](https://matrix-org.github.io/synapse/latest/deprecation_policy.html)
+on Rust and will assume a recent stable version of Rust and the ability to fetch a more recent one if necessary.
+
+To build `synapse_auto_compressor`, clone this repository and navigate to the
+`synapse_auto_compressor/` subdirectory. Then execute `cargo build`.
+
+This will create an executable and store it in
+`synapse_auto_compressor/target/debug/synapse_auto_compressor`.
+
+## Example usage
+
+Compress 100 chunks of size 500 in a remote PostgreSQL database:
+```
+$ synapse_auto_compressor -p postgresql://user:pass@localhost/synapse -c 500 -n 100
+```
+
+Compress 100 chunks of size 500 using local PostgreSQL socket:
+```
+$ sudo -u postgres synapse_auto_compressor -p "user=postgres dbname=matrix-synapse host=/var/run/postgresql" -c 500 -n 100
+```
+
+## Running Options
+
+- -p [POSTGRES_LOCATION] **Required**
+The configuration for connecting to the Postgres database. This should be of the form
+`"postgresql://username:password@mydomain.com/database"` or a key-value pair
+string: `"user=username password=password dbname=database host=mydomain.com"`
+See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
+for the full details.
+
+- -c [CHUNK_SIZE] **Required**
+The number of state groups to work on at once. All of the entries from state_groups_state are
+requested from the database for state groups that are worked on. Therefore small chunk
+sizes may be needed on machines with low memory. Note: if the compressor fails to find
+space savings on the chunk as a whole (which may well happen in rooms with lots of backfill
+in) then the entire chunk is skipped.
+
+- -n [CHUNKS_TO_COMPRESS] **Required**
+*CHUNKS_TO_COMPRESS* chunks of size *CHUNK_SIZE* will be compressed. The higher this
+number is set to, the longer the compressor will run for.
+
+- -l [LEVELS]
+Sizes of each new level in the compression algorithm, as a comma-separated list.
+The first entry in the list is for the lowest, most granular level, with each
+subsequent entry being for the next highest level. The number of entries in the
+list determines the number of levels that will be used. The sum of the sizes of
+the levels affects the performance of fetching the state from the database, as the
+sum of the sizes is the upper bound on the number of iterations needed to fetch a
+given set of state. [defaults to "100,50,25"]
+
+## Scheduling the compressor
+The automatic tool may put some strain on the database, so it might be best to schedule
+it to run at a quiet time for the server. This could be done by creating an executable
+script and scheduling it with something like
+[cron](https://www.man7.org/linux/man-pages/man1/crontab.1.html).
+
+# Manual tool: synapse_compress_state
+
+## Introduction
+
+A manual tool that reads in the rows from `state_groups_state` and `state_group_edges`
+tables for a specified room and calculates the changes that could be made that
+(hopefully) will significantly reduce the number of rows.
+
+This tool currently *does not* write to the database by default, so should be
 safe to run. If the `-o` option is specified then SQL will be written to the
 given file that would change the tables to match the calculated state. (Note
 that if `-t` is given then each change to a particular state group is wrapped
-in a transaction).
+in a transaction). If you do wish to send the changes to the database automatically
+then the `-c` flag can be set.

-The SQL generated by the `-o` option is safe to apply against the database with
-Synapse running. This is because the `state_groups` and `state_groups_state`
-tables are append-only: once written to the database, they are never modified.
-There is therefore no danger of a modification racing against a running synapse.
-Further, this script makes its changes within atomic transactions, and each
-transaction should not affect the results from any of the queries that synapse
-performs.
+The SQL generated is safe to apply against the database with Synapse running.
+This is because the `state_groups` and `state_groups_state` tables are append-only:
+once written to the database, they are never modified. There is therefore no danger
+of a modification racing against a running Synapse. Further, this script makes its
+changes within atomic transactions, and each transaction should not affect the results
+from any of the queries that Synapse performs.

 The tool will also ensure that the generated state deltas do give the same state
-as the existing state deltas.
+as the existing state deltas before generating any SQL.

-## Algorithm
+## Building

-The algorithm works by attempting to create a tree of deltas, produced by
-appending state groups to different "levels". Each level has a maximum size, where
-each state group is appended to the lowest level that is not full.
+This tool requires `cargo` to be installed. See https://www.rust-lang.org/tools/install
+for instructions on how to do this.

-This produces a graph that looks approximately like the following, in the case
-of having two levels with the bottom level (L1) having a maximum size of 3:
-
-```
-L2 <-------------------- L2 <---------- ...
-^--- L1 <--- L1 <--- L1  ^--- L1 <--- L1 <--- L1
-```
-
-The sizes and number of levels used can be controlled via `-l`.
-
-**Note**: Increasing the sum of the sizes of levels will increase the time it
-takes for to query the full state of a given state group. By default Synapse
-attempts to keep this below 100.
+To build `synapse_compress_state`, clone this repository and then execute `cargo build`.

+This will create an executable and store it in `target/debug/synapse_compress_state`.

 ## Example usage

 ```
-$ synapse-compress-state -p "postgresql://localhost/synapse" -r '!some_room:example.com' -o out.sql -t
+$ synapse_compress_state -p "postgresql://localhost/synapse" -r '!some_room:example.com' -o out.sql -t
 Fetching state from DB for room '!some_room:example.com'...
 Got initial state from database. Checking for any missing state groups...
 Number of state groups: 73904
@@ -60,3 +134,152 @@ New state map matches old one
 # It's finished, so we can now go and rewrite the DB
 $ psql synapse < out.data
 ```
+
+## Running Options
+
+- -p [POSTGRES_LOCATION] **Required**
+The configuration for connecting to the Postgres database. This should be of the form
+`"postgresql://username:password@mydomain.com/database"` or a key-value pair
+string: `"user=username password=password dbname=database host=mydomain.com"`
+See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html
+for the full details.
+
+- -r [ROOM_ID] **Required**
+The room to process (this is the value found in the `rooms` table of the database
+not the common name for the room - it should look like: "!wOlkWNmgkAZFxbTaqj:matrix.org".
+
+- -b [MIN_STATE_GROUP]
+The state group to start processing from (non-inclusive).
+
+- -n [GROUPS_TO_COMPRESS]
+How many groups to load into memory to compress (starting
+from the 1st group in the room or the group specified by -b).
+
+- -l [LEVELS]
+Sizes of each new level in the compression algorithm, as a comma-separated list.
+The first entry in the list is for the lowest, most granular level, with each
+subsequent entry being for the next highest level. The number of entries in the
+list determines the number of levels that will be used. The sum of the sizes of
+the levels affects the performance of fetching the state from the database, as the
+sum of the sizes is the upper bound on the number of iterations needed to fetch a
+given set of state. [defaults to "100,50,25"]
+
+- -m [COUNT]
+If the compressor cannot save this many rows from the database then it will stop early.
+
+- -s [MAX_STATE_GROUP]
+If a max_state_group is specified then only state groups with id's lower than this
+number can be compressed.
+
+- -o [FILE]
+File to output the SQL transactions to (for later running on the database).
+
+- -t
+If this flag is set then each change to a particular state group is wrapped in a
+transaction. This should be done if you wish to apply the changes while synapse is
+still running.
+
+- -c
+If this flag is set then the changes the compressor makes will be committed to the
+database. This should be safe to use while synapse is running as it wraps the changes
+to every state group in it's own transaction (as if the transaction flag was set).
+
+- -g
+If this flag is set then output the node and edge information for the state_group
+directed graph built up from the predecessor state_group links. These can be looked
+at in something like Gephi (https://gephi.org).
+
+
+# Running tests
+
+There are integration tests for these tools stored in `compressor_integration_tests/`.
+
+To run the integration tests, you first need to start up a Postgres database
+for the library to talk to. There is a docker-compose file that sets one up
+with all of the correct tables. The tests can therefore be run as follows:
+
+```
+$ cd compressor_integration_tests/
+$ docker-compose up -d
+$ cargo test --workspace
+$ docker-compose down
+```
+
+# Using the synapse_compress_state library
+
+If you want to use the compressor in another project, it is recomended that you
+use jemalloc `https://github.com/tikv/jemallocator`.
+
+To prevent the progress bars from being shown, use the `no-progress-bars` feature.
+(See `synapse_auto_compressor/Cargo.toml` for an example)
+
+# Troubleshooting
+
+## Connecting to database
+
+### From local machine
+
+If you setup Synapse using the instructions on https://matrix-org.github.io/synapse/latest/postgres.html
+you should have a username and password to use to login to the postgres database. To run the compressor
+from the machine where Postgres is running, the url will be the following:
+
+`postgresql://synapse_user:synapse_password@localhost/synapse`
+
+### From remote machine
+
+If you wish to connect from a different machine, you'll need to edit your Postgres settings to allow
+remote connections. This requires updating the
+[`pg_hba.conf`](https://www.postgresql.org/docs/current/auth-pg-hba-conf.html) and the `listen_addresses`
+setting in [`postgresql.conf`](https://www.postgresql.org/docs/current/runtime-config-connection.html)
+
+## Printing debugging logs
+
+The amount of output the tools produce can be altered by setting the RUST_LOG
+environment variable to something.
+
+To get more logs when running the synapse_auto_compressor tool try the following:
+
+```
+$ RUST_LOG=debug synapse_auto_compressor -p postgresql://user:pass@localhost/synapse -c 50 -n 100
+```
+
+If you want to suppress all the debugging info you are getting from the
+Postgres client then try:
+
+```
+RUST_LOG=synapse_auto_compressor=debug,synapse_compress_state=debug synapse_auto_compressor [etc.]
+```
+
+This will only print the debugging information from those two packages. For more info see
+https://docs.rs/env_logger/0.9.0/env_logger/.
+
+## Building difficulties
+
+Building the `openssl-sys` dependency crate requires OpenSSL development tools to be installed,
+and building on Linux will also require `pkg-config`
+
+This can be done on Ubuntu  with: `$ apt-get install libssl-dev pkg-config`
+
+Note that building requires quite a lot of memory and out-of-memory errors might not be
+obvious. It's recomended you only build these tools on machines with at least 2GB of RAM.
+
+## Auto Compressor skips chunks when running on already compressed room
+
+If you have used the compressor before, with certain config options, the automatic tool will
+produce lots of warnings of the form: `The compressor tried to increase the number of rows in ...`
+
+To fix this, ensure that the chunk_size is set to at least the L1 level size (so if the level
+sizes are "100,50,25" then the chunk_size should be at least 100).
+
+Note: if the level sizes being used when rerunning are different to when run previously
+this might lead to less efficient compression and thus chunks being skipped, but this shouldn't
+be a large problem.
+
+## Compressor is trying to increase the number of rows
+
+Backfilling can lead to issues with compression. The synapse_auto_compressor will
+skip chunks it can't reduce the size of and so this should help jump over the backfilled
+state_groups. Lots of state resolution might also impact the ability to use the compressor.
+
+To examine the state_group hierarchy run the manual tool on a room with the `-g` option
+and look at the graphs.
--- a/compressor_integration_tests/Cargo.toml
+++ b/compressor_integration_tests/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "compressor_integration_tests"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+string_cache = "0.8.7"
+serial_test = "2.0.0"
+openssl = "0.10.60"
+postgres = "0.19.7"
+postgres-openssl = "0.5.0"
+rand = "0.8.5"
+synapse_compress_state = { path = "../", features = ["no-progress-bars"] }
+synapse_auto_compressor = { path = "../synapse_auto_compressor/" }
+env_logger = "0.10.0"
+log = "0.4.20"
+
+[dependencies.state-map]
+git = "https://github.com/matrix-org/rust-matrix-state-map"
--- a/compressor_integration_tests/database_setup.sh
+++ b/compressor_integration_tests/database_setup.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+#N.B. the database setup comes from:
+#https://github.com/matrix-org/synapse/blob/develop/synapse/storage/schema/state/full_schemas/54/full.sql
+
+# Setup the required tables for testing
+psql --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<SQLCODE
+
+CREATE TABLE state_groups (
+    id BIGINT PRIMARY KEY,
+    room_id TEXT NOT NULL,
+    event_id TEXT NOT NULL
+);
+
+CREATE TABLE state_groups_state (
+    state_group BIGINT NOT NULL,
+    room_id TEXT NOT NULL,
+    type TEXT NOT NULL,
+    state_key TEXT NOT NULL,
+    event_id TEXT NOT NULL
+);
+
+CREATE TABLE state_group_edges (
+    state_group BIGINT NOT NULL,
+    prev_state_group BIGINT NOT NULL
+);
+
+SQLCODE
--- a/compressor_integration_tests/docker-compose.yaml
+++ b/compressor_integration_tests/docker-compose.yaml
@@ -0,0 +1,20 @@
+version: '3'
+services:
+  postgres:
+    image: "postgres:latest"
+    
+    ports:
+      # N.B. format is [port on machine]:[port to expose from container]
+      - 5432:5432
+
+    environment: 
+      POSTGRES_USER: synapse_user
+      POSTGRES_PASSWORD: synapse_pass
+      POSTGRES_DB: synapse
+      PGDATA: /tmp/data
+
+    volumes:
+      - ./database_setup.sh:/docker-entrypoint-initdb.d/1_database_setup.sh 
+
+    tmpfs:
+      /tmp/data
--- a/compressor_integration_tests/src/lib.rs
+++ b/compressor_integration_tests/src/lib.rs
@@ -0,0 +1,386 @@
+use log::LevelFilter;
+use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
+use postgres::{fallible_iterator::FallibleIterator, Client};
+use postgres_openssl::MakeTlsConnector;
+use rand::{distributions::Alphanumeric, thread_rng, Rng};
+use state_map::StateMap;
+use std::{
+    borrow::Cow,
+    collections::BTreeMap,
+    env,
+    fmt::{self, Write as _},
+};
+use string_cache::DefaultAtom as Atom;
+
+use synapse_compress_state::StateGroupEntry;
+
+pub mod map_builder;
+
+pub static DB_URL: &str = "postgresql://synapse_user:synapse_pass@localhost/synapse";
+
+/// Adds the contents of a state group map to the testing database
+pub fn add_contents_to_database(room_id: &str, state_group_map: &BTreeMap<i64, StateGroupEntry>) {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(DB_URL, connector).unwrap();
+
+    // build up the query
+    let mut sql = String::new();
+
+    let room_id = PGEscape(room_id);
+    let event_id = PGEscape("left_blank");
+
+    for (sg, entry) in state_group_map {
+        // create the entry for state_groups
+        writeln!(
+            sql,
+            "INSERT INTO state_groups (id, room_id, event_id) \
+             VALUES ({sg}, {room_id}, {event_id});",
+        )
+        .expect("Writing to a String cannot fail");
+
+        // create the entry in state_group_edges IF exists
+        if let Some(prev_sg) = entry.prev_state_group {
+            writeln!(
+                sql,
+                "INSERT INTO state_group_edges (state_group, prev_state_group) \
+                 VALUES ({sg}, {prev_sg});",
+            )
+            .unwrap();
+        }
+
+        // write entry for each row in delta
+        if !entry.state_map.is_empty() {
+            sql.push_str(
+                "INSERT INTO state_groups_state \
+                 (state_group, room_id, type, state_key, event_id) \
+                 VALUES\n",
+            );
+
+            for ((t, s), e) in entry.state_map.iter() {
+                let t = PGEscape(t);
+                let s = PGEscape(s);
+                let e = PGEscape(e);
+
+                writeln!(sql, "    ({sg}, {room_id}, {t}, {s}, {e}),").unwrap();
+            }
+
+            // Replace the last comma with a semicolon
+            sql.replace_range((sql.len() - 2).., ";\n");
+        }
+    }
+
+    client.batch_execute(&sql).unwrap();
+}
+
+/// Clears the contents of the testing database
+pub fn empty_database() {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(DB_URL, connector).unwrap();
+
+    // delete all the contents from all three tables
+    let sql = r"
+        TRUNCATE state_groups;
+        TRUNCATE state_group_edges;
+        TRUNCATE state_groups_state;
+    ";
+
+    client.batch_execute(sql).unwrap();
+}
+
+/// Safely escape the strings in sql queries
+struct PGEscape<'a>(pub &'a str);
+
+impl<'a> fmt::Display for PGEscape<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut delim = Cow::from("$$");
+        while self.0.contains(&delim as &str) {
+            let s: String = thread_rng()
+                .sample_iter(&Alphanumeric)
+                .take(10)
+                .map(char::from)
+                .collect();
+
+            delim = format!("${}$", s).into();
+        }
+
+        write!(f, "{}{}{}", delim, self.0, delim)
+    }
+}
+
+/// Checks whether the state at each state group is the same as what the map thinks it should be
+///
+/// i.e. when synapse tries to work out the state for a given state group by looking at
+/// the database. Will the state it gets be the same as what the map thinks it should be
+pub fn database_collapsed_states_match_map(
+    state_group_map: &BTreeMap<i64, StateGroupEntry>,
+) -> bool {
+    for sg in state_group_map.keys() {
+        let map_state = collapse_state_with_map(state_group_map, *sg);
+        let database_state = collapse_state_with_database(*sg);
+        if map_state != database_state {
+            println!("database state {} doesn't match", sg);
+            println!("expected {:?}", map_state);
+            println!("but found {:?}", database_state);
+            return false;
+        }
+    }
+    true
+}
+
+/// Gets the full state for a given group from the map (of deltas)
+fn collapse_state_with_map(
+    map: &BTreeMap<i64, StateGroupEntry>,
+    state_group: i64,
+) -> StateMap<Atom> {
+    let mut entry = &map[&state_group];
+    let mut state_map = StateMap::new();
+
+    let mut stack = vec![state_group];
+
+    while let Some(prev_state_group) = entry.prev_state_group {
+        stack.push(prev_state_group);
+        if !map.contains_key(&prev_state_group) {
+            panic!("Missing {}", prev_state_group);
+        }
+        entry = &map[&prev_state_group];
+    }
+
+    for sg in stack.iter().rev() {
+        state_map.extend(
+            map[sg]
+                .state_map
+                .iter()
+                .map(|((t, s), e)| ((t, s), e.clone())),
+        );
+    }
+
+    state_map
+}
+
+fn collapse_state_with_database(state_group: i64) -> StateMap<Atom> {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(DB_URL, connector).unwrap();
+
+    // Gets the delta for a specific state group
+    let query_deltas = r#"
+        SELECT m.id, type, state_key, s.event_id
+        FROM state_groups AS m
+        LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
+        WHERE m.id = $1
+    "#;
+
+    // If there is no delta for that specific state group, then we still want to find
+    // the predecessor (so have split this into a different query)
+    let query_pred = r#"
+        SELECT prev_state_group
+        FROM state_group_edges
+        WHERE state_group = $1
+    "#;
+
+    let mut state_map: StateMap<Atom> = StateMap::new();
+
+    let mut next_group = Some(state_group);
+
+    while let Some(sg) = next_group {
+        // get predecessor from state_group_edges
+        let mut pred = client.query_raw(query_pred, [sg]).unwrap();
+
+        // set next_group to predecessor
+        next_group = match pred.next().unwrap() {
+            Some(p) => p.get(0),
+            None => None,
+        };
+
+        // if there was a predecessor then assert that it is unique
+        if next_group.is_some() {
+            assert!(pred.next().unwrap().is_none());
+        }
+        drop(pred);
+
+        let mut rows = client.query_raw(query_deltas, [sg]).unwrap();
+
+        while let Some(row) = rows.next().unwrap() {
+            // Copy the single delta from the predecessor stored in this row
+            if let Some(etype) = row.get::<_, Option<String>>(1) {
+                let key = &row.get::<_, String>(2);
+
+                // only insert if not overriding existing entry
+                // this is because the newer delta is found FIRST
+                if state_map.get(&etype, key).is_none() {
+                    state_map.insert(&etype, key, row.get::<_, String>(3).into());
+                }
+            }
+        }
+    }
+
+    state_map
+}
+
+/// Check whether predecessors and deltas stored in the database are the same as in the map
+pub fn database_structure_matches_map(state_group_map: &BTreeMap<i64, StateGroupEntry>) -> bool {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(DB_URL, connector).unwrap();
+
+    // Gets the delta for a specific state group
+    let query_deltas = r#"
+        SELECT m.id, type, state_key, s.event_id
+        FROM state_groups AS m
+        LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
+        WHERE m.id = $1
+    "#;
+
+    // If there is no delta for that specific state group, then we still want to find
+    // the predecessor (so have split this into a different query)
+    let query_pred = r#"
+        SELECT prev_state_group
+        FROM state_group_edges
+        WHERE state_group = $1
+    "#;
+
+    for (sg, entry) in state_group_map {
+        // get predecessor from state_group_edges
+        let mut pred_iter = client.query_raw(query_pred, &[sg]).unwrap();
+
+        // read out the predecessor value from the database
+        let database_pred = match pred_iter.next().unwrap() {
+            Some(p) => p.get(0),
+            None => None,
+        };
+
+        // if there was a predecessor then assert that it is unique
+        if database_pred.is_some() {
+            assert!(pred_iter.next().unwrap().is_none());
+        }
+
+        // check if it matches map
+        if database_pred != entry.prev_state_group {
+            println!(
+                "ERROR: predecessor for {} was {:?} (expected {:?})",
+                sg, database_pred, entry.prev_state_group
+            );
+            return false;
+        }
+        // needed so that can create another query
+        drop(pred_iter);
+
+        // Now check that deltas are the same
+        let mut state_map: StateMap<Atom> = StateMap::new();
+
+        // Get delta from state_groups_state
+        let mut rows = client.query_raw(query_deltas, &[sg]).unwrap();
+
+        while let Some(row) = rows.next().unwrap() {
+            // Copy the single delta from the predecessor stored in this row
+            if let Some(etype) = row.get::<_, Option<String>>(1) {
+                state_map.insert(
+                    &etype,
+                    &row.get::<_, String>(2),
+                    row.get::<_, String>(3).into(),
+                );
+            }
+        }
+
+        // Check that the delta matches the map
+        if state_map != entry.state_map {
+            println!("ERROR: delta for {} didn't match", sg);
+            println!("Expected: {:?}", entry.state_map);
+            println!("Actual: {:?}", state_map);
+            return false;
+        }
+    }
+    true
+}
+
+/// Clears the compressor state from the database
+pub fn clear_compressor_state() {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(DB_URL, connector).unwrap();
+
+    // delete all the contents from the state compressor tables
+    let sql = r"
+        TRUNCATE state_compressor_state;
+        TRUNCATE state_compressor_progress;
+        UPDATE state_compressor_total_progress SET lowest_uncompressed_group = 0;
+    ";
+
+    client.batch_execute(sql).unwrap();
+}
+
+#[test]
+fn functions_are_self_consistent() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in 0i64..=13i64 {
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    assert!(database_collapsed_states_match_map(&initial));
+    assert!(database_structure_matches_map(&initial));
+}
+
+pub fn setup_logger() {
+    // setup the logger for the synapse_auto_compressor
+    // The default can be overwritten with RUST_LOG
+    // see the README for more information
+    if env::var("RUST_LOG").is_err() {
+        let mut log_builder = env_logger::builder();
+        // set is_test(true) so that the output is hidden by cargo test (unless the test fails)
+        log_builder.is_test(true);
+        // default to printing the debug information for both packages being tested
+        // (Note that just setting the global level to debug will log every sql transaction)
+        log_builder.filter_module("synapse_compress_state", LevelFilter::Debug);
+        log_builder.filter_module("synapse_auto_compressor", LevelFilter::Debug);
+        // use try_init() incase the logger has been setup by some previous test
+        let _ = log_builder.try_init();
+    } else {
+        // If RUST_LOG was set then use that
+        let mut log_builder = env_logger::Builder::from_env("RUST_LOG");
+        // set is_test(true) so that the output is hidden by cargo test (unless the test fails)
+        log_builder.is_test(true);
+        // use try_init() in case the logger has been setup by some previous test
+        let _ = log_builder.try_init();
+    }
+}
--- a/compressor_integration_tests/src/map_builder.rs
+++ b/compressor_integration_tests/src/map_builder.rs
@@ -0,0 +1,217 @@
+use std::collections::BTreeMap;
+
+use state_map::StateMap;
+use synapse_compress_state::StateGroupEntry;
+
+/// Generates long chain of state groups each with state deltas
+///
+/// If called wiht start=0, end=13 this would build the following:
+///
+/// 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+///
+/// Where each group i has state:
+///     ('node','is',      i)
+///     ('group',  j, 'seen') - for all j less than i
+pub fn line_with_state(start: i64, end: i64) -> BTreeMap<i64, StateGroupEntry> {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    for i in start..=end {
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    initial
+}
+
+/// Generates line segments in a chain of state groups each with state deltas
+///
+/// If called wiht start=0, end=13 this would build the following:
+///
+/// 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+///
+/// Where each group i has state:
+///     ('node','is',      i)
+///     ('group',  j, 'seen') - for all j less than i
+pub fn line_segments_with_state(start: i64, end: i64) -> BTreeMap<i64, StateGroupEntry> {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    for i in start..=end {
+        // if the state is a snapshot then set its predecessor to NONE
+        if (i - start) % 3 == 0 {
+            prev = None;
+        }
+
+        // create a blank entry for it
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+
+        // if it's a snapshot then add in all previous state
+        if prev.is_none() {
+            for j in start..i {
+                entry
+                    .state_map
+                    .insert("group", &j.to_string(), "seen".into());
+            }
+        }
+
+        // add in the new state for this state group
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        // put it into the initial map
+        initial.insert(i, entry);
+
+        // set this group as the predecessor for the next
+        prev = Some(i)
+    }
+    initial
+}
+
+/// This generates the correct compressed structure with 3,3 levels
+///
+/// Note: only correct structure when no impossible predecessors
+///
+/// Structure generated:
+///
+/// 0  3\      12
+/// 1  4 6\    13
+/// 2  5 7 9
+///     8 10
+///        11
+/// Where each group i has state:
+///     ('node','is',      i)
+///     ('group',  j, 'seen') - for all j less than i
+pub fn compressed_3_3_from_0_to_13_with_state() -> BTreeMap<i64, StateGroupEntry> {
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (6, 3),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    let mut expected: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in 0i64..=13i64 {
+        let prev = expected_edges.get(&i);
+
+        //change from Option<&i64> to Option<i64>
+        let prev = prev.copied();
+
+        // create a blank entry for it
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+
+        // Add in all state between predecessor and now (non inclusive)
+        if let Some(p) = prev {
+            for j in (p + 1)..i {
+                entry
+                    .state_map
+                    .insert("group", &j.to_string(), "seen".into());
+            }
+        } else {
+            for j in 0i64..i {
+                entry
+                    .state_map
+                    .insert("group", &j.to_string(), "seen".into());
+            }
+        }
+
+        // add in the new state for this state group
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        // put it into the expected map
+        expected.insert(i, entry);
+    }
+    expected
+}
+
+/// Generates state map structure that corresponds to edges (with deltas)
+///
+/// Each group i has state:
+//     ('node','is',      i)
+//     ('group',  j, 'seen') - for all j less than i
+pub fn structure_from_edges_with_state(
+    edges: BTreeMap<i64, i64>,
+    start: i64,
+    end: i64,
+) -> BTreeMap<i64, StateGroupEntry> {
+    let mut expected: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in start..=end {
+        let prev = edges.get(&i);
+
+        //change from Option<&i64> to Option<i64>
+        let prev = prev.copied();
+
+        // create a blank entry for it
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+
+        // Add in all state between predecessor and now (non inclusive)
+        if let Some(p) = prev {
+            for j in (p + 1)..i {
+                entry
+                    .state_map
+                    .insert("group", &j.to_string(), "seen".into());
+            }
+        } else {
+            for j in start..i {
+                entry
+                    .state_map
+                    .insert("group", &j.to_string(), "seen".into());
+            }
+        }
+
+        // add in the new state for this state group
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        // put it into the expected map
+        expected.insert(i, entry);
+    }
+    expected
+}
--- a/compressor_integration_tests/tests/auto_compressor_manager_tests.rs
+++ b/compressor_integration_tests/tests/auto_compressor_manager_tests.rs
@@ -0,0 +1,230 @@
+use std::collections::BTreeMap;
+
+use compressor_integration_tests::{
+    add_contents_to_database, clear_compressor_state, database_collapsed_states_match_map,
+    database_structure_matches_map, empty_database,
+    map_builder::{
+        compressed_3_3_from_0_to_13_with_state, line_segments_with_state,
+        structure_from_edges_with_state,
+    },
+    setup_logger, DB_URL,
+};
+use serial_test::serial;
+use synapse_auto_compressor::{
+    manager::{compress_chunks_of_database, run_compressor_on_room_chunk},
+    state_saving::{connect_to_database, create_tables_if_needed},
+};
+use synapse_compress_state::Level;
+
+#[test]
+#[serial(db)]
+fn run_compressor_on_room_chunk_works() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    let mut client = connect_to_database(DB_URL).unwrap();
+    create_tables_if_needed(&mut client).unwrap();
+    clear_compressor_state();
+
+    // compress in 3,3 level sizes by default
+    let default_levels = vec![Level::new(3), Level::new(3)];
+
+    // compress the first 7 groups in the room
+    // structure should be the following afterwards
+    // (NOTE: only including compressed groups)
+    //
+    // 0  3\
+    // 1  4 6
+    // 2  5
+    run_compressor_on_room_chunk(DB_URL, "room1", 7, &default_levels).unwrap();
+
+    // compress the next 7 groups
+
+    run_compressor_on_room_chunk(DB_URL, "room1", 7, &default_levels).unwrap();
+
+    // This should have created the following structure in the database
+    // i.e. groups 6 and 9 should have changed from before
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected));
+}
+
+#[test]
+#[serial(db)]
+fn compress_chunks_of_database_compresses_multiple_rooms() {
+    setup_logger();
+    // This creates 2 with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    // (with room2's numbers shifted up 14)
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i in that room
+    let initial1 = line_segments_with_state(0, 13);
+    let initial2 = line_segments_with_state(14, 27);
+
+    empty_database();
+    add_contents_to_database("room1", &initial1);
+    add_contents_to_database("room2", &initial2);
+
+    let mut client = connect_to_database(DB_URL).unwrap();
+    create_tables_if_needed(&mut client).unwrap();
+    clear_compressor_state();
+
+    // compress in 3,3 level sizes by default
+    let default_levels = vec![Level::new(3), Level::new(3)];
+
+    // Compress 4 chunks of size 8.
+    // The first two should compress room1 and the second two should compress room2
+    compress_chunks_of_database(DB_URL, 8, &default_levels, 4).unwrap();
+
+    // We are aiming for the following structure in the database for room1
+    // i.e. groups 6 and 9 should have changed from initial map
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // Where each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let expected1 = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group in room1
+    assert!(database_collapsed_states_match_map(&initial1));
+
+    // Check that the structure of the database matches the expected structure for room1
+    assert!(database_structure_matches_map(&expected1));
+
+    // room 2 should have the same structure but will all numbers shifted up by 14
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (15, 14),
+        (16, 15),
+        (18, 17),
+        (19, 18),
+        (20, 17),
+        (21, 20),
+        (22, 21),
+        (23, 20),
+        (24, 23),
+        (25, 24),
+        (27, 26),
+    ]
+    .into_iter()
+    .collect();
+
+    let expected2 = structure_from_edges_with_state(expected_edges, 14, 27);
+
+    // Check that the database still gives correct states for each group in room2
+    assert!(database_collapsed_states_match_map(&initial2));
+
+    // Check that the structure of the database matches the expected structure for room2
+    assert!(database_structure_matches_map(&expected2));
+}
+
+#[test]
+#[serial(db)]
+fn compress_chunks_of_database_continues_where_it_left_off() {
+    setup_logger();
+    // This creates 2 with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    // (with room2's numbers shifted up 14)
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i in that room
+    let initial1 = line_segments_with_state(0, 13);
+    let initial2 = line_segments_with_state(14, 27);
+
+    empty_database();
+    add_contents_to_database("room1", &initial1);
+    add_contents_to_database("room2", &initial2);
+
+    let mut client = connect_to_database(DB_URL).unwrap();
+    create_tables_if_needed(&mut client).unwrap();
+    clear_compressor_state();
+
+    // compress in 3,3 level sizes by default
+    let default_levels = vec![Level::new(3), Level::new(3)];
+
+    // Compress chunks of various sizes:
+    //
+    // These two should compress room1
+    compress_chunks_of_database(DB_URL, 8, &default_levels, 1).unwrap();
+    compress_chunks_of_database(DB_URL, 100, &default_levels, 1).unwrap();
+    // These three should compress room2
+    compress_chunks_of_database(DB_URL, 1, &default_levels, 2).unwrap();
+    compress_chunks_of_database(DB_URL, 5, &default_levels, 1).unwrap();
+    compress_chunks_of_database(DB_URL, 5, &default_levels, 1).unwrap();
+
+    // We are aiming for the following structure in the database for room1
+    // i.e. groups 6 and 9 should have changed from initial map
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // Where each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let expected1 = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group in room1
+    assert!(database_collapsed_states_match_map(&initial1));
+
+    // Check that the structure of the database matches the expected structure for room1
+    assert!(database_structure_matches_map(&expected1));
+
+    // room 2 should have the same structure but will all numbers shifted up by 14
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (15, 14),
+        (16, 15),
+        (18, 17),
+        (19, 18),
+        (20, 17),
+        (21, 20),
+        (22, 21),
+        (23, 20),
+        (24, 23),
+        (25, 24),
+        (27, 26),
+    ]
+    .into_iter()
+    .collect();
+
+    let expected2 = structure_from_edges_with_state(expected_edges, 14, 27);
+
+    // Check that the database still gives correct states for each group in room2
+    assert!(database_collapsed_states_match_map(&initial2));
+
+    // Check that the structure of the database matches the expected structure for room2
+    assert!(database_structure_matches_map(&expected2));
+}
--- a/compressor_integration_tests/tests/auto_compressor_state_saving_tests.rs
+++ b/compressor_integration_tests/tests/auto_compressor_state_saving_tests.rs
@@ -0,0 +1,29 @@
+use compressor_integration_tests::{clear_compressor_state, setup_logger, DB_URL};
+use serial_test::serial;
+use synapse_auto_compressor::state_saving::{
+    connect_to_database, create_tables_if_needed, read_room_compressor_state,
+    write_room_compressor_state,
+};
+use synapse_compress_state::Level;
+
+#[test]
+#[serial(db)]
+fn write_then_read_state_gives_correct_results() {
+    setup_logger();
+    let mut client = connect_to_database(DB_URL).unwrap();
+    create_tables_if_needed(&mut client).unwrap();
+    clear_compressor_state();
+
+    let room_id = "room1";
+    let written_info: Vec<Level> =
+        vec![Level::restore(3, 1, Some(6)), Level::restore(3, 2, Some(6))];
+    let written_num = 53;
+    write_room_compressor_state(&mut client, room_id, &written_info, written_num).unwrap();
+
+    let (read_num, read_info) = read_room_compressor_state(&mut client, room_id)
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(written_info, read_info);
+    assert_eq!(written_num, read_num);
+}
--- a/compressor_integration_tests/tests/compressor_config_tests.rs
+++ b/compressor_integration_tests/tests/compressor_config_tests.rs
@@ -0,0 +1,575 @@
+use std::collections::BTreeMap;
+
+use compressor_integration_tests::{
+    add_contents_to_database, database_collapsed_states_match_map, database_structure_matches_map,
+    empty_database,
+    map_builder::{
+        compressed_3_3_from_0_to_13_with_state, line_segments_with_state, line_with_state,
+        structure_from_edges_with_state,
+    },
+    setup_logger, DB_URL,
+};
+use serial_test::serial;
+use synapse_compress_state::{run, Config};
+
+// Remember to add #[serial(db)] before any test that access the database.
+// Only one test with this annotation can run at once - preventing
+// concurrency bugs.
+//
+// You will probably also want to use common::empty_database() at the start
+// of each test as well (since their order of execution is not guaranteed)
+
+#[test]
+#[serial(db)]
+fn run_succeeds_without_crashing() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_with_state(0, 13);
+
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/run_succeeds_without_crashing.sql".to_string());
+    let min_state_group = None;
+    let groups_to_compress = None;
+    let min_saved_rows = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = false;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    run(config);
+}
+
+#[test]
+#[serial(db)]
+fn changes_commited_if_no_min_saved_rows() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/changes_commited_if_no_min_saved_rows.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = None;
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+
+    // This should have created the following structure in the database
+    // i.e. groups 6 and 9 should have changed from before
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected))
+}
+
+#[test]
+#[serial(db)]
+fn changes_commited_if_min_saved_rows_exceeded() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/changes_commited_if_no_min_saved_rows.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = Some(10);
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+
+    // This should have created the following structure in the database
+    // i.e. groups 6 and 9 should have changed from before
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected));
+}
+
+#[test]
+#[serial(db)]
+fn changes_not_commited_if_fewer_than_min_saved_rows() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file =
+        Some("./tests/tmp/changes_not_commited_if_fewer_than_min_saved_rows.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = Some(12);
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+
+    // This should have created the following structure when running
+    // (i.e. try and change groups 6 and 9 only)
+    // BUT: This saves 11 rows which is fewer than min_saved_rows
+    // therefore there should be no changes committed!
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&initial));
+}
+
+#[test]
+#[should_panic(expected = "Error connecting to the database:")]
+fn run_panics_if_invalid_db_url() {
+    setup_logger();
+    // set up the config options
+    let db_url = "thisIsAnInvalidURL".to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/run_panics_if_invalid_db_url.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = None;
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+}
+
+#[test]
+#[serial(db)]
+fn run_only_affects_given_room_id() {
+    setup_logger();
+    // build room1 stuff up
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial_room_1 = line_segments_with_state(0, 13);
+
+    // build room2 stuff up
+    // This starts with the same structure as room 1 but just all group ids
+    // 14 higher
+    let initial_room_2 = line_segments_with_state(14, 28);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial_room_1);
+    add_contents_to_database("room2", &initial_room_2);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/run_only_affects_given_room_id.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = None;
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+
+    // This should have created the following structure in the database
+    // i.e. groups 6 and 9 should have changed from before
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group
+    // in both room1 and room2
+    assert!(database_collapsed_states_match_map(&initial_room_1));
+    assert!(database_collapsed_states_match_map(&initial_room_2));
+
+    // Check that the structure of the database matches the expected structure
+    // in both room1 and room2
+    assert!(database_structure_matches_map(&expected));
+    assert!(database_structure_matches_map(&initial_room_2));
+}
+
+#[test]
+#[serial(db)]
+fn run_respects_groups_to_compress() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file = Some("./tests/tmp/run_respects_groups_to_compress.sql".to_string());
+    let min_state_group = Some(2);
+    let min_saved_rows = None;
+    let groups_to_compress = Some(9);
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config = Config::new(
+        db_url,
+        room_id,
+        output_file,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // Run the compressor with those settings
+    run(config);
+
+    // This should have created the following structure in the database
+    // as it should only compress from groups higher than 2 (non inclusive)
+    // and should only compress a total of 9 groups
+    // i.e. so only group 9 should have changed from before
+    // N.B. this saves 7 rows
+    //
+    // 0  3  6\    12
+    // 1  4  7  9  13
+    // 2  5  8 10
+    //         11
+    //
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    let expected = structure_from_edges_with_state(expected_edges, 0, 13);
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected))
+}
+
+#[test]
+#[serial(db)]
+fn run_is_idempotent_when_run_on_whole_room() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    // set up the config options
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+    let output_file1 =
+        Some("./tests/tmp/run_is_idempotent_when_run_on_whole_room_1.sql".to_string());
+    let output_file2 =
+        Some("./tests/tmp/run_is_idempotent_when_run_on_whole_room_2.sql".to_string());
+    let min_state_group = None;
+    let min_saved_rows = None;
+    let groups_to_compress = None;
+    let max_state_group = None;
+    let level_sizes = "3,3".to_string();
+    let transactions = true;
+    let graphs = false;
+    let commit_changes = true;
+    let verify = true;
+
+    let config1 = Config::new(
+        db_url.clone(),
+        room_id.clone(),
+        output_file1,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes.clone(),
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    let config2 = Config::new(
+        db_url,
+        room_id,
+        output_file2,
+        min_state_group,
+        groups_to_compress,
+        min_saved_rows,
+        max_state_group,
+        level_sizes,
+        transactions,
+        graphs,
+        commit_changes,
+        verify,
+    )
+    .unwrap();
+
+    // We are aiming for the following structure in the database
+    // i.e. groups 6 and 9 should have changed from initial map
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // Where each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Run the compressor with those settings for the first time
+    run(config1);
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected));
+
+    // Run the compressor with those settings for the second time
+    run(config2);
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database still matches the expected structure
+    assert!(database_structure_matches_map(&expected));
+}
--- a/compressor_integration_tests/tests/compressor_continue_run_tests.rs
+++ b/compressor_integration_tests/tests/compressor_continue_run_tests.rs
@@ -0,0 +1,83 @@
+use compressor_integration_tests::{
+    add_contents_to_database, database_collapsed_states_match_map, database_structure_matches_map,
+    empty_database,
+    map_builder::{compressed_3_3_from_0_to_13_with_state, line_segments_with_state},
+    setup_logger, DB_URL,
+};
+use serial_test::serial;
+use synapse_compress_state::{continue_run, Level};
+
+// Tests the saving and continuing functionality
+// The compressor should produce the same results when run in one go
+// as when run in multiple stages
+#[test]
+#[serial(db)]
+fn continue_run_called_twice_same_as_run() {
+    setup_logger();
+    // This starts with the following structure
+    //
+    // 0-1-2 3-4-5 6-7-8 9-10-11 12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    let initial = line_segments_with_state(0, 13);
+
+    // Place this initial state into an empty database
+    empty_database();
+    add_contents_to_database("room1", &initial);
+
+    let db_url = DB_URL.to_string();
+    let room_id = "room1".to_string();
+
+    // will run the compression in two batches
+    let start = None;
+    let chunk_size = 7;
+
+    // compress in 3,3 level sizes
+    // since the compressor hasn't been run before they are empty
+    let level_info = vec![Level::new(3), Level::new(3)];
+
+    // Run the compressor with those settings
+    let chunk_stats_1 = continue_run(start, chunk_size, &db_url, &room_id, &level_info).unwrap();
+
+    // Assert that it stopped at 6 (i.e. after the 7 groups 0...6)
+    assert_eq!(chunk_stats_1.last_compressed_group, 6);
+    // structure should be the following at this point
+    // (NOTE: only including compressed groups)
+    //
+    // 0  3\
+    // 1  4 6
+    // 2  5
+    assert_eq!(
+        chunk_stats_1.new_level_info,
+        vec![Level::restore(3, 1, Some(6)), Level::restore(3, 2, Some(6))]
+    );
+
+    let start = Some(6);
+    let chunk_size = 7;
+    let level_info = chunk_stats_1.new_level_info;
+
+    // Run the compressor with those settings
+    let chunk_stats_2 = continue_run(start, chunk_size, &db_url, &room_id, &level_info).unwrap();
+
+    // Assert that it stopped at 7
+    assert_eq!(chunk_stats_2.last_compressed_group, 13);
+
+    // This should have created the following structure in the database
+    // i.e. groups 6 and 9 should have changed from before
+    // N.B. this saves 11 rows
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected = compressed_3_3_from_0_to_13_with_state();
+
+    // Check that the database still gives correct states for each group!
+    assert!(database_collapsed_states_match_map(&initial));
+
+    // Check that the structure of the database matches the expected structure
+    assert!(database_structure_matches_map(&expected))
+}
--- a/compressor_integration_tests/tests/tmp/README.md
+++ b/compressor_integration_tests/tests/tmp/README.md
@@ -0,0 +1 @@
+This folder is where sql files generated by the integration tests are saved
--- a/docs/algorithm.md
+++ b/docs/algorithm.md
@@ -0,0 +1,107 @@
+# Compression algorithm
+
+## What is state?
+State is things like who is in a room, what the room topic/name is, who has
+what privilege levels etc. Synapse keeps track of it for various reasons such as
+spotting invalid events (e.g. ones sent by banned users) and providing room membership
+information to clients.
+
+## What is a state group?
+
+Synapse needs to keep track of the state at the moment of each event. A state group
+corresponds to a unique state. The database table `event_to_state_groups` keeps track
+of the mapping from event ids to state group ids.
+
+Consider the following simplified example:
+```
+State group id   |          State
+_____________________________________________
+       1         |      Alice in room
+       2         | Alice in room, Bob in room
+       3         |        Bob in room
+
+
+Event id |     What the event was
+______________________________________
+    1    |    Alice sends a message
+    3    |     Bob joins the room
+    4    |     Bob sends a message
+    5    |    Alice leaves the room
+    6    |     Bob sends a message
+
+
+Event id | State group id
+_________________________
+    1    |       1
+    2    |       1
+    3    |       2
+    4    |       2
+    5    |       3
+    6    |       3
+```
+
+## What are deltas and predecessors?
+When a new state event happens (e.g. Bob joins the room) a new state group is created.
+BUT instead of copying all of the state from the previous state group, we just store
+the change from the previous group (saving on lots of storage space!). The difference
+from the previous state group is called the "delta".
+
+So for the previous example, we would have the following (Note only rows 1 and 2 will
+make sense at this point):
+
+```
+State group id | Previous state group id |      Delta
+____________________________________________________________
+       1       |          NONE           |   Alice in room
+       2       |           1             |    Bob in room
+       3       |          NONE           |    Bob in room
+```
+
+So why is state group 3's previous state group NONE and not 2? Well, the way that deltas
+work in Synapse is that they can only add in new state or overwrite old state, but they
+cannot remove it. (So if the room topic is changed then that is just overwriting state,
+but removing Alice from the room is neither an addition nor an overwriting). If it is
+impossible to find a delta, then you just start from scratch again with a "snapshot" of
+the entire state. 
+
+(NOTE this is not documentation on how synapse handles leaving rooms but is purely for illustrative
+purposes)
+
+The state of a state group is worked out by following the previous state group's and adding
+together all of the deltas (with the most recent taking precedence).
+
+The mapping from state group to previous state group takes place in `state_group_edges`
+and the deltas are stored in `state_groups_state`.
+
+## What are we compressing then?
+In order to speed up the conversion from state group id to state, there is a limit of 100 
+hops set by synapse (that is: we will only ever have to look up the deltas for a maximum of 
+100 state groups). It does this by taking another "snapshot" every 100 state groups.
+
+However, it is these snapshots that take up the bulk of the storage in a synapse database,
+so we want to find a way to reduce the number of them without dramatically increasing the
+maximum number of hops needed to do lookups.
+
+
+## Compression Algorithm
+
+The algorithm works by attempting to create a *tree* of deltas, produced by
+appending state groups to different "levels". Each level has a maximum size, where
+each state group is appended to the lowest level that is not full. This tool calls a 
+state group "compressed" once it has been added to
+one of these levels.
+
+This produces a graph that looks approximately like the following, in the case
+of having two levels with the bottom level (L1) having a maximum size of 3:
+
+```
+L2 <-------------------- L2 <---------- ...
+^--- L1 <--- L1 <--- L1  ^--- L1 <--- L1 <--- L1
+
+NOTE: A <--- B means that state group B's predecessor is A
+```
+The structure that synapse creates by default would be equivalent to having one level with
+a maximum length of 100. 
+
+**Note**: Increasing the sum of the sizes of levels will increase the time it
+takes to query the full state of a given state group.
--- a/docs/python.md
+++ b/docs/python.md
@@ -0,0 +1,54 @@
+# Running the compressor tools from python
+
+Both the automatic and manual tools use PyO3 to allow the compressor
+to be run from Python. 
+
+To see any output from the tools, logging must be setup in Python before
+the compressor is run.
+
+## Setting things up
+
+1. Create a virtual environment in the place you want to use the compressor from
+(if it doesn't already exist)  
+`$ virtualenv -p python3 venv`
+
+2. Activate the virtual environment and install `maturin` (if you haven't already)  
+`$ source venv/bin/activate`  
+`$ pip install maturin`  
+
+3. Navigate to the correct location  
+For the automatic tool:  
+`$ cd /home/synapse/rust-synapse-compress-state/synpase_auto_compressor`   
+For the manual tool:  
+`$ cd /home/synapse/rust-synapse-compress-state`   
+
+3. Build and install the library  
+`$ maturin develop`
+
+This will install the relevant compressor tool into the activated virtual environment.
+
+## Automatic tool example:
+
+```python
+import synapse_auto_compressor
+
+synapse_auto_compressor.compress_state_events_table(
+  db_url="postgresql://localhost/synapse",
+  chunk_size=500,
+  default_levels="100,50,25",
+  number_of_chunks=100
+)
+```
+
+# Manual tool example:
+
+```python
+import synapse_compress_state
+
+synapse_compress_state.run_compression(
+  db_url="postgresql://localhost/synapse",
+  room_id="!some_room:example.com",
+  output_file="out.sql",
+  transactions=True
+)
+```
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,8 @@
+[build-system]
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"
+
+[tool.maturin]
+profile = "release"
+features = ["pyo3"]
+no-default-features = true
--- a/src/compressor.rs
+++ b/src/compressor.rs
@@ -23,27 +23,28 @@
 //!
 //! This produces graphs that look roughly like, for two levels:
 //!
-//! ```
+//! ```ignore
 //! L2 <-------------------- L2 <---------- ...
 //!  ^--- L1 <--- L1 <--- L1  ^--- L1 <--- L1 <--- L1
 //! ```

 use indicatif::{ProgressBar, ProgressStyle};
 use state_map::StateMap;
-use std::collections::BTreeMap;
+use std::{collections::BTreeMap, time::Duration};
 use string_cache::DefaultAtom as Atom;

 use super::{collapse_state_maps, StateGroupEntry};

 /// Holds information about a particular level.
-struct Level {
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct Level {
    /// The maximum size this level is allowed to be
    max_length: usize,
    /// The (approximate) current chain length of this level. This is equivalent
    /// to recursively following `current`
    current_chain_length: usize,
    /// The head of this level
-    current: Option<i64>,
+    head: Option<i64>,
 }

 impl Level {
@@ -52,7 +53,16 @@ impl Level {
        Level {
            max_length,
            current_chain_length: 0,
-            current: None,
+            head: None,
+        }
+    }
+
+    /// Creates a new level from stored state
+    pub fn restore(max_length: usize, current_chain_length: usize, head: Option<i64>) -> Level {
+        Level {
+            max_length,
+            current_chain_length,
+            head,
        }
    }

@@ -60,14 +70,14 @@ impl Level {
    /// that given state group will (probably) reference the previous head.
    ///
    /// Panics if `delta` is true and the level is already full.
-    pub fn update(&mut self, current: i64, delta: bool) {
-        self.current = Some(current);
+    fn update(&mut self, new_head: i64, delta: bool) {
+        self.head = Some(new_head);

        if delta {
            // If we're referencing the previous head then increment our chain
            // length estimate
            if !self.has_space() {
-                panic!("Tried to add to a already full level");
+                panic!("Tried to add to an already full level");
            }

            self.current_chain_length += 1;
@@ -77,9 +87,19 @@ impl Level {
        }
    }

+    /// Get the max length of the level
+    pub fn get_max_length(&self) -> usize {
+        self.max_length
+    }
+
+    /// Get the current length of the level
+    pub fn get_current_length(&self) -> usize {
+        self.current_chain_length
+    }
+
    /// Get the current head of the level
-    pub fn get_current(&self) -> Option<i64> {
-        self.current
+    pub fn get_head(&self) -> Option<i64> {
+        self.head
    }

    /// Whether there is space in the current chain at this level. If not then a
@@ -127,24 +147,75 @@ impl<'a> Compressor<'a> {
        compressor
    }

+    /// Creates a compressor and runs the compression algorithm.
+    /// used when restoring compressor state from a previous run
+    /// in which case the levels heads are also known
+    pub fn compress_from_save(
+        original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
+        level_info: &[Level],
+    ) -> Compressor<'a> {
+        let levels = level_info
+            .iter()
+            .map(|l| Level::restore(l.max_length, l.current_chain_length, l.head))
+            .collect();
+
+        let mut compressor = Compressor {
+            original_state_map,
+            new_state_group_map: BTreeMap::new(),
+            levels,
+            stats: Stats::default(),
+        };
+
+        compressor.create_new_tree();
+        compressor
+    }
+
+    /// Returns all the state required to save the compressor so it can be continued later
+    pub fn get_level_info(&self) -> Vec<Level> {
+        self.levels.clone()
+    }
+
    /// Actually runs the compression algorithm
    fn create_new_tree(&mut self) {
        if !self.new_state_group_map.is_empty() {
            panic!("Can only call `create_new_tree` once");
        }

-        let pb = ProgressBar::new(self.original_state_map.len() as u64);
+        let pb = if cfg!(feature = "no-progress-bars") {
+            ProgressBar::hidden()
+        } else {
+            ProgressBar::new(self.original_state_map.len() as u64)
+        };
        pb.set_style(
-            ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
+            ProgressStyle::default_bar()
+                .template("[{elapsed_precise}] {bar} {pos}/{len} {msg}")
+                .unwrap(),
        );
        pb.set_message("state groups");
-        pb.enable_steady_tick(100);
+        pb.enable_steady_tick(Duration::from_millis(100));

        for (&state_group, entry) in self.original_state_map {
+            // Check whether this entry is in_range or is just present in the map due to being
+            // a predecessor of a group that IS in_range for compression
+            if !entry.in_range {
+                let new_entry = StateGroupEntry {
+                    // in_range is kept the same so that the new entry is equal to the old entry
+                    // otherwise it might trigger a useless database transaction
+                    in_range: entry.in_range,
+                    prev_state_group: entry.prev_state_group,
+                    state_map: entry.state_map.clone(),
+                };
+                // Paranoidly assert that not making changes to this entry
+                // could probably be removed...
+                assert!(new_entry == *entry);
+                self.new_state_group_map.insert(state_group, new_entry);
+
+                continue;
+            }
            let mut prev_state_group = None;
            for level in &mut self.levels {
                if level.has_space() {
-                    prev_state_group = level.get_current();
+                    prev_state_group = level.get_head();
                    level.update(state_group, true);
                    break;
                } else {
@@ -162,6 +233,7 @@ impl<'a> Compressor<'a> {
            self.new_state_group_map.insert(
                state_group,
                StateGroupEntry {
+                    in_range: true,
                    prev_state_group,
                    state_map: delta,
                },
@@ -182,7 +254,7 @@ impl<'a> Compressor<'a> {
    ///
    /// Returns the state map and the actual base state group (if any) used.
    fn get_delta(&mut self, prev_sg: Option<i64>, sg: i64) -> (StateMap<Atom>, Option<i64>) {
-        let state_map = collapse_state_maps(&self.original_state_map, sg);
+        let state_map = collapse_state_maps(self.original_state_map, sg);

        let mut prev_sg = if let Some(prev_sg) = prev_sg {
            prev_sg
@@ -194,7 +266,7 @@ impl<'a> Compressor<'a> {
        // a valid base for the state group.
        let mut prev_state_map;
        'outer: loop {
-            prev_state_map = collapse_state_maps(&self.original_state_map, prev_sg);
+            prev_state_map = collapse_state_maps(self.original_state_map, prev_sg);
            for (t, s) in prev_state_map.keys() {
                if !state_map.contains_key(t, s) {
                    // This is not a valid base as it contains key the new state
@@ -230,49 +302,11 @@ impl<'a> Compressor<'a> {
    }
 }

-#[test]
-fn test_new_map() {
-    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+#[cfg(test)]
+mod level_tests;

-    let mut prev = None;
-    for i in 0i64..=13i64 {
-        initial.insert(
-            i,
-            StateGroupEntry {
-                prev_state_group: prev,
-                state_map: StateMap::new(),
-            },
-        );
+#[cfg(test)]
+mod compressor_tests;

-        prev = Some(i)
-    }
-
-    let compressor = Compressor::compress(&initial, &[3, 3]);
-
-    let new_state = compressor.new_state_group_map;
-
-    let expected_edges: BTreeMap<i64, i64> = vec![
-        (1, 0),
-        (2, 1),
-        (4, 3),
-        (5, 4),
-        (6, 3),
-        (7, 6),
-        (8, 7),
-        (9, 6),
-        (10, 9),
-        (11, 10),
-        (13, 12),
-    ]
-    .into_iter()
-    .collect();
-
-    for sg in 0i64..=13i64 {
-        assert_eq!(
-            expected_edges.get(&sg).cloned(),
-            new_state[&sg].prev_state_group,
-            "state group {} did not match expected",
-            sg,
-        );
-    }
-}
+#[cfg(test)]
+mod stats_tests;
--- a/src/compressor/compressor_tests.rs
+++ b/src/compressor/compressor_tests.rs
@@ -0,0 +1,692 @@
+use crate::{
+    compressor::{Compressor, Level, Stats},
+    StateGroupEntry,
+};
+use state_map::StateMap;
+use std::collections::BTreeMap;
+use string_cache::DefaultAtom as Atom;
+
+#[test]
+fn compress_creates_correct_compressor() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    for i in 0i64..=13i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let compressor = Compressor::compress(&initial, &[3, 3]);
+
+    let new_state = &compressor.new_state_group_map;
+
+    // This should create the following structure
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (6, 3),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    for sg in 0i64..=13i64 {
+        assert_eq!(
+            expected_edges.get(&sg).cloned(),
+            new_state[&sg].prev_state_group,
+            "state group {} did not match expected",
+            sg,
+        );
+    }
+}
+
+#[test]
+fn create_new_tree_does_nothing_if_already_compressed() {
+    // This should create the following structure
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let initial_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (6, 3),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    for i in 0i64..=13i64 {
+        // edge from map
+        let pred_group = initial_edges.get(&i);
+
+        // Need Option<i64> not Option<&i64>
+        let prev = pred_group.copied();
+
+        // insert that edge into the initial map
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+
+    compressor.create_new_tree();
+
+    let new_state = &compressor.new_state_group_map;
+
+    assert_eq!(initial, *new_state);
+}
+
+#[test]
+fn create_new_tree_respects_levels() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    for i in 0i64..=13i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+    compressor.create_new_tree();
+
+    let new_state = &compressor.new_state_group_map;
+
+    // This should create the following structure
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (6, 3),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    for sg in 0i64..=13i64 {
+        assert_eq!(
+            expected_edges.get(&sg).cloned(),
+            new_state[&sg].prev_state_group,
+            "state group {} did not match expected",
+            sg,
+        );
+    }
+}
+
+#[test]
+#[should_panic(expected = "Can only call `create_new_tree` once")]
+fn create_new_tree_panics_if_run_twice() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    for i in 0i64..=13i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+    compressor.create_new_tree();
+    compressor.create_new_tree();
+}
+
+#[test]
+fn create_new_tree_respects_all_not_in_range() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    for i in 0i64..=13i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: false,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+    compressor.create_new_tree();
+
+    let new_state = &compressor.new_state_group_map;
+
+    // This should create the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13 (i.e. no change!)
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (3, 2),
+        (4, 3),
+        (5, 4),
+        (6, 5),
+        (7, 6),
+        (8, 7),
+        (9, 8),
+        (10, 9),
+        (11, 10),
+        (12, 11),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    for sg in 0i64..=13i64 {
+        assert_eq!(
+            expected_edges.get(&sg).cloned(),
+            new_state[&sg].prev_state_group,
+            "state group {} did not match expected",
+            sg,
+        );
+    }
+}
+
+#[test]
+fn create_new_tree_respects_some_not_in_range() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18
+    for i in 0i64..=18i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: i > 4,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+    compressor.create_new_tree();
+
+    let new_state = &compressor.new_state_group_map;
+
+    // This should create the following structure
+    //
+    // 0  5   8\       17
+    // 1  6   9 11\    18
+    // 2  7  10 12 14
+    // 3        13 15
+    // 4           16
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (3, 2),
+        (4, 3), // No compression of nodes 0,1,2,3,4
+        (6, 5), // Compresses in 3,3 leveling starting at 5
+        (7, 6),
+        (9, 8),
+        (10, 9),
+        (11, 8),
+        (12, 11),
+        (13, 12),
+        (14, 11),
+        (15, 14),
+        (16, 15),
+        (18, 17),
+    ]
+    .into_iter()
+    .collect();
+    for n in new_state {
+        println!("{:?}", n);
+    }
+
+    for sg in 0i64..=13i64 {
+        assert_eq!(
+            expected_edges.get(&sg).cloned(),
+            new_state[&sg].prev_state_group,
+            "state group {} did not match expected",
+            sg,
+        );
+    }
+}
+
+#[test]
+fn create_new_tree_deals_with_impossible_preds() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // (note missing 3-4 link)
+    // 0-1-2-3
+    // 4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') where j is ancestor of i
+    for i in 0i64..=13i64 {
+        if i == 4 {
+            prev = None
+        }
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+    compressor.create_new_tree();
+
+    let new_state = &compressor.new_state_group_map;
+
+    for n in new_state {
+        println!("{:?}", n);
+    }
+
+    // This should create the following structure
+    //
+    // Brackets mean that has NO predecessor but is in that position in the
+    // levels tree
+    //
+    // 0  3\        12
+    // 1 (4)(6)\    13
+    // 2  5  7  9
+    //       8  10
+    //          11
+    let expected_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (5, 4),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    for sg in 0i64..=13i64 {
+        assert_eq!(
+            expected_edges.get(&sg).cloned(),
+            new_state[&sg].prev_state_group,
+            "state group {} did not match expected",
+            sg,
+        );
+    }
+}
+
+#[test]
+fn get_delta_returns_snapshot_if_no_prev_given() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in 0i64..=13i64 {
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    // This should produce the following structure (tested above)
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // State contents should be the same as before
+    let mut compressor = Compressor::compress(&initial, &[3, 3]);
+
+    let (found_delta, found_pred) = compressor.get_delta(None, 6);
+
+    let mut expected_delta: StateMap<Atom> = StateMap::new();
+    expected_delta.insert("node", "is", "6".into());
+    expected_delta.insert("group", "0", "seen".into());
+    expected_delta.insert("group", "1", "seen".into());
+    expected_delta.insert("group", "2", "seen".into());
+    expected_delta.insert("group", "3", "seen".into());
+    expected_delta.insert("group", "4", "seen".into());
+    expected_delta.insert("group", "5", "seen".into());
+    expected_delta.insert("group", "6", "seen".into());
+
+    assert_eq!(found_delta, expected_delta);
+    assert_eq!(found_pred, None);
+}
+
+#[test]
+fn get_delta_returns_delta_if_original_predecessor() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in 0i64..=13i64 {
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    // This should produce the following structure (tested above)
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // State contents should be the same as before
+    let mut compressor = Compressor::compress(&initial, &[3, 3]);
+
+    let (found_delta, found_pred) = compressor.get_delta(Some(5), 6);
+
+    let mut expected_delta: StateMap<Atom> = StateMap::new();
+    expected_delta.insert("node", "is", "6".into());
+    expected_delta.insert("group", "6", "seen".into());
+
+    assert_eq!(found_delta, expected_delta);
+    assert_eq!(found_pred, Some(5));
+}
+
+#[test]
+fn get_delta_returns_delta_if_original_multi_hop_predecessor() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') - for all j less than i
+    for i in 0i64..=13i64 {
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    // This should produce the following structure (tested above)
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    //
+    // State contents should be the same as before
+    let mut compressor = Compressor::compress(&initial, &[3, 3]);
+
+    let (found_delta, found_pred) = compressor.get_delta(Some(3), 6);
+
+    let mut expected_delta: StateMap<Atom> = StateMap::new();
+    expected_delta.insert("node", "is", "6".into());
+    expected_delta.insert("group", "4", "seen".into());
+    expected_delta.insert("group", "5", "seen".into());
+    expected_delta.insert("group", "6", "seen".into());
+
+    assert_eq!(found_delta, expected_delta);
+    assert_eq!(found_pred, Some(3));
+}
+
+#[test]
+fn get_delta_returns_snapshot_if_no_prev_possible() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // (note missing 3-4 link)
+    // 0-1-2-3
+    // 4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') where j is ancestor of i
+    for i in 0i64..=13i64 {
+        // don't add 3-4 link
+        if i == 4 {
+            prev = None
+        }
+
+        // populate the delta for this state
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        // put the entry into the initial map
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    // This should create the following structure if create_new_tree() was run
+    // (tested in create_new_tree_deals_with_impossible_preds())
+    //
+    // Brackets mean that has NO predecessor but is in that position in the
+    // levels tree
+    //
+    // 0  3\        12
+    // 1 (4)(6)\    13
+    // 2  5  7  9
+    //       8  10
+    //          11
+    //
+    // State contents should be the same as before
+
+    // build up new_tree after 0,1,2,3 added
+    let mut new_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    // 0-1-2 is left the same
+    new_map.insert(0, initial.get(&0).unwrap().clone());
+    new_map.insert(1, initial.get(&1).unwrap().clone());
+    new_map.insert(2, initial.get(&2).unwrap().clone());
+
+    // 3 is now a snapshot
+    let mut entry_3: StateMap<Atom> = StateMap::new();
+    entry_3.insert("node", "is", "3".into());
+    entry_3.insert("group", "0", "seen".into());
+    entry_3.insert("group", "1", "seen".into());
+    entry_3.insert("group", "2", "seen".into());
+    entry_3.insert("group", "3", "seen".into());
+    new_map.insert(
+        3,
+        StateGroupEntry {
+            in_range: true,
+            prev_state_group: None,
+            state_map: entry_3,
+        },
+    );
+
+    // build the compressor with this partialy built new map
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: new_map,
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+
+    // make the levels how they would be after 0,1,2,3 added
+    // they should both be of length 1 and have 3 as the current head
+    let mut levels_iter = compressor.levels.iter_mut();
+
+    let l1 = levels_iter.next().unwrap();
+    l1.head = Some(3);
+    l1.current_chain_length = 1;
+
+    let l2 = levels_iter.next().unwrap();
+    l2.head = Some(3);
+    l2.current_chain_length = 1;
+
+    // Now try and find delta for 4 with 3 as pred
+    let (found_delta, found_pred) = compressor.get_delta(Some(3), 4);
+
+    let mut expected_delta: StateMap<Atom> = StateMap::new();
+    expected_delta.insert("node", "is", "4".into());
+    expected_delta.insert("group", "4", "seen".into());
+
+    assert_eq!(found_delta, expected_delta);
+    assert_eq!(found_pred, None);
+}
--- a/src/compressor/level_tests.rs
+++ b/src/compressor/level_tests.rs
@@ -0,0 +1,80 @@
+use crate::compressor::Level;
+
+#[test]
+fn new_produces_empty_level() {
+    let l = Level::new(15);
+    assert_eq!(l.max_length, 15);
+    assert_eq!(l.current_chain_length, 0);
+    assert_eq!(l.head, None);
+}
+
+#[test]
+fn update_adds_to_non_full_level() {
+    let mut l = Level::new(10);
+    l.update(7, true);
+    assert_eq!(l.max_length, 10);
+    assert_eq!(l.current_chain_length, 1);
+    assert_eq!(l.head, Some(7));
+}
+
+#[test]
+#[should_panic(expected = "Tried to add to an already full level")]
+fn update_panics_if_adding_and_too_full() {
+    let mut l = Level::new(5);
+    l.update(1, true);
+    l.update(2, true);
+    l.update(3, true);
+    l.update(4, true);
+    l.update(5, true);
+    l.update(6, true);
+}
+
+#[test]
+fn update_resets_level_correctly() {
+    let mut l = Level::new(5);
+    l.update(1, true);
+    l.update(2, true);
+    l.update(3, true);
+    l.update(4, true);
+    l.update(5, true);
+    l.update(6, false);
+    assert_eq!(l.max_length, 5);
+    assert_eq!(l.current_chain_length, 1);
+    assert_eq!(l.head, Some(6));
+}
+
+#[test]
+fn get_head_returns_head() {
+    let mut l = Level::new(5);
+    assert_eq!(l.get_head(), None);
+    l.update(23, true);
+    assert_eq!(l.get_head(), Some(23));
+}
+
+#[test]
+fn has_space_returns_true_if_empty() {
+    let l = Level::new(15);
+    assert!(l.has_space());
+}
+
+#[test]
+fn has_space_returns_true_if_part_full() {
+    let mut l = Level::new(15);
+    l.update(12, true);
+    l.update(234, true);
+    l.update(1, true);
+    l.update(143, true);
+    l.update(15, true);
+    assert!(l.has_space());
+}
+
+#[test]
+fn has_space_returns_false_if_full() {
+    let mut l = Level::new(5);
+    l.update(1, true);
+    l.update(2, true);
+    l.update(3, true);
+    l.update(4, true);
+    l.update(5, true);
+    assert!(!l.has_space());
+}
--- a/src/compressor/stats_tests.rs
+++ b/src/compressor/stats_tests.rs
@@ -0,0 +1,181 @@
+use crate::{
+    compressor::{Compressor, Level, Stats},
+    StateGroupEntry,
+};
+use state_map::StateMap;
+use std::collections::BTreeMap;
+
+#[test]
+fn stats_correct_when_no_resets() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // 0-1-2-3-4-5-6-7-8-9-10-11-12-13
+    for i in 0i64..=13i64 {
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+
+    // This should create the following structure
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    compressor.create_new_tree();
+
+    // No resets should have taken place
+    assert_eq!(compressor.stats.resets_no_suitable_prev, 0);
+    assert_eq!(compressor.stats.resets_no_suitable_prev_size, 0);
+
+    // Groups 3,6,9,12 should be the only ones changed
+    assert_eq!(compressor.stats.state_groups_changed, 4);
+}
+
+#[test]
+fn stats_correct_when_some_resets() {
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+    let mut prev = None;
+
+    // This starts with the following structure
+    //
+    // (note missing 3-4 link)
+    // 0-1-2-3
+    // 4-5-6-7-8-9-10-11-12-13
+    //
+    // Each group i has state:
+    //     ('node','is',      i)
+    //     ('group',  j, 'seen') where j is ancestor of i
+    for i in 0i64..=13i64 {
+        if i == 4 {
+            prev = None
+        }
+        let mut entry = StateGroupEntry {
+            in_range: true,
+            prev_state_group: prev,
+            state_map: StateMap::new(),
+        };
+        entry
+            .state_map
+            .insert("group", &i.to_string(), "seen".into());
+        entry.state_map.insert("node", "is", i.to_string().into());
+
+        initial.insert(i, entry);
+
+        prev = Some(i)
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+
+    // This should create the following structure
+    //
+    // Brackets mean that has NO predecessor but is in that position in the
+    // levels tree
+    //
+    // 0  3\        12
+    // 1 (4)(6)\    13
+    // 2  5  7  9
+    //       8  10
+    //          11
+    compressor.create_new_tree();
+
+    // the reset required for 4 contributes 2 to the size stat
+    // - (1 'node' and 1 'group') entry
+    // the reset required for 6 contributes 4 to the size stat
+    // - (1 'node' and 3 'group') entry
+    assert_eq!(compressor.stats.resets_no_suitable_prev, 2);
+    assert_eq!(compressor.stats.resets_no_suitable_prev_size, 6);
+
+    // groups 3,4,6,9,12 are the only ones changed
+    assert_eq!(compressor.stats.state_groups_changed, 5);
+}
+
+#[test]
+fn stats_correct_if_no_changes() {
+    // This should create the following structure
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    let initial_edges: BTreeMap<i64, i64> = vec![
+        (1, 0),
+        (2, 1),
+        (4, 3),
+        (5, 4),
+        (6, 3),
+        (7, 6),
+        (8, 7),
+        (9, 6),
+        (10, 9),
+        (11, 10),
+        (13, 12),
+    ]
+    .into_iter()
+    .collect();
+
+    let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    for i in 0i64..=13i64 {
+        // edge from map
+        let pred_group = initial_edges.get(&i);
+
+        // Need Option<i64> not Option<&i64>
+        let prev = pred_group.copied();
+
+        // insert that edge into the initial map
+        initial.insert(
+            i,
+            StateGroupEntry {
+                in_range: true,
+                prev_state_group: prev,
+                state_map: StateMap::new(),
+            },
+        );
+    }
+
+    let mut compressor = Compressor {
+        original_state_map: &initial,
+        new_state_group_map: BTreeMap::new(),
+        levels: vec![Level::new(3), Level::new(3)],
+        stats: Stats::default(),
+    };
+
+    // This should create the following structure (i.e. no change)
+    //
+    // 0  3\      12
+    // 1  4 6\    13
+    // 2  5 7 9
+    //      8 10
+    //        11
+    compressor.create_new_tree();
+
+    // No changes should have been made (the old tree should be the same)
+    assert_eq!(compressor.stats.resets_no_suitable_prev, 0);
+    assert_eq!(compressor.stats.resets_no_suitable_prev_size, 0);
+    assert_eq!(compressor.stats.state_groups_changed, 0);
+}
--- a/src/database.rs
+++ b/src/database.rs
@@ -13,101 +13,381 @@
 // limitations under the License.

 use indicatif::{ProgressBar, ProgressStyle};
-use postgres::{fallible_iterator::FallibleIterator, Client};
+use log::{debug, trace};
+use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
+use postgres::{fallible_iterator::FallibleIterator, types::ToSql, Client};
+use postgres_openssl::MakeTlsConnector;
 use rand::{distributions::Alphanumeric, thread_rng, Rng};
-use std::{borrow::Cow, collections::BTreeMap, fmt, iter};
+use std::{borrow::Cow, collections::BTreeMap, fmt, time::Duration};
+
+use crate::{compressor::Level, generate_sql};

 use super::StateGroupEntry;

-/// Fetch the entries in state_groups_state (and their prev groups) for the
-/// given `room_id` by connecting to the postgres database at `db_url`.
+/// Fetch the entries in state_groups_state (and their prev groups) for a
+/// specific room.
+///
+/// Returns with the state_group map and the id of the last group that was used
+/// Or None if there are no state groups within the range given
+///
+/// # Arguments
+///
+/// * `room_id`             -   The ID of the room in the database
+/// * `db_url`              -   The URL of a Postgres database. This should be of the
+///                             form: "postgresql://user:pass@domain:port/database"
+/// * `min_state_group`     -   If specified, then only fetch the entries for state
+///                             groups greater than (but not equal) to this number. It
+///                             also requires groups_to_compress to be specified
+/// * `max_state_group`     -   If specified, then only fetch the entries for state
+///                             groups lower than or equal to this number.
+/// * 'groups_to_compress'  -   The number of groups to get from the database before stopping
 pub fn get_data_from_db(
    db_url: &str,
    room_id: &str,
+    min_state_group: Option<i64>,
+    groups_to_compress: Option<i64>,
    max_state_group: Option<i64>,
-) -> BTreeMap<i64, StateGroupEntry> {
-    let mut client = Client::connect(db_url, postgres::NoTls).unwrap();
+) -> Option<(BTreeMap<i64, StateGroupEntry>, i64)> {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());

-    let mut state_group_map = get_initial_data_from_db(&mut client, room_id, max_state_group);
+    let mut client = Client::connect(db_url, connector)
+        .unwrap_or_else(|e| panic!("Error connecting to the database: {}", e));

-    println!("Got initial state from database. Checking for any missing state groups...");
+    // Search for the group id of the groups_to_compress'th group after min_state_group
+    // If this is saved, then the compressor can continue by having min_state_group being
+    // set to this maximum. If no such group can be found then return None.
+    let max_group_found = find_max_group(
+        &mut client,
+        room_id,
+        min_state_group,
+        groups_to_compress,
+        max_state_group,
+    )?;
+
+    let state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    Some(load_map_from_db(
+        &mut client,
+        room_id,
+        min_state_group,
+        max_group_found,
+        state_group_map,
+    ))
+}
+
+/// Fetch the entries in state_groups_state (and their prev groups) for a
+/// specific room. This method should only be called if resuming the compressor from
+/// where it last finished - and as such also loads in the state groups from the heads
+/// of each of the levels (as they were at the end of the last run of the compressor)
+///
+/// Returns with the state_group map and the id of the last group that was used
+/// Or None if there are no state groups within the range given
+///
+/// # Arguments
+///
+/// * `room_id`             -   The ID of the room in the database
+/// * `db_url`              -   The URL of a Postgres database. This should be of the
+///                             form: "postgresql://user:pass@domain:port/database"
+/// * `min_state_group`     -   If specified, then only fetch the entries for state
+///                             groups greater than (but not equal) to this number. It
+///                             also requires groups_to_compress to be specified
+/// * 'groups_to_compress'  -   The number of groups to get from the database before stopping
+/// * 'level_info'          -   The maximum size, current length and current head for each
+///                             level (as it was when the compressor last finished for this
+///                             room)
+pub fn reload_data_from_db(
+    db_url: &str,
+    room_id: &str,
+    min_state_group: Option<i64>,
+    groups_to_compress: Option<i64>,
+    level_info: &[Level],
+) -> Option<(BTreeMap<i64, StateGroupEntry>, i64)> {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(db_url, connector)
+        .unwrap_or_else(|e| panic!("Error connecting to the database: {}", e));
+
+    // Search for the group id of the groups_to_compress'th group after min_state_group
+    // If this is saved, then the compressor can continue by having min_state_group being
+    // set to this maximum.If no such group can be found then return None.
+    let max_group_found = find_max_group(
+        &mut client,
+        room_id,
+        min_state_group,
+        groups_to_compress,
+        // max state group not used when saving and loading
+        None,
+    )?;
+
+    // load just the state_groups at the head of each level
+    // this doesn't load their predecessors as that will be done at the end of
+    // load_map_from_db()
+    let state_group_map: BTreeMap<i64, StateGroupEntry> = load_level_heads(&mut client, level_info);
+
+    Some(load_map_from_db(
+        &mut client,
+        room_id,
+        min_state_group,
+        max_group_found,
+        state_group_map,
+    ))
+}
+
+/// Finds the state_groups that are at the head of each compressor level
+/// NOTE this does not also retrieve their predecessors
+///
+/// # Arguments
+///
+/// * `client'  -   A Postgres client to make requests with
+/// * `levels'  -   The levels who's heads are being requested
+fn load_level_heads(client: &mut Client, level_info: &[Level]) -> BTreeMap<i64, StateGroupEntry> {
+    // obtain all of the heads that aren't None from level_info
+    let level_heads: Vec<i64> = level_info.iter().filter_map(|l| (*l).get_head()).collect();
+
+    // Query to get id, predecessor and deltas for each state group
+    let sql = r#"
+        SELECT m.id, prev_state_group, type, state_key, s.event_id
+        FROM state_groups AS m
+        LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
+        LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
+        WHERE m.id = ANY($1)
+        ORDER BY m.id
+    "#;
+
+    // Actually do the query
+    let mut rows = client.query_raw(sql, &[&level_heads]).unwrap();
+
+    // Copy the data from the database into a map
+    let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
+
+    while let Some(row) = rows.next().unwrap() {
+        // The row in the map to copy the data to
+        // NOTE: default StateGroupEntry has in_range as false
+        // This is what we want since as a level head, it has already been compressed by the
+        // previous run!
+        let entry = state_group_map.entry(row.get(0)).or_default();
+
+        // Save the predecessor (this may already be there)
+        entry.prev_state_group = row.get(1);
+
+        // Copy the single delta from the predecessor stored in this row
+        if let Some(etype) = row.get::<_, Option<String>>(2) {
+            entry.state_map.insert(
+                &etype,
+                &row.get::<_, String>(3),
+                row.get::<_, String>(4).into(),
+            );
+        }
+    }
+    state_group_map
+}
+
+/// Fetch the entries in state_groups_state (and their prev groups) for a
+/// specific room within a certain range. These are appended onto the provided
+/// map.
+///
+/// - Fetches the first [group] rows with group id after [min]
+/// - Recursively searches for missing predecessors and adds those
+///
+/// Returns with the state_group map and the id of the last group that was used
+///
+/// # Arguments
+///
+/// * `client`              -   A Postgres client to make requests with
+/// * `room_id`             -   The ID of the room in the database
+/// * `min_state_group`     -   If specified, then only fetch the entries for state
+///                             groups greater than (but not equal) to this number. It
+///                             also requires groups_to_compress to be specified
+/// * 'max_group_found'     -   The last group to get from the database before stopping
+/// * 'state_group_map'     -   The map to populate with the entries from the database
+
+fn load_map_from_db(
+    client: &mut Client,
+    room_id: &str,
+    min_state_group: Option<i64>,
+    max_group_found: i64,
+    mut state_group_map: BTreeMap<i64, StateGroupEntry>,
+) -> (BTreeMap<i64, StateGroupEntry>, i64) {
+    state_group_map.append(&mut get_initial_data_from_db(
+        client,
+        room_id,
+        min_state_group,
+        max_group_found,
+    ));
+
+    debug!("Got initial state from database. Checking for any missing state groups...");

    // Due to reasons some of the state groups appear in the edges table, but
-    // not in the state_groups_state table. This means they don't get included
-    // in our DB queries, so we have to fetch any missing groups explicitly.
+    // not in the state_groups_state table.
+    //
+    // Also it is likely that the predecessor of a node will not be within the
+    // chunk that was specified by min_state_group and groups_to_compress.
+    // This means they don't get included in our DB queries, so we have to fetch
+    // any missing groups explicitly.
+    //
    // Since the returned groups may themselves reference groups we don't have,
    // we need to do this recursively until we don't find any more missing.
    loop {
        let mut missing_sgs: Vec<_> = state_group_map
            .iter()
            .filter_map(|(_sg, entry)| {
-                if let Some(prev_sg) = entry.prev_state_group {
-                    if state_group_map.contains_key(&prev_sg) {
-                        None
-                    } else {
-                        Some(prev_sg)
-                    }
-                } else {
-                    None
-                }
+                entry
+                    .prev_state_group
+                    .filter(|&prev_sg| !state_group_map.contains_key(&prev_sg))
            })
            .collect();

        if missing_sgs.is_empty() {
-            println!("No missing state groups");
+            trace!("No missing state groups");
            break;
        }

        missing_sgs.sort_unstable();
        missing_sgs.dedup();

-        println!("Missing {} state groups", missing_sgs.len());
+        trace!("Missing {} state groups", missing_sgs.len());

-        let map = get_missing_from_db(&mut client, &missing_sgs);
-        state_group_map.extend(map.into_iter());
+        // find state groups not picked up already and add them to the map
+        let map = get_missing_from_db(client, &missing_sgs, min_state_group, max_group_found);
+        for (k, v) in map {
+            state_group_map.entry(k).or_insert(v);
+        }
    }

-    state_group_map
+    (state_group_map, max_group_found)
 }

-/// Fetch the entries in state_groups_state (and their prev groups) for the
-/// given `room_id` by fetching all state with the given `room_id`.
+/// Returns the group ID of the last group to be compressed
+///
+/// This can be saved so that future runs of the compressor only
+/// continue from after this point. If no groups can be found in
+/// the range specified it returns None.
+///
+/// # Arguments
+///
+/// * `client`              -   A Postgres client to make requests with
+/// * `room_id`             -   The ID of the room in the database
+/// * `min_state_group`     -   The lower limit (non inclusive) of group id's to compress
+/// * 'groups_to_compress'  -   How many groups to compress
+/// * `max_state_group`     -   The upper bound on what this method can return
+fn find_max_group(
+    client: &mut Client,
+    room_id: &str,
+    min_state_group: Option<i64>,
+    groups_to_compress: Option<i64>,
+    max_state_group: Option<i64>,
+) -> Option<i64> {
+    // Get list of state_id's in a certain room
+    let mut query_chunk_of_ids = "SELECT id FROM state_groups WHERE room_id = $1".to_string();
+    let params: Vec<&(dyn ToSql + Sync)>;
+
+    if let Some(max) = max_state_group {
+        query_chunk_of_ids = format!("{} AND id <= {}", query_chunk_of_ids, max)
+    }
+
+    // Adds additional constraint if a groups_to_compress or min_state_group have been specified
+    // Note a min state group is only used if groups_to_compress also is
+    if min_state_group.is_some() && groups_to_compress.is_some() {
+        params = vec![&room_id, &min_state_group, &groups_to_compress];
+        query_chunk_of_ids = format!(
+            r"{} AND id > $2 ORDER BY id ASC LIMIT $3",
+            query_chunk_of_ids
+        );
+    } else if groups_to_compress.is_some() {
+        params = vec![&room_id, &groups_to_compress];
+        query_chunk_of_ids = format!(r"{} ORDER BY id ASC LIMIT $2", query_chunk_of_ids);
+    } else {
+        params = vec![&room_id];
+    }
+
+    let sql_query = format!(
+        "SELECT id FROM ({}) AS ids ORDER BY ids.id DESC LIMIT 1",
+        query_chunk_of_ids
+    );
+
+    // This vector should have length 0 or 1
+    let rows = client
+        .query(sql_query.as_str(), &params)
+        .expect("Something went wrong while querying the database");
+
+    // If no row can be found then return None
+    let final_row = rows.last()?;
+
+    // Else return the id of the group found
+    Some(final_row.get::<_, i64>(0))
+}
+
+/// Fetch the entries in state_groups_state and immediate predecessors for
+/// a specific room.
+///
+/// - Fetches first [groups_to_compress] rows with group id higher than min
+/// - Stores the group id, predecessor id and deltas into a map
+/// - returns map and maximum row that was considered
+///
+/// # Arguments
+///
+/// * `client`          -   A Postgres client to make requests with
+/// * `room_id`         -   The ID of the room in the database
+/// * `min_state_group` -   If specified, then only fetch the entries for state
+///                         groups greater than (but not equal) to this number. It
+///                         also requires groups_to_compress to be specified
+/// * 'max_group_found' -   The upper limit on state_groups ids to get from the database
 fn get_initial_data_from_db(
    client: &mut Client,
    room_id: &str,
-    max_state_group: Option<i64>,
+    min_state_group: Option<i64>,
+    max_group_found: i64,
 ) -> BTreeMap<i64, StateGroupEntry> {
+    // Query to get id, predecessor and deltas for each state group
    let sql = r#"
        SELECT m.id, prev_state_group, type, state_key, s.event_id
        FROM state_groups AS m
        LEFT JOIN state_groups_state AS s ON (m.id = s.state_group)
        LEFT JOIN state_group_edges AS e ON (m.id = e.state_group)
-        WHERE m.room_id = $1
+        WHERE m.room_id = $1 AND m.id <= $2
    "#;

-    let mut rows = if let Some(s) = max_state_group {
-        client.query_raw(
-            format!(r"{} AND m.id <= $2", sql).as_str(),
-            vec![&room_id as _, &s as _],
-        )
+    // Adds additional constraint if minimum state_group has been specified.
+    let mut rows = if let Some(min) = min_state_group {
+        let params: Vec<&dyn ToSql> = vec![&room_id, &max_group_found, &min];
+        client.query_raw(format!(r"{} AND m.id > $3", sql).as_str(), params)
    } else {
-        client.query_raw(sql, iter::once(&room_id as _))
+        let params: Vec<&dyn ToSql> = vec![&room_id, &max_group_found];
+        client.query_raw(sql, params)
    }
-    .unwrap();
+    .expect("Something went wrong while querying the database");

+    // Copy the data from the database into a map
    let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();

-    let pb = ProgressBar::new_spinner();
+    let pb = if cfg!(feature = "no-progress-bars") {
+        ProgressBar::hidden()
+    } else {
+        ProgressBar::new_spinner()
+    };
    pb.set_style(
-        ProgressStyle::default_spinner().template("{spinner} [{elapsed}] {pos} rows retrieved"),
+        ProgressStyle::default_spinner()
+            .template("{spinner} [{elapsed}] {pos} rows retrieved")
+            .unwrap(),
    );
-    pb.enable_steady_tick(100);
+    pb.enable_steady_tick(Duration::from_millis(100));

    while let Some(row) = rows.next().unwrap() {
+        // The row in the map to copy the data to
        let entry = state_group_map.entry(row.get(0)).or_default();

+        // Save the predecessor and mark for compression (this may already be there)
+        // TODO: slightly fewer redundant rewrites
        entry.prev_state_group = row.get(1);
+        entry.in_range = true;

+        // Copy the single delta from the predecessor stored in this row
        if let Some(etype) = row.get::<_, Option<String>>(2) {
            entry.state_map.insert(
                &etype,
@@ -125,35 +405,65 @@ fn get_initial_data_from_db(
    state_group_map
 }

-/// Get any missing state groups from the database
-fn get_missing_from_db(client: &mut Client, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
-    let mut rows = client
-        .query_raw(
-            r#"
-                SELECT state_group, prev_state_group
-                FROM state_group_edges
-                WHERE state_group = ANY($1)
-            "#,
-            iter::once(&missing_sgs as _),
-        )
-        .unwrap();
+/// Finds the predecessors of missing state groups
+///
+/// N.B. this does NOT find their deltas
+///
+/// # Arguments
+///
+/// * `client`          -   A Postgres client to make requests with
+/// * `missing_sgs`     -   An array of missing state_group ids
+/// * 'min_state_group' -   Minimum state_group id to mark as in range
+/// * 'max_group_found' -   Maximum state_group id to mark as in range
+fn get_missing_from_db(
+    client: &mut Client,
+    missing_sgs: &[i64],
+    min_state_group: Option<i64>,
+    max_group_found: i64,
+) -> BTreeMap<i64, StateGroupEntry> {
+    // "Due to reasons" it is possible that some states only appear in edges table and not in state_groups table
+    // so since we know the IDs we're looking for as they are the missing predecessors, we can find them by
+    // left joining onto the edges table (instead of the state_group table!)
+    let sql = r#"
+        SELECT target.prev_state_group, source.prev_state_group, state.type, state.state_key, state.event_id
+        FROM state_group_edges AS target
+        LEFT JOIN state_group_edges AS source ON (target.prev_state_group = source.state_group)
+        LEFT JOIN state_groups_state AS state ON (target.prev_state_group = state.state_group)
+        WHERE target.prev_state_group = ANY($1)
+    "#;

-    // initialise the map with empty entries (the missing group may not
-    // have a prev_state_group either)
-    let mut state_group_map: BTreeMap<i64, StateGroupEntry> = missing_sgs
-        .iter()
-        .map(|sg| (*sg, StateGroupEntry::default()))
-        .collect();
+    let mut rows = client.query_raw(sql, &[missing_sgs]).unwrap();
+
+    let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();

    while let Some(row) = rows.next().unwrap() {
-        let state_group = row.get(0);
-        let entry = state_group_map.get_mut(&state_group).unwrap();
+        let id = row.get(0);
+        // The row in the map to copy the data to
+        let entry = state_group_map.entry(id).or_default();
+
+        // Save the predecessor and mark for compression (this may already be there)
+        // Also may well not exist!
        entry.prev_state_group = row.get(1);
+        if let Some(min) = min_state_group {
+            if min < id && id <= max_group_found {
+                entry.in_range = true
+            }
+        }
+
+        // Copy the single delta from the predecessor stored in this row
+        if let Some(etype) = row.get::<_, Option<String>>(2) {
+            entry.state_map.insert(
+                &etype,
+                &row.get::<_, String>(3),
+                row.get::<_, String>(4).into(),
+            );
+        }
    }

    state_group_map
 }

+// TODO: find a library that has an existing safe postgres escape function
 /// Helper function that escapes the wrapped text when writing SQL
 pub struct PGEscape<'a>(pub &'a str);

@@ -161,7 +471,11 @@ impl<'a> fmt::Display for PGEscape<'a> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let mut delim = Cow::from("$$");
        while self.0.contains(&delim as &str) {
-            let s: String = thread_rng().sample_iter(&Alphanumeric).take(10).collect();
+            let s: String = thread_rng()
+                .sample_iter(&Alphanumeric)
+                .take(10)
+                .map(char::from)
+                .collect();

            delim = format!("${}$", s).into();
        }
@@ -188,3 +502,66 @@ fn test_pg_escape() {
    assert_eq!(&s[0..1], "$");
    assert_eq!(&s[start_pos - 1..start_pos], "$");
 }
+
+/// Send changes to the database
+///
+/// Note that currently ignores config.transactions and wraps every state
+/// group in it's own transaction (i.e. as if config.transactions was true)
+///
+/// # Arguments
+///
+/// * `db_url`  -   The URL of a Postgres database. This should be of the
+///                 form: "postgresql://user:pass@domain:port/database"
+/// * `room_id` -   The ID of the room in the database
+/// * `old_map` -   The state group data originally in the database
+/// * `new_map` -   The state group data generated by the compressor to
+///                 replace replace the old contents
+pub fn send_changes_to_db(
+    db_url: &str,
+    room_id: &str,
+    old_map: &BTreeMap<i64, StateGroupEntry>,
+    new_map: &BTreeMap<i64, StateGroupEntry>,
+) {
+    // connect to the database
+    let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let mut client = Client::connect(db_url, connector).unwrap();
+
+    debug!("Writing changes...");
+
+    // setup the progress bar
+    let pb = if cfg!(feature = "no-progress-bars") {
+        ProgressBar::hidden()
+    } else {
+        ProgressBar::new(old_map.len() as u64)
+    };
+    pb.set_style(
+        ProgressStyle::default_bar()
+            .template("[{elapsed_precise}] {bar} {pos}/{len} {msg}")
+            .unwrap(),
+    );
+    pb.set_message("state groups");
+    pb.enable_steady_tick(Duration::from_millis(100));
+
+    for sql_transaction in generate_sql(old_map, new_map, room_id) {
+        if sql_transaction.is_empty() {
+            pb.inc(1);
+            continue;
+        }
+
+        // commit this change to the database
+        // N.B. this is a synchronous library so will wait until finished before continueing...
+        // if want to speed up compressor then this might be a good place to start!
+        let mut single_group_transaction = client.transaction().unwrap();
+        single_group_transaction
+            .batch_execute(&sql_transaction)
+            .unwrap();
+        single_group_transaction.commit().unwrap();
+
+        pb.inc(1);
+    }
+
+    pb.finish();
+}
--- a/src/graphing.rs
+++ b/src/graphing.rs
@@ -0,0 +1,71 @@
+use std::collections::BTreeMap;
+use std::{fs::File, io::Write};
+
+use super::StateGroupEntry;
+
+type Graph = BTreeMap<i64, StateGroupEntry>;
+
+/// Outputs information from a state group graph into an edges file and a node file
+///
+/// These can be loaded into something like Gephi to visualise the graphs
+///
+/// # Arguments
+///
+/// * `groups`          - A map from state group ids to StateGroupEntries
+/// * `edges_output`    - The file to output the predecessor link information to
+/// * `nodes_output`    - The file to output the state group information to
+fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) {
+    // The line A;B in the edges file means:
+    //      That state group A has predecessor B
+    writeln!(edges_output, "Source;Target",).unwrap();
+
+    // The line A;B;C;"B" in the nodes file means:
+    //      The state group id is A
+    //      This state group has B rows in the state_groups_state table
+    //      If C is true then A has no predecessor
+    writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap();
+
+    for (source, entry) in groups {
+        // If the group has a predecessor then write an edge in the edges file
+        if let Some(target) = entry.prev_state_group {
+            writeln!(edges_output, "{};{}", source, target,).unwrap();
+        }
+
+        // Write the state group's information to the nodes file
+        writeln!(
+            nodes_output,
+            "{};{};{};\"{}\"",
+            source,
+            entry.state_map.len(),
+            entry.prev_state_group.is_none(),
+            entry.state_map.len(),
+        )
+        .unwrap();
+    }
+}
+
+/// Outputs information from two state group graph into files
+///
+/// These can be loaded into something like Gephi to visualise the graphs
+/// before and after the compressor is run
+///
+/// # Arguments
+///
+/// * `before`      - A map from state group ids to StateGroupEntries
+///                   the information from this map goes into before_edges.csv
+///                   and before_nodes.csv
+/// * `after`       - A map from state group ids to StateGroupEntries
+///                   the information from this map goes into after_edges.csv
+///                   and after_nodes.csv
+pub fn make_graphs(before: &Graph, after: &Graph) {
+    // Open all the files to output to
+    let mut before_edges_file = File::create("before_edges.csv").unwrap();
+    let mut before_nodes_file = File::create("before_nodes.csv").unwrap();
+    let mut after_edges_file = File::create("after_edges.csv").unwrap();
+    let mut after_nodes_file = File::create("after_nodes.csv").unwrap();
+
+    // Write before's information to before_edges and before_nodes
+    output_csv(before, &mut before_edges_file, &mut before_nodes_file);
+    // Write afters's information to after_edges and after_nodes
+    output_csv(after, &mut after_edges_file, &mut after_nodes_file);
+}
--- a/src/lib.rs
+++ b/src/lib.rs
--- a/src/main.rs
+++ b/src/main.rs
@@ -16,335 +16,31 @@
 //! Synapse instance's database. Specifically, it aims to reduce the number of
 //! rows that a given room takes up in the `state_groups_state` table.

-mod compressor;
-mod database;
-
+#[cfg(feature = "jemalloc")]
 #[global_allocator]
-static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

-use compressor::Compressor;
-use database::PGEscape;
+use log::LevelFilter;
+use std::env;
+use std::io::Write;

-use clap::{
-    crate_authors, crate_description, crate_name, crate_version, value_t_or_exit, App, Arg,
-};
-use indicatif::{ProgressBar, ProgressStyle};
-use rayon::prelude::*;
-use state_map::StateMap;
-use std::{collections::BTreeMap, fs::File, io::Write, str::FromStr};
-use string_cache::DefaultAtom as Atom;
-
-/// An entry for a state group. Consists of an (optional) previous group and the
-/// delta from that previous group (or the full state if no previous group)
-#[derive(Default, Debug, Clone, PartialEq, Eq)]
-pub struct StateGroupEntry {
-    prev_state_group: Option<i64>,
-    state_map: StateMap<Atom>,
-}
-
-/// Gets the full state for a given group from the map (of deltas)
-pub fn collapse_state_maps(
-    map: &BTreeMap<i64, StateGroupEntry>,
-    state_group: i64,
-) -> StateMap<Atom> {
-    let mut entry = &map[&state_group];
-    let mut state_map = StateMap::new();
-
-    let mut stack = vec![state_group];
-
-    while let Some(prev_state_group) = entry.prev_state_group {
-        stack.push(prev_state_group);
-        if !map.contains_key(&prev_state_group) {
-            panic!("Missing {}", prev_state_group);
-        }
-        entry = &map[&prev_state_group];
-    }
-
-    for sg in stack.iter().rev() {
-        state_map.extend(
-            map[&sg]
-                .state_map
-                .iter()
-                .map(|((t, s), e)| ((t, s), e.clone())),
-        );
-    }
-
-    state_map
-}
-
-/// Helper struct for parsing the `level_sizes` argument.
-struct LevelSizes(Vec<usize>);
-
-impl FromStr for LevelSizes {
-    type Err = &'static str;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let mut sizes = Vec::new();
-
-        for size_str in s.split(',') {
-            let size: usize = size_str
-                .parse()
-                .map_err(|_| "Not a comma separated list of numbers")?;
-            sizes.push(size);
-        }
-
-        Ok(LevelSizes(sizes))
-    }
-}
+use synapse_compress_state as comp_state;

 fn main() {
-    #[allow(deprecated)]
-    let matches = App::new(crate_name!())
-        .version(crate_version!())
-        .author(crate_authors!("\n"))
-        .about(crate_description!())
-        .arg(
-            Arg::with_name("postgres-url")
-                .short("p")
-                .value_name("URL")
-                .help("The url for connecting to the postgres database")
-                .takes_value(true)
-                .required(true),
-        ).arg(
-            Arg::with_name("room_id")
-                .short("r")
-                .value_name("ROOM_ID")
-                .help("The room to process")
-                .takes_value(true)
-                .required(true),
-        ).arg(
-            Arg::with_name("max_state_group")
-                .short("s")
-                .value_name("MAX_STATE_GROUP")
-                .help("The maximum state group to process up to")
-                .takes_value(true)
-                .required(false),
-        ).arg(
-            Arg::with_name("min_saved_rows")
-                .short("m")
-                .value_name("COUNT")
-                .help("Suppress output if fewer than COUNT rows would be saved")
-                .takes_value(true)
-                .required(false),
-        ).arg(
-            Arg::with_name("output_file")
-                .short("o")
-                .value_name("FILE")
-                .help("File to output the changes to in SQL")
-                .takes_value(true),
-        ).arg(
-            Arg::with_name("transactions")
-                .short("t")
-                .help("Whether to wrap each state group change in a transaction")
-                .requires("output_file"),
-        ).arg(
-            Arg::with_name("level_sizes")
-                .short("l")
-                .value_name("LEVELS")
-                .help("Sizes of each new level in the compression algorithm, as a comma separated list.")
-                .long_help(concat!(
-                    "Sizes of each new level in the compression algorithm, as a comma separated list.",
-                    " The first entry in the list is for the lowest, most granular level,",
-                    " with each subsequent entry being for the next highest level.",
-                    " The number of entries in the list determines the number of levels",
-                    " that will be used.",
-                    "\nThe sum of the sizes of the levels effect the performance of fetching the state",
-                    " from the database, as the sum of the sizes is the upper bound on number of",
-                    " iterations needed to fetch a given set of state.",
-                ))
-                .default_value("100,50,25")
-                .takes_value(true),
-        ).get_matches();
-
-    let db_url = matches
-        .value_of("postgres-url")
-        .expect("db url should be required");
-
-    let mut output_file = matches
-        .value_of("output_file")
-        .map(|path| File::create(path).unwrap());
-
-    let room_id = matches
-        .value_of("room_id")
-        .expect("room_id should be required since no file");
-
-    let max_state_group = matches
-        .value_of("max_state_group")
-        .map(|s| s.parse().expect("max_state_group must be an integer"));
-
-    let min_saved_rows = matches
-        .value_of("min_saved_rows")
-        .map(|v| v.parse().expect("COUNT must be an integer"));
-
-    let transactions = matches.is_present("transactions");
-
-    let level_sizes = value_t_or_exit!(matches, "level_sizes", LevelSizes);
-
-    // First we need to get the current state groups
-    println!("Fetching state from DB for room '{}'...", room_id);
-    let state_group_map = database::get_data_from_db(db_url, room_id, max_state_group);
-
-    println!("Number of state groups: {}", state_group_map.len());
-
-    let original_summed_size = state_group_map
-        .iter()
-        .fold(0, |acc, (_, v)| acc + v.state_map.len());
-
-    println!("Number of rows in current table: {}", original_summed_size);
-
-    // Now we actually call the compression algorithm.
-
-    println!("Compressing state...");
-
-    let compressor = Compressor::compress(&state_group_map, &level_sizes.0);
-
-    let new_state_group_map = compressor.new_state_group_map;
-
-    // Done! Now to print a bunch of stats.
-
-    let compressed_summed_size = new_state_group_map
-        .iter()
-        .fold(0, |acc, (_, v)| acc + v.state_map.len());
-
-    let ratio = (compressed_summed_size as f64) / (original_summed_size as f64);
-
-    println!(
-        "Number of rows after compression: {} ({:.2}%)",
-        compressed_summed_size,
-        ratio * 100.
-    );
-
-    println!("Compression Statistics:");
-    println!(
-        "  Number of forced resets due to lacking prev: {}",
-        compressor.stats.resets_no_suitable_prev
-    );
-    println!(
-        "  Number of compressed rows caused by the above: {}",
-        compressor.stats.resets_no_suitable_prev_size
-    );
-    println!(
-        "  Number of state groups changed: {}",
-        compressor.stats.state_groups_changed
-    );
-
-    if let Some(min) = min_saved_rows {
-        let saving = (original_summed_size - compressed_summed_size) as i32;
-        if saving < min {
-            println!(
-                "Only {} rows would be saved by this compression. Skipping output.",
-                saving
-            );
-            return;
-        }
+    // setup the logger
+    // The default can be overwritten with RUST_LOG
+    // see the README for more information
+    if env::var("RUST_LOG").is_err() {
+        let mut log_builder = env_logger::builder();
+        // Only output the log message (and not the prefixed timestamp etc.)
+        log_builder.format(|buf, record| writeln!(buf, "{}", record.args()));
+        // By default print all of the debugging messages from this library
+        log_builder.filter_module("synapse_compress_state", LevelFilter::Debug);
+        log_builder.init();
+    } else {
+        // If RUST_LOG was set then use that
+        env_logger::Builder::from_env("RUST_LOG").init();
    }

-    // If we are given an output file, we output the changes as SQL. If the
-    // `transactions` argument is set we wrap each change to a state group in a
-    // transaction.
-
-    if let Some(output) = &mut output_file {
-        println!("Writing changes...");
-
-        let pb = ProgressBar::new(state_group_map.len() as u64);
-        pb.set_style(
-            ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
-        );
-        pb.set_message("state groups");
-        pb.enable_steady_tick(100);
-
-        for (sg, old_entry) in &state_group_map {
-            let new_entry = &new_state_group_map[sg];
-
-            if old_entry != new_entry {
-                if transactions {
-                    writeln!(output, "BEGIN;").unwrap();
-                }
-
-                writeln!(
-                    output,
-                    "DELETE FROM state_group_edges WHERE state_group = {};",
-                    sg
-                )
-                .unwrap();
-
-                if let Some(prev_sg) = new_entry.prev_state_group {
-                    writeln!(output, "INSERT INTO state_group_edges (state_group, prev_state_group) VALUES ({}, {});", sg, prev_sg).unwrap();
-                }
-
-                writeln!(
-                    output,
-                    "DELETE FROM state_groups_state WHERE state_group = {};",
-                    sg
-                )
-                .unwrap();
-                if !new_entry.state_map.is_empty() {
-                    writeln!(output, "INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES").unwrap();
-                    let mut first = true;
-                    for ((t, s), e) in new_entry.state_map.iter() {
-                        if first {
-                            write!(output, "     ").unwrap();
-                            first = false;
-                        } else {
-                            write!(output, "    ,").unwrap();
-                        }
-                        writeln!(
-                            output,
-                            "({}, {}, {}, {}, {})",
-                            sg,
-                            PGEscape(room_id),
-                            PGEscape(t),
-                            PGEscape(s),
-                            PGEscape(e)
-                        )
-                        .unwrap();
-                    }
-                    writeln!(output, ";").unwrap();
-                }
-
-                if transactions {
-                    writeln!(output, "COMMIT;").unwrap();
-                }
-                writeln!(output).unwrap();
-            }
-
-            pb.inc(1);
-        }
-
-        pb.finish();
-    }
-
-    println!("Checking that state maps match...");
-
-    let pb = ProgressBar::new(state_group_map.len() as u64);
-    pb.set_style(
-        ProgressStyle::default_bar().template("[{elapsed_precise}] {bar} {pos}/{len} {msg}"),
-    );
-    pb.set_message("state groups");
-    pb.enable_steady_tick(100);
-
-    // Now let's iterate through and assert that the state for each group
-    // matches between the two versions.
-    state_group_map
-        .par_iter() // This uses rayon to run the checks in parallel
-        .try_for_each(|(sg, _)| {
-            let expected = collapse_state_maps(&state_group_map, *sg);
-            let actual = collapse_state_maps(&new_state_group_map, *sg);
-
-            pb.inc(1);
-
-            if expected != actual {
-                println!("State Group: {}", sg);
-                println!("Expected: {:#?}", expected);
-                println!("actual: {:#?}", actual);
-                Err(format!("State for group {} do not match", sg))
-            } else {
-                Ok(())
-            }
-        })
-        .expect("expected state to match");
-
-    pb.finish();
-
-    println!("New state map matches old one");
+    comp_state::run(comp_state::Config::parse_arguments());
 }
--- a/synapse_auto_compressor/Cargo.toml
+++ b/synapse_auto_compressor/Cargo.toml
@@ -0,0 +1,56 @@
+[package]
+name = "synapse_auto_compressor"
+authors = ["William Ashton"]
+version = "0.1.3"
+edition = "2018"
+
+[[bin]]
+name = "synapse_auto_compressor"
+required-features = ["clap"]
+
+[package.metadata.maturin]
+requires-python = ">=3.7"
+project-url = {Source = "https://github.com/matrix-org/rust-synapse-compress-state"}
+classifier = [
+    "Development Status :: 4 - Beta",
+    "Programming Language :: Rust",
+]
+
+[dependencies]
+openssl = { version = "0.10.60", features = ["vendored"] }
+postgres = "0.19.7"
+postgres-openssl = "0.5.0"
+rand = "0.8.5"
+serial_test = "2.0.0"
+synapse_compress_state = { path = "../", features = ["no-progress-bars"], default-features = false }
+env_logger = "0.10.0"
+log = "0.4.20"
+log-panics = "2.1.0"
+anyhow = "1.0.75"
+
+# Needed for pyo3 support
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+[dependencies.clap]
+version = "4.4.2"
+features = ["cargo"]
+optional = true
+
+[dependencies.pyo3]
+version = "0.19.2"
+features = ["extension-module"]
+optional = true
+
+[dependencies.pyo3-log]
+version = "0.8.3"
+optional = true
+
+[dependencies.tikv-jemallocator]
+version = "0.5.4"
+optional = true
+
+[features]
+default = ["clap", "jemalloc"]
+jemalloc = ["tikv-jemallocator", "synapse_compress_state/jemalloc"]
+pyo3 = ["dep:pyo3", "dep:pyo3-log", "synapse_compress_state/pyo3"]
--- a/synapse_auto_compressor/README.md
+++ b/synapse_auto_compressor/README.md
@@ -0,0 +1,12 @@
+# Auto Compressor
+
+See the top level readme for information.
+
+
+## Publishing to PyPI
+
+Bump the version number and run from the root directory of the repo:
+
+```
+docker run -it --rm -v $(pwd):/io -e OPENSSL_STATIC=1 konstin2/maturin publish -m synapse_auto_compressor/Cargo.toml --cargo-extra-args "\--features='openssl/vendored'"
+```
--- a/synapse_auto_compressor/pyproject.toml
+++ b/synapse_auto_compressor/pyproject.toml
@@ -0,0 +1,8 @@
+[build-system]
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"
+
+[tool.maturin]
+profile = "release"
+features = ["pyo3"]
+no-default-features = true
--- a/synapse_auto_compressor/src/lib.rs
+++ b/synapse_auto_compressor/src/lib.rs
@@ -0,0 +1,132 @@
+//! This is a tool that uses the synapse_compress_state library to
+//! reduce the size of the synapse state_groups_state table in a postgres
+//! database.
+//!
+//! It adds the tables state_compressor_state and state_compressor_progress
+//! to the database and uses these to enable it to incrementally work
+//! on space reductions
+
+use anyhow::Result;
+#[cfg(feature = "pyo3")]
+use log::{error, LevelFilter};
+#[cfg(feature = "pyo3")]
+use pyo3::{
+    exceptions::PyRuntimeError, prelude::pymodule, types::PyModule, PyErr, PyResult, Python,
+};
+use std::str::FromStr;
+
+use synapse_compress_state::Level;
+
+pub mod manager;
+pub mod state_saving;
+
+/// Helper struct for parsing the `default_levels` argument.
+///
+/// The compressor keeps track of a number of Levels, each of which have a maximum length,
+/// current length, and an optional current head (None if level is empty, Some if a head
+/// exists).
+///
+/// This is needed since FromStr cannot be implemented for structs
+/// that aren't defined in this scope
+#[derive(PartialEq, Eq, Debug, Clone)]
+pub struct LevelInfo(pub Vec<Level>);
+
+// Implement FromStr so that an argument of the form "100,50,25"
+// can be used to create a vector of levels with max sizes 100, 50 and 25
+// For more info see the LevelState documentation in lib.rs
+impl FromStr for LevelInfo {
+    type Err = &'static str;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        // Stores the max sizes of each level
+        let mut level_info: Vec<Level> = Vec::new();
+
+        // Split the string up at each comma
+        for size_str in s.split(',') {
+            // try and convert each section into a number
+            // panic if that fails
+            let size: usize = size_str
+                .parse()
+                .map_err(|_| "Not a comma separated list of numbers")?;
+            // add this parsed number to the sizes struct
+            level_info.push(Level::new(size));
+        }
+
+        // Return the built up vector inside a LevelInfo struct
+        Ok(LevelInfo(level_info))
+    }
+}
+
+// PyO3 INTERFACE STARTS HERE
+#[cfg(feature = "pyo3")]
+#[pymodule]
+fn synapse_auto_compressor(_py: Python, m: &PyModule) -> PyResult<()> {
+    let _ = pyo3_log::Logger::default()
+        // don't send out anything lower than a warning from other crates
+        .filter(LevelFilter::Warn)
+        // don't log warnings from synapse_compress_state, the
+        // synapse_auto_compressor handles these situations and provides better
+        // log messages
+        .filter_target("synapse_compress_state".to_owned(), LevelFilter::Error)
+        // log info and above for the synapse_auto_compressor
+        .filter_target("synapse_auto_compressor".to_owned(), LevelFilter::Debug)
+        .install();
+    // ensure any panics produce error messages in the log
+    log_panics::init();
+
+    #[pyfn(m)]
+    #[pyo3(name = "compress_largest_rooms")]
+    fn compress_state_events_table(
+        py: Python,
+        db_url: String,
+        chunk_size: i64,
+        default_levels: String,
+        number_of_chunks: i64,
+    ) -> PyResult<()> {
+        // Stops the compressor from holding the GIL while running
+        py.allow_threads(|| {
+            _compress_state_events_table_body(db_url, chunk_size, default_levels, number_of_chunks)
+        })
+    }
+
+    // Not accessbile through Py03. It is a "private" function.
+    fn _compress_state_events_table_body(
+        db_url: String,
+        chunk_size: i64,
+        default_levels: String,
+        number_of_chunks: i64,
+    ) -> PyResult<()> {
+        // Announce the start of the program to the logs
+        log::info!("synapse_auto_compressor started");
+
+        // Parse the default_level string into a LevelInfo struct
+        let default_levels: LevelInfo = match default_levels.parse() {
+            Ok(l_sizes) => l_sizes,
+            Err(e) => {
+                return Err(PyErr::new::<PyRuntimeError, _>(format!(
+                    "Unable to parse level_sizes: {}",
+                    e
+                )))
+            }
+        };
+
+        // call compress_largest_rooms with the arguments supplied
+        let run_result = manager::compress_chunks_of_database(
+            &db_url,
+            chunk_size,
+            &default_levels.0,
+            number_of_chunks,
+        );
+
+        // (Note, need to do `{:?}` formatting to show error context)
+        // Don't log the context of errors but do use it in the PyRuntimeError
+        if let Err(e) = run_result {
+            error!("{}", e);
+            return Err(PyErr::new::<PyRuntimeError, _>(format!("{:?}", e)));
+        }
+
+        log::info!("synapse_auto_compressor finished");
+        Ok(())
+    }
+    Ok(())
+}
--- a/synapse_auto_compressor/src/main.rs
+++ b/synapse_auto_compressor/src/main.rs
@@ -0,0 +1,158 @@
+//! This is a tool that uses the synapse_compress_state library to
+//! reduce the size of the synapse state_groups_state table in a postgres
+//! database.
+//!
+//! It adds the tables state_compressor_state and state_compressor_progress
+//! to the database and uses these to enable it to incrementally work
+//! on space reductions.
+//!
+//! This binary calls manager::compress_largest_rooms() with the arguments
+//! provided. That is, it compresses (in batches) the top N rooms ranked by
+//! amount of "uncompressed" state. This is measured by the number of rows in
+//! the state_groups_state table.
+//!
+//! After each batch, the rows processed are marked as "compressed" (using
+//! the state_compressor_progress table), and the program state is saved into
+//! the state_compressor_state table so that the compressor can seemlesly
+//! continue from where it left off.
+
+#[cfg(feature = "jemalloc")]
+#[global_allocator]
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
+use clap::{crate_authors, crate_description, crate_name, crate_version, Arg, Command};
+use log::LevelFilter;
+use std::env;
+use synapse_auto_compressor::{manager, state_saving, LevelInfo};
+
+/// Execution starts here
+fn main() {
+    // setup the logger for the synapse_auto_compressor
+    // The default can be overwritten with RUST_LOG
+    // see the README for more information
+    if env::var("RUST_LOG").is_err() {
+        let mut log_builder = env_logger::builder();
+        // Ensure panics still come through
+        log_builder.filter_module("panic", LevelFilter::Error);
+        // Only output errors from the synapse_compress state library
+        log_builder.filter_module("synapse_compress_state", LevelFilter::Error);
+        // Output log levels info and above from synapse_auto_compressor
+        log_builder.filter_module("synapse_auto_compressor", LevelFilter::Info);
+        log_builder.init();
+    } else {
+        // If RUST_LOG was set then use that
+        let mut log_builder = env_logger::Builder::from_env("RUST_LOG");
+        // Ensure panics still come through
+        log_builder.filter_module("panic", LevelFilter::Error);
+        log_builder.init();
+    }
+    log_panics::init();
+    // Announce the start of the program to the logs
+    log::info!("synapse_auto_compressor started");
+
+    // parse the command line arguments using the clap crate
+    let arguments = Command::new(crate_name!())
+        .version(crate_version!())
+        .author(crate_authors!("\n"))
+        .about(crate_description!())
+        .arg(
+            Arg::new("postgres-url")
+                .short('p')
+                .value_name("POSTGRES_LOCATION")
+                .help("The configruation for connecting to the postgres database.")
+                .long_help(concat!(
+                    "The configuration for connecting to the Postgres database. This should be of the form ",
+                    r#""postgresql://username:password@mydomain.com/database" or a key-value pair "#,
+                    r#"string: "user=username password=password dbname=database host=mydomain.com" "#,
+                    "See https://docs.rs/tokio-postgres/0.7.2/tokio_postgres/config/struct.Config.html ",
+                    "for the full details."
+                ))
+                .num_args(1)
+                .required(true),
+        ).arg(
+            Arg::new("chunk_size")
+                .short('c')
+                .value_name("COUNT")
+                .value_parser(clap::value_parser!(i64))
+                .help("The maximum number of state groups to load into memroy at once")
+                .long_help(concat!(
+                    "The number of state_groups to work on at once. All of the entries",
+                    " from state_groups_state are requested from the database",
+                    " for state groups that are worked on. Therefore small",
+                    " chunk sizes may be needed on machines with low memory.",
+                    " (Note: if the compressor fails to find space savings on the",
+                    " chunk as a whole (which may well happen in rooms with lots",
+                    " of backfill in) then the entire chunk is skipped.)",
+                ))
+                .num_args(1)
+                .required(true),
+        ).arg(
+            Arg::new("default_levels")
+                .short('l')
+                .value_name("LEVELS")
+                .value_parser(clap::value_parser!(LevelInfo))
+                .help("Sizes of each new level in the compression algorithm, as a comma separated list.")
+                .long_help(concat!(
+                    "Sizes of each new level in the compression algorithm, as a comma separated list.",
+                    " The first entry in the list is for the lowest, most granular level,",
+                    " with each subsequent entry being for the next highest level.",
+                    " The number of entries in the list determines the number of levels",
+                    " that will be used.",
+                    "\nThe sum of the sizes of the levels effect the performance of fetching the state",
+                    " from the database, as the sum of the sizes is the upper bound on number of",
+                    " iterations needed to fetch a given set of state.",
+                ))
+                .default_value("100,50,25")
+                .num_args(1)
+                .required(false),
+        ).arg(
+            Arg::new("number_of_chunks")
+                .short('n')
+                .value_name("CHUNKS_TO_COMPRESS")
+                .value_parser(clap::value_parser!(i64))
+                .help("The number of chunks to compress")
+                .long_help(concat!(
+                    "This many chunks of the database will be compressed. The higher this number is set to, ",
+                    "the longer the compressor will run for."
+                ))
+                .num_args(1)
+                .required(true),
+        ).get_matches();
+
+    // The URL of the database
+    let db_url = arguments
+        .get_one::<String>("postgres-url")
+        .expect("A database url is required");
+
+    // The number of state groups to work on at once
+    let chunk_size = arguments
+        .get_one("chunk_size")
+        .copied()
+        .expect("A chunk size is required");
+
+    // The default structure to use when compressing
+    let default_levels = arguments
+        .get_one::<LevelInfo>("default_levels")
+        .cloned()
+        .unwrap();
+
+    // The number of rooms to compress with this tool
+    let number_of_chunks = arguments
+        .get_one("number_of_chunks")
+        .copied()
+        .expect("number_of_chunks is required");
+
+    // Connect to the database and create the 2 tables this tool needs
+    // (Note: if they already exist then this does nothing)
+    let mut client = state_saving::connect_to_database(db_url)
+        .unwrap_or_else(|e| panic!("Error occured while connecting to {}: {}", db_url, e));
+    state_saving::create_tables_if_needed(&mut client)
+        .unwrap_or_else(|e| panic!("Error occured while creating tables in database: {}", e));
+
+    // call compress_largest_rooms with the arguments supplied
+    // panic if an error is produced
+    manager::compress_chunks_of_database(db_url, chunk_size, &default_levels.0, number_of_chunks)
+        .unwrap();
+
+    log::info!("synapse_auto_compressor finished");
+}
--- a/synapse_auto_compressor/src/manager.rs
+++ b/synapse_auto_compressor/src/manager.rs
@@ -0,0 +1,194 @@
+// This module contains functions that carry out diffferent types
+// of compression on the database.
+
+use crate::state_saving::{
+    connect_to_database, create_tables_if_needed, get_next_room_to_compress,
+    read_room_compressor_state, write_room_compressor_state,
+};
+use anyhow::{bail, Context, Result};
+use log::{debug, info, warn};
+use synapse_compress_state::{continue_run, ChunkStats, Level};
+
+/// Runs the compressor on a chunk of the room
+///
+/// Returns `Some(chunk_stats)` if the compressor has progressed
+/// and `None` if it had already got to the end of the room
+///
+/// # Arguments
+///
+/// * `db_url`          -   The URL of the postgres database that synapse is using.
+///                         e.g. "postgresql://user:password@domain.com/synapse"
+///
+/// * `room_id`         -   The id of the room to run the compressor on. Note this
+///                         is the id as stored in the database and will look like
+///                         "!aasdfasdfafdsdsa:matrix.org" instead of the common
+///                         name
+///
+/// * `chunk_size`      -   The number of state_groups to work on. All of the entries
+///                         from state_groups_state are requested from the database
+///                         for state groups that are worked on. Therefore small
+///                         chunk sizes may be needed on machines with low memory.
+///                         (Note: if the compressor fails to find space savings on the
+///                         chunk as a whole (which may well happen in rooms with lots
+///                         of backfill in) then the entire chunk is skipped.)
+///
+/// * `default_levels`  -   If the compressor has never been run on this room before
+///                         then we need to provide the compressor with some information
+///                         on what sort of compression structure we want. The default that
+///                         the library suggests is `vec![Level::new(100), Level::new(50), Level::new(25)]`
+pub fn run_compressor_on_room_chunk(
+    db_url: &str,
+    room_id: &str,
+    chunk_size: i64,
+    default_levels: &[Level],
+) -> Result<Option<ChunkStats>> {
+    // connect to the database
+    let mut client =
+        connect_to_database(db_url).with_context(|| format!("Failed to connect to {}", db_url))?;
+
+    // Access the database to find out where the compressor last got up to
+    let retrieved_state = read_room_compressor_state(&mut client, room_id)
+        .with_context(|| format!("Failed to read compressor state for room {}", room_id,))?;
+
+    // If the database didn't contain any information, then use the default state
+    let (start, level_info) = match retrieved_state {
+        Some((s, l)) => (Some(s), l),
+        None => (None, default_levels.to_vec()),
+    };
+
+    // run the compressor on this chunk
+    let option_chunk_stats = continue_run(start, chunk_size, db_url, room_id, &level_info);
+
+    if option_chunk_stats.is_none() {
+        debug!("No work to do on this room...");
+        return Ok(None);
+    }
+
+    // Ok to unwrap because have checked that it's not None
+    let chunk_stats = option_chunk_stats.unwrap();
+
+    debug!("{:?}", chunk_stats);
+
+    // Check to see whether the compressor sent its changes to the database
+    if !chunk_stats.commited {
+        if chunk_stats.new_num_rows - chunk_stats.original_num_rows != 0 {
+            warn!(
+                "The compressor tried to increase the number of rows in {} between {:?} and {}. Skipping...",
+                room_id, start, chunk_stats.last_compressed_group,
+            );
+        }
+
+        // Skip over the failed chunk and set the level info to the default (empty) state
+        write_room_compressor_state(
+            &mut client,
+            room_id,
+            default_levels,
+            chunk_stats.last_compressed_group,
+        )
+        .with_context(|| {
+            format!(
+                "Failed to skip chunk in room {} between {:?} and {}",
+                room_id, start, chunk_stats.last_compressed_group
+            )
+        })?;
+
+        return Ok(Some(chunk_stats));
+    }
+
+    // Save where we got up to after this successful commit
+    write_room_compressor_state(
+        &mut client,
+        room_id,
+        &chunk_stats.new_level_info,
+        chunk_stats.last_compressed_group,
+    )
+    .with_context(|| {
+        format!(
+            "Failed to save state after compressing chunk in room {} between {:?} and {}",
+            room_id, start, chunk_stats.last_compressed_group
+        )
+    })?;
+
+    Ok(Some(chunk_stats))
+}
+
+/// Runs the compressor in chunks on rooms with the lowest uncompressed state group ids
+///
+/// # Arguments
+///
+/// * `db_url`          -   The URL of the postgres database that synapse is using.
+///                         e.g. "postgresql://user:password@domain.com/synapse"
+///
+/// * `chunk_size`      -   The number of state_groups to work on. All of the entries
+///                         from state_groups_state are requested from the database
+///                         for state groups that are worked on. Therefore small
+///                         chunk sizes may be needed on machines with low memory.
+///                         (Note: if the compressor fails to find space savings on the
+///                         chunk as a whole (which may well happen in rooms with lots
+///                         of backfill in) then the entire chunk is skipped.)
+///
+/// * `default_levels`  -   If the compressor has never been run on this room before
+///                         Then we need to provide the compressor with some information
+///                         on what sort of compression structure we want. The default that
+///                         the library suggests is empty levels with max sizes of 100, 50 and 25
+///
+/// * `number_of_chunks`-   The number of chunks to compress. The larger this number is, the longer
+///                         the compressor will run for.
+pub fn compress_chunks_of_database(
+    db_url: &str,
+    chunk_size: i64,
+    default_levels: &[Level],
+    number_of_chunks: i64,
+) -> Result<()> {
+    // connect to the database
+    let mut client = connect_to_database(db_url)
+        .with_context(|| format!("Failed to connect to database at {}", db_url))?;
+
+    create_tables_if_needed(&mut client).context("Failed to create state compressor tables")?;
+
+    let mut skipped_chunks = 0;
+    let mut rows_saved = 0;
+    let mut chunks_processed = 0;
+
+    while chunks_processed < number_of_chunks {
+        let room_to_compress = get_next_room_to_compress(&mut client)
+            .context("Failed to work out what room to compress next")?;
+
+        if room_to_compress.is_none() {
+            break;
+        }
+
+        let room_to_compress =
+            room_to_compress.expect("Have checked that rooms_to_compress is not None");
+
+        info!(
+            "Running compressor on room {} with chunk size {}",
+            room_to_compress, chunk_size
+        );
+
+        let work_done =
+            run_compressor_on_room_chunk(db_url, &room_to_compress, chunk_size, default_levels)?;
+
+        if let Some(ref chunk_stats) = work_done {
+            if chunk_stats.commited {
+                let savings = chunk_stats.original_num_rows - chunk_stats.new_num_rows;
+                rows_saved += chunk_stats.original_num_rows - chunk_stats.new_num_rows;
+                debug!("Saved {} rows for room {}", savings, room_to_compress);
+            } else {
+                skipped_chunks += 1;
+                debug!(
+                    "Unable to make savings for room {}, skipping chunk",
+                    room_to_compress
+                );
+            }
+            chunks_processed += 1;
+        } else {
+            bail!("Ran the compressor on a room that had no more work to do!")
+        }
+    }
+    info!(
+        "Finished running compressor. Saved {} rows. Skipped {}/{} chunks",
+        rows_saved, skipped_chunks, chunks_processed
+    );
+    Ok(())
+}
--- a/synapse_auto_compressor/src/state_saving.rs
+++ b/synapse_auto_compressor/src/state_saving.rs
@@ -0,0 +1,321 @@
+// This module contains functions to communicate with the database
+
+use anyhow::{bail, Result};
+use log::trace;
+use synapse_compress_state::Level;
+
+use openssl::ssl::{SslConnector, SslMethod, SslVerifyMode};
+use postgres::{fallible_iterator::FallibleIterator, types::ToSql, Client};
+use postgres_openssl::MakeTlsConnector;
+
+/// Connects to the database and returns a postgres client
+///
+/// # Arguments
+///
+/// * `db_url`          -   The URL of the postgres database that synapse is using.
+///                         e.g. "postgresql://user:password@domain.com/synapse"
+pub fn connect_to_database(db_url: &str) -> Result<Client> {
+    let mut builder = SslConnector::builder(SslMethod::tls())?;
+    builder.set_verify(SslVerifyMode::NONE);
+    let connector = MakeTlsConnector::new(builder.build());
+
+    let client = Client::connect(db_url, connector)?;
+    Ok(client)
+}
+
+/// Creates the state_compressor_state and state_compressor progress tables
+///
+/// If these tables already exist then this function does nothing
+///
+/// # Arguments
+///
+/// * `client`        - A postgres client used to send the requests to the database
+pub fn create_tables_if_needed(client: &mut Client) -> Result<()> {
+    let create_state_table = r#"
+        CREATE TABLE IF NOT EXISTS state_compressor_state (
+            room_id TEXT NOT NULL,
+            level_num INT NOT NULL,
+            max_size INT NOT NULL,
+            current_length INT NOT NULL,
+            current_head BIGINT,
+            UNIQUE (room_id, level_num)
+        )"#;
+
+    client.execute(create_state_table, &[])?;
+
+    let create_state_table_indexes = r#"
+        CREATE INDEX IF NOT EXISTS state_compressor_state_index ON state_compressor_state (room_id)"#;
+
+    client.execute(create_state_table_indexes, &[])?;
+
+    let create_progress_table = r#"
+        CREATE TABLE IF NOT EXISTS state_compressor_progress (
+            room_id TEXT PRIMARY KEY,
+            last_compressed BIGINT NOT NULL
+        )"#;
+
+    client.execute(create_progress_table, &[])?;
+
+    let create_compressor_global_progress_table = r#"
+        CREATE TABLE IF NOT EXISTS state_compressor_total_progress(
+            lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE,
+            lowest_uncompressed_group BIGINT NOT NULL,
+            CHECK (Lock='X')
+        );
+        INSERT INTO state_compressor_total_progress 
+            (lowest_uncompressed_group) 
+            VALUES (0)
+        ON CONFLICT (lock) DO NOTHING;
+    "#;
+
+    client.batch_execute(create_compressor_global_progress_table)?;
+
+    Ok(())
+}
+
+/// Retrieve the level info so we can restart the compressor
+///
+/// # Arguments
+///
+/// * `client`        - A postgres client used to send the requests to the database
+/// * `room_id`       - The room who's saved compressor state we want to load
+pub fn read_room_compressor_state(
+    client: &mut Client,
+    room_id: &str,
+) -> Result<Option<(i64, Vec<Level>)>> {
+    // Query to retrieve all levels from state_compressor_state
+    // Ordered by ascending level_number
+    let sql = r#"
+        SELECT level_num, max_size, current_length, current_head, last_compressed
+        FROM state_compressor_state 
+        LEFT JOIN state_compressor_progress USING (room_id)
+        WHERE room_id = $1
+        ORDER BY level_num ASC
+    "#;
+
+    // send the query to the database
+    let mut levels = client.query_raw(sql, &[room_id])?;
+
+    // Needed to ensure that the rows are for unique consecutive levels
+    // starting from 1 (i.e of form [1,2,3] not [0,1,2] or [1,1,2,2,3])
+    let mut prev_seen = 0;
+
+    // The vector to store the level info from the database in
+    let mut level_info: Vec<Level> = Vec::new();
+
+    // Where the last compressor run stopped
+    let mut last_compressed = None;
+    // Used to only read last_compressed value once
+    let mut first_row = true;
+
+    // Loop through all the rows retrieved by that query
+    while let Some(l) = levels.next()? {
+        // Read out the fields into variables
+        //
+        // Some of these are `usize` as they may be used to index vectors, but stored as Postgres
+        // type `INT` which is the same as`i32`.
+        //
+        // Since usize is unlikely to be ess than 32 bits wide, this conversion should be safe
+        let level_num: usize = l.get::<_, i32>("level_num") as usize;
+        let max_size: usize = l.get::<_, i32>("max_size") as usize;
+        let current_length: usize = l.get::<_, i32>("current_length") as usize;
+        let current_head: Option<i64> = l.get("current_head");
+
+        // Only read the last compressed column once since is the same for each row
+        if first_row {
+            last_compressed = l.get("last_compressed"); // might be NULL if corrupted
+            if last_compressed.is_none() {
+                bail!(
+                    "No entry in state_compressor_progress for room {} but entries in state_compressor_state were found",
+                    room_id
+                )
+            }
+            first_row = false;
+        }
+
+        // Check that there aren't multiple entries for the same level number
+        // in the database. (Should be impossible due to unique key constraint)
+        if prev_seen == level_num {
+            bail!(
+                "The level {} occurs twice in state_compressor_state for room {}",
+                level_num,
+                room_id,
+            );
+        }
+
+        // Check that there is no missing level in the database
+        // e.g. if the previous row retrieved was for level 1 and this
+        // row is for level 3 then since the SQL query orders the results
+        // in ascenting level numbers, there was no level 2 found!
+        if prev_seen != level_num - 1 {
+            bail!("Levels between {} and {} are missing", prev_seen, level_num,);
+        }
+
+        // if the level is not empty, then it must have a head!
+        if current_head.is_none() && current_length != 0 {
+            bail!(
+                "Level {} has no head but current length is {} in room {}",
+                level_num,
+                current_length,
+                room_id,
+            );
+        }
+
+        // If the level has more groups in than the maximum then something is wrong!
+        if current_length > max_size {
+            bail!(
+                "Level {} has length {} but max size {} in room {}",
+                level_num,
+                current_length,
+                max_size,
+                room_id,
+            );
+        }
+
+        // Add this level to the level_info vector
+        level_info.push(Level::restore(max_size, current_length, current_head));
+        // Mark the previous level_number seen as the current one
+        prev_seen = level_num;
+    }
+
+    // If we didn't retrieve anything from the database then there is no saved state
+    // in the database!
+    if level_info.is_empty() {
+        return Ok(None);
+    }
+
+    // Return the compressor state we retrieved
+    // last_compressed cannot be None at this point, so safe to unwrap
+    Ok(Some((last_compressed.unwrap(), level_info)))
+}
+
+/// Save the level info so it can be loaded by the next run of the compressor
+///
+/// # Arguments
+///
+/// * `client`            - A postgres client used to send the requests to the database
+/// * `room_id`           - The room who's saved compressor state we want to save
+/// * `level_info`        - The state that can be used to restore the compressor later
+/// * `last_compressed`   - The last state_group that was compressed. This is needed
+///                         so that the compressor knows where to start from next
+pub fn write_room_compressor_state(
+    client: &mut Client,
+    room_id: &str,
+    level_info: &[Level],
+    last_compressed: i64,
+) -> Result<()> {
+    // Wrap all the changes to the state for this room in a transaction
+    // This prevents accidentally having malformed compressor start info
+    let mut write_transaction = client.transaction()?;
+
+    // Go through every level that the compressor is using
+    for (level_num, level) in level_info.iter().enumerate() {
+        // the 1st level is level 1 not level 0, but enumerate starts at 0
+        // so need to add 1 to get correct number
+        let level_num = level_num + 1;
+
+        // bring the level info out of the Level struct
+        let (max_size, current_len, current_head) = (
+            level.get_max_length(),
+            level.get_current_length(),
+            level.get_head(),
+        );
+
+        // Update the database with this compressor state information
+        //
+        // Some of these are `usize` as they may be used to index vectors, but stored as Postgres
+        // type `INT` which is the same as`i32`.
+        //
+        // Since these values should always be small, this conversion should be safe.
+        let (level_num, max_size, current_len) =
+            (level_num as i32, max_size as i32, current_len as i32);
+        let params: Vec<&(dyn ToSql + Sync)> =
+            vec![&room_id, &level_num, &max_size, &current_len, &current_head];
+
+        write_transaction.execute(
+            r#"
+                INSERT INTO state_compressor_state 
+                    (room_id, level_num, max_size, current_length, current_head) 
+                    VALUES ($1, $2, $3, $4, $5)
+                ON CONFLICT (room_id, level_num) 
+                    DO UPDATE SET 
+                        max_size = excluded.max_size,
+                        current_length = excluded.current_length,
+                        current_head= excluded.current_head;
+            "#,
+            &params,
+        )?;
+    }
+
+    // Update the database with this progress information
+    let params: Vec<&(dyn ToSql + Sync)> = vec![&room_id, &last_compressed];
+    write_transaction.execute(
+        r#"
+            INSERT INTO state_compressor_progress (room_id, last_compressed) 
+                VALUES ($1, $2)
+            ON CONFLICT (room_id)
+                DO UPDATE SET last_compressed = excluded.last_compressed;
+        "#,
+        &params,
+    )?;
+
+    // Commit the transaction (otherwise changes never happen)
+    write_transaction.commit()?;
+
+    Ok(())
+}
+
+/// Returns the room with with the lowest uncompressed state group id
+///
+/// A group is detected as uncompressed if it is greater than the `last_compressed`
+/// entry in `state_compressor_progress` for that room.
+///
+/// The `lowest_uncompressed_group` value stored in `state_compressor_total_progress`
+/// stores where this method last finished, to prevent repeating work
+///
+/// # Arguments
+///
+/// * `client`    -   A postgres client used to send the requests to the database
+pub fn get_next_room_to_compress(client: &mut Client) -> Result<Option<String>> {
+    // Walk the state_groups table until find next uncompressed group
+    let get_next_room = r#"
+        SELECT room_id, id 
+        FROM state_groups
+        LEFT JOIN state_compressor_progress USING (room_id)
+        WHERE
+            id >= (SELECT lowest_uncompressed_group FROM state_compressor_total_progress)
+            AND (
+                id > last_compressed
+                OR last_compressed IS NULL
+            )
+        ORDER BY id ASC
+        LIMIT 1
+    "#;
+
+    let row_opt = client.query_opt(get_next_room, &[])?;
+
+    let next_room_row = if let Some(row) = row_opt {
+        row
+    } else {
+        return Ok(None);
+    };
+
+    let next_room: String = next_room_row.get("room_id");
+    let lowest_uncompressed_group: i64 = next_room_row.get("id");
+
+    // This method has determined where the lowest uncompressesed group is, save that
+    // information so we don't have to redo this work in the future.
+    let update_total_progress = r#"
+        UPDATE state_compressor_total_progress SET lowest_uncompressed_group = $1;
+    "#;
+
+    client.execute(update_total_progress, &[&lowest_uncompressed_group])?;
+
+    trace!(
+        "next_room: {}, lowest_uncompressed: {}",
+        next_room,
+        lowest_uncompressed_group
+    );
+
+    Ok(Some(next_room))
+}
Author	SHA1	Message	Date
heimoshuiyu	01bce55ade	add gitea action Some checks failed Build and push docker images / build (push) Failing after 42s Details	2024-03-13 15:04:27 +08:00
dependabot[bot]	bf92c82b7f	Bump openssl from 0.10.57 to 0.10.60 (#131 ) Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.57 to 0.10.60. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.57...openssl-v0.10.60) --- updated-dependencies: - dependency-name: openssl dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2023-11-29 10:23:23 +00:00
Quentin Gliech	4b9f2e2d64	Make the Docker image cross-compile without QEMU emulation (#129 ) * Bump all dependencies & disable the pyo3 feature by default. * Make the Docker image cross-compile without QEMU emulation * Add a build cache on the Docker image	2023-09-28 14:42:26 -04:00
David Robertson	71f24cf2b9	Tag container images under docker.io	2023-09-19 18:03:06 +01:00
David Robertson	575d0fd878	Re-enable pushing to dockerhub (#130 )	2023-09-19 13:51:39 +01:00
Brad Murray	2697e261da	Push git sha tags for docker containers (#128 )	2023-09-11 12:19:46 +01:00
David Robertson	d3aad1a23f	Don't try to login to dockerhub (#127 )	2023-09-08 13:42:34 +01:00
erdnaxe	0890891bb0	Add local PostgreSQL socket example to README (#126 )	2023-08-22 09:52:40 -04:00
Aquatica	8dc70fec8d	Fix README.md stating the wrong flag for levels (#122 )	2023-07-10 18:42:38 +01:00
dependabot[bot]	982ee5ead8	Bump openssl from 0.10.54 to 0.10.55 (#120 ) Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.54 to 0.10.55. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.54...openssl-v0.10.55) --- updated-dependencies: - dependency-name: openssl dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2023-06-22 11:26:31 +01:00
Daniel Seymour	8fca8adb04	Add CI to publish official container (#117 ) Adds a Github Actions workflow to publish containers on merges to main and when new tags are pushed. As part of the change, cargo will now use the release when building the published containers, but default to the dev profile when building the container locally. Signed-off-by: Danny Seymour danny@seymour.family	2023-06-07 15:38:09 +01:00
Jan Alexander Steffens	9ee99cd547	Fix clippy warnings, update dependencies (4) (#118 ) Signed-off-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>	2023-06-06 16:45:49 +01:00
msrd0	6a065de6fc	Make jemalloc, clap and pyo3 optional dependencies (#116 )	2023-04-21 13:17:35 +01:00
Erik Johnston	f4d96c73a8	Fix clippy linting (#115 ) * Fix clippy (This was fixed via `cargo clippy --fix`) * Fmt	2023-03-27 12:00:32 +01:00
dependabot[bot]	923ca65f67	Bump openssl from 0.10.42 to 0.10.48 (#114 ) Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.42 to 0.10.48. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.42...openssl-v0.10.48) --- updated-dependencies: - dependency-name: openssl dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2023-03-27 11:17:13 +01:00
David Robertson	13882d7654	Merge pull request #113 from matrix-org/dependabot/cargo/tokio-1.25.0 Bump tokio from 1.24.1 to 1.25.0	2023-02-05 00:22:20 +00:00
dependabot[bot]	c0dac572c1	Bump tokio from 1.24.1 to 1.25.0 Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.25.0. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.24.1...tokio-1.25.0) --- updated-dependencies: - dependency-name: tokio dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2023-02-04 00:58:15 +00:00
David Robertson	856b799c53	Merge pull request #112 from matrix-org/dependabot/cargo/tokio-1.24.1 Bump tokio from 1.21.2 to 1.24.1	2023-01-09 14:24:56 +00:00
dependabot[bot]	aab4d37123	Bump tokio from 1.21.2 to 1.24.1 Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.21.2 to 1.24.1. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.21.2...tokio-1.24.1) --- updated-dependencies: - dependency-name: tokio dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com>	2023-01-06 21:49:13 +00:00
Shay	fce2a7eee8	Merge pull request #111 from matrix-org/shay/rust_min_version Update READ.me with information about Rust minimum version	2022-12-13 11:03:56 -08:00
Shay	74bd719262	Update README.md Co-authored-by: David Robertson <davidr@element.io>	2022-12-02 10:28:16 -08:00
Shay	e3075d1451	Update READ.me with information about Rust minimum version	2022-12-02 10:19:49 -08:00
David Robertson	d22acc6906	Merge pull request #109 from kittykat/patch-2	2022-11-02 15:19:09 +00:00
Kat Gerasimova	88d97ea413	Add automation to move X-Needs-Info issues When an issue is labelled with X-Needs-Info, it should move to the correct column on the issue triage board.	2022-11-02 15:05:04 +00:00
Jan Alexander Steffens	152808baca	Fix clippy warnings, update dependencies (3) (#106 )	2022-10-17 13:43:39 +01:00
Jelmer Vernooĳ	2596f25eea	Qualify docker image name. (#104 )	2022-10-05 10:45:08 +01:00
Kat Gerasimova	4d3049d3ed	Add issue automation for triage (#103 ) Move new issues to https://github.com/orgs/matrix-org/projects/67 for triage	2022-09-02 16:52:36 +01:00
Erik Johnston	9ff021f32e	Add contributing guide (#102 )	2022-08-03 15:18:54 +01:00
Landry Breuil	019b100521	make jemalloc dependency really optional (#101 ) Signed-off-by: Sebastien Marie <semarie@online.fr>	2022-08-03 10:57:00 +01:00
Jan Alexander Steffens	da6271a331	Fix clippy warnings, update dependencies (again) (#100 )	2022-08-03 10:52:47 +01:00
David Robertson	dd62afb3d5	Update lockfile; drop Python 3.6 support Python 3.6 EOLed at the end of 2021, see https://endoflife.date/python. (pyO3 was refusing to build against 3.6).	2022-07-07 19:23:33 +01:00
David Robertson	65ffce2362	Tag v0.1.3	2022-07-07 19:13:47 +01:00
Jan Alexander Steffens	b4f3d8adbd	Fix clippy warnings, update dependencies (#91 )	2022-06-06 10:34:07 +01:00
reivilibre	04ac0529e1	Merge pull request #86 from saces/saces/fixlogfile	2022-03-16 13:58:37 +00:00
saces	d6df1ac5a4	remove log_file leftovers PR #74 (Log to stderr not to a file) did not remove all references to log_file, the file was still created, but remained empty. The synapse_auto_compressor failed if run in a read only environment. Signed-off-by: saces <saces@c-base.org>	2022-03-13 00:13:33 +01:00
saces	32e43da3e8	Add Docker files (#83 ) Signed-off-by: saces <saces@c-base.org>	2022-02-17 10:28:43 +00:00
Jan Alexander Steffens	4c7316311b	Update dependencies, use tikv-jemallocator (#73 ) Signed-off-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>	2021-10-26 08:54:56 +01:00
Jan Alexander Steffens	83e8dedfa9	lib: New argument -N to suppress verification (#26 ) Signed-off-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>	2021-10-26 08:50:34 +01:00
Sean Quah	38d800a775	Configure @matrix-org/synapse-core to be the code owner for the repo (#76 ) Signed-off-by: Sean Quah <seanq@element.io>	2021-10-25 14:32:07 +01:00
Erik Johnston	5272acedd2	Merge pull request #32	2021-10-13 10:19:25 +01:00
Erik Johnston	9d642cfb67	Release v0.1.2 of auto compressor	2021-10-06 10:55:47 +01:00
Erik Johnston	0111079153	Make the auto compressor uploadable to pypi (#75 )	2021-09-28 16:57:13 +01:00
Azrenbeth	bf57e81f54	Log to stderr not to a file (#74 )	2021-09-28 14:39:18 +01:00
Azrenbeth	dcfa67ea98	Add documentation for autocompressor (#70 )	2021-09-28 13:38:01 +01:00
Azrenbeth	9bfd786f3b	Add pyo3 bindings to autocompressor so can be used from python (#69 )	2021-09-28 09:32:50 +01:00
Azrenbeth	f822da9625	Add a binary crate to the autocompressor (#68 )	2021-09-27 14:22:26 +01:00
Azrenbeth	b8e323c321	All printing replaced by logging (#67 )	2021-09-27 12:20:12 +01:00
Azrenbeth	bc30942e2d	Add no-progress-bars feature so other packages can hide them (#66 )	2021-09-27 11:35:40 +01:00
Azrenbeth	a069d8765a	Add method that compresses the chunks with lowest uncompressed state_group ids (#72 )	2021-09-27 09:49:34 +01:00
Azrenbeth	3271221311	Add method that compresses next chunk of room (#64 )	2021-09-20 09:21:59 +01:00
Azrenbeth	a9bc800b87	Add new package with methods to save and load compressor state (#63 )	2021-09-16 09:55:14 +01:00
Azrenbeth	80795aa813	Dont send no-changes transaction to database (#71 )	2021-09-15 10:09:43 +01:00
Azrenbeth	55ee83ce13	Don't panic in continue_run if no groups found within range (#62 )	2021-09-14 17:29:48 +01:00
Azrenbeth	a409cdbd8e	Rename level current to head (#61 )	2021-09-14 12:28:35 +01:00
Azrenbeth	8c72a0de52	Move setting up jemalloc out of library crate (#60 )	2021-09-14 11:26:21 +01:00
Azrenbeth	a951ba4dae	Move the graph generation to before abort (#56 )	2021-09-13 13:23:48 +01:00
Azrenbeth	4c3d6bd346	Add method to run the compressor starting from a particular compressor-state (#55 )	2021-09-13 10:25:52 +01:00
Azrenbeth	d32f49303b	Add integration tests that check various config options (#54 )	2021-09-09 14:26:14 +01:00
Azrenbeth	d908d13f8f	Add option to commit changes to the database automatically (#53 )	2021-09-08 11:39:57 +01:00
Azrenbeth	65861de06e	Stops the compressor if it will lead to more rows in the database (#52 )	2021-09-06 10:06:47 +01:00
Azrenbeth	3b5d7fd176	Add information on running options to README and --help output (#51 )	2021-09-06 09:59:04 +01:00
Azrenbeth	0f7f2c2660	Setup framework to do integration testing (#50 )	2021-09-06 09:52:13 +01:00
Azrenbeth	011f9f8da5	Wrote tests for Stats (#49 ) wrote tests for Stats struct and whether the compressor accurately populates it	2021-08-16 15:17:38 +01:00
Azrenbeth	9a59b1121c	Wrote tests for compressor functions (#48 ) * Wrote tests for create_new_tree * wrote tests for get_delta	2021-08-16 15:10:36 +01:00
Azrenbeth	aa6137ce52	Wrote tests for Level struct functions (#47 )	2021-08-16 15:04:06 +01:00
Azrenbeth	e5174f4181	Wrote tests for config::new() which is used by pyo3 code (#46 )	2021-08-16 14:44:56 +01:00
Azrenbeth	5c248b5362	Wrote unit tests for functionality within lib.rs (#45 )	2021-08-16 14:39:27 +01:00
Azrenbeth	63e1d6e3c9	Added option to only run the compressor on a range of state groups in a room (#44 )	2021-08-12 16:01:12 +01:00
Azrenbeth	3290726990	Added option to output directed graphs of the group heirarchy before and after compression (#43 ) It outputs nodes and edges information before and after the compressor has run - these can be visualised in a tool like Gephi A good way to visualise what the compressor is actually doing!	2021-08-04 11:56:42 +01:00
Azrenbeth	becb2937aa	Wrapped code with pyo3 so that the libary can be used from python (#42 )	2021-08-03 09:44:51 +01:00
Azrenbeth	a887107607	Add documentation (#41 ) Added documentation to lib.rs and database.rs files	2021-07-30 10:06:39 +01:00
Azrenbeth	013080f712	Azren/moved to lib (#40 )	2021-07-28 13:25:44 +01:00
Azrenbeth	31e37996ea	Split up main (#39 ) Moved code from inside the main function into function calls	2021-07-27 15:31:48 +01:00
Erik Johnston	f3b1dd0b8a	Add github actions (#33 )	2021-05-20 12:13:45 +01:00
Jörg Sommer	2fc2db2848	Update dependencies	2021-05-13 17:25:48 +02:00
Jörg Sommer	4f823fba78	Use saturating arithmetic on calculating row saving The saturating arithmetic doesn't overflow, but stays at the maximum or minimum. In this case it doesn't become negative, but zero, which satisfies the condition below. Closes #31	2021-05-13 17:25:37 +02:00
Erik Johnston	b33c498e99	Update dependencies	2021-03-19 18:42:48 +00:00
Erik Johnston	d2ab661b1d	Merge pull request #25 from heftig/update-deps Cargo.toml: Update dependencies	2021-02-23 10:55:11 +00:00
Jan Alexander Steffens (heftig)	b07397b5e3	Cargo.toml: Update dependencies	2021-02-23 00:56:22 +01:00
Erik Johnston	4a56406fb3	Merge pull request #23 from ananace/postgres-ssl Support postgres with SSL requirements	2021-02-22 14:05:16 +00:00
Alexander Olofsson	dc635bdd0e	Clean up postgres client creation slightly	2021-02-02 19:29:34 +01:00
Alexander Olofsson	9720b19332	Always use tls connector for postgres	2021-02-02 18:30:27 +01:00
Alexander Olofsson	8e691aec1f	Support postgres with SSL requirements This stupidly just strips any ?sslmode= arguments from the postgres URI and enables SSL without verification regardless of which mode the user specifies.	2021-01-30 20:29:58 +01:00
				`@@ -0,0 +1 @@`
				`This folder is where sql files generated by the integration tests are saved`