257 lines
8.0 KiB
Rust
257 lines
8.0 KiB
Rust
//! This is the actual compression algorithm.
|
|
//!
|
|
//! The algorithm attempts to make a tree of deltas for the state group maps.
|
|
//! This is done by having multiple "levels", where each level has a maximum
|
|
//! size. The state groups are iterated over, with deltas being calculated
|
|
//! against the smallest level that isn't yet full. When a state group is
|
|
//! inserted into a level, or lower levels are reset to have their current
|
|
//! "head" at the new state group.
|
|
//!
|
|
//! This produces graphs that look roughly like, for two levels:
|
|
//!
|
|
//! ```
|
|
//! L2 <-------------------- L2 <---------- ...
|
|
//! ^--- L1 <--- L1 <--- L1 ^--- L1 <--- L1 <--- L1
|
|
//! ```
|
|
|
|
|
|
use rust_matrix_lib::state_map::StateMap;
|
|
use string_cache::DefaultAtom as Atom;
|
|
|
|
use std::collections::BTreeMap;
|
|
|
|
use {collapse_state_maps, StateGroupEntry};
|
|
|
|
|
|
/// Holds information about a particular level.
|
|
struct Level {
|
|
/// The maximum size this level is allowed to be
|
|
max_length: usize,
|
|
/// The (approximate) current chain length of this level. This is equivalent
|
|
/// to recursively following `current`
|
|
current_chain_length: usize,
|
|
/// The head of this level
|
|
current: Option<i64>,
|
|
}
|
|
|
|
impl Level {
|
|
/// Creates a new Level with the given maximum length
|
|
pub fn new(max_length: usize) -> Level {
|
|
Level {
|
|
max_length,
|
|
current_chain_length: 0,
|
|
current: None,
|
|
}
|
|
}
|
|
|
|
/// Update the current head of this level. If delta is true then it means
|
|
/// that given state group will (probably) reference the previous head.
|
|
///
|
|
/// Panics if `delta` is true and the level is already full.
|
|
pub fn update(&mut self, current: i64, delta: bool) {
|
|
self.current = Some(current);
|
|
|
|
if delta {
|
|
// If we're referencing the previous head then increment our chain
|
|
// length estimate
|
|
if !self.has_space() {
|
|
panic!("Tried to add to a already full level");
|
|
}
|
|
|
|
self.current_chain_length += 1;
|
|
} else {
|
|
// Otherwise, we've started a new chain with a single entry.
|
|
self.current_chain_length = 1;
|
|
}
|
|
}
|
|
|
|
/// Get the current head of the level
|
|
pub fn get_current(&self) -> Option<i64> {
|
|
self.current
|
|
}
|
|
|
|
/// Whether there is space in the current chain at this level. If not then a
|
|
/// new chain should be started.
|
|
pub fn has_space(&self) -> bool {
|
|
self.current_chain_length < self.max_length
|
|
}
|
|
}
|
|
|
|
|
|
/// Keeps track of some statistics of a compression run.
|
|
#[derive(Default)]
|
|
pub struct Stats {
|
|
/// How many state groups we couldn't find a delta for, despite trying.
|
|
pub resets_no_suitable_prev: usize,
|
|
/// The sum of the rows of the state groups counted by
|
|
/// `resets_no_suitable_prev`.
|
|
pub resets_no_suitable_prev_size: usize,
|
|
/// How many state groups we have changed.
|
|
pub state_groups_changed: usize,
|
|
}
|
|
|
|
|
|
/// Attempts to compress a set of state deltas using the given level sizes.
|
|
pub struct Compressor<'a> {
|
|
original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
|
|
pub new_state_group_map: BTreeMap<i64, StateGroupEntry>,
|
|
levels: Vec<Level>,
|
|
pub stats: Stats,
|
|
}
|
|
|
|
impl<'a> Compressor<'a> {
|
|
/// Creates a compressor and runs the compression algorithm.
|
|
pub fn compress(
|
|
original_state_map: &'a BTreeMap<i64, StateGroupEntry>,
|
|
level_sizes: &[usize],
|
|
) -> Compressor<'a> {
|
|
let mut compressor = Compressor {
|
|
original_state_map,
|
|
new_state_group_map: BTreeMap::new(),
|
|
levels: level_sizes.iter().map(|size| Level::new(*size)).collect(),
|
|
stats: Stats::default(),
|
|
};
|
|
|
|
compressor.create_new_tree();
|
|
|
|
compressor
|
|
}
|
|
|
|
/// Actually runs the compression algorithm
|
|
fn create_new_tree(&mut self) {
|
|
if !self.new_state_group_map.is_empty() {
|
|
panic!("Can only call `create_new_tree` once");
|
|
}
|
|
|
|
for (&state_group, entry) in self.original_state_map {
|
|
let mut prev_state_group = None;
|
|
for level in &mut self.levels {
|
|
if level.has_space() {
|
|
prev_state_group = level.get_current();
|
|
level.update(state_group, true);
|
|
break;
|
|
} else {
|
|
level.update(state_group, false);
|
|
}
|
|
}
|
|
|
|
let (delta, prev_state_group) = if entry.prev_state_group == prev_state_group {
|
|
(entry.state_map.clone(), prev_state_group)
|
|
} else {
|
|
self.stats.state_groups_changed += 1;
|
|
self.get_delta(prev_state_group, state_group)
|
|
};
|
|
|
|
self.new_state_group_map.insert(
|
|
state_group,
|
|
StateGroupEntry {
|
|
prev_state_group,
|
|
state_map: delta,
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Attempts to calculate the delta between two state groups.
|
|
///
|
|
/// This is not always possible if the given candidate previous state group
|
|
/// have state keys that are not in the new state group. In this case the
|
|
/// function will try and iterate back up the current tree to find a state
|
|
/// group that can be used as a base for a delta.
|
|
///
|
|
/// Returns the state map and the actual base state group (if any) used.
|
|
fn get_delta(&mut self, prev_sg: Option<i64>, sg: i64) -> (StateMap<Atom>, Option<i64>) {
|
|
let state_map = collapse_state_maps(&self.original_state_map, sg);
|
|
|
|
let mut prev_sg = if let Some(prev_sg) = prev_sg {
|
|
prev_sg
|
|
} else {
|
|
return (state_map, None);
|
|
};
|
|
|
|
// This is a loop to go through to find the first prev_sg which can be
|
|
// a valid base for the state group.
|
|
let mut prev_state_map;
|
|
'outer: loop {
|
|
prev_state_map = collapse_state_maps(&self.original_state_map, prev_sg);
|
|
for (t, s) in prev_state_map.keys() {
|
|
if !state_map.contains_key(t, s) {
|
|
// This is not a valid base as it contains key the new state
|
|
// group doesn't have. Attempt to walk up the tree to find a
|
|
// better base.
|
|
if let Some(psg) = self.new_state_group_map[&prev_sg].prev_state_group {
|
|
prev_sg = psg;
|
|
continue 'outer;
|
|
}
|
|
|
|
// Couldn't find a new base, so we give up and just persist
|
|
// a full state group here.
|
|
self.stats.resets_no_suitable_prev += 1;
|
|
self.stats.resets_no_suitable_prev_size += state_map.len();
|
|
|
|
return (state_map, None);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
// We've found a valid base, now we just need to calculate the delta.
|
|
let mut delta_map = StateMap::new();
|
|
|
|
for ((t, s), e) in state_map.iter() {
|
|
if prev_state_map.get(t, s) != Some(e) {
|
|
delta_map.insert(t, s, e.clone());
|
|
}
|
|
}
|
|
|
|
(delta_map, Some(prev_sg))
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_new_map() {
|
|
let mut initial: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
|
|
|
|
let mut prev = None;
|
|
for i in 0i64..=13i64 {
|
|
initial.insert(
|
|
i,
|
|
StateGroupEntry {
|
|
prev_state_group: prev,
|
|
state_map: StateMap::new(),
|
|
},
|
|
);
|
|
|
|
prev = Some(i)
|
|
}
|
|
|
|
let compressor = Compressor::compress(&initial, &[3, 3]);
|
|
|
|
let new_state = compressor.new_state_group_map;
|
|
|
|
let expected_edges: BTreeMap<i64, i64> = vec![
|
|
(1, 0),
|
|
(2, 1),
|
|
(4, 3),
|
|
(5, 4),
|
|
(6, 3),
|
|
(7, 6),
|
|
(8, 7),
|
|
(9, 6),
|
|
(10, 9),
|
|
(11, 10),
|
|
(13, 12),
|
|
].into_iter()
|
|
.collect();
|
|
|
|
for sg in 0i64..=13i64 {
|
|
assert_eq!(
|
|
expected_edges.get(&sg).cloned(),
|
|
new_state[&sg].prev_state_group,
|
|
"state group {} did not match expected",
|
|
sg,
|
|
);
|
|
}
|
|
}
|