diff options
author | Iru Cai <mytbk920423@gmail.com> | 2018-08-25 00:16:14 +0800 |
---|---|---|
committer | Iru Cai <mytbk920423@gmail.com> | 2018-08-25 00:17:27 +0800 |
commit | d85513c6f24968abd35181c902819dc8e489f7e7 (patch) | |
tree | 72d3561c42af8dde758788da35ef77c98c201684 /mxclean | |
parent | e5fab7b6ee72d8c7b9701bc1e007a875a37c2f2c (diff) | |
download | matrix-synapse-scripts-master.tar.xz |
Diffstat (limited to 'mxclean')
-rw-r--r-- | mxclean/clean_files.cpp | 43 | ||||
-rw-r--r-- | mxclean/clean_local_store.sh | 31 |
2 files changed, 74 insertions, 0 deletions
diff --git a/mxclean/clean_files.cpp b/mxclean/clean_files.cpp new file mode 100644 index 0000000..8c3b0ad --- /dev/null +++ b/mxclean/clean_files.cpp @@ -0,0 +1,43 @@ +#include <set> +#include <string> +#include <fstream> +#include <iostream> + +using namespace std; + +string path_to_id(string &s) +{ + string t = ""; + for (auto c: s) { + if (isalpha(c)) + t = t + c; + } + return t; +} + +int main() +{ + ifstream flist("/tmp/mxclean/filelist"); + ifstream keeplist("/tmp/mxclean/files_to_keep"); + ofstream rm("/tmp/mxclean/remove.sh"); + + set<string> keep_set; + string s; + int keep = 0, remove = 0; + + while (keeplist >> s) { + keep_set.insert(s); + } + + while (flist >> s) { + if (keep_set.find(path_to_id(s)) != keep_set.end()) { + keep++; + } else { + remove++; + rm << "rm " << s << endl; + } + } + + cout << "keep " << keep << " files, remove " << remove << " files." << endl; + return 0; +} diff --git a/mxclean/clean_local_store.sh b/mxclean/clean_local_store.sh new file mode 100644 index 0000000..12402e7 --- /dev/null +++ b/mxclean/clean_local_store.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +purge_ts=$(($(date +%s)-$((3600*24*30))))000 +server=matrixim.cc +content_dir=/var/lib/matrix-synapse/media_store/local_content +tmp=/tmp/mxclean +CLEANER="$(pwd)/clean_files" + +make "${CLEANER}" +cd "${content_dir}" +mkdir -p "${tmp}" +find -type f > ${tmp}/filelist + +# for avatars in state events +# E2EE messages doesn't have the URI in the events, so do not rely on it on all events +echo 'select content from events' | psql -d synapse -Aqt | grep -F "mxc://${server}/" > ${tmp}/file_in_db +jq -r .avatar_url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' > ${tmp}/dbfiles +jq -r .url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' >> ${tmp}/dbfiles + +# profile avatars +echo 'select avatar_url from profiles' | psql -d synapse -Aqt | sed '/^$/d' | awk -F '/' '{print $NF}' > ${tmp}/avatars + +echo "select media_id from local_media_repository where created_ts > ${purge_ts}" \ + | psql -d synapse -Aqt > ${tmp}/recent_media + +cat ${tmp}/dbfiles ${tmp}/avatars ${tmp}/recent_media | sort -u > ${tmp}/files_to_keep +${CLEANER} + +echo "Please run 'cd ${content_dir} && sh ${tmp}/remove.sh' to clean the local content." |