From d85513c6f24968abd35181c902819dc8e489f7e7 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Sat, 25 Aug 2018 00:16:14 +0800 Subject: local store cleaner --- mxclean/clean_files.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ mxclean/clean_local_store.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 mxclean/clean_files.cpp create mode 100644 mxclean/clean_local_store.sh diff --git a/mxclean/clean_files.cpp b/mxclean/clean_files.cpp new file mode 100644 index 0000000..8c3b0ad --- /dev/null +++ b/mxclean/clean_files.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +using namespace std; + +string path_to_id(string &s) +{ + string t = ""; + for (auto c: s) { + if (isalpha(c)) + t = t + c; + } + return t; +} + +int main() +{ + ifstream flist("/tmp/mxclean/filelist"); + ifstream keeplist("/tmp/mxclean/files_to_keep"); + ofstream rm("/tmp/mxclean/remove.sh"); + + set keep_set; + string s; + int keep = 0, remove = 0; + + while (keeplist >> s) { + keep_set.insert(s); + } + + while (flist >> s) { + if (keep_set.find(path_to_id(s)) != keep_set.end()) { + keep++; + } else { + remove++; + rm << "rm " << s << endl; + } + } + + cout << "keep " << keep << " files, remove " << remove << " files." << endl; + return 0; +} diff --git a/mxclean/clean_local_store.sh b/mxclean/clean_local_store.sh new file mode 100644 index 0000000..12402e7 --- /dev/null +++ b/mxclean/clean_local_store.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +purge_ts=$(($(date +%s)-$((3600*24*30))))000 +server=matrixim.cc +content_dir=/var/lib/matrix-synapse/media_store/local_content +tmp=/tmp/mxclean +CLEANER="$(pwd)/clean_files" + +make "${CLEANER}" +cd "${content_dir}" +mkdir -p "${tmp}" +find -type f > ${tmp}/filelist + +# for avatars in state events +# E2EE messages doesn't have the URI in the events, so do not rely on it on all events +echo 'select content from events' | psql -d synapse -Aqt | grep -F "mxc://${server}/" > ${tmp}/file_in_db +jq -r .avatar_url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' > ${tmp}/dbfiles +jq -r .url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' >> ${tmp}/dbfiles + +# profile avatars +echo 'select avatar_url from profiles' | psql -d synapse -Aqt | sed '/^$/d' | awk -F '/' '{print $NF}' > ${tmp}/avatars + +echo "select media_id from local_media_repository where created_ts > ${purge_ts}" \ + | psql -d synapse -Aqt > ${tmp}/recent_media + +cat ${tmp}/dbfiles ${tmp}/avatars ${tmp}/recent_media | sort -u > ${tmp}/files_to_keep +${CLEANER} + +echo "Please run 'cd ${content_dir} && sh ${tmp}/remove.sh' to clean the local content." -- cgit v1.2.3