summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIru Cai <mytbk920423@gmail.com>2018-08-25 00:16:14 +0800
committerIru Cai <mytbk920423@gmail.com>2018-08-25 00:17:27 +0800
commitd85513c6f24968abd35181c902819dc8e489f7e7 (patch)
tree72d3561c42af8dde758788da35ef77c98c201684
parente5fab7b6ee72d8c7b9701bc1e007a875a37c2f2c (diff)
downloadmatrix-synapse-scripts-master.tar.xz
local store cleanerHEADmaster
-rw-r--r--mxclean/clean_files.cpp43
-rw-r--r--mxclean/clean_local_store.sh31
2 files changed, 74 insertions, 0 deletions
diff --git a/mxclean/clean_files.cpp b/mxclean/clean_files.cpp
new file mode 100644
index 0000000..8c3b0ad
--- /dev/null
+++ b/mxclean/clean_files.cpp
@@ -0,0 +1,43 @@
+#include <set>
+#include <string>
+#include <fstream>
+#include <iostream>
+
+using namespace std;
+
+string path_to_id(string &s)
+{
+ string t = "";
+ for (auto c: s) {
+ if (isalpha(c))
+ t = t + c;
+ }
+ return t;
+}
+
+int main()
+{
+ ifstream flist("/tmp/mxclean/filelist");
+ ifstream keeplist("/tmp/mxclean/files_to_keep");
+ ofstream rm("/tmp/mxclean/remove.sh");
+
+ set<string> keep_set;
+ string s;
+ int keep = 0, remove = 0;
+
+ while (keeplist >> s) {
+ keep_set.insert(s);
+ }
+
+ while (flist >> s) {
+ if (keep_set.find(path_to_id(s)) != keep_set.end()) {
+ keep++;
+ } else {
+ remove++;
+ rm << "rm " << s << endl;
+ }
+ }
+
+ cout << "keep " << keep << " files, remove " << remove << " files." << endl;
+ return 0;
+}
diff --git a/mxclean/clean_local_store.sh b/mxclean/clean_local_store.sh
new file mode 100644
index 0000000..12402e7
--- /dev/null
+++ b/mxclean/clean_local_store.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+set -e
+
+purge_ts=$(($(date +%s)-$((3600*24*30))))000
+server=matrixim.cc
+content_dir=/var/lib/matrix-synapse/media_store/local_content
+tmp=/tmp/mxclean
+CLEANER="$(pwd)/clean_files"
+
+make "${CLEANER}"
+cd "${content_dir}"
+mkdir -p "${tmp}"
+find -type f > ${tmp}/filelist
+
+# for avatars in state events
+# E2EE messages doesn't have the URI in the events, so do not rely on it on all events
+echo 'select content from events' | psql -d synapse -Aqt | grep -F "mxc://${server}/" > ${tmp}/file_in_db
+jq -r .avatar_url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' > ${tmp}/dbfiles
+jq -r .url ${tmp}/file_in_db | awk -F '/' '{print $NF}' | sed '/null/d' >> ${tmp}/dbfiles
+
+# profile avatars
+echo 'select avatar_url from profiles' | psql -d synapse -Aqt | sed '/^$/d' | awk -F '/' '{print $NF}' > ${tmp}/avatars
+
+echo "select media_id from local_media_repository where created_ts > ${purge_ts}" \
+ | psql -d synapse -Aqt > ${tmp}/recent_media
+
+cat ${tmp}/dbfiles ${tmp}/avatars ${tmp}/recent_media | sort -u > ${tmp}/files_to_keep
+${CLEANER}
+
+echo "Please run 'cd ${content_dir} && sh ${tmp}/remove.sh' to clean the local content."