summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIru Cai <mytbk920423@gmail.com>2018-10-30 01:02:23 +0800
committerIru Cai <mytbk920423@gmail.com>2018-10-30 01:02:23 +0800
commit2a7af550d5b1f3b656173545ce339ae14463eed3 (patch)
treea24b9e836f0b7c6c25bed87393aaca2403c3c7c5
downloadpkulib-dl-2a7af550d5b1f3b656173545ce339ae14463eed3.tar.xz
initial commitHEADmaster
-rw-r--r--COPYING25
-rw-r--r--infopage.jpgbin0 -> 35267 bytes
-rwxr-xr-xpkulib-dl.sh29
-rw-r--r--readme.rst10
4 files changed, 64 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..c0c6851
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,25 @@
+Copyright (c) 2018 Iru Cai
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/infopage.jpg b/infopage.jpg
new file mode 100644
index 0000000..54f4773
--- /dev/null
+++ b/infopage.jpg
Binary files differ
diff --git a/pkulib-dl.sh b/pkulib-dl.sh
new file mode 100755
index 0000000..8190f28
--- /dev/null
+++ b/pkulib-dl.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (C) 2016-2018 Iru Cai <mytbk920423@gmail.com>
+# SPDX-License-Identifier: MIT
+
+WEBURL="$1"
+URL1=$(curl -D - "$WEBURL" | grep 'Location:' | cut -d ' ' -f2 | tr -d '\n\r')
+HD2=/tmp/head2
+
+curl -D "$HD2" "$URL1"
+LOC=$(grep 'Location:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r')
+COOKIE=$(grep 'Set-Cookie:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r')
+
+curl -b "$COOKIE" "http://162.105.134.188/$LOC" --output /tmp/article.htm
+IMGDIR=$(grep sm_img_url /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/')
+FID=$(grep fid /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/')
+PAGE=$(grep 'var endpage' /tmp/article.htm | grep -o '[0-9]*')
+
+WORKDIR=$(mktemp -d '/tmp/pkulib.XXXXXX')
+IMGS=()
+cd "$WORKDIR"
+for _pg in `seq 1 $PAGE`
+do
+ curl -b "$COOKIE" "http://162.105.134.188/jumpServlet?page=${_pg}&fid=${FID}"
+ curl -b "$COOKIE" $(printf "http://162.105.134.188/store/$IMGDIR/P01_%05d.jpg" "$_pg") --output "$_pg.jpg"
+ IMGS=(${IMGS[@]} "$_pg.jpg")
+done
+
+img2pdf "${IMGS[@]}" --output thesis.pdf
+echo "Saved to $WORKDIR/thesis.pdf"
diff --git a/readme.rst b/readme.rst
new file mode 100644
index 0000000..f52ba48
--- /dev/null
+++ b/readme.rst
@@ -0,0 +1,10 @@
+北大论文下载脚本
+=================
+
+鉴于破解小麦阅读器 DRM 的办法还没找到,我先写个通过 HTTP 抓取论文图片的脚本。
+
+依赖:curl, img2pdf
+
+使用方法:如图右键“查看全文”按钮复制链接 URL,然后执行 ./pkulib-dl.sh <url>
+
+.. image:: infopage.jpg