diff options
author | Iru Cai <mytbk920423@gmail.com> | 2018-10-30 01:02:23 +0800 |
---|---|---|
committer | Iru Cai <mytbk920423@gmail.com> | 2018-10-30 01:02:23 +0800 |
commit | 2a7af550d5b1f3b656173545ce339ae14463eed3 (patch) | |
tree | a24b9e836f0b7c6c25bed87393aaca2403c3c7c5 | |
download | pkulib-dl-2a7af550d5b1f3b656173545ce339ae14463eed3.tar.xz |
-rw-r--r-- | COPYING | 25 | ||||
-rw-r--r-- | infopage.jpg | bin | 0 -> 35267 bytes | |||
-rwxr-xr-x | pkulib-dl.sh | 29 | ||||
-rw-r--r-- | readme.rst | 10 |
4 files changed, 64 insertions, 0 deletions
@@ -0,0 +1,25 @@ +Copyright (c) 2018 Iru Cai + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/infopage.jpg b/infopage.jpg Binary files differnew file mode 100644 index 0000000..54f4773 --- /dev/null +++ b/infopage.jpg diff --git a/pkulib-dl.sh b/pkulib-dl.sh new file mode 100755 index 0000000..8190f28 --- /dev/null +++ b/pkulib-dl.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (C) 2016-2018 Iru Cai <mytbk920423@gmail.com> +# SPDX-License-Identifier: MIT + +WEBURL="$1" +URL1=$(curl -D - "$WEBURL" | grep 'Location:' | cut -d ' ' -f2 | tr -d '\n\r') +HD2=/tmp/head2 + +curl -D "$HD2" "$URL1" +LOC=$(grep 'Location:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') +COOKIE=$(grep 'Set-Cookie:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') + +curl -b "$COOKIE" "http://162.105.134.188/$LOC" --output /tmp/article.htm +IMGDIR=$(grep sm_img_url /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') +FID=$(grep fid /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') +PAGE=$(grep 'var endpage' /tmp/article.htm | grep -o '[0-9]*') + +WORKDIR=$(mktemp -d '/tmp/pkulib.XXXXXX') +IMGS=() +cd "$WORKDIR" +for _pg in `seq 1 $PAGE` +do + curl -b "$COOKIE" "http://162.105.134.188/jumpServlet?page=${_pg}&fid=${FID}" + curl -b "$COOKIE" $(printf "http://162.105.134.188/store/$IMGDIR/P01_%05d.jpg" "$_pg") --output "$_pg.jpg" + IMGS=(${IMGS[@]} "$_pg.jpg") +done + +img2pdf "${IMGS[@]}" --output thesis.pdf +echo "Saved to $WORKDIR/thesis.pdf" diff --git a/readme.rst b/readme.rst new file mode 100644 index 0000000..f52ba48 --- /dev/null +++ b/readme.rst @@ -0,0 +1,10 @@ +北大论文下载脚本 +================= + +鉴于破解小麦阅读器 DRM 的办法还没找到,我先写个通过 HTTP 抓取论文图片的脚本。 + +依赖:curl, img2pdf + +使用方法:如图右键“查看全文”按钮复制链接 URL,然后执行 ./pkulib-dl.sh <url> + +.. image:: infopage.jpg |