From 2a7af550d5b1f3b656173545ce339ae14463eed3 Mon Sep 17 00:00:00 2001 From: Iru Cai Date: Tue, 30 Oct 2018 01:02:23 +0800 Subject: initial commit --- COPYING | 25 +++++++++++++++++++++++++ infopage.jpg | Bin 0 -> 35267 bytes pkulib-dl.sh | 29 +++++++++++++++++++++++++++++ readme.rst | 10 ++++++++++ 4 files changed, 64 insertions(+) create mode 100644 COPYING create mode 100644 infopage.jpg create mode 100755 pkulib-dl.sh create mode 100644 readme.rst diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..c0c6851 --- /dev/null +++ b/COPYING @@ -0,0 +1,25 @@ +Copyright (c) 2018 Iru Cai + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/infopage.jpg b/infopage.jpg new file mode 100644 index 0000000..54f4773 Binary files /dev/null and b/infopage.jpg differ diff --git a/pkulib-dl.sh b/pkulib-dl.sh new file mode 100755 index 0000000..8190f28 --- /dev/null +++ b/pkulib-dl.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (C) 2016-2018 Iru Cai +# SPDX-License-Identifier: MIT + +WEBURL="$1" +URL1=$(curl -D - "$WEBURL" | grep 'Location:' | cut -d ' ' -f2 | tr -d '\n\r') +HD2=/tmp/head2 + +curl -D "$HD2" "$URL1" +LOC=$(grep 'Location:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') +COOKIE=$(grep 'Set-Cookie:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') + +curl -b "$COOKIE" "http://162.105.134.188/$LOC" --output /tmp/article.htm +IMGDIR=$(grep sm_img_url /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') +FID=$(grep fid /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') +PAGE=$(grep 'var endpage' /tmp/article.htm | grep -o '[0-9]*') + +WORKDIR=$(mktemp -d '/tmp/pkulib.XXXXXX') +IMGS=() +cd "$WORKDIR" +for _pg in `seq 1 $PAGE` +do + curl -b "$COOKIE" "http://162.105.134.188/jumpServlet?page=${_pg}&fid=${FID}" + curl -b "$COOKIE" $(printf "http://162.105.134.188/store/$IMGDIR/P01_%05d.jpg" "$_pg") --output "$_pg.jpg" + IMGS=(${IMGS[@]} "$_pg.jpg") +done + +img2pdf "${IMGS[@]}" --output thesis.pdf +echo "Saved to $WORKDIR/thesis.pdf" diff --git a/readme.rst b/readme.rst new file mode 100644 index 0000000..f52ba48 --- /dev/null +++ b/readme.rst @@ -0,0 +1,10 @@ +北大论文下载脚本 +================= + +鉴于破解小麦阅读器 DRM 的办法还没找到,我先写个通过 HTTP 抓取论文图片的脚本。 + +依赖:curl, img2pdf + +使用方法:如图右键“查看全文”按钮复制链接 URL,然后执行 ./pkulib-dl.sh + +.. image:: infopage.jpg -- cgit v1.2.3