summaryrefslogtreecommitdiff
path: root/pkulib-dl.sh
blob: 8190f2830ca69ede1e77bcefd7f35fe061a77f46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash
# Copyright (C)  2016-2018 Iru Cai <mytbk920423@gmail.com>
# SPDX-License-Identifier: MIT

WEBURL="$1"
URL1=$(curl -D -  "$WEBURL" | grep 'Location:' | cut -d ' ' -f2 | tr -d '\n\r')
HD2=/tmp/head2

curl -D "$HD2" "$URL1"
LOC=$(grep 'Location:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r')
COOKIE=$(grep 'Set-Cookie:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r')

curl -b "$COOKIE" "http://162.105.134.188/$LOC" --output /tmp/article.htm
IMGDIR=$(grep sm_img_url /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/')
FID=$(grep fid /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/')
PAGE=$(grep 'var endpage' /tmp/article.htm | grep -o '[0-9]*')

WORKDIR=$(mktemp -d '/tmp/pkulib.XXXXXX')
IMGS=()
cd "$WORKDIR"
for _pg in `seq 1 $PAGE`
do
	curl -b "$COOKIE" "http://162.105.134.188/jumpServlet?page=${_pg}&fid=${FID}"
	curl -b "$COOKIE" $(printf "http://162.105.134.188/store/$IMGDIR/P01_%05d.jpg" "$_pg") --output "$_pg.jpg"
	IMGS=(${IMGS[@]} "$_pg.jpg")
done

img2pdf "${IMGS[@]}" --output thesis.pdf
echo "Saved to $WORKDIR/thesis.pdf"