#!/bin/bash # Copyright (C) 2016-2018 Iru Cai # SPDX-License-Identifier: MIT WEBURL="$1" URL1=$(curl -D - "$WEBURL" | grep 'Location:' | cut -d ' ' -f2 | tr -d '\n\r') HD2=/tmp/head2 curl -D "$HD2" "$URL1" LOC=$(grep 'Location:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') COOKIE=$(grep 'Set-Cookie:' "$HD2" | cut -d ' ' -f2 | tr -d '\n\r') curl -b "$COOKIE" "http://162.105.134.188/$LOC" --output /tmp/article.htm IMGDIR=$(grep sm_img_url /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') FID=$(grep fid /tmp/article.htm | sed 's/.*value=\"\([^"]*\)\".*/\1/') PAGE=$(grep 'var endpage' /tmp/article.htm | grep -o '[0-9]*') WORKDIR=$(mktemp -d '/tmp/pkulib.XXXXXX') IMGS=() cd "$WORKDIR" for _pg in `seq 1 $PAGE` do curl -b "$COOKIE" "http://162.105.134.188/jumpServlet?page=${_pg}&fid=${FID}" curl -b "$COOKIE" $(printf "http://162.105.134.188/store/$IMGDIR/P01_%05d.jpg" "$_pg") --output "$_pg.jpg" IMGS=(${IMGS[@]} "$_pg.jpg") done img2pdf "${IMGS[@]}" --output thesis.pdf echo "Saved to $WORKDIR/thesis.pdf"