1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
|
# coding=UTF-8
"""
download the directory
13/09/04 kingson
* add this changelog
Notes:
1. make sure the path is empty, otherwise mkdir complain and stoped
2. the time for sleep() varies dependent on sites
3. tested for FireBird BBS only
Bugs:
1.
TODO:
1. add some GUI for control, e.g. stop, pause, resume, inside script
2. make it robust
"""
import fqterm
import sys,os,string,time,re
# the pointer to fqtermWindow object
lp=long(sys.argv[0])
wait_time = 2.0
def txt2html(txt):
txt=string.replace(txt,"\n","\n<br>")
return string.replace(txt," "," ")
def upper_dir(path):
if(sys.platform=="win32"): # '\' for win32
i=path.rfind("\\",0,-1)
else:
i=path.rfind("/",0,-1) # '/' for *nix
return path[:i+1]
def lower_dir(path, subdir):
if(sys.platform=="win32"): # '\' for win32
return path+subdir+"\\"
else:
return path+subdir+"/" # '/' for *nix
def write_html_header(hfile, num):
hfile.write("""<html><head>
<meta http-equiv="Content-Language" content="zh-cn">
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>FQTerm Article Downloader</title>
</head>""")
hfile.write("<body>")
hfile.write("<p><b><h1>FQTerm Article Downloader</h1></b></p>")
txt="""<p><p align=center><a href=%d.html>Prevoius</a>
<a href=index.html>Index</a>
<a href=%d.html>Next</a></p align=center></p>""" % (num-1,num+1)
hfile.write(txt)
hfile.write("<hr><p></p>\n")
def write_html_ender(hfile,num):
hfile.write("<hr><p></p>")
txt="""<p><p align=center><a href=%d.html>Prevoius</a>
<a href=index.html>Index</a>
<a href=%d.html>Next</a></p align=center></p>""" % (num-1,num+1)
hfile.write(txt)
hfile.write("<p><b>FQTerm --- BBS client based on Qt library</b><p>")
hfile.write("""<p><a href=http://code.google.com/p/fqterm>
http://code.google.com/p/fqterm</a><p>""")
hfile.write("</body>")
hfile.write("</html>")
def write_index_header(hfile):
hfile.write("""<html><head>
<meta http-equiv="Content-Language" content="zh-cn">
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<title>FQTerm Article Downloader</title>
</head>""")
hfile.write("<p><b><h1>FQTerm Article Downloader</h1></b></p>")
hfile.write("""<p><p align=center>
<a href=\"../index.html\">Up</a>
</p align=center</p>""")
hfile.write("<hr><p></p>\n")
def write_index_ender(hfile):
hfile.write("<hr><p></p>")
hfile.write("""<p><p align=center>
<a href=\"../index.html\">Up</a>
</p align=center</p>""")
hfile.write("<p><b>FQTerm --- BBS client based on Qt library</b><p>")
hfile.write("""<p><a href=http://code.google.com/p/fqterm>
http://code.google.com/p/fqterm</a><p>""")
hfile.write("</body>")
hfile.write("</html>")
def get_list_num(str_line):
# get the number
matchobj = re.search("[0-9]+",str_line)
if(matchobj==None):
# wrong format
return None
else:
return str_line[matchobj.start():matchobj.end()]
def get_list_categary(str_line):
# get the categary
matchobj = re.search("\[[^0-9]{6}\]",str_line)
if(matchobj==None):
# wrong format
return None
else:
return str_line[matchobj.start():matchobj.end()]
def get_list_title(str_line):
matchobj = re.search("\[[^0-9]{6}\]",str_line)
if(matchobj==None):
# wrong format
return None
else:
# get the title
return str_line[matchobj.end()+1:]
def down_folder():
global path
first_num = -1
while(1):
line=fqterm.caretY(lp)
str_line=fqterm.getText(lp,line)
article_num = get_list_num(str_line)
if (first_num == -1):
first_num = article_num
if(article_num==None):
print "Wrong format list"
# end the index.html
f=open(path+"index.html","a+")
write_index_ender(f)
f.close()
# wrong formated list, leave out
print "leave %s" % path
fqterm.sendString(lp,'q')
path=upper_dir(path)
time.sleep(wait_time)
return
article_categary = get_list_categary(str_line)
article_title = get_list_title(str_line)
# download if its file
if article_categary == '[文件]':
# log in index.html
f=open(path+"index.html","a+")
f.write("<p><a href="+article_num+".html>")
f.write("[文件] "+article_title+"</a></p>\n")
f.close()
# downlaod and save article
f=open(path+article_num+".html","w")
fqterm.sendString(lp,'r')
time.sleep(wait_time)
write_html_header(f,int(article_num))
f.write(txt2html(fqterm.getArticle(lp, 100)[0]))
f.write("\n")
write_html_ender(f,int(article_num))
f.close()
time.sleep(wait_time)
fqterm.sendString(lp,"q")
time.sleep(wait_time)
# recursive callback if its directory
elif article_categary == '[目录]':
# log in index.html
f=open(path+"index.html","a+")
f.write("<p><a href="+article_num+"/index.html>")
f.write("[目录] "+article_title+"</a></p>\n")
f.close()
# make dir and enter
path=lower_dir(path,article_num)
os.mkdir(path)
print "enter %s" % path
# create index.html and write the header
f=open(path+"index.html","w")
write_index_header(f)
f.close()
# recursive call
fqterm.sendString(lp,'r')
time.sleep(wait_time)
# increase delay when condition not met
down_folder()
else:
print "Unrecognized Categary"
# end the index.html
f=open(path+"index.html","a+")
write_index_ender(f)
f.close()
# wrong formated list, leave out
print "leave %s" % path
fqterm.sendString(lp,'q')
path=upper_dir(path)
time.sleep(wait_time)
return
str_next=fqterm.getText(lp,line+1)
# move cursor down and get the num
fqterm.sendString(lp,'j')
time.sleep(wait_time)
str_next=fqterm.getText(lp,fqterm.caretY(lp))
article_num_next = get_list_num(str_next)
if(article_num_next==None):
print "Wrong format list"
# end the index.html
f=open(path+"index.html","a+")
write_index_ender(f)
f.close()
# wrong formated list, leave out
print "leave %s" % path
fqterm.sendString(lp,'q')
path=upper_dir(path)
time.sleep(wait_time)
return
# exit current dir when the number of the next one equals or small then last
# this may cause problem when the server is extra slow
if(int(article_num_next) <= int(article_num)):
#print "leaving, because cur: %s < pre: %s" % (article_num_next , article_num)
if(path!=path_dir):
# end the index.html
f=open(path+"index.html","a+")
write_index_ender(f)
f.close()
# leave out
print "leave %s" % path
fqterm.sendString(lp,'q')
path=upper_dir(path)
time.sleep(wait_time)
return
# NOTE: make sure path ended with '/'(*nix) or '\\'(windowz)
#path_dir=path="e:\\temp\\test\\"
path_dir=path="/home/dp2/temp/test/"
# try to save all to home dir
# path_dir=path=os.environ['HOME']+"/.fqterm/downloads/"+time.ctime()+"/"
os.makedirs(path)
# enter
fqterm.sendString(lp,'x')
time.sleep(wait_time)
# create index.html and write the header
f=open(path+"index.html","w")
write_index_header(f)
f.close()
down_folder()
# end the index.html
f=open(path+"index.html","a+")
write_index_ender(f)
f.close()
# exit
fqterm.sendString(lp,'q')
|