从5a5x采集易语言源代码

2015/05/22 45370
# encoding: ASCII-8BIT
require 'open-uri'

def master
	title = ["系统工具","图形图像","多媒体类","游戏娱乐","数据库类","模块控件","行业软件","网络相关"]
	path = ["etools","eimage","emedia","egame","edata","ecom","etrade","enetwork"]
	Dir.mkdir("#{$run_file}/downfile") if !File.directory?("#{$run_file}/downfile")
	for i in 0..7
		Dir.mkdir("#{$run_file}/downfile/#{title[i]}") if !File.directory?("#{$run_file}/downfile/#{title[i]}")
		caiji(title[i], path[i])
	end
end

def caiji (title, path)
	url = "http://www.5a5x.com/wode_source/#{path}/"
	page_count = getCount "#{url}10.html"
	if page_count % 10 == 0
		page_count = page_count / 10
	else
		page_count = page_count / 10 + 1
	end

	for i in 1..page_count
		puts "正在处理#{title}第#{i}页,共#{page_count}页"
		html = open("#{url}#{i}.html").read
		a = 0
		while true
			a = html.index('<dt><a href="', a)
			break if a == nil
			a = a + 13
			b = html.index('"', a)
			download_page_url = html[a, b - a]
			a = b + 18
			b = html.index('<', a)
			file_title = html[a, b - a]
			download_url = getDownloadUrl download_page_url
			puts "正在下载:#{file_title}.zip"
			tmp = File.new("#{$run_file}/downfile/#{title}/#{file_title}.zip", "w")
			tmp.binmode
			tmp.write open("http://www.5a5x.com/#{download_url}", "r").read
			tmp.close
		end
	end
end

def getDownloadUrl download_page_url
	html = open("http://www.5a5x.com/#{download_page_url}").read
	a = html.index("<a href='down.php?a_k=") + 9
	b = html.index("'", a)
	url = html[a, b - a]
	html = open("http://www.5a5x.com/#{url}").read
	a = html.index('<a href="download.php?a_k=') + 9
	b = html.index('"', a)
	return html[a, b - a]
end

def getRunFile
	a = __FILE__.rindex "/"
	return __FILE__[0, a]
end

def getCount url
	html = open(url).read
	a = html.index("总数：<b>") + 9
	b = html.index("<", a)
	return html[a, b - a].to_i
end
$run_file = getRunFile
master()
代码片段