清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
@Grab(group='org.ccil.cowan.tagsoup', module='tagsoup', version='1.2' )
def tagsoupParser = new org.ccil.cowan.tagsoup.Parser()
def slurper = new XmlSlurper(tagsoupParser)
def start=System.currentTimeMillis()
def storePath = "/tmp/mm/"
def urlPrefix = "http://me2-sex.lofter.com"
def htmlParser = slurper.parse("http://me2-sex.lofter.com/tag/%E7%BE%8E%E5%A5%B3%E6%91%84%E5%BD%B1")
println "开始抓取高清图片,存储路径为:${storePath}"
def pagesLink = []
def mmLink = []
htmlParser.'**'.findAll{ it.@class == 'num'}.each {//抓取分页地址
pagesLink.add(it.'@href'.text())
}
pagesLink.each{//抓取每个分页mm图片链接
htmlParser = slurper.parse(urlPrefix+it)
htmlParser.'**'.findAll{ it.@class == 'img'}.each {
mmLink.add(it.'@href'.text())
}
}
def count = 0
long fileSize = 0
mmLink.each{//抓取mm页面图片文件,并且存储到本地
slurper.parse(it).'**'.findAll{it.@class == 'img imgclasstag'}.each {
def address = it.'@bigimgsrc'.text()
def file = new File(storePath+address.tokenize("/")[-1])
def fileOut = new FileOutputStream(file)
def out = new BufferedOutputStream(fileOut)
out << new URL(address).openStream()
out.close()
fileSize = file.length() + fileSize
println "下载第"+ (count++) +"图片完成" + "大小 " +file.length()/1024 +"kb 地址:"+address
}
}
def end = System.currentTimeMillis()
println "mm数量${mmLink.size()},共抓取到${count}张图片,"+"用时:"+(end - start)/1000/60 + "分钟,下载总大小为:${fileSize/1024/1024}M"