清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
@Grab(group='org.ccil.cowan.tagsoup', module='tagsoup', version='1.2' ) def tagsoupParser = new org.ccil.cowan.tagsoup.Parser() def slurper = new XmlSlurper(tagsoupParser) def start=System.currentTimeMillis() def storePath = "/tmp/mm/" def urlPrefix = "http://me2-sex.lofter.com" def htmlParser = slurper.parse("http://me2-sex.lofter.com/tag/%E7%BE%8E%E5%A5%B3%E6%91%84%E5%BD%B1") println "开始抓取高清图片,存储路径为:${storePath}" def pagesLink = [] def mmLink = [] htmlParser.'**'.findAll{ it.@class == 'num'}.each {//抓取分页地址 pagesLink.add(it.'@href'.text()) } pagesLink.each{//抓取每个分页mm图片链接 htmlParser = slurper.parse(urlPrefix+it) htmlParser.'**'.findAll{ it.@class == 'img'}.each { mmLink.add(it.'@href'.text()) } } def count = 0 long fileSize = 0 mmLink.each{//抓取mm页面图片文件,并且存储到本地 slurper.parse(it).'**'.findAll{it.@class == 'img imgclasstag'}.each { def address = it.'@bigimgsrc'.text() def file = new File(storePath+address.tokenize("/")[-1]) def fileOut = new FileOutputStream(file) def out = new BufferedOutputStream(fileOut) out << new URL(address).openStream() out.close() fileSize = file.length() + fileSize println "下载第"+ (count++) +"图片完成" + "大小 " +file.length()/1024 +"kb 地址:"+address } } def end = System.currentTimeMillis() println "mm数量${mmLink.size()},共抓取到${count}张图片,"+"用时:"+(end - start)/1000/60 + "分钟,下载总大小为:${fileSize/1024/1024}M"