主页 > 创业  > 

爬虫进阶-反爬破解6(Nodejs+Puppeteer实现登陆官网+实现滑动验证码全自动识别)

爬虫进阶-反爬破解6(Nodejs+Puppeteer实现登陆官网+实现滑动验证码全自动识别)
一、Nodejs+Puppeteer实现登陆官网

1.环境说明

Nodejs——直接从官网下载最新版本,并安装

使用npm安装puppeteer:npm install puppeteer

npm install xxx -registry registry.npm.taobao.org

Chromium会自动下载,前提是网络通畅

2.实践操作:Nodejs+Puppeteer介绍

Puppeteer登录官网首页:

1.打开浏览器并访问首页

2.键入数据并访问登录后的页面

3.截图保留记录,存储成本地图片

const puppeteer = require("puppeteer") const sleep = time => new Promise(resolve => { setTimeout(resolve, time); }); (async() => { const browser = await puppeteer.launch({ headless:false,//无头模式,默认是隐藏界面的,true.改成false,显示界面。 slowMo:100 //设置浏览器每一步之间的时间间隔,单位毫秒 defaultViewport:{width:1366, height:768},//默认的网页大小是800*800,可以自行设置 }); const page = await browser.newPage(); await page.goto("http://shanzhi.spbeen /index"); await sleep(3000); const login_link_button = await page.$('a.btn.btn-primary'); await login_link_button.click() await sleep(2000); const username_input = await page.$('input#username'); await username_input.type('demo1234'); await sleep(2000); const password_input = await page.$('input#MemberPassword'); await password_input.type('demo1234'); await sleep(2000); const submit_button = await page.$("button.btn.btn-primary"); await submit_button.click(); await sleep(2000); await page.screenshot({path:'shanzhi_login_index.png'}); await sleep(2000); await browser.close(); })();

3.总结:

Puppeteer有更多且更全的接口,可以快捷的操作网页

Puppeteer可以实现多种数据的存储,例如截图、pdf等

浏览器的标签页不要太多,容易卡电脑

二、nodejs+puppeteer实现滑动验证码全自动识别

1.滑动验证码破解方法

数据来源:滑动验证码的图片偏移

破解方法一:分析请求,用数据做正确的请求操作

破解方法二:浏览器实现滑动验证码

2.滑动验证码偏移计算方法

数据来源:滑动验证码的图片

方法一:相似度对比

方法二:像素的RGB值对比

方法三:调用经过数据训练过的机器学习模型

3.图片的预处理

数据来源:滑动验证码的图片

阶段一:缩放图片[将图片尺寸进行压缩]

阶段二:简化色彩[灰度处理]

阶段三:计算平均值或灰度平均值

4.实践操作:图形以及效果展示

// const puppeteer = require("puppeteer");//puppeteer启动的chromium会被知乎识别 const puppeteer = require('puppeteer-extra');//消除特征 npm install puppeteer-extra const StealthPlugin = require('puppeteer-extra-plugin-stealth'); //npm install puppeteer-extra-plugin-stealth puppeteer.use(StealthPlugin()); //消除特征 const Rembrant = require('rembrandt');//rembrandt算法库,导入使用 npm install rembrandt const fs = require('fs');// nodejs 操作本地文件的库 npm install fs var ssim = require('ssim');//ssim算法库,导入使用 npm install ssim // 睡眠函数,单位毫秒 const sleep = time => new Promise(resolve => { setTimeout(resolve, time); }); //程序的主体部分 (async() => { const browser = await puppeteer.launch({//启动浏览器 headless:false,//无头模式,默认是隐藏界面的,true.改成false,显示界面。 defaultViewport:{width:1366, height:768},//默认的网页大小是800*800,可以自行设置 args: [ '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', ] }); const page = await browser.newPage(); await page.goto("http:// .zhihu /signin"); //点击“密码登录”,从“手机短信登陆”切换到“账号密码登录” const mima_button = await page.$('form.SignFlow.Login-content > div:nth-child(1) div:nth-child(2)'); await mima_button.click; const username = await page.$('div.SignFlow-account input.Input'); await username.type('18296198879'); const password = await page.$('div.SignFlow-password input.Input'); await password.type('demodemodemo'); // 定位登录按钮,点击登录,弹出滑动验证码,开始验证 const login_button = await page.$('button.Button.SignFlow-submitButton.Button--primary.Button--blue'); await sleep(1000) await login_button.click() await sleep(1000); // 重复破解 滑块验证码 代码部分 for (let num = 1; num < 1000; num++){ await sleep(2000); console.log('----------------\n当前循环次数:',num); // 判断 验证码多次失败后,弹出的错误提示,需要点击之后才能继续验证 // 判断依据 “失败过多,点此重试”,然后会恢复到等到滑块滑动的验证状态 let yidun_msg = await page.$eval("span.yidun_tips_text.yidun-fallback_tip",el => el.innerHTML) if (yidun_msg = '失败过多,点此重试'){ const yidun_tips = await page.$(".yidun_tips"); await yidun_tips.click(); }; // 验证成功后,滑动验证码的框会隐藏 // 如果滑动验证码隐藏了,则自动停止,并输出“验证成功,正常退出” let yidun_popup = await page.$('div.yidun_popup--light.yidun_popup'); const yidun_popup_style = await page.evaluate(//根据yidun_popup标签,通过window窗口,计算出标签的style样式 (x) => {return JSON.parse(JSON.stringify(window.getComputedStyle(x)))}, yidun_popup ); console.log('yidun_popup style.display:',yidun_popup_style.display);//输出样式值 if(yidun_popup_style.display == 'none'){ console.log("验证成功,正常退出"); break;// 如果样式值为none,则表示验证成功了,使用break跳出for循环 }; // page.waitForSelector('.yidun_tips', {timeout:1000}).then((yidun_tips) => yidun_tips.click()); //calculateDistance函数中的console.log内容全部输出在网页的开发者工具console栏【记得拉长延迟,慢慢看】 calculateDistance = async (page) =>{ var distance = await page.evaluate(() => { //将图片写入canvas,截取canvas的图片内容,通过toGrayBinary全部换成二维数组,进行像素RGB的对比 toGrayBinary = (pixels, binary, value, sn) => { var r, g, b, g, avg = 0, len = pixels.length, s = ''; for (var i = 0;i < len; i+= 4){ avg += (.299 * pixels[i] + .587 * pixels[i+1]+.114*pixels[i+2]); } avg /= (len /4); for (var i=0;i<len;i+=4){ r = .299*pixels[i], g = .587* pixels[i+1], b = .114*pixels[i+2]; if (binary){ if ((r+g+b)>=(value||avg)){ g = 255; if (sn) s+= '1'; } else { g = 0; if (sn) s+= '0'; } g = (r+g+b) > (value || avg)?255:0; }else{ g = r+g+b; } pixels[i] = g, }; //将截取的canvas图片,转成base64,方便存储本地 imgCanvasToBase64 = (img,width,height) => { let canvas3 = document.createElement("canvas"); let context3 = canvas3.getContext("2d"); canvas3.width = width; canvas3.height = height; context3.putImageData(img,0,0,0,0,width,height); let base64Img = canvas3.toDataURL('image/jpeg'); return base64Img; }; const smallbgimg = document.getElementByClassName('yidun_jigsaw')[0];//提取图片 const smallcanvas = document.createElement('canvas');//创建画布 const smallcontext = smallcanvas.getContext('2d');//设定2d界面 console.log('smallbgimg:', smallbgimg, smallbgimg.naturalWidth, smallbgimg.naturalHeight); smallcontext.drawImage(smallbgimg,0,0,smallbgimg.naturalWidth, smallbgimg.naturalHeight);//写入图片到画布中 //将图片亮度降低,颜色减弱【灰度处理】 //从0,0位置,读取图片的宽高 var pixels = smallcontext.getImageData(0,0,smallbgimg.naturalWidth, smallbgimg.naturalHeight); var pixeldata = pixels.data;//读取了图片,取出具体的数值data //循环设置,降低像素的RGB值。RGB分别是0,1,2 //完整的值,是RGBA,A一直是1,所以不需要处理A,所以i每次增加4 for (var i=0,len = pixeldata.length;i<len;i+=4){ pixels.data[i] = pixels.data[i] - 95;//R pixels.data[i+1] = pixels.data[i+1] - 55;//G pixels.data[i+2] = pixels.data[i+2] -45;//B } smallcontext.putImageData(pixels,0,0); //把数据写回到画布中 var minwidth = smallbgimg.naturalWidth; var maxwidth = 0; var minheight = smallbgimg.naturalHeight; var maxheight = 0; for (let i = 1;i<smallbgimg.naturalWidth;i++){ let times=0; //因为缺口只会出现在中间位置,所以不用对比整个纵坐标,只需要对比中间位置即可 //这里我们从上面45像素开始到下面55像素结束 for (let j=1;i<smallbgimg.naturalHeight;j++){ const smallimgData = smallcontext.getImageData(1*i,1*j,1,1).data: const r = smallimgData[0]; const g = smallimgData[1]; const b = smallimgData[2]; if (r >0&g>0&b>0){ //不含无色的长方形图片 if(minwidth >i){minwidth=i;} if(maxheight<=j){maxheight=j;} if(maxwidth<i){maxwidth=i}; if(minheight>j){minheight=j}; }; }; }; //maxheight = maxheight-12 //minwidth = minwidth+2 //minheight = minheight +2 //maxwidth = maxwidth -2 console.log("图片最大和最小宽高",minwidth,maxwidth,minheight,maxheight) var height = maxheight -minheight; var width = maxwidth - minwidth; var smallimg = smallcontext.getImageData(minwidth,minheight,width,height); var smallimgdata = smallimg.data; var smallimggb = toGratBinary(smallimgdata); //console.log('smallimggb',smallimggb) var small_img_canvas = imgCanvasToBase64(smallimg,width,height); console.log('smallimg canvas),small_img_canvas); //背景图转canvas const bgimg = document.getElementsByClassName('yidun_bg-img')[0]; console.log('bgimg',bgImg.naturalWidth,bgImg.naturalHeight); const convas = document.createElement('canvas'); const context = canvas.getContext('2d'); context.drawImage(bgImg,0,0,bgImg.naturalWidth,bgImg.naturalHeight); const contextBigimg = context.getImageData(1,1,bgImg.naturalWidth,bgImg.naturalHeight); var bigimg = imgCanvasToBase64(contextbigimg,bgImg.naturalWidth,bgImg.naturalHeight); console.log("bigimg canvas:",bigimg); var xAxis = []; var tmpmax = 0.0; var part_bigimg = {}; //这个for循环,进行的就是图片二维数组的对比,找出最大的相似度 //将截图的canvas图片,放入part_bigimg对象,方便返回并保存成本地图片 for (let i = minwidth+width+2;i<bgImg.naturalWidth-width;i++){ let times=0; i = minheight +2; const bigimg = context.getImageData(1*i,1*j,width,height);//根据小图的尺寸截取大图的部分内容,能得到小图同高不同宽的逐帧所有同尺寸图片 const bigimgData = bigimg.data; const imggb = toGrayBinary(bigimgData); let similar = 0; for (let n=0,len = width*height;n<len;n++){ if(smallimggb[n]==imggb[n]){similar++}; } similar = (similar/(width*height))*100; var bigimg_part = imgCanvasToBase64(bigimg,width,height); part_bigimg[i] = bigimg_part; if (parseFloat(similar)>tmpmax){ tmpmax = parseFloat(similar); console.log('yes:',i,j,width,height,similar,bigimg_part); xAxis = []; xAxis.push(i); } else if(parseFloat(similar)==tmpmax){ console.log('yes:',i,j,width,height,similar,bigimg_part); xAxis.push(i); } else{ console.log('error---',i,j,width,height,similsr,bigimg_part); }; }; return [xAxis[xAxis.length-1],small_img_canvas,part_bigimg];//返回多个参数,请修改这里 }); return distance;//这里不影响结果值的返回 } //const distance = await calculateDistance(page); const adata = await calculateDistance(page); const distance = adata[0]; const smallimg = adata[1];//base64的小图数据 const part_bigimg = adata[2];//base64的截取大图特定部分的所有图片,用于对比小图 // const distance, smallimg, part_bigimg = await calculateDistance(page); console.log('像素RGB值对比算法结果值:',distance); //小图存储地址, 保存到本地 var small_img_path = './assets/smallimg.jpg'; // console.log('smallimg:', smalling); var small_img_data = smallimg.replace("data:image/jpeg;base64,","") const smallimg_buffer = new Buffer.from(small_img_data,'base64'); fs.writeFile(small_img_path, smallimg_buffer, function(err){//用fs写入文件 //if(err) { console.log(err);}else{ // console.log('写入成功!'); //} }); var maxdiff = 0.0; var offset_size = 0; var ssim_maxdiff = 0.0; var ssim_offset_size = 0; for (var partb in part_bigimg){ //截取的所有大图, 保存本地 var part_big_img_path = './assets/part_big/'+parseInt(partb)+'.jpg'; var part_big_img_data = part_bigimg[partb].replace("data:image/jpeg;base64,","") const partbimg_buffer = new Buffer.from(part_big_img_data,'base64'); fs.writeFile(part_big_img_path, partbimg_buffer, function(err){//用fs写入文件 //if(err) { console.log(err);}else{ // console.log('写入成功!'); //} }); //ssim算法 比较图片相似度 //参数是base64转换成图片字节,也是通过路径读取到的图片内容 const ssim_result = ssim(smallimg_buffer, partbimg_buffer); //console.log("ssim:",ssim_result,part_big_img_path); if (ssim_result>ssim_maxdiff){ssim_maxdiff = ssim_result;ssim_offset_size=parseInt(partb);} //randbrandt算法,参数是图片路径 const rembrandt = new Rembrandt({ imageA:small_img_path, imageB:part_big_img_path, thresholdType:Rembrandt.THRESHOLD_PIXELS //thresholdType:Rembrandt.THRESHOLD_PERCENT }); let result = await rembrant pare(); let difference = result.percentageDifference*100; if (difference > maxdiff){maxdiff = difference;offset_size=parseInt(partb);} }; console.log("{*}SSIM算法计算偏移结果值:", ssim_offset_size); console.log("rembrandt算法计算偏移结果值:", offset_size); await sleep(1000); // 拿到了滑动验证码的偏差值,开始使用鼠标移动滑块,定位到偏差值的具体位置。 const _moveTrace = function* (dis) {//定义移动函数,从起始地址到目标地址,中间要计算出很多个坐标,结合延迟,达到缓慢的滑块效果 let trace = []; let t0 = 0.2; let curr = 0; let step = 0; let a = 0.8; while (curr < dis){ let t = t0 * (++step); curr = parseFloat((1/2*a*t*t).toFixed(2)); trace.push(curr); }; for (let i = 0;i<trace.length;++i){ yield trace[i]; }; }; const yidun_slider = await page.$(".yidun_slider");// 定位滑块标签位置 const bounding_box = await yidun_slider.boundingBox();// 通过bounding_Box()函数,拿到标签的起始坐标,标签的左上角 await page.mouse.move(bounding_box.x + bounding_box.width/2,bounding_box.y+bounding_box.height/2);//等待页面上,鼠标移动到标签的中间位置 await page.mouse.down();//鼠标按住滑块 let gen = _moveTrace(ssim_offset_size); for (let ret of gen){//循环读取_moveTrace返回的生成器的值,每次都挪动一点点。而且y轴【垂直方向】也需要加一个简单的偏移 await page.mouse.move(bounding_box.x+ret,bounding_box.y+6);//移动鼠标 }; await page.mouse.move(bounding_box.x+ssim_offset_size,bounding_box.y+6);//把鼠标移动到目标位置上 await.sleep(100);//睡眠100毫秒 await page.mouse.up();//到了目标位置上,松开鼠标按键,完成滑块的拖动 }; await sleep(2000); await browser.close(); })();

总结:

使用canvas进行图片的灰度处理、剪辑处理。

图片的对比,要查看算法函数的调用方法,注意数据类型要对上。

修改代码前,要详细阅读每一行代码,修改起来会更顺畅

三、滑动验证码之像素RGB对比算法实现

算法介绍:1.拿到小图,减弱图片颜色2.根据小图的宽高,截图同等宽高的对比图3.循环取出图片的所有RGB值并比较4.取出相似度最大的一个偏移值,做滑动操作

对比步骤:1.图片预处理2.逐个像素对比3.将相似度最大的偏移量进行滑动操作

图片预处理:1.颜色减弱2.宽高设定

标签:

爬虫进阶-反爬破解6(Nodejs+Puppeteer实现登陆官网+实现滑动验证码全自动识别)由讯客互联创业栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“爬虫进阶-反爬破解6(Nodejs+Puppeteer实现登陆官网+实现滑动验证码全自动识别)