要爬取一個網(wǎng)站遇到了極驗的驗證碼,這周都在想著怎么破解這個,網(wǎng)上搜了好多知乎上看到有人問了這問題https://www.zhihu.com/question/28833985,我按照這思路去大概實現(xiàn)了一下。
	
	  1.使用htmlunit(這種方式我沒成功,模擬鼠標拖拽后軌跡沒生成,可以跳過)
	  我用的是java,我首先先想到了用直接用htmlunit,我做了點初始化
	private void initWebClient() {
	if (webClient != null) {
	return;
	}
	webClient = new WebClient(BrowserVersion.FIREFOX_24);
	webClient.getOptions().setProxyConfig(new ProxyConfig("127.0.0.1",8888));
	webClient.getOptions().setActiveXNative(true);
	webClient.getOptions().setUseInsecureSSL(true); // 配置證書
	webClient.getOptions().setJavaScriptEnabled(true);
	webClient.getOptions().setCssEnabled(true);
	webClient.setCssErrorHandler(new SilentCssErrorHandler());
	webClient.getOptions().setThrowExceptionOnScriptError(false);
	webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
	CookieManager cookieManager = new CookieManager();
	List<org.apache.http.cookie.Cookie> httpCookies = client.getCookies();//其方式獲取的cookie
	for (org.apache.http.cookie.Cookie cookie : httpCookies) {
	cookieManager.addCookie(new com.gargoylesoftware.htmlunit.util.Cookie(cookie));
	}
	webClient.setCookieManager(cookieManager);
	}
	  初始化代理,cookie..然后能正常調(diào)用了
	  HtmlPage page = webClient.getPage("http://www.qixin.com/login");//企信寶
	  gePageInfor(page);
	  下面是我獲取圖片,還原圖片并且模擬拖拽,(這里我覺得是有些問題的,可能是拖拽我模擬的不對導(dǎo)致觸發(fā)的js并沒有生成正確的軌跡,還請大家?guī)兔纯茨睦镥e了)
	private void gePageInfor(HtmlPage page) {
	String[] img_slice={"div", "class", "gt_cut_fullbg_slice"};
	String[] img_bg_slice={"div", "class", "gt_cut_bg_slice"};
	HtmlDivision div = (HtmlDivision) page.getElementById("captcha");
	int deCAPTCHA = 0;
	try {
	byte[] img_slice_binary = client.get(getImgUrl(img_slice, div, true)).getBinary();//獲取圖片byte
	byte[] img_bg_slice_binary = client.get(getImgUrl(img_bg_slice, div, false)).getBinary();
	//獲取還原后的圖片
	BufferedImage geetestImg = ImgTest.getGeetestImg(img_slice_binary, ImgTest.imgArray);
	BufferedImage geetestImg2 = ImgTest.getGeetestImg(img_bg_slice_binary, ImgTest.imgArray);
	//獲得圖片移動位置(目前還有問題,需改用第三方圖片識別)
	deCAPTCHA =ImgTest.deCAPTCHA(geetestImg,geetestImg2);
	System.out.println(deCAPTCHA);
	} catch (IOException | FetchException e) {
	e.printStackTrace();
	}
	HtmlDivision div_slider_knob = get_div_slider_knob(page,"gt_slider_knob gt_show");//獲取要移動div
	HtmlPage mouseOver = (HtmlPage) div_slider_knob.mouseOver();
	HtmlPage mouseDownPage = (HtmlPage)div_slider_knob.mouseDown();
	div_slider_knob = get_div_slider_knob(mouseDownPage,"gt_slider_knob gt_show moving");
	mouseMoveX(deCAPTCHA, div_slider_knob, mouseDownPage);
	HtmlPage newPage =(HtmlPage)div_slider_knob.mouseOver();
	//        newPage =(HtmlPage)div_slider_knob.mouseDown();
	System.out.println(newPage.asXml());
	div = (HtmlDivision)newPage.getElementById("captcha");
	HtmlElement htmlElement = div.getElementsByAttribute("div", "class", "gt_slice gt_show moving").get(0);
	System.out.println(htmlElement);
	newPage =(HtmlPage)div_slider_knob.mouseUp();//觸發(fā)js,軌跡沒有生成
	System.out.println("---------------");
	System.out.println(newPage.asXml());
	if (newPage.getElementById("captcha")!=null) {//錯誤重試
	//gePageInfor(newPage);
	}
	}
	private void mouseMoveX(int deCAPTCHA, HtmlDivision div_slider_knob, HtmlPage mouseDown) {
	MouseEvent mouseEvent = new MouseEvent(div_slider_knob, MouseEvent.TYPE_MOUSE_MOVE, false, false, false, MouseEvent.BUTTON_LEFT);
	mouseEvent.setClientX( mouseEvent.getClientX()+((deCAPTCHA!=0)?deCAPTCHA:99));    //移動x坐標
	ScriptResult scriptResult = mouseDown.getDocumentElement().fireEvent(mouseEvent);
	}
	private HtmlDivision get_div_slider_knob(HtmlPage page,String classString) {
	return (HtmlDivision)(((HtmlDivision) page.getElementById("captcha")).getElementsByAttribute("div", "class", classString).get(0));
	}
	private String getImgUrl(String[] img_slice, HtmlDivision div, boolean isNeedCheckPostion) {
	String url ="";
	int[] postion = new int[2];
	boolean empty = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]).isEmpty();
	if (div.hasChildNodes() && !empty) {
	List<HtmlElement> elementsByAttribute = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]);
	for(int i = 0;i<elementsByAttribute.size();i++){
	HtmlDivision div_img = (HtmlDivision)elementsByAttribute.get(i);
	String style = div_img.getAttribute("style");
	String[] imge_url_position = style.split(";");
	if(StringUtils.isBlank(url)){//確認url
	url = StringUtils.replacePattern(imge_url_position[0], ".*\(", "").replace(")", "");
	}
	if (isNeedCheckPostion) {//確認圖片切割postion,兩張圖切割方式一樣  background-position: -157px -58px
	//                    String[] positionS = StringUtils.split(StringUtils.remove(imge_url_position[1], "px").replace("-", "").replaceAll(".*:", ""), null);
	String[] positionS = StringUtils.split(StringUtils.removePattern(imge_url_position[1], "[^\d+ \s]"),null);
	postion[0] = Integer.parseInt(positionS[0]);
	postion[1] = Integer.parseInt(positionS[1]);
	int[] is = ImgTest.imgArray[i];
	if (is[0]!=postion[0]||is[1]!=postion[1]) {
	logger.debug("更新分割postion");
	ImgTest.imgArray[i] = postion;
	}
	System.out.println(ImgTest.imgArray);
	isNeedCheckPostion= false;
	}
	}
	}
	return url;
	}