使用webBrowser采集网页数据_第1页
使用webBrowser采集网页数据_第2页
使用webBrowser采集网页数据_第3页
使用webBrowser采集网页数据_第4页
使用webBrowser采集网页数据_第5页
已阅读5页,还剩9页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

1、using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.Linq;using System.Text;using System.Windows.Forms;using System.Text.RegularExpressions;namespace ProWebBrowserCaiJi public partial class Form1 : Form private static int PageI

2、ndex = 0; private static int HasFetechPageIndex = 0; private static int type = 0;/采集类型 0:水果1:蔬菜2:大田 private static int FetechCurCount = 0;/当前页采集数量 private static int FetechCount = 0;/采集数量 public Form1() InitializeComponent(); private void Form1_Load(object sender, EventArgs e) groupBox3.Focus(); txt

3、EndDate.Text = DateTime.Now.ToString(yyyy-MM-dd); private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) string html; if (PageIndex = 0) return; /取得页面html源码 html = webBrowser1.Document.Body.InnerHtml; /if (!CompleteFlag) / / lblMsg.Text = 正在采集:第 + PageIndex

4、 + 页; / switch (type) case 0:/水果 GetPriceAgrosg(html); break; case 1:/蔬菜 GetPriceVegnet(html); break; case 2:/大田 if (HasFetechPageIndex = PageIndex) return; HtmlElementCollection elems = webBrowser1.Document.GetElementsByTagName(div); bool flag = false; int i = 0; FetechCurCount = 0; foreach (HtmlEl

5、ement elem in elems) if (elem.GetAttribute(className).Equals(price) i+; if (!flag) HasFetechPageIndex = PageIndex; flag = true; html = elem.InnerHtml; if (i = 1) continue; if (GetPriceDaTian(html) = -100) lblMsg.Text = 大田作物价格数据采集完成,共采集 + FetechCount + 条; PageIndex = 0; timDaTian.Stop(); return; if (

6、flag) /已采集页 lblMsg.Text = 大田采集 完成:第 + PageIndex + 页 共 + FetechCurCount + 个; if (FetechCurCount 25) lblErr.Text += PageIndex + ,; System.Threading.Thread.Sleep(1000); break; System.Threading.Thread.Sleep(1000); / / 价格行情 - 中国水果网 / private void GetPriceAgrosg(string html) html = html.ToLower(); NGJ.BLL

7、.sys_Price bll = new NGJ.BLL.sys_Price(); NGJ.Model.sys_Price model = new NGJ.Model.sys_Price(); int startIndex = 0; int endIndex = 0; startIndex = html.IndexOf(div id=price_btbj class=pri_list); if (startIndex 0) return; endIndex = html.IndexOf(, startIndex + 10); html = html.Substring(startIndex,

8、endIndex + 6 - startIndex); if (HasFetechPageIndex = PageIndex) return; /已采集页 HasFetechPageIndex = PageIndex; startIndex = 0; endIndex = 0; string tmp = ; int index = 0; int i = 0; FetechCurCount = 0; while (true) startIndex = html.IndexOf(li, endIndex); if (startIndex 0) break; endIndex = html.Inde

9、xOf(, startIndex); if (endIndex 0) continue; tmp = html.Substring(startIndex, endIndex + 5 - startIndex); tmp = striphtml(tmp); tmp = tmp.Trim(); index+; if (index = 7) continue; if (tmp = ) continue; i = index % 7; switch (i) case 1: model = new NGJ.Model.sys_Price(); model.pagIndex = PageIndex; mo

10、del.PriceDate = DateTime.Parse(tmp); model.type = 0; if (model.PriceDate DateTime.Parse(txtEndDate.Text) lblMsg.Text = 水果作物价格数据采集完成,共采集 + FetechCount + 条; PageIndex = 0; timShuiGuo.Stop(); return; break; case 2: model.varieties = tmp; break; case 3: model.market = tmp; break; case 4: model.lowPrice

11、= tmp; break; case 5: model.highPrice = tmp; break; case 6: model.avgPrice = tmp; break; case 0: model.unit = tmp; bll.Add(model); FetechCurCount+; FetechCount+; break; lblMsg.Text = 水果采集 完成:第 + PageIndex + 页 共 + FetechCurCount + 个; if (FetechCurCount 25) lblErr.Text += PageIndex + ,; System.Threadi

12、ng.Thread.Sleep(1000); / / 价格行情 - 中国蔬菜网 / / / / private void GetPriceVegnet(string html) html = html.ToLower(); NGJ.BLL.sys_Price bll = new NGJ.BLL.sys_Price(); NGJ.Model.sys_Price model = new NGJ.Model.sys_Price(); int startIndex = 0; int endIndex = 0; startIndex = html.IndexOf(); if (startIndex 0)

13、 return; ; endIndex = html.IndexOf(, startIndex + 10); html = html.Substring(startIndex, endIndex + 6 - startIndex); if (HasFetechPageIndex = PageIndex) return; /已采集页 HasFetechPageIndex = PageIndex; startIndex = 0; endIndex = 0; string tmp = ; int index = 0; int i = 0; FetechCurCount = 0; while (tru

14、e) startIndex = html.IndexOf(span, endIndex); if (startIndex 0) break; endIndex = html.IndexOf(, startIndex); if (endIndex 0) continue; tmp = html.Substring(startIndex, endIndex + 7 - startIndex); tmp = striphtml(tmp); tmp = tmp.Trim(); index+; i = index % 8; switch (i) case 1: model = new NGJ.Model

15、.sys_Price(); model.pagIndex = PageIndex; model.type = 1; model.PriceDate = DateTime.Parse(tmp.Replace(, ).Replace(, ); if (model.PriceDate DateTime.Parse(txtEndDate.Text) lblMsg.Text = 蔬菜作物价格数据采集完成,共采集 + FetechCount + 条; PageIndex = 0; timer1.Stop(); return; break; case 2: model.varieties = tmp; br

16、eak; case 3: model.market = tmp; break; case 4: model.lowPrice = tmp; break; case 5: model.highPrice = tmp; break; case 6: model.avgPrice = tmp; break; case 7: model.unit = tmp; bll.Add(model); FetechCurCount+; FetechCount+; break; lblMsg.Text = 蔬菜采集 完成:第 + PageIndex + 页 共 + FetechCurCount + 个; if (

17、FetechCurCount 25) lblErr.Text += PageIndex + ,; System.Threading.Thread.Sleep(1000); / / 价格行情 - 中国大田网 / / / / private int GetPriceDaTian(string html) html = html.ToLower(); NGJ.BLL.sys_Price bll = new NGJ.BLL.sys_Price(); NGJ.Model.sys_Price model = new NGJ.Model.sys_Price(); int startIndex = 0; in

18、t endIndex = 0; string tmp = ; int index = 0; int i = 0; while (true) startIndex = html.IndexOf(div, endIndex); if (startIndex 0) break; endIndex = html.IndexOf(, startIndex); if (endIndex 0) continue; tmp = html.Substring(startIndex, endIndex + 6 - startIndex); tmp = striphtml(tmp); tmp = tmp.Trim(

19、); index+; i = index % 5; switch (i) case 1: model = new NGJ.Model.sys_Price(); model.pagIndex = PageIndex; model.lowPrice = ¥0.00; model.highPrice = ¥0.00; model.type = 2; model.PriceDate = DateTime.Parse(tmp); if (model.PriceDate DateTime.Parse(txtEndDate.Text) return -100; break; case 2: model.va

20、rieties = tmp; break; case 3: model.market = tmp; break; case 4: model.avgPrice = tmp; break; case 0: model.unit = tmp; bll.Add(model); FetechCount+; FetechCurCount+; break; return 1; / / 前往 / / / private void btnGo_Click(object sender, EventArgs e) if (txtUrl.Text.ToString().Trim() = ) MessageBox.S

21、how(请输入网址!); return; lblMsg.Text = 加载中.; webBrowser1.Navigate(txtUrl.Text.ToString().Trim(); if (!webBrowser1.IsBusy) lblMsg.Text = 加载完成; / / 蔬菜采集开始 / / / private void btnStart_Click(object sender, EventArgs e) type = 1;/采集类型 0:水果1:蔬菜2:大田 PageIndex = 0; lblMsg.Text = 蔬菜 开始采集.; lblErr.Text = ; Fetech

22、Count = 0;/采集数量 HasFetechPageIndex = 0;/开始采集,初始化已采集页 DeleteHasExist();/删除已有,重新采集 timer1.Start(); / / 大田采集开始 / / / private void btnDaTian_Click(object sender, EventArgs e) type = 2; PageIndex = 0; lblMsg.Text = 大田 开始采集.; lblErr.Text = ; FetechCount = 0;/采集数量 HasFetechPageIndex = 0;/开始采集,初始化已采集页 Delet

23、eHasExist();/删除已有,重新采集 timDaTian.Start(); / / 蔬菜 / / / private void timer1_Tick(object sender, EventArgs e) string strUrl = /浏览器加载完成 if (!webBrowser1.IsBusy) PageIndex+; strUrl = + PageIndex + .html; /浏览器控件跳转到下一页投诉信息列表 webBrowser1.Navigate(strUrl); System.Threading.Thread.Sleep(3000); / / 大田 / / / p

24、rivate void timDaTian_Tick(object sender, EventArgs e) string strUrl = /浏览器加载完成 if (!webBrowser1.IsBusy) PageIndex+; strUrl = + PageIndex; /浏览器控件跳转到下一页投诉信息列表 webBrowser1.Navigate(strUrl); System.Threading.Thread.Sleep(3000); / / 水果 / / / private void timShuiGuo_Tick(object sender, EventArgs e) strin

25、g strUrl = + DateTime.Now.Year + &beginMonth= + DateTime.Now.Month + &beginDay= + DateTime.Now.Day + &endYear= + DateTime.Now.Year + &endMonth= + DateTime.Now.Month + &endDay= + DateTime.Now.Day + &cateName=&MID=0; /浏览器加载完成 if (!webBrowser1.IsBusy) PageIndex+; strUrl = + PageIndex; /浏览器控件跳转到下一页投诉信息列

26、表 webBrowser1.Navigate(strUrl); System.Threading.Thread.Sleep(3000); / / 蔬菜 / / / private void lkShuCai_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) txtUrl.Text = webBrowser1.Navigate(txtUrl.Text.ToString().Trim(); / / 大田 / / / private void lkDaTian_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) txtUrl.Text = webBrowser1.Navigate(txtUrl.Text.ToString().Trim(); / / 水果网 / / / private void lkShuiGuo_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) txtUrl.Text = webBrowser1.Navigate(txtUrl.Text.ToString().Trim(); / / 水果 / / /

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论