HOW TO:利用Excel的QueryTable下载网上数据 - 中国WEB开发者网络 (http://www.webasp.net) -- 技术教程 (http://www.webasp.net/article/) --- HOW TO:利用Excel的QueryTable下载网上数据 (http://www.webasp.net/article/29/28069.htm) |
| -- 作者:未知 -- 发布日期: 2006-12-29 |
Author:水如烟
总目录:行政区划数据方案设计
这里所说的网上数据,是基于: 一、有固定网址发布最新数据的链接; 二、数据格式固定。 在去年的10月,曾写了个《全国县及县以上行政区划代码信息类 》 见:http://www.cnblogs.com/LzmTW/archive/2005/10/22/260066.html 现在仍以行政区划代码数据为例。 行政区划代码数据由国家统计局发布,网址为 http://www.stats.gov.cn/tjbz/xzqhdm/index.htm 数据格式是固定的: 如最新的为2005年12月31日 http://www.stats.gov.cn/tjbz/xzqhdm/t20041022_402301029.htm 最旧的为2001年10月的, http://www.stats.gov.cn/tjbz/xzqhdm/t20021125_46781.htm 但是有例外,这在代码中说。 方案组织: 效果: 以下为代码: NetConst.vb Namespace NET Public Class NetConst Private Sub New() End Sub Public Const GOV_DEFAULT As String = "www.stats.gov.cn" Public Const GOV_ADDRESS As String = "http://www.stats.gov.cn/tjbz/xzqhdm/" Public Const WEBTABLE_INDEX As String = "9" End Class End Namespace NetInformation.vb
Imports System.Net Imports System.IO Imports System.Text.RegularExpressions Namespace NET Public Class NetInformation Private gNetUpdateInformations(-1) As NetUpdateInformationItem Public ReadOnly Property UpdateInformationsTable() As DataTable Get Return GetUpdateInformationsTable() End Get End Property Private Function GetUpdateInformationsTable() As DataTable Dim mDataTable As New DataTable("UpdateInformations") With mDataTable .Columns.Add("Address") .Columns.Add("LastDate") For Each item As NetUpdateInformationItem In gNetUpdateInformations .Rows.Add(New String() {item.Address, item.LastDate}) Next .AcceptChanges() End With Return mDataTable End Function Public Sub DownloadInformationsFromNet() Dim mRegex As New Regex("(?<date>2.*日)") Dim mNetUpdateItems As NetUpdateItem() = GetNetUpdateItems() Dim mNetUpdateInformationItem As NetUpdateInformationItem Dim tmp As NetUpdateItem '由于后两个不合规则,舍去不用。最后一个没有日期,倒数第二个提供的是附件数据。 For i As Integer = 0 To mNetUpdateItems.Length - 1 - 2 tmp = mNetUpdateItems(i) mNetUpdateInformationItem = New NetUpdateInformationItem With mNetUpdateInformationItem .Address = tmp.Address .LastDate = CType(mRegex.Match(tmp.Content).Value, Date).ToString("yyyyMMdd") End With AppendItem(Of NetUpdateInformationItem)(mNetUpdateInformationItem, gNetUpdateInformations) Next End Sub Private Function GetNetUpdateItems() As NetUpdateItem() Dim mResult(-1) As NetUpdateItem Dim mRegex As New Regex("<a href='(?<href>.*)' target='_blank' >(?<content>.*行政区划代码.*)</a>") Dim mCollection As MatchCollection Dim mClient As New WebClient() Dim mStream As Stream = mClient.OpenRead(NetConst.GOV_ADDRESS) Dim mReader As New StreamReader(mStream, System.Text.Encoding.Default) Dim mText As String = mReader.ReadToEnd mReader.Close() mStream.Close() mClient.Dispose() mCollection = mRegex.Matches(mText) Dim tmpItem As NetUpdateItem For Each m As Match In mCollection tmpItem = New NetUpdateItem With tmpItem .Address = NetConst.GOV_ADDRESS & m.Groups(1).Value .Content = m.Groups(2).Value End With AppendItem(Of NetUpdateItem)(tmpItem, mResult) Next Return mResult End Function Private Structure NetUpdateItem Public Address As String Public Content As String End Structure Private Structure NetUpdateInformationItem Public Address As String Public LastDate As String End Structure Private Sub AppendItem(Of T)(ByVal value As T, ByRef array As T()) ReDim Preserve array(array.Length) array(array.Length - 1) = value End Sub End Class End Namespace ExcelQueryTable.vb Option Strict Off Namespace NET Public Class ExcelQueryTable Private gExcelApplication As Object Private gWorkbook As Object Private gWorksheet As Object Private gQueryTable As Object Sub New() Initialize() End Sub Private Sub Initialize() gExcelApplication = CreateObject("Excel.Application") gExcelApplication.DisplayAlerts = False '使退出时不询问是否存盘 gWorkbook = gExcelApplication.Workbooks.Add gWorksheet = gWorkbook.Worksheets.Add End Sub '这里只作简单处理,详细处理在我的BLOG上有相关“文章”作过介绍 Public Sub Close() gWorkbook.Close() gWorksheet = Nothing gWorkbook = Nothing gExcelApplication.DisplayAlerts = True gExcelApplication.Quit() gExcelApplication = Nothing End Sub Public Function Query(ByVal address As String) As DataTable Dim mDataTable As DataTable = GetDataTable() gWorksheet.Cells.Clear() gQueryTable = gWorksheet.QueryTables.Add( _ Connection:=String.Format("URL;{0}", address), _ Destination:=gWorksheet.Range("A1")) With gQueryTable .WebTables = NetConst.WEBTABLE_INDEX '这是固定的 .Refresh(BackgroundQuery:=False) End With Dim mCell As Object Dim mMaxRowIndex As Integer Dim line As Object mMaxRowIndex = gWorksheet.Cells.SpecialCells(11).Row 'Excel.XlCellType.xlCellTypeLastCell=11 mCell = gWorksheet.Range("A1") For i As Integer = 0 To mMaxRowIndex line = mCell.Offset(i, 0).Value If line IsNot Nothing Then AddRow(mDataTable, line.ToString) End If Next gQueryTable.Delete() gQueryTable = Nothing Return mDataTable End Function Private Sub AddRow(ByVal table As DataTable, ByVal line As String) line = line.Trim If line.Length < 7 Then Exit Sub Dim tmpCode As String Dim tmpName As String tmpCode = line.Substring(0, 6) tmpName = line.Substring(6).Trim If Not IsNumeric(tmpCode) Then Exit Sub '前六位需是数字 table.Rows.Add(New String() {tmpCode, tmpName}) End Sub Private Function GetDataTable() As DataTable '表的列名意义为:代码、名称 Dim mDataTable As New DataTable("RegionalCode") With mDataTable.Columns .Add("Code") .Add("Name") End With Return mDataTable End Function End Class End Namespace 测试代码: MainForm.vb(界面部分省,在最后有整个方案供下载) Public Class MainForm Private gNetInformation As New RegionalCodeLibrary.NET.NetInformation Private gQueryTable As RegionalCodeLibrary.NET.ExcelQueryTable Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click If Not CheckNetworkIsAvailable() Then Exit Sub ShowMessage("正在下载数据信息 ")gNetInformation.DownloadInformationsFromNet() With Me.ComboBox1 .DataSource = gNetInformation.UpdateInformationsTable .DisplayMember = "LastDate" End With ShowMessage("") End Sub Private Sub Button2_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button2.Click If String.IsNullOrEmpty(Me.ComboBox1.Text) Then Exit Sub If Not CheckNetworkIsAvailable() Then Exit Sub If gQueryTable Is Nothing Then ShowMessage("正在启动Excel ")gQueryTable = New RegionalCodeLibrary.NET.ExcelQueryTable End If Dim mAddress As String = CType(Me.ComboBox1.SelectedItem, DataRowView).Row.Item("Address").ToString ShowMessage(String.Format("正在下载{0}数据 ", Me.ComboBox1.Text))Me.DataGridView1.DataSource = gQueryTable.Query(mAddress) ShowMessage(String.Format("{0}共有数据{1}项", Me.ComboBox1.Text, Me.DataGridView1.RowCount)) End Sub Private Sub Button3_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button3.Click ClearEnvironment() End Sub Private Function CheckNetworkIsAvailable() As Boolean Dim mResult As Boolean = False mResult = My.Computer.Network.IsAvailable If Not mResult Then ShowMessage("本地连接无效") Else Try mResult = My.Computer.Network.Ping(RegionalCodeLibrary.NET.NetConst.GOV_DEFAULT) Catch ex As Exception mResult = False End Try If Not mResult Then ShowMessage(String.Format("本机没有连接Internet或发布网址{0}无效", RegionalCodeLibrary.NET.NetConst.GOV_ADDRESS)) End If End If Return mResult End Function Private Sub ShowMessage(ByVal msg As String) If msg = "" Then msg = "待命" Me.Label1.Text = String.Format("消息:{0}", msg) Me.Label1.Refresh() End Sub Private Sub MainForm_FormClosing(ByVal sender As Object, ByVal e As System.Windows.Forms.FormClosingEventArgs) Handles Me.FormClosing ClearEnvironment() End Sub Private Sub ClearEnvironment() If gQueryTable Is Nothing Then Exit Sub gQueryTable.Close() gQueryTable = Nothing End Sub End Class |
| webasp.net |