.Net/C#: 利用反射编写通用的 rss 2.0 的 reader - 中国WEB开发者网络 (http://www.webasp.net) -- 技术教程 (http://www.webasp.net/article/) --- .Net/C#: 利用反射编写通用的 rss 2.0 的 reader (http://www.webasp.net/article/18/17236.htm) |
| -- 作者:未知 -- 发布日期: 2005-03-29 |
| /*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader 最近在写一个 Simple Rss Reader 网上找到现成代码两种: 1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader) 2.代码复杂的,但没有足够时间去消化 (如: rssbandit) 遂自己动手: 由于 rss 的基本属性大家都有! 但一些特殊不通用属性,如: slash:comments wfw:comment wfw:commentRss trackbackping 不一定存在! 如何处理??? 我想到了 Reflection,就此提出以下解决方案: 1. Class RssHeader 用于表示 Rss 的头信息 你可以在为其添加新属性,原则是: 成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉! 如: <dc:language>zh-CHS</dc:language> 将其影射为: private string _dclanguage public string DcLanguage { get { return this._dclanguage; } } 2. Class RssItem 用于表示 Rss 的 Item 添加新属性的原则同 RssHeader! 3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader) 根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem! 请仔细参阅 class SimpleRssReader 的 Travel 方法! 4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1) 表: Channels (主表) ChannelsDetails (细表) 字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉! 存储过程: SP_AddChannel SP_AddChannelsDetails 参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉! 命令行编译: csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll 全部代码 SimpleRssReader.cs 在此下载 http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar */ namespace Microshaoft { using System; using System.Xml; using System.Text; using System.Reflection; using System.Collections; using System.Text.RegularExpressions; 全部代码 SimpleRssReader.cs 在此下载 http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar */ namespace Microshaoft { using System; using System.Xml; using System.Text; using System.Reflection; using System.Collections; using System.Text.RegularExpressions; public class RssHeader { //feed URL public RssHeader(string URL) { this._URL = URL; } public string Title { get { return this._title; } } public string Description { get { return this._description; } } public string Link { get { return this._link; } } public string Language { get { return this._language; } } public string Generator { get { return this._generator; } } public string Ttl { get { return this._ttl; } } public string Copyright { get { return this._copyright; } } public DateTime PubDate { get { return Util.ParseDateTime(this._pubDate); } } public string Category { get { return this._category; } } public DateTime LastBuildDate { get { return Util.ParseDateTime(this._lastBuildDate); } } public string ManagingEditor { get { return this._managingEditor; } } public string URL { get { return this._URL; } } public string DcLanguage { get { return this._dclanguage; } } //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值 private string _dclanguage; //dc:language private string _URL; private string _managingEditor; private string _lastBuildDate; private string _title; private string _description; private string _link; private string _language; private string _generator; private string _ttl; private string _copyright; private string _pubDate; private string _category; } public class RssItem { private RssHeader _Header; public RssHeader Header { get { return this._Header; } } //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值 private string _title; private string _link; private string _description; private string _category; private string _author; private string _pubDate; private string _comments; private string _guid; private string _slashcomments; private string _wfwcomment; private string _wfwcommentRss; private string _trackbackping; public string TrackbackPing { get { return this._trackbackping; } } public string WfwCommentRss { get { return this._wfwcommentRss; } } public string WfwComment { get { return this._wfwcomment; } } public string SlashComments { get { return this._slashcomments; } } public string Title { get { return this._title; } } public string Link { get { return this._link; } } public string Description { get { return this._description; } } public string Category { get { return this._category; } } public string Author { get { return this._author; } } public DateTime PubDate { get { return Util.ParseDateTime(this._pubDate); } } public string Comments { get { return this._comments; } } public string Guid { get { return this._guid; } } } public class SimpleRssReader { //RssHeader header 解析处理完毕事件 public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header); public event RssHeaderReceiveEventHandler RssHeaderReceive; //某一个 RssItem 解析处理完毕事件 public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item); public event RssItemReceiveEventHandler RssItemReceive; private Type _TRS; //typeof(RssHeader) private Type _tri; //typeof(RssItem) private ArrayList _RssItemsAL; private RssHeader _rs; public RssHeader RssHeader { get { return this._rs; } } //用于存储所有的 RssItem private RssItem[] _RssItems; public RssItem[] RssItems { get { return this._RssItems; } } public void Rss(string URL) { XmlDocument xd = new XmlDocument(); //如果效率不高可采用 WebRequest 替代 xd.Load(URL); XmlNodeList xnl = xd.SelectNodes("/rss/channel"); this._rs = new RssHeader(URL); this._TRS = typeof(RssHeader); this._tri = typeof(RssItem); this._RssItemsAL = new ArrayList(); foreach (XmlNode xn in xnl) { //递归遍历 this.Travel(xn, 0); } if (this._RssItemsAL.Count > 0) { this._RssItems = new RssItem[this._RssItemsAL.Count]; int i = 0; foreach (object o in this._RssItemsAL) { this._RssItems[i++] = (RssItem) o; } } } /// <Header> /// 递归遍历 /// </Header> /// <param name="xn">节点</param> /// <param name="i">项目数</param> private void Travel(XmlNode xn, int i) { if (xn.HasChildNodes) { foreach (XmlNode x in xn.ChildNodes) { if (x.ParentNode != null) { if (x.ParentNode.Name == "channel") { if (x.Name == "item") { i ++; if (i >= 1) { XmlNode node = null; bool b = false; //是否是 Rss Item RssItem ri = null; if (i == 1) //Header { node = xn; b = false; } else if (i > 1) //Item { node = x; b = true; ri = new RssItem(); } foreach (XmlNode n in node.ChildNodes) { if (n.Name != "item") { if (!b) //Rss Header Header { //根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值 FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public); if (fi != null) { fi.SetValue(this._rs,n.InnerText); } } else //Rss Item { //根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值 FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public); if (fi != null) { fi.SetValue(ri,n.InnerText); } } } } if (!b) { //触发 RssHeaderReceive 事件 if (this.RssHeaderReceive != null) { this.RssHeaderReceive(this,this._rs); } } else { //制定 RssItem 实例的 Header/Header FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public); if (fi != null) { fi.SetValue(ri,this._rs); } //触发 RssItemReceive 事件 if (this.RssItemReceive != null) { this.RssItemReceive(this,ri); } this._RssItemsAL.Add(ri); } } } } } if (!x.HasChildNodes) { this.Travel(x, i); } } } } } public class Util { public static DateTime ParseDateTime(string s) { DateTime dt; if (s == null || s.ToString().Length <= 0) { dt = DateTime.Now; } else { try { dt = DateTime.Parse(s); } catch { dt = DateTime.Now; } } return dt; } /// <Header> /// 去除 HTML tag /// </Header> /// <param name="HTML">源</param> /// <returns>结果</returns> public static string StripHTML(string HTML) //google "StripHTML" 得到 { string[] Regexs = { @"<script[^>]*?>.*?</script>", @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>", @"([\r\n])[\s]+", @"&(quot|#34);", @"&(amp|#38);", @"&(lt|#60);", @"&(gt|#62);", @"&(nbsp|#160);", @"&(iexcl|#161);", @"&(cent|#162);", @"&(pound|#163);", @"&(copy|#169);", @"&#(\d+);", @"-->", @"<!--.*\n" }; string[] Replaces = { "", "", "", "\"", "&", "<", ">", " ", "\xa1", //chr(161), "\xa2", //chr(162), "\xa3", //chr(163), "\xa9", //chr(169), "", "\r\n", "" }; string s = HTML; for (int i = 0; i < Regexs.Length; i++) { s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]); } s.Replace("<", ""); s.Replace(">", ""); s.Replace("\r\n", ""); return s; } } } //测试程序 namespace Test { using System; using System.Data; using System.Reflection; using System.Data.SqlClient; using Microshaoft; using Microshaoft.Data; class ConsoleApplication { private SqlConnection _Connection; public string _Channel; public SqlConnection Connection { set { this._Connection = value; } get { return this._Connection; } } static void Main() { string s = "http://www.ccw.com.cn/rss/news2/1.xml"; s = "http://dzh.mop.com/topic/rss.jsp?type=28"; s = "http://www.ccw.com.cn/rss/news2/15.xml"; s = "http://www.cnblogs.com/rss.aspx?id=-1"; s = "http://localhost/rss.xml"; //s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml"; //s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml"; ConsoleApplication a = new ConsoleApplication(); a.Connection = new SqlConnection("server=SERVER\\PSQLKE;user id=sa;password=;database=rss"); a.Connection.Open(); SimpleRssReader srr = new SimpleRssReader(); srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive); srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive); System.Console.WriteLine("waiting ...."); srr.Rss(s); //以后改成多线程或异步 System.Console.WriteLine("print all rss Header and items ...."); System.Console.ReadLine(); System.Console.WriteLine("Header: "+ srr.RssHeader.Title); foreach (RssItem ri in srr.RssItems) { System.Console.WriteLine("item: " + ri.Title); } System.Console.ReadLine(); } private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header) { System.Console.WriteLine("Header:" + Header.Link); System.Console.WriteLine("Header:" + Header.Title); this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header); } private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item) { System.Console.WriteLine("Item: " + Item.Title); System.Console.WriteLine("Item: " + Item.Link); System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description)); this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item); } private void SaveToDataBase(string sp, Type t,object instance) { //获取 sp 所有参数 SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp); System.Collections.Hashtable ht = new System.Collections.Hashtable(); for (int i = 0; i < spa.Length; i++) { //保存 参数名称与其位置(次序) 的关系 ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i); //相当于为存储过程的所有参数赋初值 spa[i].Value = null; } //得到所有的属性 PropertyInfo[] pi = t.GetProperties(); foreach (PropertyInfo x in pi) { if (ht.ContainsKey( x.Name.ToLower())) { //根据参数(属性)名称得到参数的次序! int i = (int) ht[x.Name.ToLower()]; if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput) { object o; if (x.PropertyType.Name == "String") { o = x.GetValue(instance,null); if (o != null) { string s = Util.StripHTML((string) o); o = s; } } else { o = x.GetValue(instance,null); } spa[i].Value = o; } } } if (t == typeof(RssItem)) { spa[0].Value = ((RssItem) instance).Header.URL; } SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa); if (spa[spa.Length - 1].Value != System.DBNull.Value) { System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value); } else { System.Console.WriteLine("save failed! may be duplicate!"); } } } } //========================================================================================================== /* --sql Script if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1) drop procedure [dbo].[SP_AddChannel] GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1) drop procedure [dbo].[SP_AddChannelsDetails] GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table [dbo].[Channels] GO if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1) drop table [dbo].[ChannelsDetails] GO CREATE TABLE [dbo].[Channels] ( [ID] [int] IDENTITY (1, 1) NOT NULL , [URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL , [Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL , [link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL , [language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL , [generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [pubDate] [datetime] NULL , [category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL , [dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ) ON [PRIMARY] GO CREATE TABLE [dbo].[ChannelsDetails] ( [ID] [int] IDENTITY (1, 1) NOT NULL , [ChannelID] [int] NULL , [title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [pubDate] [datetime] NULL , [comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL , [trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ) ON [PRIMARY] GO SET QUOTED_IDENTIFIER ON GO SET ANSI_NULLS ON GO CREATE proc SP_AddChannel @URL varchar(8000) ,@link varchar(8000) ,@Channel varchar(8000) ,@Title varchar(8000) ,@Image varchar(8000) ,@Description varchar(7999) ,@language varchar(8000) ,@generator varchar(8000) ,@ttl varchar(8000) ,@copyright varchar(8000) ,@pubDate datetime ,@category varchar(8000) ,@Docs varchar(8000) ,@ManagingEditor varchar(8000) ,@dclanguage varchar(8000) ,@ int out as set @ = 0 insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage]) select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage where not exists(select 1 from Channels where [URL] = @URL) select @ = SCOPE_IDENTITY() GO SET QUOTED_IDENTIFIER OFF GO SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO SET ANSI_NULLS ON GO CREATE proc SP_AddChannelsDetails @URL varchar(8000) ,@Title varchar(8000) ,@Description varchar(7000) ,@link varchar(8000) ,@pubDate datetime ,@category varchar(8000) ,@Comments varchar(8000) ,@Guid varchar(8000) ,@trackbackping varchar(8000) ,@ int out as set @ = 0 insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping]) select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping from Channels where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL select @ = SCOPE_IDENTITY() GO SET QUOTED_IDENTIFIER OFF GO SET ANSI_NULLS ON GO */ |
| webasp.net |