.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

80酷酷网    80kuku.com

  rss/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)

遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在! 如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
你可以在为其添加新属性,原则是:
成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
如: <dc:language>zh-CHS</dc:language>
将其影射为:
private string _dclanguage
public string DcLanguage
{
get
{
return this._dclanguage;
}
}

2. Class RssItem 用于表示 Rss 的 Item
添加新属性的原则同 RssHeader!

3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
请仔细参阅 class SimpleRssReader 的 Travel 方法!

4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
表:
Channels (主表)
ChannelsDetails (细表)
字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
存储过程:
SP_AddChannel
SP_AddChannelsDetails
参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!


命令行编译:
csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll


全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar


*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;



全部代码 SimpleRssReader.cs 在此下载
http://www.cnblogs.com/Files/Microshaoft/SimpleRssReader.rar


*/
namespace Microshaoft
{
using System;
using System.Xml;
using System.Text;
using System.Reflection;
using System.Collections;
using System.Text.RegularExpressions;


public class RssHeader
{
//feed URL
public RssHeader(string URL)
{
this._URL = URL;
}

public string Title
{
get
{
return this._title;
}
}

public string Description
{
get
{
return this._description;
}
}

public string Link
{
get
{
return this._link;
}
}

public string Language
{
get
{
return this._language;
}
}

public string Generator
{
get
{
return this._generator;
}
}

public string Ttl
{
get
{
return this._ttl;
}
}

public string Copyright
{
get
{
return this._copyright;
}
}

public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}

public string Category
{
get
{
return this._category;
}
}

public DateTime LastBuildDate
{
get
{
return Util.ParseDateTime(this._lastBuildDate);
}
}
public string ManagingEditor
{
get
{
return this._managingEditor;
}
}

public string URL
{
get
{
return this._URL;
}
}

public string DcLanguage
{
get
{
return this._dclanguage;
}
}

//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _dclanguage; //dc:language
private string _URL;
private string _managingEditor;
private string _lastBuildDate;
private string _title;
private string _description;
private string _link;
private string _language;
private string _generator;
private string _ttl;
private string _copyright;
private string _pubDate;
private string _category;


}
public class RssItem
{
private RssHeader _Header;

public RssHeader Header
{
get
{
return this._Header;
}
}

//下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
private string _title;
private string _link;
private string _description;
private string _category;
private string _author;
private string _pubDate;
private string _comments;
private string _guid;
private string _slashcomments;
private string _wfwcomment;
private string _wfwcommentRss;
private string _trackbackping;

public string TrackbackPing
{
get
{
return this._trackbackping;
}
}

public string WfwCommentRss
{
get
{
return this._wfwcommentRss;
}
}

public string WfwComment
{
get
{
return this._wfwcomment;
}
}


public string SlashComments
{
get
{
return this._slashcomments;
}
}
public string Title
{
get
{
return this._title;
}
}

public string Link
{
get
{
return this._link;
}
}

public string Description
{
get
{
return this._description;
}
}

public string Category
{
get
{
return this._category;
}
}

public string Author
{
get
{
return this._author;
}
}

public DateTime PubDate
{
get
{
return Util.ParseDateTime(this._pubDate);
}
}

public string Comments
{
get
{
return this._comments;
}
}

public string Guid
{
get
{
return this._guid;
}
}
}
public class SimpleRssReader
{
//RssHeader header 解析处理完毕事件
public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
public event RssHeaderReceiveEventHandler RssHeaderReceive;

//某一个 RssItem 解析处理完毕事件
public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
public event RssItemReceiveEventHandler RssItemReceive;

private Type _TRS; //typeof(RssHeader)
private Type _tri; //typeof(RssItem)

private ArrayList _RssItemsAL;

private RssHeader _rs;
public RssHeader RssHeader
{
get
{
return this._rs;
}
}

//用于存储所有的 RssItem
private RssItem[] _RssItems;

public RssItem[] RssItems
{
get
{
return this._RssItems;
}
}

public void Rss(string URL)
{
XmlDocument xd = new XmlDocument();
//如果效率不高可采用 WebRequest 替代
xd.Load(URL);
XmlNodeList xnl = xd.SelectNodes("/rss/channel");

this._rs = new RssHeader(URL);

this._TRS = typeof(RssHeader);
this._tri = typeof(RssItem);

this._RssItemsAL = new ArrayList();

foreach (XmlNode xn in xnl)
{
//递归遍历
this.Travel(xn, 0);
}

if (this._RssItemsAL.Count > 0)
{
this._RssItems = new RssItem[this._RssItemsAL.Count];
int i = 0;
foreach (object o in this._RssItemsAL)
{
this._RssItems[i++] = (RssItem) o;
}
}
}

/// <Header>
/// 递归遍历
/// </Header>
/// <param name="xn">节点</param>
/// <param name="i">项目数</param>
private void Travel(XmlNode xn, int i)
{
if (xn.HasChildNodes)
{
foreach (XmlNode x in xn.ChildNodes)
{
if (x.ParentNode != null)
{
if (x.ParentNode.Name == "channel")
{
if (x.Name == "item")
{
i ++;
if (i >= 1)
{
XmlNode node = null;
bool b = false; //是否是 Rss Item
RssItem ri = null;
if (i == 1) //Header
{
node = xn;
b = false;
}
else if (i > 1) //Item
{
node = x;
b = true;
ri = new RssItem();
}

foreach (XmlNode n in node.ChildNodes)
{
if (n.Name != "item")
{
if (!b) //Rss Header Header
{
//根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(this._rs,n.InnerText);
}
}
else //Rss Item
{
//根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,n.InnerText);
}
}

}
}
if (!b)
{
//触发 RssHeaderReceive 事件
if (this.RssHeaderReceive != null)
{
this.RssHeaderReceive(this,this._rs);
}
}
else
{
//制定 RssItem 实例的 Header/Header
FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
if (fi != null)
{
fi.SetValue(ri,this._rs);
}

//触发 RssItemReceive 事件
if (this.RssItemReceive != null)
{
this.RssItemReceive(this,ri);
}
this._RssItemsAL.Add(ri);
}
}
}
}
}
if (!x.HasChildNodes)
{
this.Travel(x, i);
}
}
}
}
}

public class Util
{
public static DateTime ParseDateTime(string s)
{
DateTime dt;
if (s == null || s.ToString().Length <= 0)
{
dt = DateTime.Now;
}
else
{
try
{
dt = DateTime.Parse(s);
}
catch
{
dt = DateTime.Now;
}
}
return dt;
}
/// <Header>
/// 去除 HTML tag
/// </Header>
/// <param name="HTML">源</param>
/// <returns>结果</returns>
public static string StripHTML(string HTML) //google "StripHTML" 得到
{
string[] Regexs =
{
"<script[^>]*?>.*?</script>",
"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
"([\r\n])[\s]+",
"&(quot|#34);",
"&(amp|#38);",
"&(lt|#60);",
"&(gt|#62);",
"&(nbsp|#160);",
"&(iexcl|#161);",
"&(cent|#162);",
"&(pound|#163);",
"&(copy|#169);",
"&#(\d+);",
"-->",
"<!--.*\n"
};

string[] Replaces =
{
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1", //chr(161),
"\xa2", //chr(162),
"\xa3", //chr(163),
"\xa9", //chr(169),
"",
"\r\n",
""
};

string s = HTML;
for (int i = 0; i < Regexs.Length; i++)
{
s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
}
s.Replace("<", "");
s.Replace(">", "");
s.Replace("\r\n", "");
return s;
}
}
}

//测试程序
namespace Test
{
using System;
using System.Data;
using System.Reflection;
using System.Data.SqlClient;

using Microshaoft;
using Microshaoft.Data;

class ConsoleApplication
{
private SqlConnection _Connection;
public string _Channel;

public SqlConnection Connection
{
set
{
this._Connection = value;
}
get
{
return this._Connection;
}
}

static void Main()
{

string s = "http://www.ccw.com.cn/rss/news2/1.xml";
s = "http://dzh.mop.com/topic/rss.jsp?type=28";
s = "http://www.ccw.com.cn/rss/news2/15.xml";
s = "http://www.cnblogs.com/rss.aspx?id=-1";
s = "http://localhost/rss.xml";
//s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml";
//s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml";

ConsoleApplication a = new ConsoleApplication();

a.Connection = new SqlConnection("server=SERVER\\PSQLKE;user id=sa;password=;database=rss");
a.Connection.Open();

SimpleRssReader srr = new SimpleRssReader();

srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);

System.Console.WriteLine("waiting ....");
srr.Rss(s); //以后改成多线程或异步

System.Console.WriteLine("print all rss Header and items ....");
System.Console.ReadLine();
System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
foreach (RssItem ri in srr.RssItems)
{
System.Console.WriteLine("item: " + ri.Title);
}
System.Console.ReadLine();

}

private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
{
System.Console.WriteLine("Header:" + Header.Link);
System.Console.WriteLine("Header:" + Header.Title);

this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);

}

private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
{
System.Console.WriteLine("Item: " + Item.Title);
System.Console.WriteLine("Item: " + Item.Link);
System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));

this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);

}
private void SaveToDataBase(string sp, Type t,object instance)
{
//获取 sp 所有参数
SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
System.Collections.Hashtable ht = new System.Collections.Hashtable();

for (int i = 0; i < spa.Length; i++)
{
//保存 参数名称与其位置(次序) 的关系
ht.Add(spa[i].ParameterName.ToLower().Replace("", ""), i);

//相当于为存储过程的所有参数赋初值
spa[i].Value = null;
}

//得到所有的属性
PropertyInfo[] pi = t.GetProperties();
foreach (PropertyInfo x in pi)
{
if (ht.ContainsKey( x.Name.ToLower()))
{
//根据参数(属性)名称得到参数的次序!
int i = (int) ht[x.Name.ToLower()];
if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
{
object o;
if (x.PropertyType.Name == "String")
{
o = x.GetValue(instance,null);
if (o != null)
{
string s = Util.StripHTML((string) o);
o = s;
}
}
else
{
o = x.GetValue(instance,null);
}

spa[i].Value = o;
}
}

}

if (t == typeof(RssItem))
{
spa[0].Value = ((RssItem) instance).Header.URL;
}

SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
if (spa[spa.Length - 1].Value != System.DBNull.Value)
{
System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
}
else
{
System.Console.WriteLine("save failed! may be duplicate!");
}
}
}
}

//==========================================================================================================
/*
--sql Script
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannel]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannelsDetails]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[Channels]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[ChannelsDetails]
GO

CREATE TABLE [dbo].[Channels] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
[Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
[link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
[language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
[generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[pubDate] [datetime] NULL ,
[category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
[dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[ChannelsDetails] (
[ID] [int] IDENTITY (1, 1) NOT NULL ,
[ChannelID] [int] NULL ,
[title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[pubDate] [datetime] NULL ,
[comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
[trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO


CREATE proc SP_AddChannel
URL varchar(8000)
,link varchar(8000)
,Channel varchar(8000)
,Title varchar(8000)
,Image varchar(8000)
,Description varchar(7999)
,language varchar(8000)
,generator varchar(8000)
,ttl varchar(8000)
,copyright varchar(8000)
,pubDate datetime
,category varchar(8000)
,Docs varchar(8000)
,ManagingEditor varchar(8000)
,dclanguage varchar(8000)
, int out
as
set = 0
insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
select URL,Channel,Title,Description,link,language,generator,ttl,copyright,pubDate,category,dclanguage
where not exists(select 1 from Channels where [URL] = URL)
select = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO



CREATE proc SP_AddChannelsDetails
URL varchar(8000)
,Title varchar(8000)
,Description varchar(7000)
,link varchar(8000)
,pubDate datetime
,category varchar(8000)
,Comments varchar(8000)
,Guid varchar(8000)
,trackbackping varchar(8000)
, int out
as
set = 0
insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
select id,Title,Description,link,pubDate,category,comments,isnull(guid,link),trackbackping
from Channels
where not exists (select 1 from ChannelsDetails where guid = isnull(guid,link)) and URL = URL
select = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
*/


分享到
  • 微信分享
  • 新浪微博
  • QQ好友
  • QQ空间
点击: