C#XPath的概念
- 游戏开发
- 2025-08-20 23:39:01

一 XPath的概念 1 XPath是对XML进行查询的表达式
① Axes(路径) / 及 //; ② 第几个子节点[1] 等; ③ 属性@ ④ 条件 [] ⑤ 例如 /books/book/@title //price para[@type=“warning”][5]
2 使用XPath① XmlDocument doc=new XmlDocument(); ② doc.LoadXml(strXml); ③ XmlElement root=doc.DocumentElement; ④ XmlNodeList nodes= root.SelectNodes(strXPath); XmlNode node=root.SelectSingleNode(strXPath); node的.NodeType .InnerXml及.Value;
3 使用Xlst进行转换 XMLDocument doc=new XMLDocument(); doc.Load(@".\BookList.xml"); XPathNavigator nav=doc.Createnavigator(); nav.MoveToRoot(); XslTransform xt=new XslTransform(); xt.Load(@".\BookList.xslt"); XmlTextWriter writer=new XmlTextWriter(Console.Out); xt.Transform(nav,null,writer); using System; using System.Collections; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; namespace 网络爬虫 { public class Crawler { private WebClient webClient = new WebClient(); private Hashtable urls = new Hashtable(); private int count = 0; static void Main(string[] args) { Crawler myCrawler = new Crawler(); string startUrl = "http:// blogs /dstang2000"; if (args.Length >= 1) startUrl = args[0]; myCrawler.urls.Add(startUrl, false);//加入初始页面 new Thread(new ThreadStart(myCrawler.Crawl)).Start();//开始爬行 Console.ReadKey(); } private void Crawl() { Console.WriteLine("开始爬行了....."); while(true) { string current = null; foreach(string url in urls.Keys)//找到一个还没有下载过的链接 { if ((bool)urls[url]) continue;//已经下载过的,不再下载 current = url; } if (current == null || count > 10) break; Console.WriteLine("爬行" + current + "页面!"); string html = DownLoad(current);//下载 urls[current] = true; count++; Parse(html);//解析,并加入新的链接 } Console.WriteLine("爬行结束"); } public string DownLoad(string url) { try { HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.Timeout = 30000; HttpWebResponse response = (HttpWebResponse)req.GetResponse(); byte[] buffer = ReadInstreamIntoMemory(response.GetResponseStream()); string fileName = count.ToString(); FileStream fs = new FileStream(fileName, FileMode.OpenOrCreate); fs.Write(buffer, 0, buffer.Length); fs.Close(); string html = Encoding.UTF8.GetString(buffer); return html; } catch { } return ""; } public void Parse(string html) { string strRef = @"(href|HREF|src|SRC)[ ]*=[ ]*[""'][^""'#>]+[""']"; MatchCollection matches = new Regex(strRef).Matches(html); foreach(Match match in matches) { strRef = match.Value.Substring(match.Value.IndexOf('=') + 1).Trim('"', '\'', '#', ' ', '>'); if (strRef.Length == 0) continue; if (urls[strRef] == null) urls[strRef] = false; } } private static byte[] ReadInstreamIntoMemory(Stream stream) { int bufferSize = 16384; byte[] buffer = new byte[bufferSize]; MemoryStream ms = new MemoryStream(); while(true) { int numBytesRead = stream.Read(buffer, 0, bufferSize); if (numBytesRead <= 0) break; ms.Write(buffer, 0, numBytesRead); } return ms.ToArray(); } } }C#XPath的概念由讯客互联游戏开发栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“C#XPath的概念”