using System; using System.Collections.Generic; using System.Configuration; using System.IO; using System.Linq; using System.Net; using System.Web; using HtmlAgilityPack; public static class DocHelpParcing { public class DocHelpText { public string Title { get; set; } public string Content { get; set; } } public static string Title { get { return GetContent().Title; } } public static string Content { get { return GetContent().Content; } } public static DocHelpText GetContent() { var result = new DocHelpText(); var doc = new HtmlDocument(); var baseDomain = ConfigurationManager.AppSettings["web.doc.teamlab"]; string mainUrl = ""; string category = ""; string filePath = ""; string mainPath = ""; List listCategoriesSite = new List { "description-document-editor", "description-presentation-editor", "description-spreadsheet-editor" }; List listCategoriesDocs = new List { "documenteditor", "presentationeditor", "spreadsheeteditor" }; string curUrl = HttpContext.Current.Request.Url.ToString().ToLowerInvariant(); foreach (var item in listCategoriesSite) { if (curUrl.Contains(item)) { category = listCategoriesDocs[listCategoriesSite.IndexOf(item)]; curUrl = curUrl.Remove(curUrl.IndexOf(".aspx")); filePath = curUrl.Substring(curUrl.IndexOf(item) + item.Length); } } if (baseDomain != null) { mainPath = baseDomain + "/OfficeWeb/apps/" + category + "/main/resources/help/" + GetLanguage(); mainUrl = mainPath + filePath + ".htm"; } doc.LoadHtml(SendRequestToDocs(mainUrl)); var content = doc.DocumentNode.SelectSingleNode("//div[@class='mainpart']"); if (content == null) { mainPath = baseDomain + "/OfficeWeb/apps/" + category + "/main/resources/help/en"; mainUrl = mainPath + filePath + ".htm"; doc.LoadHtml(SendRequestToDocs(mainUrl)); content = doc.DocumentNode.SelectSingleNode("//div[@class='mainpart']"); } if (content != null) { var images = content.SelectNodes("//img"); if (images != null) { foreach (var img in images) { var src = img.Attributes["src"]; if (src != null) { src.Value = mainPath + src.Value.Substring(src.Value.IndexOf("/images/")); } } } var links = content.SelectNodes("//a[@href]"); if (links != null) { foreach (var link in links) { var href = link.Attributes["href"]; if (href != null && !href.Value.Contains("http")) { if (!String.IsNullOrEmpty(href.Value) && href.Value[0].ToString() != "#") { href.Value = href.Value.Replace(".htm", ".aspx"); href.Value = href.Value.Contains("/") ? curUrl.Remove(curUrl.IndexOf(filePath)) + href.Value.Substring(href.Value.IndexOf("/")) : curUrl.Remove(curUrl.IndexOf(curUrl.Split('/').Last())) + href.Value; } } } } HtmlNode title = content.SelectSingleNode("//h1"); result.Title = (title != null) ? title.InnerHtml : ""; content.SelectSingleNode("//h1").Remove(); result.Content = content.InnerHtml; } else { result.Title = ""; result.Content = ""; } return result; } private static String SendRequestToDocs(string url) { try { var httpWebRequest = (HttpWebRequest)WebRequest.Create(url); httpWebRequest.AllowAutoRedirect = false; httpWebRequest.Method = "GET"; using (var httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse()) using (var stream = httpWebResponse.GetResponseStream()) using (var reader = new StreamReader(stream)) { var result = reader.ReadToEnd(); return result; } } catch (Exception) { return String.Empty; } } private static String GetLanguage() { return "ru"; } }