VectSharp  2.2.1
A light library for C# vector graphics
HtmlTag.cs
1 /*
2  VectSharp - A light library for C# vector graphics.
3  Copyright (C) 2020-2022 Giorgio Bianchini
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU Lesser General Public License as published by
7  the Free Software Foundation, version 3.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17 
18 using System;
19 using System.Collections.Generic;
20 using System.IO;
21 using System.Net;
22 using System.Text;
23 using System.Threading.Tasks;
24 
26 {
27  internal class HtmlTag
28  {
29  public Dictionary<string, string> Attributes { get; }
30  public string Tag { get; }
31 
32  private HtmlTag(string tag, Dictionary<string, string> attributes)
33  {
34  this.Tag = tag;
35  this.Attributes = attributes;
36  }
37 
38  private static HtmlTag ParseTag(StringReader reader)
39  {
40  StringBuilder tagBuilder = new StringBuilder();
41 
42  int character = reader.Read();
43 
44  while (character >= 0 && (char)character != '<')
45  {
46  character = reader.Read();
47  }
48 
49  if ((char)character == '<')
50  {
51  character = reader.Read();
52  }
53 
54  while (character >= 0 && char.IsWhiteSpace((char)character))
55  {
56  character = reader.Read();
57  }
58 
59  while (character >= 0 && !char.IsWhiteSpace((char)character) && (char)character != '>')
60  {
61  tagBuilder.Append((char)character);
62  character = reader.Read();
63  }
64 
65  string tag = tagBuilder.ToString();
66 
67  Dictionary<string, string> attributes = new Dictionary<string, string>();
68 
69  (string, string)? attribute = ReadAttribute(reader, ref character);
70 
71  while (attribute != null && character >= 0)
72  {
73  attributes[attribute.Value.Item1] = attribute.Value.Item2;
74  attribute = ReadAttribute(reader, ref character);
75  }
76 
77  return new HtmlTag(tag, attributes);
78  }
79 
80  public static IEnumerable<HtmlTag> ParseTagsUntil(StringReader reader, string targetTag)
81  {
82  HtmlTag tag = ParseTag(reader);
83 
84  while (tag.Tag != targetTag && reader.Peek() >= 0)
85  {
86  if (tag.Tag.Equals("p", StringComparison.OrdinalIgnoreCase))
87  {
88  foreach (HtmlTag nestedTag in ParseTagsUntil(reader, "/p"))
89  {
90  if (nestedTag.Tag != "/p")
91  {
92  foreach (KeyValuePair<string, string> kvp in tag.Attributes)
93  {
94  if (!nestedTag.Attributes.ContainsKey(kvp.Key))
95  {
96  nestedTag.Attributes[kvp.Key] = kvp.Value;
97  }
98  }
99 
100  yield return nestedTag;
101  }
102  }
103  }
104  else
105  {
106  yield return tag;
107  }
108 
109 
110  tag = ParseTag(reader);
111  }
112 
113  yield return tag;
114  }
115 
116  public static IEnumerable<HtmlTag> Parse(string html)
117  {
118  using (StringReader reader = new StringReader(html))
119  {
120  while (reader.Peek() >= 0)
121  {
122  HtmlTag tag = ParseTag(reader);
123 
124  if (tag.Tag.Equals("p", StringComparison.OrdinalIgnoreCase))
125  {
126  foreach (HtmlTag nestedTag in ParseTagsUntil(reader, "/p"))
127  {
128  if (nestedTag.Tag != "/p")
129  {
130  foreach (KeyValuePair<string, string> kvp in tag.Attributes)
131  {
132  if (!nestedTag.Attributes.ContainsKey(kvp.Key))
133  {
134  nestedTag.Attributes[kvp.Key] = kvp.Value;
135  }
136  }
137 
138  yield return nestedTag;
139  }
140  }
141  }
142  else
143  {
144  yield return tag;
145  }
146  }
147  }
148  }
149 
150  private static (string, string)? ReadAttribute(StringReader reader, ref int character)
151  {
152  while (character >= 0 && char.IsWhiteSpace((char)character) && (char)character != '>')
153  {
154  character = reader.Read();
155  }
156 
157  if ((char)character == '>')
158  {
159  return null;
160  }
161  else
162  {
163  StringBuilder attributeNameBuilder = new StringBuilder();
164 
165  while (character >= 0 && !char.IsWhiteSpace((char)character) && (char)character != '>' && (char)character != '=')
166  {
167  attributeNameBuilder.Append((char)character);
168  character = reader.Read();
169  }
170 
171  string attributeName = attributeNameBuilder.ToString();
172 
173  while (character >= 0 && char.IsWhiteSpace((char)character) && (char)character != '>' && (char)character != '=')
174  {
175  character = reader.Read();
176  }
177 
178  if ((char)character == '=')
179  {
180  character = reader.Read();
181 
182  while (character >= 0 && char.IsWhiteSpace((char)character) && (char)character != '>')
183  {
184  character = reader.Read();
185  }
186 
187  if ((char)character == '>')
188  {
189  return (attributeName, null);
190  }
191  else
192  {
193  bool quoted = (char)character == '"' || (char)character == '\'';
194 
195  if (quoted)
196  {
197  char quoteChar = (char)character;
198 
199  character = reader.Read();
200 
201  StringBuilder attributeValueBuilder = new StringBuilder();
202 
203  bool isEscaped = (char)character == '\\';
204 
205  while (character >= 0 && ((char)character != quoteChar || isEscaped))
206  {
207  attributeValueBuilder.Append((char)character);
208  character = reader.Read();
209  isEscaped = (char)character == '\\' && !isEscaped;
210  }
211 
212  string attributeValue = attributeValueBuilder.ToString();
213 
214  return (attributeName, attributeValue);
215  }
216  else
217  {
218  StringBuilder attributeValueBuilder = new StringBuilder();
219 
220  while (character >= 0 && !char.IsWhiteSpace((char)character) && (char)character != '>' && (char)character != '=')
221  {
222  attributeValueBuilder.Append((char)character);
223  character = reader.Read();
224  }
225 
226  string attributeValue = attributeValueBuilder.ToString();
227 
228  return (attributeName, attributeValue);
229  }
230  }
231  }
232  else
233  {
234  return (attributeName, null);
235  }
236  }
237  }
238 
239  }
240 
241  /// <summary>
242  /// Contains utilities to resolve absolute and relative URIs.
243  /// </summary>
244  public static class HTTPUtils
245  {
246  /// <summary>
247  /// Determines whether every file that is downloaded should be logged to the standard error stream.
248  /// </summary>
249  public static bool LogDownloads { get; set; } = true;
250 
251  /// <summary>
252  /// Resolves an image Uri, by downloading the image file if necessary. It also takes care of ensuring that the file extension matches the format of the file.
253  /// </summary>
254  /// <param name="uri">The address of the image.</param>
255  /// <param name="baseUriString">The base uri to use for relative uris.</param>
256  /// <returns>A tuple containing the local path of the image file (either the original image, or a local copy of a remote file) and a boolean value indicating whether the image was fetched from a remote location and should be deleted after the program is done with it.</returns>
257  public static (string path, bool wasDownloaded) ResolveImageURI(string uri, string baseUriString)
258  {
259  if (uri.StartsWith("data:"))
260  {
261  string tempFile = Path.GetTempFileName();
262  if (File.Exists(tempFile))
263  {
264  File.Delete(tempFile);
265  }
266 
267  Directory.CreateDirectory(tempFile);
268 
269  VectSharp.Page pag = VectSharp.SVG.Parser.ParseImageURI(uri, true);
270  VectSharp.SVG.SVGContextInterpreter.SaveAsSVG(pag, Path.Combine(tempFile, "temp.svg"));
271 
272  return (Path.Combine(tempFile, "temp.svg"), true);
273  }
274  else if (File.Exists(Path.Combine(baseUriString, uri)))
275  {
276  return (Path.Combine(baseUriString, uri), false);
277  }
278  else if (File.Exists(uri))
279  {
280  return (uri, false);
281  }
282  else
283  {
284  Uri absoluteUri;
285  bool validUri;
286 
287  if (Uri.TryCreate(baseUriString, UriKind.Absolute, out Uri baseUri))
288  {
289  validUri = Uri.TryCreate(baseUri, uri, out absoluteUri);
290  }
291  else
292  {
293  validUri = Uri.TryCreate(uri, UriKind.Absolute, out absoluteUri);
294  }
295 
296  if (validUri)
297  {
298  string tempFile = Path.GetTempFileName();
299  File.Delete(tempFile);
300  Directory.CreateDirectory(tempFile);
301 
302  string fileDest = Path.Combine(tempFile, Path.GetFileName(absoluteUri.LocalPath));
303 
304  try
305  {
306  if (LogDownloads)
307  {
308  Console.Error.WriteLine();
309  Console.Error.Write("Downloading {0}...", absoluteUri);
310  }
311 
312  using (WebClient client = new WebClient())
313  {
314  client.DownloadFile(absoluteUri, fileDest);
315  }
316 
317  if (LogDownloads)
318  {
319  Console.Error.WriteLine(" Done.");
320  }
321 
322  string newName = FixFileExtensionBasedOnContent(fileDest);
323 
324  File.Move(fileDest, newName);
325  fileDest = newName;
326 
327  return (fileDest, true);
328  }
329  catch (Exception ex)
330  {
331  if (LogDownloads)
332  {
333  Console.Error.WriteLine(" Failed!");
334  Console.Error.WriteLine(ex.Message);
335  }
336 
337  Directory.Delete(tempFile, true);
338  return (null, false);
339  }
340  }
341  else
342  {
343  return (null, false);
344  }
345  }
346  }
347 
348  private static string FixFileExtensionBasedOnContent(string fileName)
349  {
350  using (FileStream fileStream = File.OpenRead(fileName))
351  {
352  bool isSvg = false;
353 
354  try
355  {
356  using (var xmlReader = System.Xml.XmlReader.Create(fileStream))
357  {
358  isSvg = xmlReader.MoveToContent() == System.Xml.XmlNodeType.Element && "svg".Equals(xmlReader.Name, StringComparison.OrdinalIgnoreCase);
359  }
360  }
361  catch
362  {
363  isSvg = false;
364  }
365 
366  if (isSvg)
367  {
368  return fileName + ".svg";
369  }
370  else
371  {
372  fileStream.Seek(0, SeekOrigin.Begin);
373  byte[] header = new byte[8];
374 
375  for (int i = 0; i < header.Length; i++)
376  {
377  header[i] = (byte)fileStream.ReadByte();
378  }
379 
380  if (header[0] == 0x42 && header[1] == 0x4D)
381  {
382  return fileName + ".bmp";
383  }
384  else if (header[0] == 0x47 && header[1] == 0x49 && header[2] == 0x46 && header[3] == 0x38)
385  {
386  return fileName + ".gif";
387  }
388  else if (header[0] == 0xFF && header[1] == 0xD8 && header[2] == 0xFF && (header[3] == 0xDB || header[3] == 0xE0 || header[3] == 0xEE || header[3] == 0xE1))
389  {
390  return fileName + ".jpg";
391  }
392  else if (header[0] == 0x25 && header[1] == 0x50 && header[2] == 0x44 && header[3] == 0x46 && header[4] == 0x2D)
393  {
394  return fileName + ".pdf";
395  }
396  else if (header[0] == 0x89 && header[1] == 0x50 && header[2] == 0x4E && header[3] == 0x47 && header[4] == 0x0D && header[5] == 0x0A && header[6] == 0x1A && header[7] == 0x0A)
397  {
398  return fileName + ".png";
399  }
400  else if ((header[0] == 0x49 && header[1] == 0x49 && header[2] == 0x2A && header[3] == 0x00) || (header[0] == 0x4D && header[1] == 0x4D && header[2] == 0x00 && header[3] == 0x2A))
401  {
402  return fileName + ".tif";
403  }
404  else
405  {
406  return fileName;
407  }
408  }
409  }
410  }
411  }
412 }
VectSharp.SVG.Parser
Contains methods to read an SVG image file.
Definition: SVGParser.cs:34
VectSharp.SVG
Definition: SVGContext.cs:28
VectSharp
Definition: Brush.cs:26
VectSharp.Page
Represents a Graphics object with a width and height.
Definition: Document.cs:48
VectSharp.Markdown.HTTPUtils.path
static string path
Resolves an image Uri, by downloading the image file if necessary. It also takes care of ensuring tha...
Definition: HtmlTag.cs:257
VectSharp.Markdown.HTTPUtils.LogDownloads
static bool LogDownloads
Determines whether every file that is downloaded should be logged to the standard error stream.
Definition: HtmlTag.cs:249
VectSharp.SVG.SVGContextInterpreter
Contains methods to render a Page as an SVG file.
Definition: SVGContext.cs:1609
VectSharp.Markdown.HTTPUtils
Contains utilities to resolve absolute and relative URIs.
Definition: HtmlTag.cs:245
VectSharp.Markdown
Definition: HtmlTag.cs:26
VectSharp.SVG.Parser.ParseImageURI
static Func< string, bool, Page > ParseImageURI
A function that takes as input an image URI and a boolean value indicating whether the image should b...
Definition: SVGParser.cs:46
VectSharp.SVG.SVGContextInterpreter.SaveAsSVG
static void SaveAsSVG(this Page page, string fileName, TextOptions textOption=TextOptions.SubsetFonts, Dictionary< string, string > linkDestinations=null, FilterOption filterOption=default)
Render the page to an SVG file.
Definition: SVGContext.cs:1619