LibOFX
ofx_preproc.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  ofx_preproc.cpp
3  -------------------
4  copyright : (C) 2002 by Benoit Gr�oir
5  email : benoitg@coeus.ca
6 ***************************************************************************/
12 /***************************************************************************
13  * *
14  * This program is free software; you can redistribute it and/or modify *
15  * it under the terms of the GNU General Public License as published by *
16  * the Free Software Foundation; either version 2 of the License, or *
17  * (at your option) any later version. *
18  * *
19  ***************************************************************************/
20 #include "../config.h"
21 #include <iostream>
22 #include <fstream>
23 #include <cstdlib>
24 #include <stdio.h>
25 #include <string>
26 #include "ParserEventGeneratorKit.h"
27 #include "libofx.h"
28 #include "messages.hh"
29 #include "ofx_sgml.hh"
30 #include "ofc_sgml.hh"
31 #include "ofx_preproc.hh"
32 #include "ofx_utilities.hh"
33 #ifdef HAVE_ICONV
34 #include <iconv.h>
35 #endif
36 
37 #ifdef OS_WIN32
38 # define DIRSEP "\\"
39 #else
40 # define DIRSEP "/"
41 #endif
42 
43 #ifdef OS_WIN32
44 # include "win32.hh"
45 # include <windows.h> // for GetModuleFileName()
46 # undef ERROR
47 # undef DELETE
48 #endif
49 
50 #define LIBOFX_DEFAULT_INPUT_ENCODING "CP1252"
51 #define LIBOFX_DEFAULT_OUTPUT_ENCODING "UTF-8"
52 
53 using namespace std;
57 #ifdef MAKEFILE_DTD_PATH
58 const int DTD_SEARCH_PATH_NUM = 4;
59 #else
60 const int DTD_SEARCH_PATH_NUM = 3;
61 #endif
62 
67 {
68 #ifdef MAKEFILE_DTD_PATH
69  MAKEFILE_DTD_PATH ,
70 #endif
71  "/usr/local/share/libofx/dtd",
72  "/usr/share/libofx/dtd",
73  "~"
74 };
75 const unsigned int READ_BUFFER_SIZE = 1024;
76 
81 int ofx_proc_file(LibofxContextPtr ctx, const char * p_filename)
82 {
83  LibofxContext *libofx_context;
84  bool ofx_start = false;
85  bool ofx_end = false;
86  bool file_is_xml = false;
87 
88  ifstream input_file;
89  ofstream tmp_file;
90  char buffer[READ_BUFFER_SIZE];
91  char *iconv_buffer;
92  string s_buffer;
93  char *filenames[3];
94  char tmp_filename[256];
95  int tmp_file_fd;
96 #ifdef HAVE_ICONV
97  iconv_t conversion_descriptor;
98 #endif
99  libofx_context = (LibofxContext*)ctx;
100 
101  if (p_filename != NULL && strcmp(p_filename, "") != 0)
102  {
103  message_out(DEBUG, string("ofx_proc_file():Opening file: ") + p_filename);
104 
105  input_file.open(p_filename);
106  if (!input_file)
107  {
108  message_out(ERROR, "ofx_proc_file():Unable to open the input file " + string(p_filename));
109  }
110 
111  mkTempFileName("libofxtmpXXXXXX", tmp_filename, sizeof(tmp_filename));
112 
113  message_out(DEBUG, "ofx_proc_file(): Creating temp file: " + string(tmp_filename));
114  tmp_file_fd = mkstemp(tmp_filename);
115  if (tmp_file_fd)
116  {
117  tmp_file.open(tmp_filename);
118  if (!tmp_file)
119  {
120  message_out(ERROR, "ofx_proc_file():Unable to open the created temp file " + string(tmp_filename));
121  return -1;
122  }
123  }
124  else
125  {
126  message_out(ERROR, "ofx_proc_file():Unable to create a temp file at " + string(tmp_filename));
127  return -1;
128  }
129 
130  if (input_file && tmp_file)
131  {
132  int header_separator_idx;
133  string header_name;
134  string header_value;
135  string ofx_encoding;
136  string ofx_charset;
137  do
138  {
139  s_buffer.clear();
140  bool end_of_line = false;
141  do
142  {
143  input_file.get(buffer, sizeof(buffer), '\n');
144  //cout<< "got: \"" << buffer<<"\"\n";
145  s_buffer.append(buffer);
146 
147  // Watch out: If input_file is in eof(), any subsequent read or
148  // peek() will fail and we must exit this loop.
149  if (input_file.eof())
150  break;
151 
152  //cout<<"input_file.gcount(): "<<input_file.gcount()<< " s_buffer.size=" << s_buffer.size()<<" sizeof(buffer): "<<sizeof(buffer) << " peek=\"" << int(input_file.peek()) << "\"" <<endl;
153  if (input_file.fail()) // If no characters were extracted above, the failbit is set.
154  {
155  // No characters extracted means that we've reached the newline
156  // delimiter (because we already checked for EOF). We will check
157  // for and remove that newline in the next if-clause, but must
158  // remove the failbit so that peek() will work again.
159  input_file.clear();
160  }
161 
162  // Is the next character really the newline?
163  if (input_file.peek() == '\n')
164  {
165  // Yes. Then discard that newline character from the stream and
166  // append it manually to the output string.
167  input_file.get();
168  s_buffer.append("\n");
169  end_of_line = true; // We found the end-of-line.
170  }
171  }
172  // Continue reading as long as we're not at EOF *and* we've not yet
173  // reached an end-of-line.
174  while (!input_file.eof() && !end_of_line);
175 
176  if (ofx_start == false && (s_buffer.find("<?xml") != string::npos))
177  {
178  message_out(DEBUG, "ofx_proc_file(): File is an actual XML file, iconv conversion will be skipped.");
179  file_is_xml = true;
180  }
181 
182  int ofx_start_idx;
183  if (ofx_start == false &&
184  (
185  (libofx_context->currentFileType() == OFX &&
186  ((ofx_start_idx = s_buffer.find("<OFX>")) !=
187  string::npos || (ofx_start_idx = s_buffer.find("<ofx>")) != string::npos))
188  || (libofx_context->currentFileType() == OFC &&
189  ((ofx_start_idx = s_buffer.find("<OFC>")) != string::npos ||
190  (ofx_start_idx = s_buffer.find("<ofc>")) != string::npos))
191  )
192  )
193  {
194  ofx_start = true;
195  if (file_is_xml == false)
196  {
197  s_buffer.erase(0, ofx_start_idx); //Fix for really broken files that don't have a newline after the header.
198  }
199  message_out(DEBUG, "ofx_proc_file():<OFX> or <OFC> has been found");
200 
201  if (file_is_xml == true)
202  {
203  static char sp_charset_fixed[] = "SP_CHARSET_FIXED=1";
204  if (putenv(sp_charset_fixed) != 0)
205  {
206  message_out(ERROR, "ofx_proc_file(): putenv failed");
207  }
208  /* Normally the following would be "xml".
209  * Unfortunately, opensp's generic api will garble UTF-8 if this is
210  * set to xml. So we set any single byte encoding to avoid messing
211  * up UTF-8. Unfortunately this means that non-UTF-8 files will not
212  * get properly translated. We'd need to manually detect the
213  * encoding in the XML header and convert the xml with iconv like we
214  * do for SGML to work around the problem. Most unfortunate. */
215  static char sp_encoding[] = "SP_ENCODING=ms-dos";
216  if (putenv(sp_encoding) != 0)
217  {
218  message_out(ERROR, "ofx_proc_file(): putenv failed");
219  }
220  }
221  else
222  {
223  static char sp_charset_fixed[] = "SP_CHARSET_FIXED=1";
224  if (putenv(sp_charset_fixed) != 0)
225  {
226  message_out(ERROR, "ofx_proc_file(): putenv failed");
227  }
228  static char sp_encoding[] = "SP_ENCODING=ms-dos"; //Any single byte encoding will do, we don't want opensp messing up UTF-8;
229  if (putenv(sp_encoding) != 0)
230  {
231  message_out(ERROR, "ofx_proc_file(): putenv failed");
232  }
233 #ifdef HAVE_ICONV
234  string fromcode;
235  string tocode;
236  if (ofx_encoding.compare("USASCII") == 0)
237  {
238  if (ofx_charset.compare("ISO-8859-1") == 0 || ofx_charset.compare("8859-1") == 0)
239  {
240  //Only "ISO-8859-1" is actually a legal value, but since the banks follows the spec SO well...
241  fromcode = "ISO-8859-1";
242  }
243  else if (ofx_charset.compare("1252") == 0 || ofx_charset.compare("CP1252") == 0)
244  {
245  //Only "1252" is actually a legal value, but since the banks follows the spec SO well...
246  fromcode = "CP1252";
247  }
248  else if (ofx_charset.compare("NONE") == 0)
249  {
250  fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
251  }
252  else
253  {
254  fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
255  }
256  }
257  else if (ofx_encoding.compare("UTF-8") == 0 || ofx_encoding.compare("UNICODE") == 0)
258  {
259  //While "UNICODE" isn't a legal value, some cyrilic files do specify it as such...
260  fromcode = "UTF-8";
261  }
262  else
263  {
264  fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
265  }
266  tocode = LIBOFX_DEFAULT_OUTPUT_ENCODING;
267  message_out(DEBUG, "ofx_proc_file(): Setting up iconv for fromcode: " + fromcode + ", tocode: " + tocode);
268  conversion_descriptor = iconv_open (tocode.c_str(), fromcode.c_str());
269 #endif
270  }
271  }
272  else
273  {
274  //We are still in the headers
275  if ((header_separator_idx = s_buffer.find(':')) != string::npos)
276  {
277  //Header processing
278  header_name.assign(s_buffer.substr(0, header_separator_idx));
279  header_value.assign(s_buffer.substr(header_separator_idx + 1));
280  while ( header_value[header_value.length() -1 ] == '\n' ||
281  header_value[header_value.length() -1 ] == '\r' )
282  header_value.erase(header_value.length() - 1);
283  message_out(DEBUG, "ofx_proc_file():Header: " + header_name + " with value: " + header_value + " has been found");
284  if (header_name.compare("ENCODING") == 0)
285  {
286  ofx_encoding.assign(header_value);
287  }
288  if (header_name.compare("CHARSET") == 0)
289  {
290  ofx_charset.assign(header_value);
291  }
292  }
293  }
294 
295  if (file_is_xml == true || (ofx_start == true && ofx_end == false))
296  {
297  if (ofx_start == true)
298  {
299  /* The above test won't help us if the <OFX> tag is on the same line
300  * as the xml header, but as opensp can't be used to parse it anyway
301  * this isn't a great loss for now.
302  */
303  s_buffer = sanitize_proprietary_tags(s_buffer);
304  }
305  //cout<< s_buffer<<"\n";
306  if (file_is_xml == false)
307  {
308 #ifdef HAVE_ICONV
309  size_t inbytesleft = strlen(s_buffer.c_str());
310  size_t outbytesleft = inbytesleft * 2 - 1;
311  iconv_buffer = (char*) malloc (inbytesleft * 2);
312  memset(iconv_buffer, 0, inbytesleft * 2);
313 #if defined(OS_WIN32) || defined(__sun) || defined(__NetBSD__)
314  const char * inchar = (const char *)s_buffer.c_str();
315 #else
316  char * inchar = (char *)s_buffer.c_str();
317 #endif
318  char * outchar = iconv_buffer;
319  int iconv_retval = iconv (conversion_descriptor,
320  &inchar, &inbytesleft,
321  &outchar, &outbytesleft);
322  if (iconv_retval == -1)
323  {
324  message_out(ERROR, "ofx_proc_file(): Conversion error");
325  }
326  s_buffer = iconv_buffer;
327  free (iconv_buffer);
328 #endif
329  }
330  //cout << s_buffer << "\n";
331  tmp_file.write(s_buffer.c_str(), s_buffer.length());
332  }
333 
334  if (ofx_start == true &&
335  (
336  (libofx_context->currentFileType() == OFX &&
337  ((ofx_start_idx = s_buffer.find("</OFX>")) != string::npos ||
338  (ofx_start_idx = s_buffer.find("</ofx>")) != string::npos))
339  || (libofx_context->currentFileType() == OFC &&
340  ((ofx_start_idx = s_buffer.find("</OFC>")) != string::npos ||
341  (ofx_start_idx = s_buffer.find("</ofc>")) != string::npos))
342  )
343  )
344  {
345  ofx_end = true;
346  message_out(DEBUG, "ofx_proc_file():</OFX> or </OFC> has been found");
347  }
348 
349  }
350  while (!input_file.eof() && !input_file.bad());
351  }
352  input_file.close();
353  tmp_file.close();
354 #ifdef HAVE_ICONV
355  if (file_is_xml == false)
356  {
357  iconv_close(conversion_descriptor);
358  }
359 #endif
360  char filename_openspdtd[255];
361  char filename_dtd[255];
362  char filename_ofx[255];
363  strncpy(filename_openspdtd, find_dtd(ctx, OPENSPDCL_FILENAME).c_str(), 255); //The opensp sgml dtd file
364  if (libofx_context->currentFileType() == OFX)
365  {
366  strncpy(filename_dtd, find_dtd(ctx, OFX160DTD_FILENAME).c_str(), 255); //The ofx dtd file
367  }
368  else if (libofx_context->currentFileType() == OFC)
369  {
370  strncpy(filename_dtd, find_dtd(ctx, OFCDTD_FILENAME).c_str(), 255); //The ofc dtd file
371  }
372  else
373  {
374  message_out(ERROR, string("ofx_proc_file(): Error unknown file format for the OFX parser"));
375  }
376 
377  if ((string)filename_dtd != "" && (string)filename_openspdtd != "")
378  {
379  strncpy(filename_ofx, tmp_filename, 255); //The processed ofx file
380  filenames[0] = filename_openspdtd;
381  filenames[1] = filename_dtd;
382  filenames[2] = filename_ofx;
383  if (libofx_context->currentFileType() == OFX)
384  {
385  ofx_proc_sgml(libofx_context, 3, filenames);
386  }
387  else if (libofx_context->currentFileType() == OFC)
388  {
389  ofc_proc_sgml(libofx_context, 3, filenames);
390  }
391  else
392  {
393  message_out(ERROR, string("ofx_proc_file(): Error unknown file format for the OFX parser"));
394  }
395  if (remove(tmp_filename) != 0)
396  {
397  message_out(ERROR, "ofx_proc_file(): Error deleting temporary file " + string(tmp_filename));
398  }
399  }
400  else
401  {
402  message_out(ERROR, "ofx_proc_file(): FATAL: Missing DTD, aborting");
403  }
404  }
405  else
406  {
407  message_out(ERROR, "ofx_proc_file():No input file specified");
408  }
409  return 0;
410 }
411 
412 
417 string sanitize_proprietary_tags(string input_string)
418 {
419  unsigned int i;
420  bool strip = false;
421  bool tag_open = false;
422  int tag_open_idx = 0; //Are we within < > ?
423  bool closing_tag_open = false; //Are we within </ > ?
424  int orig_tag_open_idx = 0;
425  bool proprietary_tag = false; //Are we within a proprietary element?
426  bool proprietary_closing_tag = false;
427  int crop_end_idx = 0;
428  char buffer[READ_BUFFER_SIZE] = "";
429  char tagname[READ_BUFFER_SIZE] = "";
430  int tagname_idx = 0;
431  char close_tagname[READ_BUFFER_SIZE] = "";
432 
433  for (i = 0; i < READ_BUFFER_SIZE; i++)
434  {
435  buffer[i] = 0;
436  tagname[i] = 0;
437  close_tagname[i] = 0;
438  }
439 
440  size_t input_string_size = input_string.size();
441 
442  // Minimum workaround to prevent buffer overflow: Stop iterating
443  // once the (fixed!) size of the output buffers is reached. In
444  // response to
445  // https://www.talosintelligence.com/vulnerability_reports/TALOS-2017-0317
446  //
447  // However, this code is a huge mess anyway and is in no way
448  // anything like up-to-date C++ code. Please, anyone, replace it
449  // with something more modern. Thanks. - cstim, 2017-09-17.
450  for (i = 0; i < std::min(input_string_size, size_t(READ_BUFFER_SIZE)); i++)
451  {
452  if (input_string.c_str()[i] == '<')
453  {
454  tag_open = true;
455  tag_open_idx = i;
456  if (proprietary_tag == true && input_string.c_str()[i+1] == '/')
457  {
458  //We are now in a closing tag
459  closing_tag_open = true;
460  //cout<<"Comparaison: "<<tagname<<"|"<<&(input_string.c_str()[i+2])<<"|"<<strlen(tagname)<<endl;
461  if (strncmp(tagname, &(input_string.c_str()[i+2]), strlen(tagname)) != 0)
462  {
463  //If it is the begining of an other tag
464  //cout<<"DIFFERENT!"<<endl;
465  crop_end_idx = i - 1;
466  strip = true;
467  }
468  else
469  {
470  //Otherwise, it is the start of the closing tag of the proprietary tag
471  proprietary_closing_tag = true;
472  }
473  }
474  else if (proprietary_tag == true)
475  {
476  //It is the start of a new tag, following a proprietary tag
477  crop_end_idx = i - 1;
478  strip = true;
479  }
480  }
481  else if (input_string.c_str()[i] == '>')
482  {
483  tag_open = false;
484  closing_tag_open = false;
485  tagname[tagname_idx] = 0;
486  tagname_idx = 0;
487  if (proprietary_closing_tag == true)
488  {
489  crop_end_idx = i;
490  strip = true;
491  }
492  }
493  else if (tag_open == true && closing_tag_open == false)
494  {
495  if (input_string.c_str()[i] == '.')
496  {
497  if (proprietary_tag != true)
498  {
499  orig_tag_open_idx = tag_open_idx;
500  proprietary_tag = true;
501  }
502  }
503  tagname[tagname_idx] = input_string.c_str()[i];
504  tagname_idx++;
505  }
506  //cerr <<i<<endl;
507  if (strip == true && orig_tag_open_idx < input_string.size())
508  {
509  input_string.copy(buffer, (crop_end_idx - orig_tag_open_idx) + 1, orig_tag_open_idx);
510  message_out(INFO, "sanitize_proprietary_tags() (end tag or new tag) removed: " + string(buffer));
511  input_string.erase(orig_tag_open_idx, (crop_end_idx - orig_tag_open_idx) + 1);
512  i = orig_tag_open_idx - 1;
513  proprietary_tag = false;
514  proprietary_closing_tag = false;
515  closing_tag_open = false;
516  tag_open = false;
517  strip = false;
518 
519  input_string_size = input_string.size();
520  }
521 
522  }//end for
523  if (proprietary_tag == true && orig_tag_open_idx < input_string.size())
524  {
525  if (crop_end_idx == 0) //no closing tag
526  {
527  crop_end_idx = input_string.size() - 1;
528  }
529  input_string.copy(buffer, (crop_end_idx - orig_tag_open_idx) + 1, orig_tag_open_idx);
530  message_out(INFO, "sanitize_proprietary_tags() (end of line) removed: " + string(buffer));
531  input_string.erase(orig_tag_open_idx, (crop_end_idx - orig_tag_open_idx) + 1);
532  input_string_size = input_string.size();
533  }
534  return input_string;
535 }
536 
537 
538 #ifdef OS_WIN32
539 static std::string get_dtd_installation_directory()
540 {
541  // Partial implementation of
542  // http://developer.gnome.org/doc/API/2.0/glib/glib-Windows-Compatibility-Functions.html#g-win32-get-package-installation-directory
543  char ch_fn[MAX_PATH], *p;
544  std::string str_fn;
545 
546  if (!GetModuleFileName(NULL, ch_fn, MAX_PATH)) return "";
547 
548  if ((p = strrchr(ch_fn, '\\')) != NULL)
549  * p = '\0';
550 
551  p = strrchr(ch_fn, '\\');
552  if (p && (_stricmp(p + 1, "bin") == 0 ||
553  _stricmp(p + 1, "lib") == 0))
554  *p = '\0';
555 
556  str_fn = ch_fn;
557  str_fn += "\\share\\libofx\\dtd";
558 
559  return str_fn;
560 }
561 #endif
562 
563 
576 std::string find_dtd(LibofxContextPtr ctx, const std::string& dtd_filename)
577 {
578  string dtd_path_filename;
579  char *env_dtd_path;
580 
581  dtd_path_filename = reinterpret_cast<const LibofxContext*>(ctx)->dtdDir();
582  if (!dtd_path_filename.empty())
583  {
584  dtd_path_filename.append(dtd_filename);
585  ifstream dtd_file(dtd_path_filename.c_str());
586  if (dtd_file)
587  {
588  message_out(STATUS, "find_dtd():DTD found: " + dtd_path_filename);
589  return dtd_path_filename;
590  }
591  }
592 
593 #ifdef OS_WIN32
594  dtd_path_filename = get_dtd_installation_directory();
595  if (!dtd_path_filename.empty())
596  {
597  dtd_path_filename.append(DIRSEP);
598  dtd_path_filename.append(dtd_filename);
599  ifstream dtd_file(dtd_path_filename.c_str());
600  if (dtd_file)
601  {
602  message_out(STATUS, "find_dtd():DTD found: " + dtd_path_filename);
603  return dtd_path_filename;
604  }
605  }
606 #endif
607  /* Search in environement variable OFX_DTD_PATH */
608  env_dtd_path = getenv("OFX_DTD_PATH");
609  if (env_dtd_path)
610  {
611  dtd_path_filename.append(env_dtd_path);
612  dtd_path_filename.append(DIRSEP);
613  dtd_path_filename.append(dtd_filename);
614  ifstream dtd_file(dtd_path_filename.c_str());
615  if (!dtd_file)
616  {
617  message_out(STATUS, "find_dtd():OFX_DTD_PATH env variable was was present, but unable to open the file " + dtd_path_filename);
618  }
619  else
620  {
621  message_out(STATUS, "find_dtd():DTD found: " + dtd_path_filename);
622  return dtd_path_filename;
623  }
624  }
625 
626  for (int i = 0; i < DTD_SEARCH_PATH_NUM; i++)
627  {
628  dtd_path_filename = DTD_SEARCH_PATH[i];
629  dtd_path_filename.append(DIRSEP);
630  dtd_path_filename.append(dtd_filename);
631  ifstream dtd_file(dtd_path_filename.c_str());
632  if (!dtd_file)
633  {
634  message_out(DEBUG, "find_dtd():Unable to open the file " + dtd_path_filename);
635  }
636  else
637  {
638  message_out(STATUS, "find_dtd():DTD found: " + dtd_path_filename);
639  return dtd_path_filename;
640  }
641  }
642 
643  /* Last resort, look in source tree relative path (useful for development) */
644  dtd_path_filename = "";
645  dtd_path_filename.append("..");
646  dtd_path_filename.append(DIRSEP);
647  dtd_path_filename.append("dtd");
648  dtd_path_filename.append(DIRSEP);
649  dtd_path_filename.append(dtd_filename);
650  ifstream dtd_file(dtd_path_filename.c_str());
651  if (!dtd_file)
652  {
653  message_out(DEBUG, "find_dtd(): Unable to open the file " + dtd_path_filename + ", most likely we are not in the source tree.");
654  }
655  else
656  {
657  message_out(STATUS, "find_dtd():DTD found: " + dtd_path_filename);
658  return dtd_path_filename;
659  }
660 
661 
662  message_out(ERROR, "find_dtd():Unable to find the DTD named " + dtd_filename);
663  return "";
664 }
665 
666 
Definition: messages.hh:32
int ofx_proc_file(LibofxContextPtr ctx, const char *p_filename)
File pre-processing of OFX AND for OFC files.
Definition: ofx_preproc.cpp:81
const int DTD_SEARCH_PATH_NUM
The number of different paths to search for DTDs.
Definition: ofx_preproc.cpp:60
int message_out(OfxMsgType error_type, const string message)
Message output function.
Definition: messages.cpp:60
OFX/SGML parsing functionnality.
const char * DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM]
The list of paths to search for the DTDs.
Definition: ofx_preproc.cpp:66
int ofc_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
Definition: ofc_sgml.cpp:353
Various simple functions for type conversion & al.
int ofx_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
Definition: ofx_sgml.cpp:372
string sanitize_proprietary_tags(string input_string)
Removes proprietary tags and comments.
OFX/SGML parsing functionnality.
Message IO functionality.
Preprocessing of the OFX files before parsing.
std::string find_dtd(LibofxContextPtr ctx, const std::string &dtd_filename)
Find the appropriate DTD for the file version.