bes  Updated for version 3.20.6
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "SaxParserWrapper.h"
31 
32 #include <exception>
33 #include <iostream>
34 #include <libxml/parser.h>
35 #include <libxml/xmlstring.h>
36 #include <cstdio> // for vsnprintf
37 #include <string>
38 
39 #include "BESDebug.h"
40 #include "BESError.h"
41 #include "BESInternalError.h"
42 #include "BESInternalFatalError.h"
43 #include "BESSyntaxUserError.h"
44 #include "BESForbiddenError.h"
45 #include "BESNotFoundError.h"
46 #include "NCMLDebug.h"
47 #include "SaxParser.h"
48 #include "XMLHelpers.h"
49 
50 // Toggle to tell the parser to use the Sax2 start/end element
51 // calls with namespace information.
52 // [ TODO We probably want to remove the non-namespace pathways at some point,
53 // but I will leave them here for now in case there's issues ]
54 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
55 
56 using namespace std;
57 using namespace ncml_module;
58 
60 // Helpers
61 
62 #if NCML_PARSER_USE_SAX2_NAMESPACES
63 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
64 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
65 {
66  attrMap.clear();
67  for (int i = 0; i < num_attributes; ++i) {
68  XMLAttribute attr;
69  attr.fromSAX2NamespaceAttributes(attributes);
70  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
71  attrMap.addAttribute(attr);
72  }
73  return num_attributes;
74 }
75 #else
76 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
77 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
78 {
79  attrMap.clear();
80  int count=0;
81  while (attrs && *attrs != NULL)
82  {
83  XMLAttribute attr;
84  attr.localname = XMLUtil::xmlCharToString(*attrs);
85  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
86  attrMap.addAttribute(attr);
87  attrs += 2;
88  count++;
89  }
90  return count;
91 }
92 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
93 
95 // Callback we will register that just pass on to our C++ engine
96 //
97 // NOTE WELL: New C handlers need to follow the given
98 // other examples in order to avoid memory leaks
99 // in libxml during an exception!
100 
101 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
102 // set up a proper error handling structure around the main call.
103 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
104 // So for example, a safe handler call to SaxParser would look like:
105 // static void ncmlStartDocument(void* userData)
106 //{
107 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
108 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
109 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
110 //}
111 
112 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
113  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
114  if (_spw_->isExceptionState()) \
115  { \
116  return; \
117  } \
118  else \
119  { \
120  try \
121  { \
122  SaxParser& parser = _spw_->getParser(); \
123  parser.setParseLineNumber(_spw_->getCurrentParseLine());
124 
125 // This is required after the end of the actual calls to the parser.
126 #define END_SAFE_PARSER_BLOCK } \
127  catch (BESError& theErr) \
128  { \
129  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
130  _spw_->deferException(theErr); \
131  } \
132  catch (std::exception& ex) \
133  { \
134  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
135  BESInternalError _badness_("Wrapped std::exception.what()=" + string(ex.what()), __FILE__, __LINE__);\
136  _spw_->deferException(_badness_); \
137  } \
138  catch (...) \
139  { \
140  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
141  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
142  _spw_->deferException(_badness_); \
143  } \
144  } \
145 }
146 
148 // Our C SAX callbacks, wrapped carefully.
149 
150 static void ncmlStartDocument(void* userData)
151 {
152  BEGIN_SAFE_PARSER_BLOCK(userData)
153 
154  parser.onStartDocument();
155 
156  END_SAFE_PARSER_BLOCK
157 }
158 
159 static void ncmlEndDocument(void* userData)
160 {
161  BEGIN_SAFE_PARSER_BLOCK(userData)
162 
163  parser.onEndDocument();
164 
165  END_SAFE_PARSER_BLOCK
166 }
167 
168 #if !NCML_PARSER_USE_SAX2_NAMESPACES
169 
170 static void ncmlStartElement(void * userData,
171  const xmlChar * name,
172  const xmlChar ** attrs)
173 {
174  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
175  BEGIN_SAFE_PARSER_BLOCK(1)
176 
177  string nameS = XMLUtil::xmlCharToString(name);
178  XMLAttributeMap map;
179  toXMLAttributeMapNoNamespaces(map, attrs);
180 
181  // These args will be valid for the scope of the call.
182  parser.onStartElement(nameS, map);
183 
184  END_SAFE_PARSER_BLOCK
185 }
186 
187 static void ncmlEndElement(void * userData,
188  const xmlChar * name)
189 {
190  BEGIN_SAFE_PARSER_BLOCK(1)
191 
192  string nameS = XMLUtil::xmlCharToString(name);
193  parser.onEndElement(nameS);
194 
195  END_SAFE_PARSER_BLOCK
196 }
197 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
198 
199 #if NCML_PARSER_USE_SAX2_NAMESPACES
200 static
201 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
202  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
203  const xmlChar **attributes)
204 {
205  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
206  BEGIN_SAFE_PARSER_BLOCK(userData)
207 
208  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
209 
210  XMLAttributeMap attrMap;
211  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
212 
213  XMLNamespaceMap nsMap;
214  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
215 
216  // These args will be valid for the scope of the call.
217  string localnameString = XMLUtil::xmlCharToString(localname);
218  string prefixString = XMLUtil::xmlCharToString(prefix);
219  string uriString = XMLUtil::xmlCharToString(URI);
220 
221  parser.onStartElementWithNamespace(
222  localnameString,
223  prefixString,
224  uriString,
225  attrMap,
226  nsMap);
227 
228  END_SAFE_PARSER_BLOCK
229 }
230 
231 static
232 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
233 {
234  BEGIN_SAFE_PARSER_BLOCK(userData)
235 
236  string localnameString = XMLUtil::xmlCharToString(localname);
237  string prefixString = XMLUtil::xmlCharToString(prefix);
238  string uriString = XMLUtil::xmlCharToString(URI);
239  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
240 
241  END_SAFE_PARSER_BLOCK
242 }
243 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
244 
245 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
246 {
247  BEGIN_SAFE_PARSER_BLOCK(userData)
248 
249  // len is since the content string might not be null terminated,
250  // so we have to build out own and pass it up special....
251  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
252  string characters("");
253  characters.reserve(len);
254  const xmlChar* contentEnd = content+len;
255  while(content != contentEnd)
256  {
257  characters += (const char)(*content++);
258  }
259 
260  parser.onCharacters(characters);
261 
262  END_SAFE_PARSER_BLOCK
263 }
264 
265 static void ncmlWarning(void* userData, const char* msg, ...)
266 {
267  BEGIN_SAFE_PARSER_BLOCK(userData)
268 
269  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
270 
271  char buffer[1024];
272  va_list(args);
273  va_start(args, msg);
274  unsigned int len = sizeof(buffer);
275  vsnprintf(buffer, len, msg, args);
276  va_end(args);
277  parser.onParseWarning(string(buffer));
278 
279  END_SAFE_PARSER_BLOCK
280 }
281 
282 static void ncmlFatalError(void* userData, const char* msg, ...)
283 {
284  BEGIN_SAFE_PARSER_BLOCK(userData)
285 
286  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
287 
288  char buffer[1024];
289  va_list(args);
290  va_start(args, msg);
291  unsigned int len = sizeof(buffer);
292  vsnprintf(buffer, len, msg, args);
293  va_end(args);
294  parser.onParseError(string(buffer));
295 
296  END_SAFE_PARSER_BLOCK
297 }
298 
300 // class SaxParserWrapper impl
301 
302 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
303  _parser(parser), _handler(), _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
304 {
305 }
306 
307 SaxParserWrapper::~SaxParserWrapper()
308 {
309  // Really not much to do... everything cleans itself up.
310  _state = NOT_PARSING;
311 
312  // Leak fix. jhrg 6/21/19
313  cleanupParser();
314 }
315 
316 bool SaxParserWrapper::parse(const string& ncmlFilename)
317 {
318  // It's illegal to call this until it's done.
319  if (_state == PARSING) {
320  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
321  }
322 
323  // OK, now we're parsing
324  _state = PARSING;
325 
326  setupParser();
327 
328  bool success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
329 
330  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
331  if (isExceptionState()) {
333  }
334 
335  // Otherwise, we're also done parsing.
336  _state = NOT_PARSING;
337  return success;
338 }
339 
341 {
342  _state = EXCEPTION;
343  _errorType = theErr.get_bes_error_type();
344  _errorMsg = theErr.get_message();
345  _errorLine = theErr.get_line();
346  _errorFile = theErr.get_file();
347 }
348 
349 // HACK admittedly a little gross, but it's weird to have to copy an exception
350 // and this seemed the safest way rather than making dynamic storage, etc.
352 {
353  // Clear our state out so we can parse again though.
354  _state = NOT_PARSING;
355 
356  switch (_errorType) {
357  case BES_INTERNAL_ERROR:
358  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
359 
360  case BES_INTERNAL_FATAL_ERROR:
361  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
362 
363  case BES_SYNTAX_USER_ERROR:
364  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
365 
366  case BES_FORBIDDEN_ERROR:
367  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
368 
369  case BES_NOT_FOUND_ERROR:
370  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
371 
372  default:
373  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
374  }
375 }
376 
378 {
379 #if 0
380  if (_context) {
381  return xmlSAX2GetLineNumber(_context);
382  }
383  else {
384  return -1;
385  }
386 #endif
387  return -1; //FIXME part of leak fix. jhrg 6.21.19
388 }
389 
390 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
391 {
392  h.internalSubset = 0;
393  h.isStandalone = 0;
394  h.hasInternalSubset = 0;
395  h.hasExternalSubset = 0;
396  h.resolveEntity = 0;
397  h.getEntity = 0;
398  h.entityDecl = 0;
399  h.notationDecl = 0;
400  h.attributeDecl = 0;
401  h.elementDecl = 0;
402  h.unparsedEntityDecl = 0;
403  h.setDocumentLocator = 0;
404  h.startDocument = 0;
405  h.endDocument = 0;
406  h.startElement = 0;
407  h.endElement = 0;
408  h.reference = 0;
409  h.characters = 0;
410  h.ignorableWhitespace = 0;
411  h.processingInstruction = 0;
412  h.comment = 0;
413  h.warning = 0;
414  h.error = 0;
415  h.fatalError = 0;
416  h.getParameterEntity = 0;
417  h.cdataBlock = 0;
418  h.externalSubset = 0;
419 
420  // unsigned int initialized; magic number the init should fill in
421  /* The following fields are extensions available only on version 2 */
422  // void *_private; //i'd assume i don't set this either...
423  h.startElementNs = 0;
424  h.endElementNs = 0;
425  h.serror = 0;
426 }
427 
428 void SaxParserWrapper::setupParser()
429 {
430  // setup the handler for version 2,
431  // which sets an internal version magic number
432  // into _handler.initialized
433  // but which doesn't clear the handlers to 0.
434  xmlSAXVersion(&_handler, 2);
435 
436  // Initialize all handlers to 0 by hand to start
437  // so we don't blow those internal magic numbers.
438  setAllHandlerCBToNulls(_handler);
439 
440  // Put our static functions into the handler
441  _handler.startDocument = ncmlStartDocument;
442  _handler.endDocument = ncmlEndDocument;
443  _handler.warning = ncmlWarning;
444  _handler.error = ncmlFatalError;
445  _handler.fatalError = ncmlFatalError;
446  _handler.characters = ncmlCharacters;
447 
448  // We'll use one or the other until we're sure it works.
449 #if NCML_PARSER_USE_SAX2_NAMESPACES
450  _handler.startElement = 0;
451  _handler.endElement = 0;
452  _handler.startElementNs = ncmlSax2StartElementNs;
453  _handler.endElementNs = ncmlSax2EndElementNs;
454 #else
455  _handler.startElement = ncmlStartElement;
456  _handler.endElement = ncmlEndElement;
457  _handler.startElementNs = 0;
458  _handler.endElementNs = 0;
459 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
460 }
461 
462 // Leak fix. jhrg 6/21/19
463 void SaxParserWrapper::cleanupParser() throw ()
464 {
465 }
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:167
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:320
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:94