Medical Imaging Interaction Toolkit  2018.4.99-389bf124
Medical Imaging Interaction Toolkit
Modules/CppMicroServices/doc/CMakeDoxygenFilter.cpp
Go to the documentation of this file.
1 /*============================================================================
2 
3  Library: CppMicroServices
4 
5  Copyright (c) German Cancer Research Center (DKFZ)
6  All rights reserved.
7 
8  Licensed under the Apache License, Version 2.0 (the "License");
9  you may not use this file except in compliance with the License.
10  You may obtain a copy of the License at
11 
12  http://www.apache.org/licenses/LICENSE-2.0
13 
14  Unless required by applicable law or agreed to in writing, software
15  distributed under the License is distributed on an "AS IS" BASIS,
16  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  See the License for the specific language governing permissions and
18  limitations under the License.
19 
20 ============================================================================*/
21 
22 #include <cstdlib>
23 #include <string>
24 #include <fstream>
25 #include <iostream>
26 
27 #include <assert.h>
28 
29 //--------------------------------------
30 // Utilitiy classes and functions
31 //--------------------------------------
32 
33 struct ci_char_traits : public std::char_traits<char>
34  // just inherit all the other functions
35  // that we don't need to override
36 {
37  static bool eq(char c1, char c2)
38  { return toupper(c1) == toupper(c2); }
39 
40  static bool ne(char c1, char c2)
41  { return toupper(c1) != toupper(c2); }
42 
43  static bool lt(char c1, char c2)
44  { return toupper(c1) < toupper(c2); }
45 
46  static bool gt(char c1, char c2)
47  { return toupper(c1) > toupper(c2); }
48 
49  static int compare(const char* s1, const char* s2, std::size_t n)
50  {
51  while (n-- > 0)
52  {
53  if (lt(*s1, *s2)) return -1;
54  if (gt(*s1, *s2)) return 1;
55  ++s1; ++s2;
56  }
57  return 0;
58  }
59 
60  static const char* find(const char* s, int n, char a)
61  {
62  while (n-- > 0 && toupper(*s) != toupper(a))
63  {
64  ++s;
65  }
66  return s;
67  }
68 };
69 
70 typedef std::basic_string<char, ci_char_traits> ci_string;
71 
72 //--------------------------------------
73 // Lexer
74 //--------------------------------------
75 
76 class CMakeLexer
77 {
78 public:
79 
80  enum Token {
81  TOK_EOF = -1,
82  TOK_EOL = -2,
83 
84  // commands
85  TOK_MACRO = -3, TOK_ENDMACRO = -4,
86  TOK_FUNCTION = -5, TOK_ENDFUNCTION = -6,
87  TOK_DOXYGEN_COMMENT = -7,
88  TOK_SET = -8,
89  TOK_STRING_LITERAL = -100,
90  TOK_NUMBER_LITERAL = -102,
91 
92  // primary
93  TOK_IDENTIFIER = -200
94  };
95 
96  CMakeLexer(std::istream& is)
97  : _lastChar(' '), _is(is), _line(1), _col(1)
98  {}
99 
100  int getToken()
101  {
102  // skip whitespace
103  while (isspace(_lastChar) && _lastChar != '\r' && _lastChar != '\n')
104  {
105  _lastChar = getChar();
106  }
107 
108  if (isalpha(_lastChar) || _lastChar == '_')
109  {
110  _identifier = _lastChar;
111  while (isalnum(_lastChar = getChar()) || _lastChar == '-' || _lastChar == '_')
112  {
113  _identifier += _lastChar;
114  }
115 
116  if (_identifier == "set")
117  return TOK_SET;
118  if (_identifier == "function")
119  return TOK_FUNCTION;
120  if (_identifier == "macro")
121  return TOK_MACRO;
122  if (_identifier == "endfunction")
123  return TOK_ENDFUNCTION;
124  if (_identifier == "endmacro")
125  return TOK_ENDMACRO;
126  return TOK_IDENTIFIER;
127  }
128 
129  if (isdigit(_lastChar))
130  {
131  // very lax!! number detection
132  _identifier = _lastChar;
133  while (isalnum(_lastChar = getChar()) || _lastChar == '.' || _lastChar == ',')
134  {
135  _identifier += _lastChar;
136  }
137  return TOK_NUMBER_LITERAL;
138  }
139 
140  if (_lastChar == '#')
141  {
142  _lastChar = getChar();
143  if (_lastChar == '!')
144  {
145  // found a doxygen comment marker
146  _identifier.clear();
147 
148  _lastChar = getChar();
149  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
150  {
151  _identifier += _lastChar;
152  _lastChar = getChar();
153  }
154  return TOK_DOXYGEN_COMMENT;
155  }
156 
157  // skip the comment
158  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
159  {
160  _lastChar = getChar();
161  }
162  }
163 
164  if (_lastChar == '"')
165  {
166  _lastChar = getChar();
167  _identifier.clear();
168  while (_lastChar != EOF && _lastChar != '"')
169  {
170  _identifier += _lastChar;
171  _lastChar = getChar();
172  }
173 
174  // eat the closing "
175  _lastChar = getChar();
176  return TOK_STRING_LITERAL;
177  }
178 
179  // don't eat the EOF
180  if (_lastChar == EOF) return TOK_EOF;
181 
182  // don't eat the EOL
183  if (_lastChar == '\r' || _lastChar == '\n')
184  {
185  if (_lastChar == '\r') _lastChar = getChar();
186  if (_lastChar == '\n') _lastChar = getChar();
187  return TOK_EOL;
188  }
189 
190  // return the character as its ascii value
191  int thisChar = _lastChar;
192  _lastChar = getChar();
193  return thisChar;
194  }
195 
196  std::string getIdentifier() const
197  {
198  return std::string(_identifier.c_str());
199  }
200 
201  int curLine() const
202  { return _line; }
203 
204  int curCol() const
205  { return _col; }
206 
207  int getChar()
208  {
209  int c = _is.get();
210  updateLoc(c);
211  return c;
212  }
213 
214 private:
215 
216  void updateLoc(int c)
217  {
218  if (c == '\n' || c == '\r')
219  {
220  ++_line;
221  _col = 1;
222  }
223  else
224  {
225  ++_col;
226  }
227  }
228 
229  ci_string _identifier;
230  int _lastChar;
231  std::istream& _is;
232 
233  int _line;
234  int _col;
235 };
236 
237 //--------------------------------------
238 // Parser
239 //--------------------------------------
240 
241 class CMakeParser
242 {
243 
244 public:
245 
246  CMakeParser(std::istream& is, std::ostream& os)
247  : _os(os), _lexer(is), _curToken(CMakeLexer::TOK_EOF), _lastToken(CMakeLexer::TOK_EOF)
248  { }
249 
250  int curToken()
251  {
252  return _curToken;
253  }
254 
255  int nextToken()
256  {
257  _lastToken = _curToken;
258  _curToken = _lexer.getToken();
259  while (_curToken == CMakeLexer::TOK_EOL)
260  {
261  // Try to preserve lines in output to allow correct line number referencing by doxygen.
262  _os << std::endl;
263  _curToken = _lexer.getToken();
264  }
265  return _curToken;
266  }
267 
268  void handleMacro()
269  {
270  if(!parseMacro())
271  {
272  // skip token for error recovery
273  nextToken();
274  }
275  }
276 
277  void handleFunction()
278  {
279  if(!parseFunction())
280  {
281  // skip token for error recovery
282  nextToken();
283  }
284  }
285 
286  void handleSet()
287  {
288  // SET(var ...) following a documentation block is assumed to be a variable declaration.
289  if (_lastToken != CMakeLexer::TOK_DOXYGEN_COMMENT)
290  {
291  // No comment block before
292  nextToken();
293  } else if(!parseSet())
294  {
295  // skip token for error recovery
296  nextToken();
297  }
298  }
299 
300  void handleDoxygenComment()
301  {
302  _os << "///" << _lexer.getIdentifier();
303  nextToken();
304  }
305 
306  void handleTopLevelExpression()
307  {
308  // skip token
309  nextToken();
310  }
311 
312 private:
313 
314  void printError(const char* str)
315  {
316  std::cerr << "Error: " << str << " (at line " << _lexer.curLine() << ", col " << _lexer.curCol() << ")";
317  }
318 
319  bool parseMacro()
320  {
321  if (nextToken() != '(')
322  {
323  printError("Expected '(' after MACRO");
324  return false;
325  }
326 
327  nextToken();
328  std::string macroName = _lexer.getIdentifier();
329  if (curToken() != CMakeLexer::TOK_IDENTIFIER || macroName.empty())
330  {
331  printError("Expected macro name");
332  return false;
333  }
334 
335  _os << macroName << '(';
336  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
337  {
338  _os << _lexer.getIdentifier();
339  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
340  {
341  _os << ", " << _lexer.getIdentifier();
342  }
343  }
344 
345  if (curToken() != ')')
346  {
347  printError("Missing expected ')'");
348  }
349  else
350  {
351  _os << ");";
352  }
353 
354  // eat the ')'
355  nextToken();
356  return true;
357  }
358 
359  bool parseSet()
360  {
361  if (nextToken() != '(')
362  {
363  printError("Expected '(' after SET");
364  return false;
365  }
366 
367  nextToken();
368  std::string variableName = _lexer.getIdentifier();
369  if (curToken() != CMakeLexer::TOK_IDENTIFIER || variableName.empty())
370  {
371  printError("Expected variable name");
372  return false;
373  }
374 
375  _os << "CMAKE_VARIABLE " << variableName;
376 
377  nextToken();
378  while ((curToken() == CMakeLexer::TOK_IDENTIFIER)
379  || (curToken() == CMakeLexer::TOK_STRING_LITERAL)
380  || (curToken() == CMakeLexer::TOK_NUMBER_LITERAL))
381  {
382  nextToken();
383  }
384 
385  if (curToken() != ')')
386  {
387  printError("Missing expected ')'");
388  }
389  else
390  {
391  _os << ";";
392  }
393 
394  // eat the ')'
395  nextToken();
396  return true;
397  }
398 
399  bool parseFunction()
400  {
401  if (nextToken() != '(')
402  {
403  printError("Expected '(' after FUNCTION");
404  return false;
405  }
406 
407  nextToken();
408  std::string funcName = _lexer.getIdentifier();
409  if (curToken() != CMakeLexer::TOK_IDENTIFIER || funcName.empty())
410  {
411  printError("Expected function name");
412  return false;
413  }
414 
415  _os << funcName << '(';
416  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
417  {
418  _os << _lexer.getIdentifier();
419  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
420  {
421  _os << ", " << _lexer.getIdentifier();
422  }
423  }
424 
425  if (curToken() != ')')
426  {
427  printError("Missing expected ')'");
428  }
429  else
430  {
431  _os << ");";
432  }
433 
434  // eat the ')'
435  nextToken();
436 
437  return true;
438  }
439 
440  std::ostream& _os;
441  CMakeLexer _lexer;
442  int _curToken;
443  int _lastToken;
444 };
445 
446 
447 #define STRINGIFY(a) #a
448 #define DOUBLESTRINGIFY(a) STRINGIFY(a)
449 
450 int main(int argc, char** argv)
451 {
452  assert(argc > 1);
453 
454  for (int i = 1; i < argc; ++i)
455  {
456  std::ifstream ifs(argv[i]);
457  std::ostream& os = std::cout;
458 
459  #ifdef USE_NAMESPACE
460  os << "namespace " << DOUBLESTRINGIFY(USE_NAMESPACE) << " {\n";
461  #endif
462 
463  CMakeParser parser(ifs, os);
464  parser.nextToken();
465  while (ifs.good())
466  {
467  switch (parser.curToken())
468  {
469  case CMakeLexer::TOK_EOF:
470  return ifs.get(); // eat EOF
471  case CMakeLexer::TOK_MACRO:
472  parser.handleMacro();
473  break;
474  case CMakeLexer::TOK_FUNCTION:
475  parser.handleFunction();
476  break;
477  case CMakeLexer::TOK_SET:
478  parser.handleSet();
479  break;
480  case CMakeLexer::TOK_DOXYGEN_COMMENT:
481  parser.handleDoxygenComment();
482  break;
483  default:
484  parser.handleTopLevelExpression();
485  break;
486  }
487  }
488 
489  #ifdef USE_NAMESPACE
490  os << "}\n";
491  #endif
492  }
493 
494  return EXIT_SUCCESS;
495 }
std::basic_string< char, ci_char_traits > ci_string
int main(int argc, char **argv)
bool compare(std::pair< double, int > i, std::pair< double, int > j)