Medical Imaging Interaction Toolkit  2016.11.0
Medical Imaging Interaction Toolkit
Modules/CppMicroServices/doc/CMakeDoxygenFilter.cpp
Go to the documentation of this file.
1 /*=============================================================================
2 
3  Copyright (c) German Cancer Research Center,
4  Division of Medical and Biological Informatics
5 
6  Licensed under the Apache License, Version 2.0 (the "License");
7  you may not use this file except in compliance with the License.
8  You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12  Unless required by applicable law or agreed to in writing, software
13  distributed under the License is distributed on an "AS IS" BASIS,
14  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  See the License for the specific language governing permissions and
16  limitations under the License.
17 
18 =============================================================================*/
19 
20 #include <cstdlib>
21 #include <string>
22 #include <fstream>
23 #include <iostream>
24 
25 #include <assert.h>
26 
27 //--------------------------------------
28 // Utilitiy classes and functions
29 //--------------------------------------
30 
31 struct ci_char_traits : public std::char_traits<char>
32  // just inherit all the other functions
33  // that we don't need to override
34 {
35  static bool eq(char c1, char c2)
36  { return toupper(c1) == toupper(c2); }
37 
38  static bool ne(char c1, char c2)
39  { return toupper(c1) != toupper(c2); }
40 
41  static bool lt(char c1, char c2)
42  { return toupper(c1) < toupper(c2); }
43 
44  static bool gt(char c1, char c2)
45  { return toupper(c1) > toupper(c2); }
46 
47  static int compare(const char* s1, const char* s2, std::size_t n)
48  {
49  while (n-- > 0)
50  {
51  if (lt(*s1, *s2)) return -1;
52  if (gt(*s1, *s2)) return 1;
53  ++s1; ++s2;
54  }
55  return 0;
56  }
57 
58  static const char* find(const char* s, int n, char a)
59  {
60  while (n-- > 0 && toupper(*s) != toupper(a))
61  {
62  ++s;
63  }
64  return s;
65  }
66 };
67 
68 typedef std::basic_string<char, ci_char_traits> ci_string;
69 
70 //--------------------------------------
71 // Lexer
72 //--------------------------------------
73 
74 class CMakeLexer
75 {
76 public:
77 
78  enum Token {
79  TOK_EOF = -1,
80  TOK_EOL = -2,
81 
82  // commands
83  TOK_MACRO = -3, TOK_ENDMACRO = -4,
84  TOK_FUNCTION = -5, TOK_ENDFUNCTION = -6,
85  TOK_DOXYGEN_COMMENT = -7,
86  TOK_SET = -8,
87  TOK_STRING_LITERAL = -100,
88  TOK_NUMBER_LITERAL = -102,
89 
90  // primary
91  TOK_IDENTIFIER = -200
92  };
93 
94  CMakeLexer(std::istream& is)
95  : _lastChar(' '), _is(is), _line(1), _col(1)
96  {}
97 
98  int getToken()
99  {
100  // skip whitespace
101  while (isspace(_lastChar) && _lastChar != '\r' && _lastChar != '\n')
102  {
103  _lastChar = getChar();
104  }
105 
106  if (isalpha(_lastChar) || _lastChar == '_')
107  {
108  _identifier = _lastChar;
109  while (isalnum(_lastChar = getChar()) || _lastChar == '-' || _lastChar == '_')
110  {
111  _identifier += _lastChar;
112  }
113 
114  if (_identifier == "set")
115  return TOK_SET;
116  if (_identifier == "function")
117  return TOK_FUNCTION;
118  if (_identifier == "macro")
119  return TOK_MACRO;
120  if (_identifier == "endfunction")
121  return TOK_ENDFUNCTION;
122  if (_identifier == "endmacro")
123  return TOK_ENDMACRO;
124  return TOK_IDENTIFIER;
125  }
126 
127  if (isdigit(_lastChar))
128  {
129  // very lax!! number detection
130  _identifier = _lastChar;
131  while (isalnum(_lastChar = getChar()) || _lastChar == '.' || _lastChar == ',')
132  {
133  _identifier += _lastChar;
134  }
135  return TOK_NUMBER_LITERAL;
136  }
137 
138  if (_lastChar == '#')
139  {
140  _lastChar = getChar();
141  if (_lastChar == '!')
142  {
143  // found a doxygen comment marker
144  _identifier.clear();
145 
146  _lastChar = getChar();
147  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
148  {
149  _identifier += _lastChar;
150  _lastChar = getChar();
151  }
152  return TOK_DOXYGEN_COMMENT;
153  }
154 
155  // skip the comment
156  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
157  {
158  _lastChar = getChar();
159  }
160  }
161 
162  if (_lastChar == '"')
163  {
164  _lastChar = getChar();
165  _identifier.clear();
166  while (_lastChar != EOF && _lastChar != '"')
167  {
168  _identifier += _lastChar;
169  _lastChar = getChar();
170  }
171 
172  // eat the closing "
173  _lastChar = getChar();
174  return TOK_STRING_LITERAL;
175  }
176 
177  // don't eat the EOF
178  if (_lastChar == EOF) return TOK_EOF;
179 
180  // don't eat the EOL
181  if (_lastChar == '\r' || _lastChar == '\n')
182  {
183  if (_lastChar == '\r') _lastChar = getChar();
184  if (_lastChar == '\n') _lastChar = getChar();
185  return TOK_EOL;
186  }
187 
188  // return the character as its ascii value
189  int thisChar = _lastChar;
190  _lastChar = getChar();
191  return thisChar;
192  }
193 
194  std::string getIdentifier() const
195  {
196  return std::string(_identifier.c_str());
197  }
198 
199  int curLine() const
200  { return _line; }
201 
202  int curCol() const
203  { return _col; }
204 
205  int getChar()
206  {
207  int c = _is.get();
208  updateLoc(c);
209  return c;
210  }
211 
212 private:
213 
214  void updateLoc(int c)
215  {
216  if (c == '\n' || c == '\r')
217  {
218  ++_line;
219  _col = 1;
220  }
221  else
222  {
223  ++_col;
224  }
225  }
226 
227  ci_string _identifier;
228  int _lastChar;
229  std::istream& _is;
230 
231  int _line;
232  int _col;
233 };
234 
235 //--------------------------------------
236 // Parser
237 //--------------------------------------
238 
239 class CMakeParser
240 {
241 
242 public:
243 
244  CMakeParser(std::istream& is, std::ostream& os)
245  : _os(os), _lexer(is), _curToken(CMakeLexer::TOK_EOF), _lastToken(CMakeLexer::TOK_EOF)
246  { }
247 
248  int curToken()
249  {
250  return _curToken;
251  }
252 
253  int nextToken()
254  {
255  _lastToken = _curToken;
256  _curToken = _lexer.getToken();
257  while (_curToken == CMakeLexer::TOK_EOL)
258  {
259  // Try to preserve lines in output to allow correct line number referencing by doxygen.
260  _os << std::endl;
261  _curToken = _lexer.getToken();
262  }
263  return _curToken;
264  }
265 
266  void handleMacro()
267  {
268  if(!parseMacro())
269  {
270  // skip token for error recovery
271  nextToken();
272  }
273  }
274 
275  void handleFunction()
276  {
277  if(!parseFunction())
278  {
279  // skip token for error recovery
280  nextToken();
281  }
282  }
283 
284  void handleSet()
285  {
286  // SET(var ...) following a documentation block is assumed to be a variable declaration.
287  if (_lastToken != CMakeLexer::TOK_DOXYGEN_COMMENT)
288  {
289  // No comment block before
290  nextToken();
291  } else if(!parseSet())
292  {
293  // skip token for error recovery
294  nextToken();
295  }
296  }
297 
298  void handleDoxygenComment()
299  {
300  _os << "///" << _lexer.getIdentifier();
301  nextToken();
302  }
303 
304  void handleTopLevelExpression()
305  {
306  // skip token
307  nextToken();
308  }
309 
310 private:
311 
312  void printError(const char* str)
313  {
314  std::cerr << "Error: " << str << " (at line " << _lexer.curLine() << ", col " << _lexer.curCol() << ")";
315  }
316 
317  bool parseMacro()
318  {
319  if (nextToken() != '(')
320  {
321  printError("Expected '(' after MACRO");
322  return false;
323  }
324 
325  nextToken();
326  std::string macroName = _lexer.getIdentifier();
327  if (curToken() != CMakeLexer::TOK_IDENTIFIER || macroName.empty())
328  {
329  printError("Expected macro name");
330  return false;
331  }
332 
333  _os << macroName << '(';
334  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
335  {
336  _os << _lexer.getIdentifier();
337  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
338  {
339  _os << ", " << _lexer.getIdentifier();
340  }
341  }
342 
343  if (curToken() != ')')
344  {
345  printError("Missing expected ')'");
346  }
347  else
348  {
349  _os << ");";
350  }
351 
352  // eat the ')'
353  nextToken();
354  return true;
355  }
356 
357  bool parseSet()
358  {
359  if (nextToken() != '(')
360  {
361  printError("Expected '(' after SET");
362  return false;
363  }
364 
365  nextToken();
366  std::string variableName = _lexer.getIdentifier();
367  if (curToken() != CMakeLexer::TOK_IDENTIFIER || variableName.empty())
368  {
369  printError("Expected variable name");
370  return false;
371  }
372 
373  _os << "CMAKE_VARIABLE " << variableName;
374 
375  nextToken();
376  while ((curToken() == CMakeLexer::TOK_IDENTIFIER)
377  || (curToken() == CMakeLexer::TOK_STRING_LITERAL)
378  || (curToken() == CMakeLexer::TOK_NUMBER_LITERAL))
379  {
380  nextToken();
381  }
382 
383  if (curToken() != ')')
384  {
385  printError("Missing expected ')'");
386  }
387  else
388  {
389  _os << ";";
390  }
391 
392  // eat the ')'
393  nextToken();
394  return true;
395  }
396 
397  bool parseFunction()
398  {
399  if (nextToken() != '(')
400  {
401  printError("Expected '(' after FUNCTION");
402  return false;
403  }
404 
405  nextToken();
406  std::string funcName = _lexer.getIdentifier();
407  if (curToken() != CMakeLexer::TOK_IDENTIFIER || funcName.empty())
408  {
409  printError("Expected function name");
410  return false;
411  }
412 
413  _os << funcName << '(';
414  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
415  {
416  _os << _lexer.getIdentifier();
417  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
418  {
419  _os << ", " << _lexer.getIdentifier();
420  }
421  }
422 
423  if (curToken() != ')')
424  {
425  printError("Missing expected ')'");
426  }
427  else
428  {
429  _os << ");";
430  }
431 
432  // eat the ')'
433  nextToken();
434 
435  return true;
436  }
437 
438  std::ostream& _os;
439  CMakeLexer _lexer;
440  int _curToken;
441  int _lastToken;
442 };
443 
444 
445 #define STRINGIFY(a) #a
446 #define DOUBLESTRINGIFY(a) STRINGIFY(a)
447 
448 int main(int argc, char** argv)
449 {
450  assert(argc > 1);
451 
452  for (int i = 1; i < argc; ++i)
453  {
454  std::ifstream ifs(argv[i]);
455  std::ostream& os = std::cout;
456 
457  #ifdef USE_NAMESPACE
458  os << "namespace " << DOUBLESTRINGIFY(USE_NAMESPACE) << " {\n";
459  #endif
460 
461  CMakeParser parser(ifs, os);
462  parser.nextToken();
463  while (ifs.good())
464  {
465  switch (parser.curToken())
466  {
467  case CMakeLexer::TOK_EOF:
468  return ifs.get(); // eat EOF
469  case CMakeLexer::TOK_MACRO:
470  parser.handleMacro();
471  break;
472  case CMakeLexer::TOK_FUNCTION:
473  parser.handleFunction();
474  break;
475  case CMakeLexer::TOK_SET:
476  parser.handleSet();
477  break;
478  case CMakeLexer::TOK_DOXYGEN_COMMENT:
479  parser.handleDoxygenComment();
480  break;
481  default:
482  parser.handleTopLevelExpression();
483  break;
484  }
485  }
486 
487  #ifdef USE_NAMESPACE
488  os << "}\n";
489  #endif
490  }
491 
492  return EXIT_SUCCESS;
493 }
std::basic_string< char, ci_char_traits > ci_string
int main(int argc, char **argv)
bool compare(std::pair< double, int > i, std::pair< double, int > j)