Medical Imaging Interaction Toolkit  2016.11.0
Medical Imaging Interaction Toolkit
Documentation/CMakeDoxygenFilter.cpp
Go to the documentation of this file.
1 /*=============================================================================
2 
3  Copyright (c) German Cancer Research Center,
4  Division of Medical and Biological Informatics
5 
6  Licensed under the Apache License, Version 2.0 (the "License");
7  you may not use this file except in compliance with the License.
8  You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12  Unless required by applicable law or agreed to in writing, software
13  distributed under the License is distributed on an "AS IS" BASIS,
14  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  See the License for the specific language governing permissions and
16  limitations under the License.
17 
18 =============================================================================*/
19 
20 #include <cstdlib>
21 #include <fstream>
22 #include <iostream>
23 #include <string>
24 
25 #include <assert.h>
26 
27 //--------------------------------------
28 // Utilitiy classes and functions
29 //--------------------------------------
30 
31 struct ci_char_traits : public std::char_traits<char>
32 // just inherit all the other functions
33 // that we don't need to override
34 {
35  static bool eq(char c1, char c2) { return toupper(c1) == toupper(c2); }
36  static bool ne(char c1, char c2) { return toupper(c1) != toupper(c2); }
37  static bool lt(char c1, char c2) { return toupper(c1) < toupper(c2); }
38  static bool gt(char c1, char c2) { return toupper(c1) > toupper(c2); }
39  static int compare(const char *s1, const char *s2, std::size_t n)
40  {
41  while (n-- > 0)
42  {
43  if (lt(*s1, *s2))
44  return -1;
45  if (gt(*s1, *s2))
46  return 1;
47  ++s1;
48  ++s2;
49  }
50  return 0;
51  }
52 
53  static const char *find(const char *s, int n, char a)
54  {
55  while (n-- > 0 && toupper(*s) != toupper(a))
56  {
57  ++s;
58  }
59  return s;
60  }
61 };
62 
63 typedef std::basic_string<char, ci_char_traits> ci_string;
64 
65 //--------------------------------------
66 // Lexer
67 //--------------------------------------
68 
69 class CMakeLexer
70 {
71 public:
72  enum Token
73  {
74  TOK_EOF = -1,
75  TOK_EOL = -2,
76 
77  // commands
78  TOK_MACRO = -3,
79  TOK_ENDMACRO = -4,
80  TOK_FUNCTION = -5,
81  TOK_ENDFUNCTION = -6,
82  TOK_DOXYGEN_COMMENT = -7,
83  TOK_SET = -8,
84  TOK_STRING_LITERAL = -100,
85  TOK_NUMBER_LITERAL = -102,
86 
87  // primary
88  TOK_IDENTIFIER = -200
89  };
90 
91  CMakeLexer(std::istream &is) : _lastChar(' '), _is(is), _line(1), _col(1) {}
92  int getToken()
93  {
94  // skip whitespace
95  while (isspace(_lastChar) && _lastChar != '\r' && _lastChar != '\n')
96  {
97  _lastChar = getChar();
98  }
99 
100  if (isalpha(_lastChar) || _lastChar == '_')
101  {
102  _identifier = _lastChar;
103  while (isalnum(_lastChar = getChar()) || _lastChar == '-' || _lastChar == '_')
104  {
105  _identifier += _lastChar;
106  }
107 
108  if (_identifier == "set")
109  return TOK_SET;
110  if (_identifier == "function")
111  return TOK_FUNCTION;
112  if (_identifier == "macro")
113  return TOK_MACRO;
114  if (_identifier == "endfunction")
115  return TOK_ENDFUNCTION;
116  if (_identifier == "endmacro")
117  return TOK_ENDMACRO;
118  return TOK_IDENTIFIER;
119  }
120 
121  if (isdigit(_lastChar))
122  {
123  // very lax!! number detection
124  _identifier = _lastChar;
125  while (isalnum(_lastChar = getChar()) || _lastChar == '.' || _lastChar == ',')
126  {
127  _identifier += _lastChar;
128  }
129  return TOK_NUMBER_LITERAL;
130  }
131 
132  if (_lastChar == '#')
133  {
134  _lastChar = getChar();
135  if (_lastChar == '!')
136  {
137  // found a doxygen comment marker
138  _identifier.clear();
139 
140  _lastChar = getChar();
141  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
142  {
143  _identifier += _lastChar;
144  _lastChar = getChar();
145  }
146  return TOK_DOXYGEN_COMMENT;
147  }
148 
149  // skip the comment
150  while (_lastChar != EOF && _lastChar != '\n' && _lastChar != '\r')
151  {
152  _lastChar = getChar();
153  }
154  }
155 
156  if (_lastChar == '"')
157  {
158  _lastChar = getChar();
159  _identifier.clear();
160  while (_lastChar != EOF && _lastChar != '"')
161  {
162  _identifier += _lastChar;
163  _lastChar = getChar();
164  }
165 
166  // eat the closing "
167  _lastChar = getChar();
168  return TOK_STRING_LITERAL;
169  }
170 
171  // don't eat the EOF
172  if (_lastChar == EOF)
173  return TOK_EOF;
174 
175  // don't eat the EOL
176  if (_lastChar == '\r' || _lastChar == '\n')
177  {
178  if (_lastChar == '\r')
179  _lastChar = getChar();
180  if (_lastChar == '\n')
181  _lastChar = getChar();
182  return TOK_EOL;
183  }
184 
185  // return the character as its ascii value
186  int thisChar = _lastChar;
187  _lastChar = getChar();
188  return thisChar;
189  }
190 
191  std::string getIdentifier() const { return std::string(_identifier.c_str()); }
192  int curLine() const { return _line; }
193  int curCol() const { return _col; }
194  int getChar()
195  {
196  int c = _is.get();
197  updateLoc(c);
198  return c;
199  }
200 
201 private:
202  void updateLoc(int c)
203  {
204  if (c == '\n' || c == '\r')
205  {
206  ++_line;
207  _col = 1;
208  }
209  else
210  {
211  ++_col;
212  }
213  }
214 
215  ci_string _identifier;
216  int _lastChar;
217  std::istream &_is;
218 
219  int _line;
220  int _col;
221 };
222 
223 //--------------------------------------
224 // Parser
225 //--------------------------------------
226 
227 class CMakeParser
228 {
229 public:
230  CMakeParser(std::istream &is, std::ostream &os)
231  : _is(is), _os(os), _lexer(is), _curToken(CMakeLexer::TOK_EOF), _lastToken(CMakeLexer::TOK_EOF)
232  {
233  }
234 
235  int curToken() { return _curToken; }
236  int nextToken()
237  {
238  _lastToken = _curToken;
239  _curToken = _lexer.getToken();
240  while (_curToken == CMakeLexer::TOK_EOL)
241  {
242  // Try to preserve lines in output to allow correct line number referencing by doxygen.
243  _os << std::endl;
244  _curToken = _lexer.getToken();
245  }
246  return _curToken;
247  }
248 
249  void handleMacro()
250  {
251  if (!parseMacro())
252  {
253  // skip token for error recovery
254  nextToken();
255  }
256  }
257 
258  void handleFunction()
259  {
260  if (!parseFunction())
261  {
262  // skip token for error recovery
263  nextToken();
264  }
265  }
266 
267  void handleSet()
268  {
269  // SET(var ...) following a documentation block is assumed to be a variable declaration.
270  if (_lastToken != CMakeLexer::TOK_DOXYGEN_COMMENT)
271  {
272  // No comment block before
273  nextToken();
274  }
275  else if (!parseSet())
276  {
277  // skip token for error recovery
278  nextToken();
279  }
280  }
281 
282  void handleDoxygenComment()
283  {
284  _os << "///" << _lexer.getIdentifier();
285  nextToken();
286  }
287 
288  void handleTopLevelExpression()
289  {
290  // skip token
291  nextToken();
292  }
293 
294 private:
295  void printError(const char *str)
296  {
297  std::cerr << "Error: " << str << " (at line " << _lexer.curLine() << ", col " << _lexer.curCol() << ")";
298  }
299 
300  bool parseMacro()
301  {
302  if (nextToken() != '(')
303  {
304  printError("Expected '(' after MACRO");
305  return false;
306  }
307 
308  nextToken();
309  std::string macroName = _lexer.getIdentifier();
310  if (curToken() != CMakeLexer::TOK_IDENTIFIER || macroName.empty())
311  {
312  printError("Expected macro name");
313  return false;
314  }
315 
316  _os << macroName << '(';
317  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
318  {
319  _os << _lexer.getIdentifier();
320  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
321  {
322  _os << ", " << _lexer.getIdentifier();
323  }
324  }
325 
326  if (curToken() != ')')
327  {
328  printError("Missing expected ')'");
329  }
330  else
331  {
332  _os << ");";
333  }
334 
335  // eat the ')'
336  nextToken();
337  return true;
338  }
339 
340  bool parseSet()
341  {
342  if (nextToken() != '(')
343  {
344  printError("Expected '(' after SET");
345  return false;
346  }
347 
348  nextToken();
349  std::string variableName = _lexer.getIdentifier();
350  if (curToken() != CMakeLexer::TOK_IDENTIFIER || variableName.empty())
351  {
352  printError("Expected variable name");
353  return false;
354  }
355 
356  _os << "CMAKE_VARIABLE " << variableName;
357 
358  nextToken();
359  while ((curToken() == CMakeLexer::TOK_IDENTIFIER) || (curToken() == CMakeLexer::TOK_STRING_LITERAL) ||
360  (curToken() == CMakeLexer::TOK_NUMBER_LITERAL))
361  {
362  nextToken();
363  }
364 
365  if (curToken() != ')')
366  {
367  printError("Missing expected ')'");
368  }
369  else
370  {
371  _os << ";";
372  }
373 
374  // eat the ')'
375  nextToken();
376  return true;
377  }
378 
379  bool parseFunction()
380  {
381  if (nextToken() != '(')
382  {
383  printError("Expected '(' after FUNCTION");
384  return false;
385  }
386 
387  nextToken();
388  std::string funcName = _lexer.getIdentifier();
389  if (curToken() != CMakeLexer::TOK_IDENTIFIER || funcName.empty())
390  {
391  printError("Expected function name");
392  return false;
393  }
394 
395  _os << funcName << '(';
396  if (nextToken() == CMakeLexer::TOK_IDENTIFIER)
397  {
398  _os << _lexer.getIdentifier();
399  while (nextToken() == CMakeLexer::TOK_IDENTIFIER)
400  {
401  _os << ", " << _lexer.getIdentifier();
402  }
403  }
404 
405  if (curToken() != ')')
406  {
407  printError("Missing expected ')'");
408  }
409  else
410  {
411  _os << ");";
412  }
413 
414  // eat the ')'
415  nextToken();
416 
417  return true;
418  }
419 
420  std::istream &_is;
421  std::ostream &_os;
422  CMakeLexer _lexer;
423  int _curToken;
424  int _lastToken;
425 };
426 
427 #define STRINGIFY(a) #a
428 #define DOUBLESTRINGIFY(a) STRINGIFY(a)
429 
430 int main(int argc, char **argv)
431 {
432  assert(argc > 1);
433 
434  for (int i = 1; i < argc; ++i)
435  {
436  std::ifstream ifs(argv[i]);
437  std::ostream &os = std::cout;
438 
439 #ifdef USE_NAMESPACE
440  os << "namespace " << DOUBLESTRINGIFY(USE_NAMESPACE) << " {\n";
441 #endif
442 
443  CMakeParser parser(ifs, os);
444  parser.nextToken();
445  while (ifs.good())
446  {
447  switch (parser.curToken())
448  {
449  case CMakeLexer::TOK_EOF:
450  return ifs.get(); // eat EOF
451  case CMakeLexer::TOK_MACRO:
452  parser.handleMacro();
453  break;
454  case CMakeLexer::TOK_FUNCTION:
455  parser.handleFunction();
456  break;
457  case CMakeLexer::TOK_SET:
458  parser.handleSet();
459  break;
460  case CMakeLexer::TOK_DOXYGEN_COMMENT:
461  parser.handleDoxygenComment();
462  break;
463  default:
464  parser.handleTopLevelExpression();
465  break;
466  }
467  }
468 
469 #ifdef USE_NAMESPACE
470  os << "}\n";
471 #endif
472  }
473 
474  return EXIT_SUCCESS;
475 }
int main(int argc, char **argv)
std::basic_string< char, ci_char_traits > ci_string
#define DOUBLESTRINGIFY(a)
bool compare(std::pair< double, int > i, std::pair< double, int > j)