dpp.translation.macro

1 module dpp.translation.macro_;
2 
3 
4 import dpp.from;
5 
6 
7 string[] translateMacro(in from!"clang".Cursor cursor,
8                         ref from!"dpp.runtime.context".Context context)
9     @safe
10     in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition)
11 {
12     import dpp.translation.dlang: maybeRename;
13     import clang: Cursor;
14     import std.file: exists;
15     import std.algorithm: startsWith;
16     import std.conv: text;
17 
18     // we want non-built-in macro definitions to be defined and then preprocessed
19     // again
20 
21     if(isBuiltinMacro(cursor)) return [];
22 
23     const tokens = cursor.tokens;
24 
25     // the only sane way for us to be able to see a macro definition
26     // for a macro that has already been defined is if an #undef happened
27     // in the meanwhile. Unfortunately, libclang has no way of passing
28     // that information to us
29     const maybeUndef = context.macroAlreadyDefined(cursor)
30         ? "#undef " ~ cursor.spelling
31         : "";
32 
33     context.rememberMacro(cursor);
34     const spelling = maybeRename(cursor, context);
35     const dbody = translateToD(cursor, context, tokens);
36 
37     // We try here to make it so that literal macros can be imported from
38     // another D module. We also try and make non-function-like macros
39     // that aren't a literal constant but an expression can be imported
40     // as well. To that end we check that we can mixin a declaration of
41     // an enum with the same name of the macro with the original C code.
42     // If so, we mix it in.
43     // Below that, we declare the macro so the the #including .dpp file
44     // uses the preprocessor.
45     if(!cursor.isMacroFunction && tokens.length > 1) {
46         const defineEnum = `enum ` ~ spelling ~ ` = ` ~ dbody ~ `;`;
47         const enumVarName = `enumMixinStr_` ~ spelling;
48         return [
49             `#ifdef ` ~ spelling,
50             `#    undef ` ~ spelling,
51             `#endif`,
52             `static if(!is(typeof(` ~ spelling ~ `))) {`,
53             "    private enum " ~ enumVarName ~ " = `" ~ defineEnum ~ "`;",
54             `    static if(is(typeof({ mixin(` ~ enumVarName ~ `); }))) {`,
55             `        mixin(`  ~ enumVarName ~ `);`,
56             `    }`,
57             `}`,
58             `#define ` ~ spelling ~ ` ` ~ dbody,
59         ];
60     }
61 
62     // Define a template function with the same name as the macro
63     // in an attempt to make it importable from outside the .dpp file.
64     enum prefix = "_dpp_impl_"; // can't use the macro name as-is
65     const emitFunction = cursor.isMacroFunction && context.options.functionMacros;
66     auto maybeFunction = emitFunction
67         ? macroToTemplateFunction(cursor, prefix, spelling)
68         : [];
69     const maybeSpace = cursor.isMacroFunction ? "" : " ";
70     const restOfLine = spelling ~ maybeSpace ~ dbody;
71     const maybeDefineWithPrefix = emitFunction
72         ? `#define ` ~ prefix ~ restOfLine
73         : "";
74     const define = `#define ` ~ restOfLine;
75 
76     return maybeUndef ~ maybeDefineWithPrefix ~ maybeFunction ~ define;
77 }
78 
79 private string[] macroToTemplateFunction(in from!"clang".Cursor cursor, in string prefix, in string spelling)
80     @safe
81     in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition)
82     in(cursor.isMacroFunction)
83 {
84     import clang : Token;
85     import std.algorithm : countUntil, count, map, startsWith;
86     import std.range: iota;
87     import std.conv: text;
88     import std.array : join;
89 
90     if(spelling.startsWith("__")) return [];
91 
92     const tokens = cursor.tokens;
93     assert(tokens[0].kind == Token.Kind.Identifier);
94     assert(tokens[1] == Token(Token.Kind.Punctuation, "("));
95 
96     const closeParenIndex = tokens[2 .. $].countUntil(Token(Token.Kind.Punctuation, ")")) + 2;
97     const numCommas = tokens[2 .. closeParenIndex].count(Token(Token.Kind.Punctuation, ","));
98     const numElements = closeParenIndex == 2 ? 0 : numCommas + 1;
99     const isVariadic = tokens[closeParenIndex - 1] == Token(Token.Kind.Punctuation, "...");
100     const numArgs = isVariadic ? numElements - 1 : numElements;
101     const maybeVarTemplate = isVariadic ? ", REST..." : "";
102     const templateParams = `(` ~ numArgs.iota.map!(i => text(`A`, i)).join(`, `) ~ maybeVarTemplate ~ `)`;
103     const maybeVarParam = isVariadic ? ", REST rest" : "";
104     const runtimeParams = `(` ~ numArgs.iota.map!(i => text(`A`, i, ` arg`, i)).join(`, `) ~ maybeVarParam ~ `)`;
105     const maybeVarArg = isVariadic ? ", rest" : "";
106     const runtimeArgs = numArgs.iota.map!(i => text(`arg`, i)).join(`, `) ~ maybeVarArg;
107     auto lines = [
108         `auto ` ~ spelling ~ templateParams ~ runtimeParams ~ ` {`,
109         `    return ` ~ prefix ~ spelling ~ `(` ~ runtimeArgs ~ `);`,
110         `}`,
111     ];
112     const functionMixinStr = lines.map!(l => "    " ~ l).join("\n");
113     const enumName = prefix ~ spelling ~ `_mixin`;
114     return [
115         `enum ` ~ enumName ~ " = `" ~ functionMixinStr ~ "`;",
116         `static if(__traits(compiles, { mixin(` ~  enumName ~ `); })) {`,
117         `    mixin(` ~ enumName ~ `);`,
118         `}`
119     ];
120 }
121 
122 
123 bool isBuiltinMacro(in from!"clang".Cursor cursor)
124     @safe
125 {
126     import clang: Cursor;
127     import std.file: exists;
128     import std.algorithm: startsWith;
129 
130     if(cursor.kind != Cursor.Kind.MacroDefinition) return false;
131 
132     return
133         cursor.sourceRange.path == ""
134         || !cursor.sourceRange.path.exists
135         || cursor.isPredefined
136         || cursor.spelling.startsWith("__STDC_")
137         ;
138 }
139 
140 
141 private bool isLiteralMacro(in from!"clang".Token[] tokens) @safe @nogc pure nothrow {
142     import clang: Token;
143 
144     return
145         tokens.length == 2
146         && tokens[0].kind == Token.Kind.Identifier
147         && tokens[1].kind == Token.Kind.Literal
148         ;
149 }
150 
151 
152 private string translateToD(
153     in from!"clang".Cursor cursor,
154     ref from!"dpp.runtime.context".Context context,
155     in from!"clang".Token[] tokens,
156     )
157     @safe
158 {
159     import dpp.translation.type: translateElaborated;
160     import clang: Token;
161     import std.algorithm: map;
162 
163     if(isLiteralMacro(tokens)) return fixLiteral(tokens[1]);
164     if(tokens.length == 1) return ""; // e.g. `#define FOO`
165 
166     auto fixLiteralOrPassThrough(in Token t) {
167         return t.kind == Token.Kind.Literal
168             ? Token(Token.Kind.Literal, fixLiteral(t))
169             : t;
170     }
171 
172     return tokens
173         .fixSizeof(cursor)
174         .fixCasts(cursor, context)
175         .fixArrow
176         .fixNull
177         .map!fixLiteralOrPassThrough
178         .toString
179         .translateElaborated(context)
180         ;
181 }
182 
183 
184 private string toString(R)(R tokens) {
185     import clang: Token;
186     import std.algorithm: map;
187     import std.array: join;
188 
189     // skip the identifier because of DPP_ENUM_
190     return tokens[1..$]
191         .map!(t => t.spelling)
192         .join(" ")
193         ;
194 
195 }
196 
197 private string fixLiteral(in from!"clang".Token token)
198     @safe pure
199     in(token.kind == from!"clang".Token.Kind.Literal)
200     do
201 {
202     return token.spelling
203         .fixLowercaseSuffix
204         .fixMultiCharacterLiterals
205         .fixWideCharStrings
206         .fixOctal
207         .fixMicrosoftSuffixes
208         .fixLongLong
209         ;
210 }
211 
212 
213 private auto fixArrow(R)(R tokens) {
214     import clang: Token;
215     import std.algorithm: map;
216 
217     static const(Token) replace(in Token token) {
218         return token == Token(Token.Kind.Punctuation, "->")
219             ? Token(Token.Kind.Punctuation, ".")
220             : token;
221     }
222 
223     return tokens
224         .map!replace
225         ;
226 }
227 
228 private auto fixNull(R)(R tokens)
229 {
230     import clang: Token;
231     import std.algorithm: map;
232     import std.array: array;
233 
234     static const(Token) replace(in Token token) {
235         return token == Token(Token.Kind.Identifier, "NULL")
236             ? Token(Token.Kind.Identifier, "null")
237             : token;
238     }
239 
240     return tokens
241         .map!replace
242         ;
243 }
244 
245 version(Windows)
246 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
247     import std.algorithm: endsWith;
248 
249     if(str.endsWith("i64"))
250         return str[0 .. $-3] ~ "L";
251     else if(str.endsWith("i32"))
252         return str[0 .. $-3];
253     else if(str.endsWith("i16"))
254         return str[0 .. $-3];
255     else if(str.endsWith("i8"))
256         return str[0 .. $-3];
257     return str;
258 }
259 else
260 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
261     return str;
262 }
263 
264 private string fixWideCharStrings(in string str) @safe pure nothrow {
265     if(str.length >=3 && str[0] == 'L' && str[1] == '"' && str[$-1] == '"') {
266         return str[1 .. $] ~ "w";
267     }
268 
269     return str;
270 }
271 
272 private string fixMultiCharacterLiterals(in string str) @safe pure nothrow {
273     // multi-character literals are implementation-defined, but allowed,
274     // in C I aim to identify them and then distinguish them from a
275     // non-ASCII character, which I'll just forward to D assuming utf-8 source
276     // moreover, the '\uxxx' or other escape sequences should be forwarded
277     if(str.length > 3 && str[0] == '\'' && str[$-1] == '\'' && str[1] != '\\') {
278         // apparently a multi-char literal, let's translate to int
279         // the way this is typically done in common compilers, e.g.
280         // https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html
281         int result;
282         foreach(char ch; str[1 .. $-1]) {
283             // any multi-byte character I'm going to assume
284             // is just a single UTF-8 char and punt on it.
285             if(ch > 127) return str;
286             result <<= 8;
287             result |= cast(ubyte) ch;
288         }
289         import std.conv;
290         return to!string(result);
291     }
292     return str; // not one of these, don't touch
293 }
294 
295 private string fixLowercaseSuffix(in string str) @safe pure nothrow {
296     import std.algorithm: endsWith;
297 
298     if(str.endsWith("ll"))
299         return str[0 .. $-2] ~ "LL";
300     if(str.endsWith("l"))
301         return str[0 .. $-1] ~ "L";
302     return str;
303 }
304 
305 private string fixLongLong(in string str) @safe pure {
306     import std.uni : toUpper;
307     const suffix = str.length < 3 ? "" : str[$-3 .. $].toUpper;
308 
309     if (suffix.length > 0) {
310         if (suffix == "LLU" || suffix == "ULL")
311             return str[0 .. $-3] ~ "LU";
312 
313         if (suffix[1 .. $] == "LL")
314             return str[0 .. $-2] ~ "L";
315     }
316 
317     return str;
318 }
319 
320 
321 private string fixOctal(in string spelling) @safe pure {
322     import clang: Token;
323     import std.algorithm: countUntil;
324     import std.uni: isNumber;
325     import std.conv : text;
326 
327     const isOctal =
328         spelling.length > 1
329         && spelling[0] == '0'
330         && spelling[1].isNumber
331         ;
332 
333     if(!isOctal) return spelling;
334 
335     const firstNonZero = spelling.countUntil!(a => a != '0');
336     if(firstNonZero == -1) return "0";
337 
338     const base8_representation = spelling[firstNonZero .. $];
339     const base8_length = base8_representation.length;
340     int base10_number = 0;
341     foreach(i, c; base8_representation)
342     {
343         const power = base8_length - i - 1;
344         const digit = c - '0';
345         base10_number += digit * 8 ^^ power;
346     }
347 
348     return "/+converted from octal '" ~ base8_representation ~ "'+/ " ~ base10_number.text;
349 }
350 
351 
352 private auto fixSizeof(R)(R tokens, in from !"clang".Cursor cursor)
353 {
354     import clang: Token;
355     import std.conv: text;
356     import std.algorithm: countUntil;
357 
358     // find the closing paren for the function-like macro's argument list
359     size_t lastIndex = 0;
360     if(cursor.isMacroFunction) {
361         lastIndex = tokens
362             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
363             +1; // skip the right paren
364 
365         if(lastIndex == 0)  // given the +1 above, -1 becomes 0
366             throw new Exception(text("Can't fix sizeof in function-like macro with tokens: ", tokens));
367     }
368 
369     const beginning = tokens[0 .. lastIndex];
370     const(Token)[] middle;
371 
372     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
373         if(tokens[i] == Token(Token.Kind.Keyword, "sizeof")
374            && tokens[i + 1] == Token(Token.Kind.Punctuation, "("))
375         {
376             // find closing paren
377             long open = 1;
378             size_t scanIndex = i + 2;  // skip i + 1 since that's the open paren
379 
380             while(open != 0) {
381                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
382                     ++open;
383                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")"))
384                     --open;
385 
386                 ++scanIndex;
387             }
388 
389             middle ~= tokens[lastIndex .. i] ~ tokens[i + 1 .. scanIndex] ~ Token(Token.Kind.Keyword, ".sizeof");
390             lastIndex = scanIndex;
391             // advance i past the sizeof. -1 because of ++i in the for loop
392             i = lastIndex - 1;
393         }
394     }
395 
396     // can't chain here due to fixCasts appending to const(Token)[]
397     return beginning ~ middle ~ tokens[lastIndex .. $];
398 }
399 
400 
401 private auto fixCasts(R)(
402     R tokens,
403     in from !"clang".Cursor cursor,
404     in from!"dpp.runtime.context".Context context,
405     )
406 {
407     import dpp.translation.exception: UntranslatableException;
408     import dpp.translation.type : translateString;
409     import clang: Token;
410     import std.conv: text;
411     import std.algorithm: countUntil, count, canFind, all, map;
412     import std.range: chain;
413     import std.array: split, join;
414 
415     // If the cursor is a macro function return its parameters
416     Token[] macroFunctionParams() {
417         assert(cursor.tokens[0].kind == Token.Kind.Identifier);
418         assert(cursor.tokens[1] == Token(Token.Kind.Punctuation, "("));
419         enum fromParen = 2;
420         const closeParenIndex = cursor.tokens[fromParen .. $].countUntil(Token(Token.Kind.Punctuation, ")")) + fromParen;
421         return cursor.tokens[fromParen .. closeParenIndex].split(Token(Token.Kind.Punctuation, ",")).join;
422     }
423 
424     const params = cursor.isMacroFunction ? macroFunctionParams : [];
425 
426     // if the token array is a built-in or user-defined type
427     bool isType(in Token[] tokens) {
428 
429         if( // fundamental type
430             tokens.length == 1
431             && tokens[0].kind == Token.Kind.Keyword
432             && tokens[0].spelling != "sizeof"
433             && tokens[0].spelling != "alignof"
434             )
435             return true;
436 
437         // fundamental type like `unsigned char`
438         if(tokens.length > 1 && tokens.all!(t => t.kind == Token.Kind.Keyword))
439             return true;
440 
441         if( // user defined type
442             tokens.length == 1
443             && tokens[0].kind == Token.Kind.Identifier
444             && context.isUserDefinedType(tokens[0].spelling)
445             )
446             return true;
447 
448         if(  // pointer to a type
449             tokens.length >= 2
450             && tokens[$-1] == Token(Token.Kind.Punctuation, "*")
451             && (isType(tokens[0 .. $-1]) || params.canFind(tokens[$-2]) )
452             )
453             return true;
454 
455         if( // const type
456             tokens.length >= 2
457             && tokens[0] == Token(Token.Kind.Keyword, "const")
458             && isType(tokens[1..$])
459             )
460             return true;
461 
462         if( // typeof
463             tokens.length >= 2
464             && tokens[0] == Token(Token.Kind.Keyword, "typeof")
465             )
466             return true;
467 
468         if ( // macro attribute (e.g. __force) + type
469             tokens.length >= 2
470             && tokens[0].kind == Token.Kind.Identifier
471             && isType(tokens[1..$])
472             )
473             return true;
474 
475         return false;
476     }
477 
478     size_t lastIndex = 0;
479     // find the closing paren for the function-like macro's argument list
480     if(cursor.isMacroFunction) {
481         lastIndex = tokens
482             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
483             +1; // skip the right paren
484         if(lastIndex == 0)
485             throw new Exception(text("Can't fix casts in function-like macro with tokens: ", tokens));
486     }
487 
488     const beginning = tokens[0 .. lastIndex];
489     const(Token)[] middle;
490 
491     // See #244 - macros can have unbalanced parentheses
492     // Apparently libclang tokenises `\\n)` as including the backslash and the newline
493     const numLeftParens  = tokens.count!(a => a == Token(Token.Kind.Punctuation, "(") ||
494                                          a == Token(Token.Kind.Punctuation, "\\\n("));
495     const numRightParens = tokens.count!(a => a == Token(Token.Kind.Punctuation, ")") ||
496                                          a == Token(Token.Kind.Punctuation, "\\\n)"));
497 
498     if(numLeftParens != numRightParens)
499         throw new UntranslatableException("Unbalanced parentheses in macro `" ~ cursor.spelling ~ "`");
500 
501     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
502         if(tokens[i] == Token(Token.Kind.Punctuation, "(")) {
503             // find closing paren
504             long open = 1;
505             size_t scanIndex = i + 1;  // skip i + 1 since that's the open paren
506 
507             while(open != 0) {
508                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
509                     ++open;
510                 // for the 2nd condition, esee it.c.compile.preprocessor.multiline
511                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")") ||
512                    tokens[scanIndex] == Token(Token.Kind.Punctuation, "\\\n)"))
513                     --open;
514 
515                 ++scanIndex;
516             }
517             // at this point scanIndex is the 1 + index of closing paren
518 
519             // we want to ignore e.g. `(int)(foo).sizeof` even if `foo` is a type
520             const followedByDot =
521                 tokens.length > scanIndex
522                 && tokens[scanIndex].spelling[0] == '.'
523                 ;
524 
525             if(isType(tokens[i + 1 .. scanIndex - 1]) && !followedByDot) {
526                 // -1 to not include the closing paren
527                 const cTypeString = tokens[i + 1 .. scanIndex - 1].map!(t => t.spelling).join(" ");
528                 const dTypeString = translateString(cTypeString, context);
529                 middle ~= tokens[lastIndex .. i] ~
530                     Token(Token.Kind.Punctuation, "cast(") ~
531                     Token(Token.Kind.Keyword, dTypeString) ~
532                     Token(Token.Kind.Punctuation, ")");
533 
534                 lastIndex = scanIndex;
535                 // advance i past the sizeof. -1 because of ++i in the for loop
536                 i = lastIndex - 1;
537             }
538         }
539     }
540 
541     return chain(beginning, middle, tokens[lastIndex .. $]);
542 }