dpp.translation.macro

1 module dpp.translation.macro_;
2 
3 
4 import dpp.from;
5 
6 
7 string[] translateMacro(in from!"clang".Cursor cursor,
8                         ref from!"dpp.runtime.context".Context context)
9     @safe
10     in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition)
11 {
12     import dpp.translation.dlang: maybeRename;
13     import clang: Cursor;
14     import std.file: exists;
15     import std.algorithm: startsWith, canFind;
16     import std.conv: text;
17 
18     // we want non-built-in macro definitions to be defined and then preprocessed
19     // again
20 
21     if(isBuiltinMacro(cursor)) return [];
22 
23     const tokens = cursor.tokens;
24 
25     // the only sane way for us to be able to see a macro definition
26     // for a macro that has already been defined is if an #undef happened
27     // in the meanwhile. Unfortunately, libclang has no way of passing
28     // that information to us
29     string maybeUndef;
30     if(context.macroAlreadyDefined(cursor))
31         maybeUndef = "#undef " ~ cursor.spelling ~ "\n";
32 
33     context.rememberMacro(cursor);
34     const spelling = maybeRename(cursor, context);
35     const dbody = translateToD(cursor, context, tokens);
36 
37     // We try here to make it so that literal macros can be imported from
38     // another D module. We also try and make non-function-like macros
39     // that aren't a literal constant but an expression can be imported
40     // as well. To that end we check that we can mixin a declaration of
41     // an enum with the same name of the macro with the original C code.
42     // If so, we mix it in.
43     // Below that, we declare the macro so the the #including .dpp file
44     // uses the preprocessor.
45     if(!cursor.isMacroFunction && tokens.length > 1) {
46         const defineEnum = `enum ` ~ spelling ~ ` = ` ~ dbody ~ `;`;
47         const enumVarName = `enumMixinStr_` ~ spelling;
48         return [
49             `#ifdef ` ~ spelling,
50             `#    undef ` ~ spelling,
51             `#endif`,
52             `static if(!is(typeof(` ~ spelling ~ `))) {`,
53             "    private enum " ~ enumVarName ~ " = `" ~ defineEnum ~ "`;",
54             `    static if(is(typeof({ mixin(` ~ enumVarName ~ `); }))) {`,
55             `        mixin(`  ~ enumVarName ~ `);`,
56             `    }`,
57             `}`,
58             `#define ` ~ spelling ~ ` ` ~ dbody,
59         ];
60     }
61 
62     const maybeSpace = cursor.isMacroFunction ? "" : " ";
63     return [maybeUndef ~ "#define " ~ spelling ~ maybeSpace ~ dbody ~ "\n"];
64 }
65 
66 
67 bool isBuiltinMacro(in from!"clang".Cursor cursor)
68     @safe
69 {
70     import clang: Cursor;
71     import std.file: exists;
72     import std.algorithm: startsWith;
73 
74     if(cursor.kind != Cursor.Kind.MacroDefinition) return false;
75 
76     return
77         cursor.sourceRange.path == ""
78         || !cursor.sourceRange.path.exists
79         || cursor.isPredefined
80         || cursor.spelling.startsWith("__STDC_")
81         ;
82 }
83 
84 
85 private bool isLiteralMacro(in from!"clang".Token[] tokens) @safe @nogc pure nothrow {
86     import clang: Token;
87 
88     return
89         tokens.length == 2
90         && tokens[0].kind == Token.Kind.Identifier
91         && tokens[1].kind == Token.Kind.Literal
92         ;
93 }
94 
95 
96 private string translateToD(
97     in from!"clang".Cursor cursor,
98     ref from!"dpp.runtime.context".Context context,
99     in from!"clang".Token[] tokens,
100     )
101     @safe
102 {
103     import dpp.translation.type: translateElaborated;
104     if(isLiteralMacro(tokens)) return fixLiteral(tokens[1]);
105     if(tokens.length == 1) return ""; // e.g. `#define FOO`
106 
107     return tokens
108         .fixSizeof(cursor)
109         .fixCasts(cursor, context)
110         .fixArrow
111         .fixNull
112         .toString
113         .translateElaborated(context)
114         ;
115 }
116 
117 
118 private string toString(R)(R tokens) {
119     import clang: Token;
120     import std.algorithm: map;
121     import std.array: join;
122 
123     // skip the identifier because of DPP_ENUM_
124     return tokens[1..$]
125         .map!(t => t.spelling)
126         .join(" ");
127 }
128 
129 private string fixLiteral(in from!"clang".Token token)
130     @safe pure
131     in(token.kind == from!"clang".Token.Kind.Literal)
132     do
133 {
134     return token.spelling
135         .fixLowercaseSuffix
136         .fixMultiCharacterLiterals
137         .fixWideCharStrings
138         .fixOctal
139         .fixMicrosoftSuffixes
140         .fixLongLong
141         ;
142 }
143 
144 
145 private auto fixArrow(R)(R tokens) {
146     import clang: Token;
147     import std.algorithm: map;
148 
149     static const(Token) replace(in Token token) {
150         return token == Token(Token.Kind.Punctuation, "->")
151             ? Token(Token.Kind.Punctuation, ".")
152             : token;
153     }
154 
155     return tokens
156         .map!replace
157         ;
158 }
159 
160 private auto fixNull(R)(R tokens)
161 {
162     import clang: Token;
163     import std.algorithm: map;
164     import std.array: array;
165 
166     static const(Token) replace(in Token token) {
167         return token == Token(Token.Kind.Identifier, "NULL")
168             ? Token(Token.Kind.Identifier, "null")
169             : token;
170     }
171 
172     return tokens
173         .map!replace
174         ;
175 }
176 
177 version(Windows)
178 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
179     import std.algorithm: endsWith;
180 
181     if(str.endsWith("i64"))
182         return str[0 .. $-3] ~ "L";
183     else if(str.endsWith("i32"))
184         return str[0 .. $-3];
185     else if(str.endsWith("i16"))
186         return str[0 .. $-3];
187     else if(str.endsWith("i8"))
188         return str[0 .. $-3];
189     return str;
190 }
191 else
192 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
193     return str;
194 }
195 
196 private string fixWideCharStrings(in string str) @safe pure nothrow {
197     if(str.length >=3 && str[0] == 'L' && str[1] == '"' && str[$-1] == '"') {
198         return str[1 .. $] ~ "w";
199     }
200 
201     return str;
202 }
203 
204 private string fixMultiCharacterLiterals(in string str) @safe pure nothrow {
205     // multi-character literals are implementation-defined, but allowed,
206     // in C I aim to identify them and then distinguish them from a
207     // non-ASCII character, which I'll just forward to D assuming utf-8 source
208     // moreover, the '\uxxx' or other escape sequences should be forwarded
209     if(str.length > 3 && str[0] == '\'' && str[$-1] == '\'' && str[1] != '\\') {
210         // apparently a multi-char literal, let's translate to int
211         // the way this is typically done in common compilers, e.g.
212         // https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html
213         int result;
214         foreach(char ch; str[1 .. $-1]) {
215             // any multi-byte character I'm going to assume
216             // is just a single UTF-8 char and punt on it.
217             if(ch > 127) return str;
218             result <<= 8;
219             result |= cast(ubyte) ch;
220         }
221         import std.conv;
222         return to!string(result);
223     }
224     return str; // not one of these, don't touch
225 }
226 
227 private string fixLowercaseSuffix(in string str) @safe pure nothrow {
228     import std.algorithm: endsWith;
229 
230     if(str.endsWith("ll"))
231         return str[0 .. $-2] ~ "LL";
232     if(str.endsWith("l"))
233         return str[0 .. $-1] ~ "L";
234     return str;
235 }
236 
237 private string fixLongLong(in string str) @safe pure {
238     import std.uni : toUpper;
239     const suffix = str.length < 3 ? "" : str[$-3 .. $].toUpper;
240 
241     if (suffix.length > 0) {
242         if (suffix == "LLU" || suffix == "ULL")
243             return str[0 .. $-3] ~ "LU";
244 
245         if (suffix[1 .. $] == "LL")
246             return str[0 .. $-2] ~ "L";
247     }
248 
249     return str;
250 }
251 
252 
253 private string fixOctal(in string spelling) @safe pure {
254     import clang: Token;
255     import std.algorithm: countUntil;
256     import std.uni: isNumber;
257     import std.conv : text;
258 
259     const isOctal =
260         spelling.length > 1
261         && spelling[0] == '0'
262         && spelling[1].isNumber
263         ;
264 
265     if(!isOctal) return spelling;
266 
267     const firstNonZero = spelling.countUntil!(a => a != '0');
268     if(firstNonZero == -1) return "0";
269 
270     const base8_representation = spelling[firstNonZero .. $];
271     const base8_length = base8_representation.length;
272     int base10_number = 0;
273     foreach(i, c; base8_representation)
274     {
275         const power = base8_length - i - 1;
276         const digit = c - '0';
277         base10_number += digit * 8 ^^ power;
278     }
279 
280     return "/+converted from octal '" ~ base8_representation ~ "'+/ " ~ base10_number.text;
281 }
282 
283 
284 private auto fixSizeof(R)(R tokens, in from !"clang".Cursor cursor)
285 {
286     import clang: Token;
287     import std.conv: text;
288     import std.algorithm: countUntil;
289 
290     // find the closing paren for the function-like macro's argument list
291     size_t lastIndex = 0;
292     if(cursor.isMacroFunction) {
293         lastIndex = tokens
294             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
295             +1; // skip the right paren
296 
297         if(lastIndex == 0)  // given the +1 above, -1 becomes 0
298             throw new Exception(text("Can't fix sizeof in function-like macro with tokens: ", tokens));
299     }
300 
301     const beginning = tokens[0 .. lastIndex];
302     const(Token)[] middle;
303 
304     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
305         if(tokens[i] == Token(Token.Kind.Keyword, "sizeof")
306            && tokens[i + 1] == Token(Token.Kind.Punctuation, "("))
307         {
308             // find closing paren
309             long open = 1;
310             size_t scanIndex = i + 2;  // skip i + 1 since that's the open paren
311 
312             while(open != 0) {
313                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
314                     ++open;
315                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")"))
316                     --open;
317 
318                 ++scanIndex;
319             }
320 
321             middle ~= tokens[lastIndex .. i] ~ tokens[i + 1 .. scanIndex] ~ Token(Token.Kind.Keyword, ".sizeof");
322             lastIndex = scanIndex;
323             // advance i past the sizeof. -1 because of ++i in the for loop
324             i = lastIndex - 1;
325         }
326     }
327 
328     // can't chain here due to fixCasts appending to const(Token)[]
329     return beginning ~ middle ~ tokens[lastIndex .. $];
330 }
331 
332 
333 private auto fixCasts(R)(
334     R tokens,
335     in from !"clang".Cursor cursor,
336     in from!"dpp.runtime.context".Context context,
337     )
338 {
339     import dpp.translation.exception: UntranslatableException;
340     import clang: Token;
341     import std.conv: text;
342     import std.algorithm: countUntil, count;
343     import std.range: chain;
344 
345     // if the token array is a built-in or user-defined type
346     bool isType(in Token[] tokens) {
347 
348         if( // fundamental type
349             tokens.length == 1
350             && tokens[0].kind == Token.Kind.Keyword
351             && tokens[0].spelling != "sizeof"
352             && tokens[0].spelling != "alignof"
353             )
354             return true;
355 
356         if( // user defined type
357             tokens.length == 1
358             && tokens[0].kind == Token.Kind.Identifier
359             && context.isUserDefinedType(tokens[0].spelling)
360             )
361             return true;
362 
363         if(  // pointer to a type
364             tokens.length >= 2
365             && tokens[$-1] == Token(Token.Kind.Punctuation, "*")
366             && isType(tokens[0 .. $-1])
367             )
368             return true;
369 
370         if( // const type
371             tokens.length >= 2
372             && tokens[0] == Token(Token.Kind.Keyword, "const")
373             && isType(tokens[1..$])
374             )
375             return true;
376 
377         if( // typeof
378             tokens.length >= 2
379             && tokens[0] == Token(Token.Kind.Keyword, "typeof")
380             )
381             return true;
382 
383         if ( // macro attribute (e.g. __force) + type
384             tokens.length >= 2
385             && tokens[0].kind == Token.Kind.Identifier
386             && isType(tokens[1..$])
387             )
388             return true;
389 
390         return false;
391     }
392 
393     size_t lastIndex = 0;
394     // find the closing paren for the function-like macro's argument list
395     if(cursor.isMacroFunction) {
396         lastIndex = tokens
397             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
398             +1; // skip the right paren
399         if(lastIndex == 0)
400             throw new Exception(text("Can't fix casts in function-like macro with tokens: ", tokens));
401     }
402 
403     const beginning = tokens[0 .. lastIndex];
404     const(Token)[] middle;
405 
406     // See #244 - macros can have unbalanced parentheses
407     // Apparently libclang tokenises `\\n)` as including the backslash and the newline
408     const numLeftParens  = tokens.count!(a => a == Token(Token.Kind.Punctuation, "(") ||
409                                          a == Token(Token.Kind.Punctuation, "\\\n("));
410     const numRightParens = tokens.count!(a => a == Token(Token.Kind.Punctuation, ")") ||
411                                          a == Token(Token.Kind.Punctuation, "\\\n)"));
412 
413     if(numLeftParens != numRightParens)
414         throw new UntranslatableException("Unbalanced parentheses in macro `" ~ cursor.spelling ~ "`");
415 
416     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
417         if(tokens[i] == Token(Token.Kind.Punctuation, "(")) {
418             // find closing paren
419             long open = 1;
420             size_t scanIndex = i + 1;  // skip i + 1 since that's the open paren
421 
422             while(open != 0) {
423                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
424                     ++open;
425                 // for the 2nd condition, esee it.c.compile.preprocessor.multiline
426                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")") ||
427                    tokens[scanIndex] == Token(Token.Kind.Punctuation, "\\\n)"))
428                     --open;
429 
430                 ++scanIndex;
431             }
432             // at this point scanIndex is the 1 + index of closing paren
433 
434             // we want to ignore e.g. `(int)(foo).sizeof` even if `foo` is a type
435             const followedByDot =
436                 tokens.length > scanIndex
437                 && tokens[scanIndex].spelling[0] == '.'
438                 ;
439 
440             if(isType(tokens[i + 1 .. scanIndex - 1]) && !followedByDot) {
441                 middle ~= tokens[lastIndex .. i] ~
442                     Token(Token.Kind.Punctuation, "cast(") ~
443                     tokens[i + 1 .. scanIndex]; // includes closing paren
444                 lastIndex = scanIndex;
445                 // advance i past the sizeof. -1 because of ++i in the for loop
446                 i = lastIndex - 1;
447             }
448         }
449     }
450 
451     return chain(beginning, middle, tokens[lastIndex .. $]);
452 }