dpp.translation.macro

1 module dpp.translation.macro_;
2 
3 import dpp.from;
4 
5 string[] translateMacro(in from!"clang".Cursor cursor,
6                         ref from!"dpp.runtime.context".Context context)
7     @safe
8     in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition)
9 {
10     import dpp.translation.dlang: maybeRename;
11     import clang: Cursor;
12     import std.file: exists;
13     import std.algorithm: startsWith, canFind;
14     import std.conv: text;
15 
16     // we want non-built-in macro definitions to be defined and then preprocessed
17     // again
18 
19     if(isBuiltinMacro(cursor)) return [];
20 
21     const tokens = cursor.tokens;
22 
23     // the only sane way for us to be able to see a macro definition
24     // for a macro that has already been defined is if an #undef happened
25     // in the meanwhile. Unfortunately, libclang has no way of passing
26     // that information to us
27     string maybeUndef;
28     if(context.macroAlreadyDefined(cursor))
29         maybeUndef = "#undef " ~ cursor.spelling ~ "\n";
30 
31     context.rememberMacro(cursor);
32     const spelling = maybeRename(cursor, context);
33     const dbody = translateToD(cursor, context, tokens);
34 
35     // We try here to make it so that literal macros can be imported from
36     // another D module. We also try and make non-function-like macros
37     // that aren't a literal constant but an expression can be imported
38     // as well. To that end we check that we can mixin a declaration of
39     // an enum with the same name of the macro with the original C code.
40     // If so, we mix it in.
41     // Below that, we declare the macro so the the #including .dpp file
42     // uses the preprocessor.
43     if(!cursor.isMacroFunction && tokens.length > 1) {
44         const defineEnum = `enum ` ~ spelling ~ ` = ` ~ dbody ~ `;`;
45         const enumVarName = `enumMixinStr_` ~ spelling;
46         return [
47             `#ifdef ` ~ spelling,
48             `#    undef ` ~ spelling,
49             `#endif`,
50             `static if(!is(typeof(` ~ spelling ~ `))) {`,
51             "    private enum " ~ enumVarName ~ " = `" ~ defineEnum ~ "`;",
52             `    static if(is(typeof({ mixin(` ~ enumVarName ~ `); }))) {`,
53             `        mixin(`  ~ enumVarName ~ `);`,
54             `    }`,
55             `}`,
56             `#define ` ~ spelling ~ ` ` ~ dbody,
57         ];
58     }
59 
60     const maybeSpace = cursor.isMacroFunction ? "" : " ";
61     return [maybeUndef ~ "#define " ~ spelling ~ maybeSpace ~ dbody ~ "\n"];
62 }
63 
64 
65 bool isBuiltinMacro(in from!"clang".Cursor cursor)
66     @safe
67 {
68     import clang: Cursor;
69     import std.file: exists;
70     import std.algorithm: startsWith;
71 
72     if(cursor.kind != Cursor.Kind.MacroDefinition) return false;
73 
74     return
75         cursor.sourceRange.path == ""
76         || !cursor.sourceRange.path.exists
77         || cursor.isPredefined
78         || cursor.spelling.startsWith("__STDC_")
79         ;
80 }
81 
82 
83 private bool isLiteralMacro(in from!"clang".Token[] tokens) @safe @nogc pure nothrow {
84     import clang: Token;
85 
86     return
87         tokens.length == 2
88         && tokens[0].kind == Token.Kind.Identifier
89         && tokens[1].kind == Token.Kind.Literal
90         ;
91 }
92 
93 private bool isStringRepr(T)(in string str) @safe pure {
94     import std.conv: to;
95     import std.exception: collectException;
96     import std..string: strip;
97 
98     T dummy;
99     return str.strip.to!T.collectException(dummy) is null;
100 }
101 
102 
103 private string translateToD(
104     in from!"clang".Cursor cursor,
105     ref from!"dpp.runtime.context".Context context,
106     in from!"clang".Token[] tokens,
107     )
108     @safe
109 {
110     import dpp.translation.type: translateElaborated;
111     if(isLiteralMacro(tokens)) return fixLiteral(tokens[1]);
112     if(tokens.length == 1) return ""; // e.g. `#define FOO`
113 
114     return tokens
115         .fixSizeof(cursor)
116         .fixCasts(cursor, context)
117         .fixArrow
118         .fixNull
119         .toString
120         .translateElaborated(context)
121         ;
122 }
123 
124 
125 private string toString(R)(R tokens) {
126     import clang: Token;
127     import std.algorithm: map;
128     import std.array: join;
129 
130     // skip the identifier because of DPP_ENUM_
131     return tokens[1..$]
132         .map!(t => t.spelling)
133         .join(" ");
134 }
135 
136 private string fixLiteral(in from!"clang".Token token)
137     @safe pure
138     in(token.kind == from!"clang".Token.Kind.Literal)
139     do
140 {
141     return token.spelling
142         .fixLowercaseSuffix
143         .fixMultiCharacterLiterals
144         .fixWideCharStrings
145         .fixOctal
146         .fixMicrosoftSuffixes
147         .fixLongLong
148         ;
149 }
150 
151 
152 private auto fixArrow(R)(R tokens) {
153     import clang: Token;
154     import std.algorithm: map;
155 
156     static const(Token) replace(in Token token) {
157         return token == Token(Token.Kind.Punctuation, "->")
158             ? Token(Token.Kind.Punctuation, ".")
159             : token;
160     }
161 
162     return tokens
163         .map!replace
164         ;
165 }
166 
167 private auto fixNull(R)(R tokens)
168 {
169     import clang: Token;
170     import std.algorithm: map;
171     import std.array: array;
172 
173     static const(Token) replace(in Token token) {
174         return token == Token(Token.Kind.Identifier, "NULL")
175             ? Token(Token.Kind.Identifier, "null")
176             : token;
177     }
178 
179     return tokens
180         .map!replace
181         ;
182 }
183 
184 version(Windows)
185 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
186     import std.algorithm: endsWith;
187 
188     if(str.endsWith("i64"))
189         return str[0 .. $-3] ~ "L";
190     else if(str.endsWith("i32"))
191         return str[0 .. $-3];
192     else if(str.endsWith("i16"))
193         return str[0 .. $-3];
194     else if(str.endsWith("i8"))
195         return str[0 .. $-3];
196     return str;
197 }
198 else
199 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow {
200     return str;
201 }
202 
203 private string fixWideCharStrings(in string str) @safe pure nothrow {
204     if(str.length >=3 && str[0] == 'L' && str[1] == '"' && str[$-1] == '"') {
205         return str[1 .. $] ~ "w";
206     }
207 
208     return str;
209 }
210 
211 private string fixMultiCharacterLiterals(in string str) @safe pure nothrow {
212     // multi-character literals are implementation-defined, but allowed,
213     // in C I aim to identify them and then distinguish them from a
214     // non-ASCII character, which I'll just forward to D assuming utf-8 source
215     // moreover, the '\uxxx' or other escape sequences should be forwarded
216     if(str.length > 3 && str[0] == '\'' && str[$-1] == '\'' && str[1] != '\\') {
217         // apparently a multi-char literal, let's translate to int
218         // the way this is typically done in common compilers, e.g.
219         // https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html
220         int result;
221         foreach(char ch; str[1 .. $-1]) {
222             // any multi-byte character I'm going to assume
223             // is just a single UTF-8 char and punt on it.
224             if(ch > 127) return str;
225             result <<= 8;
226             result |= cast(ubyte) ch;
227         }
228         import std.conv;
229         return to!string(result);
230     }
231     return str; // not one of these, don't touch
232 }
233 
234 private string fixLowercaseSuffix(in string str) @safe pure nothrow {
235     import std.algorithm: endsWith;
236 
237     if(str.endsWith("ll"))
238         return str[0 .. $-2] ~ "LL";
239     if(str.endsWith("l"))
240         return str[0 .. $-1] ~ "L";
241     return str;
242 }
243 
244 private string fixLongLong(in string str) @safe pure {
245     import std.uni : toUpper;
246     const suffix = str.length < 3 ? "" : str[$-3 .. $].toUpper;
247 
248     if (suffix.length > 0) {
249         if (suffix == "LLU" || suffix == "ULL")
250             return str[0 .. $-3] ~ "LU";
251 
252         if (suffix[1 .. $] == "LL")
253             return str[0 .. $-2] ~ "L";
254     }
255 
256     return str;
257 }
258 
259 
260 private string fixOctal(in string spelling) @safe pure {
261     import clang: Token;
262     import std.algorithm: countUntil;
263     import std.uni: isNumber;
264 
265     const isOctal =
266         spelling.length > 1
267         && spelling[0] == '0'
268         && spelling[1].isNumber
269         //&& token.spelling.isStringRepr!long
270         ;
271 
272     if(!isOctal) return spelling;
273 
274     const firstNonZero = spelling.countUntil!(a => a != '0');
275     if(firstNonZero == -1) return "0";
276 
277     return `std.conv.octal!` ~ spelling[firstNonZero .. $];
278 }
279 
280 
281 private auto fixSizeof(R)(R tokens, in from !"clang".Cursor cursor)
282 {
283     import clang: Token;
284     import std.conv: text;
285     import std.algorithm: countUntil;
286 
287     // find the closing paren for the function-like macro's argument list
288     size_t lastIndex = 0;
289     if(cursor.isMacroFunction) {
290         lastIndex = tokens
291             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
292             +1; // skip the right paren
293 
294         if(lastIndex == 0)  // given the +1 above, -1 becomes 0
295             throw new Exception(text("Can't fix sizeof in function-like macro with tokens: ", tokens));
296     }
297 
298     const beginning = tokens[0 .. lastIndex];
299     const(Token)[] middle;
300 
301     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
302         if(tokens[i] == Token(Token.Kind.Keyword, "sizeof")
303            && tokens[i + 1] == Token(Token.Kind.Punctuation, "("))
304         {
305             // find closing paren
306             long open = 1;
307             size_t scanIndex = i + 2;  // skip i + 1 since that's the open paren
308 
309             while(open != 0) {
310                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
311                     ++open;
312                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")"))
313                     --open;
314 
315                 ++scanIndex;
316             }
317 
318             middle ~= tokens[lastIndex .. i] ~ tokens[i + 1 .. scanIndex] ~ Token(Token.Kind.Keyword, ".sizeof");
319             lastIndex = scanIndex;
320             // advance i past the sizeof. -1 because of ++i in the for loop
321             i = lastIndex - 1;
322         }
323     }
324 
325     // can't chain here due to fixCasts appending to const(Token)[]
326     return beginning ~ middle ~ tokens[lastIndex .. $];
327 }
328 
329 
330 private auto fixCasts(R)(
331     R tokens,
332     in from !"clang".Cursor cursor,
333     in from!"dpp.runtime.context".Context context,
334     )
335 {
336     import clang: Token;
337     import std.conv: text;
338     import std.algorithm: countUntil;
339     import std.range: chain;
340 
341     // if the token array is a built-in or user-defined type
342     bool isType(in Token[] tokens) {
343 
344         if( // fundamental type
345             tokens.length == 1
346             && tokens[0].kind == Token.Kind.Keyword
347             && tokens[0].spelling != "sizeof"
348             && tokens[0].spelling != "alignof"
349             )
350             return true;
351 
352         if( // user defined type
353             tokens.length == 1
354             && tokens[0].kind == Token.Kind.Identifier
355             && context.isUserDefinedType(tokens[0].spelling)
356             )
357             return true;
358 
359         if(  // pointer to a type
360             tokens.length >= 2
361             && tokens[$-1] == Token(Token.Kind.Punctuation, "*")
362             && isType(tokens[0 .. $-1])
363             )
364             return true;
365 
366         if( // const type
367             tokens.length >= 2
368             && tokens[0] == Token(Token.Kind.Keyword, "const")
369             && isType(tokens[1..$])
370             )
371             return true;
372 
373         return false;
374     }
375 
376     size_t lastIndex = 0;
377     // find the closing paren for the function-like macro's argument list
378     if(cursor.isMacroFunction) {
379         lastIndex = tokens
380             .countUntil!(t => t == Token(Token.Kind.Punctuation, ")"))
381             +1; // skip the right paren
382         if(lastIndex == 0)
383             throw new Exception(text("Can't fix casts in function-like macro with tokens: ", tokens));
384     }
385 
386     const beginning = tokens[0 .. lastIndex];
387     const(Token)[] middle;
388 
389     for(size_t i = lastIndex; i < tokens.length - 1; ++i) {
390         if(tokens[i] == Token(Token.Kind.Punctuation, "(")) {
391             // find closing paren
392             long open = 1;
393             size_t scanIndex = i + 1;  // skip i + 1 since that's the open paren
394 
395             while(open != 0) {
396                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "("))
397                     ++open;
398                 // for the 2nd condition, esee it.c.compile.preprocessor.multiline
399                 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")") ||
400                    tokens[scanIndex] == Token(Token.Kind.Punctuation, "\\\n)"))
401                     --open;
402 
403                 ++scanIndex;
404             }
405             // at this point scanIndex is the 1 + index of closing paren
406 
407             // we want to ignore e.g. `(int)(foo).sizeof` even if `foo` is a type
408             const followedByDot =
409                 tokens.length > scanIndex
410                 && tokens[scanIndex].spelling[0] == '.'
411                 ;
412 
413             if(isType(tokens[i + 1 .. scanIndex - 1]) && !followedByDot) {
414                 middle ~= tokens[lastIndex .. i] ~
415                     Token(Token.Kind.Punctuation, "cast(") ~
416                     tokens[i + 1 .. scanIndex]; // includes closing paren
417                 lastIndex = scanIndex;
418                 // advance i past the sizeof. -1 because of ++i in the for loop
419                 i = lastIndex - 1;
420             }
421         }
422     }
423 
424     return chain(beginning, middle, tokens[lastIndex .. $]);
425 }