1 module dpp.translation.macro_; 2 3 4 import dpp.from; 5 6 7 string[] translateMacro(in from!"clang".Cursor cursor, 8 ref from!"dpp.runtime.context".Context context) 9 @safe 10 in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition) 11 { 12 import dpp.translation.dlang: maybeRename; 13 import clang: Cursor; 14 import std.file: exists; 15 import std.algorithm: startsWith, canFind; 16 import std.conv: text; 17 18 // we want non-built-in macro definitions to be defined and then preprocessed 19 // again 20 21 if(isBuiltinMacro(cursor)) return []; 22 23 const tokens = cursor.tokens; 24 25 // the only sane way for us to be able to see a macro definition 26 // for a macro that has already been defined is if an #undef happened 27 // in the meanwhile. Unfortunately, libclang has no way of passing 28 // that information to us 29 string maybeUndef; 30 if(context.macroAlreadyDefined(cursor)) 31 maybeUndef = "#undef " ~ cursor.spelling ~ "\n"; 32 33 context.rememberMacro(cursor); 34 const spelling = maybeRename(cursor, context); 35 const dbody = translateToD(cursor, context, tokens); 36 37 // We try here to make it so that literal macros can be imported from 38 // another D module. We also try and make non-function-like macros 39 // that aren't a literal constant but an expression can be imported 40 // as well. To that end we check that we can mixin a declaration of 41 // an enum with the same name of the macro with the original C code. 42 // If so, we mix it in. 43 // Below that, we declare the macro so the the #including .dpp file 44 // uses the preprocessor. 45 if(!cursor.isMacroFunction && tokens.length > 1) { 46 const defineEnum = `enum ` ~ spelling ~ ` = ` ~ dbody ~ `;`; 47 const enumVarName = `enumMixinStr_` ~ spelling; 48 return [ 49 `#ifdef ` ~ spelling, 50 `# undef ` ~ spelling, 51 `#endif`, 52 `static if(!is(typeof(` ~ spelling ~ `))) {`, 53 " private enum " ~ enumVarName ~ " = `" ~ defineEnum ~ "`;", 54 ` static if(is(typeof({ mixin(` ~ enumVarName ~ `); }))) {`, 55 ` mixin(` ~ enumVarName ~ `);`, 56 ` }`, 57 `}`, 58 `#define ` ~ spelling ~ ` ` ~ dbody, 59 ]; 60 } 61 62 const maybeSpace = cursor.isMacroFunction ? "" : " "; 63 return [maybeUndef ~ "#define " ~ spelling ~ maybeSpace ~ dbody ~ "\n"]; 64 } 65 66 67 bool isBuiltinMacro(in from!"clang".Cursor cursor) 68 @safe 69 { 70 import clang: Cursor; 71 import std.file: exists; 72 import std.algorithm: startsWith; 73 74 if(cursor.kind != Cursor.Kind.MacroDefinition) return false; 75 76 return 77 cursor.sourceRange.path == "" 78 || !cursor.sourceRange.path.exists 79 || cursor.isPredefined 80 || cursor.spelling.startsWith("__STDC_") 81 ; 82 } 83 84 85 private bool isLiteralMacro(in from!"clang".Token[] tokens) @safe @nogc pure nothrow { 86 import clang: Token; 87 88 return 89 tokens.length == 2 90 && tokens[0].kind == Token.Kind.Identifier 91 && tokens[1].kind == Token.Kind.Literal 92 ; 93 } 94 95 96 private string translateToD( 97 in from!"clang".Cursor cursor, 98 ref from!"dpp.runtime.context".Context context, 99 in from!"clang".Token[] tokens, 100 ) 101 @safe 102 { 103 import dpp.translation.type: translateElaborated; 104 if(isLiteralMacro(tokens)) return fixLiteral(tokens[1]); 105 if(tokens.length == 1) return ""; // e.g. `#define FOO` 106 107 return tokens 108 .fixSizeof(cursor) 109 .fixCasts(cursor, context) 110 .fixArrow 111 .fixNull 112 .toString 113 .translateElaborated(context) 114 ; 115 } 116 117 118 private string toString(R)(R tokens) { 119 import clang: Token; 120 import std.algorithm: map; 121 import std.array: join; 122 123 // skip the identifier because of DPP_ENUM_ 124 return tokens[1..$] 125 .map!(t => t.spelling) 126 .join(" "); 127 } 128 129 private string fixLiteral(in from!"clang".Token token) 130 @safe pure 131 in(token.kind == from!"clang".Token.Kind.Literal) 132 do 133 { 134 return token.spelling 135 .fixLowercaseSuffix 136 .fixMultiCharacterLiterals 137 .fixWideCharStrings 138 .fixOctal 139 .fixMicrosoftSuffixes 140 .fixLongLong 141 ; 142 } 143 144 145 private auto fixArrow(R)(R tokens) { 146 import clang: Token; 147 import std.algorithm: map; 148 149 static const(Token) replace(in Token token) { 150 return token == Token(Token.Kind.Punctuation, "->") 151 ? Token(Token.Kind.Punctuation, ".") 152 : token; 153 } 154 155 return tokens 156 .map!replace 157 ; 158 } 159 160 private auto fixNull(R)(R tokens) 161 { 162 import clang: Token; 163 import std.algorithm: map; 164 import std.array: array; 165 166 static const(Token) replace(in Token token) { 167 return token == Token(Token.Kind.Identifier, "NULL") 168 ? Token(Token.Kind.Identifier, "null") 169 : token; 170 } 171 172 return tokens 173 .map!replace 174 ; 175 } 176 177 version(Windows) 178 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow { 179 import std.algorithm: endsWith; 180 181 if(str.endsWith("i64")) 182 return str[0 .. $-3] ~ "L"; 183 else if(str.endsWith("i32")) 184 return str[0 .. $-3]; 185 else if(str.endsWith("i16")) 186 return str[0 .. $-3]; 187 else if(str.endsWith("i8")) 188 return str[0 .. $-3]; 189 return str; 190 } 191 else 192 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow { 193 return str; 194 } 195 196 private string fixWideCharStrings(in string str) @safe pure nothrow { 197 if(str.length >=3 && str[0] == 'L' && str[1] == '"' && str[$-1] == '"') { 198 return str[1 .. $] ~ "w"; 199 } 200 201 return str; 202 } 203 204 private string fixMultiCharacterLiterals(in string str) @safe pure nothrow { 205 // multi-character literals are implementation-defined, but allowed, 206 // in C I aim to identify them and then distinguish them from a 207 // non-ASCII character, which I'll just forward to D assuming utf-8 source 208 // moreover, the '\uxxx' or other escape sequences should be forwarded 209 if(str.length > 3 && str[0] == '\'' && str[$-1] == '\'' && str[1] != '\\') { 210 // apparently a multi-char literal, let's translate to int 211 // the way this is typically done in common compilers, e.g. 212 // https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html 213 int result; 214 foreach(char ch; str[1 .. $-1]) { 215 // any multi-byte character I'm going to assume 216 // is just a single UTF-8 char and punt on it. 217 if(ch > 127) return str; 218 result <<= 8; 219 result |= cast(ubyte) ch; 220 } 221 import std.conv; 222 return to!string(result); 223 } 224 return str; // not one of these, don't touch 225 } 226 227 private string fixLowercaseSuffix(in string str) @safe pure nothrow { 228 import std.algorithm: endsWith; 229 230 if(str.endsWith("ll")) 231 return str[0 .. $-2] ~ "LL"; 232 if(str.endsWith("l")) 233 return str[0 .. $-1] ~ "L"; 234 return str; 235 } 236 237 private string fixLongLong(in string str) @safe pure { 238 import std.uni : toUpper; 239 const suffix = str.length < 3 ? "" : str[$-3 .. $].toUpper; 240 241 if (suffix.length > 0) { 242 if (suffix == "LLU" || suffix == "ULL") 243 return str[0 .. $-3] ~ "LU"; 244 245 if (suffix[1 .. $] == "LL") 246 return str[0 .. $-2] ~ "L"; 247 } 248 249 return str; 250 } 251 252 253 private string fixOctal(in string spelling) @safe pure { 254 import clang: Token; 255 import std.algorithm: countUntil; 256 import std.uni: isNumber; 257 import std.conv : text; 258 259 const isOctal = 260 spelling.length > 1 261 && spelling[0] == '0' 262 && spelling[1].isNumber 263 ; 264 265 if(!isOctal) return spelling; 266 267 const firstNonZero = spelling.countUntil!(a => a != '0'); 268 if(firstNonZero == -1) return "0"; 269 270 const base8_representation = spelling[firstNonZero .. $]; 271 const base8_length = base8_representation.length; 272 int base10_number = 0; 273 foreach(i, c; base8_representation) 274 { 275 const power = base8_length - i - 1; 276 const digit = c - '0'; 277 base10_number += digit * 8 ^^ power; 278 } 279 280 return "/+converted from octal '" ~ base8_representation ~ "'+/ " ~ base10_number.text; 281 } 282 283 284 private auto fixSizeof(R)(R tokens, in from !"clang".Cursor cursor) 285 { 286 import clang: Token; 287 import std.conv: text; 288 import std.algorithm: countUntil; 289 290 // find the closing paren for the function-like macro's argument list 291 size_t lastIndex = 0; 292 if(cursor.isMacroFunction) { 293 lastIndex = tokens 294 .countUntil!(t => t == Token(Token.Kind.Punctuation, ")")) 295 +1; // skip the right paren 296 297 if(lastIndex == 0) // given the +1 above, -1 becomes 0 298 throw new Exception(text("Can't fix sizeof in function-like macro with tokens: ", tokens)); 299 } 300 301 const beginning = tokens[0 .. lastIndex]; 302 const(Token)[] middle; 303 304 for(size_t i = lastIndex; i < tokens.length - 1; ++i) { 305 if(tokens[i] == Token(Token.Kind.Keyword, "sizeof") 306 && tokens[i + 1] == Token(Token.Kind.Punctuation, "(")) 307 { 308 // find closing paren 309 long open = 1; 310 size_t scanIndex = i + 2; // skip i + 1 since that's the open paren 311 312 while(open != 0) { 313 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "(")) 314 ++open; 315 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")")) 316 --open; 317 318 ++scanIndex; 319 } 320 321 middle ~= tokens[lastIndex .. i] ~ tokens[i + 1 .. scanIndex] ~ Token(Token.Kind.Keyword, ".sizeof"); 322 lastIndex = scanIndex; 323 // advance i past the sizeof. -1 because of ++i in the for loop 324 i = lastIndex - 1; 325 } 326 } 327 328 // can't chain here due to fixCasts appending to const(Token)[] 329 return beginning ~ middle ~ tokens[lastIndex .. $]; 330 } 331 332 333 private auto fixCasts(R)( 334 R tokens, 335 in from !"clang".Cursor cursor, 336 in from!"dpp.runtime.context".Context context, 337 ) 338 { 339 import dpp.translation.exception: UntranslatableException; 340 import clang: Token; 341 import std.conv: text; 342 import std.algorithm: countUntil, count; 343 import std.range: chain; 344 345 // if the token array is a built-in or user-defined type 346 bool isType(in Token[] tokens) { 347 348 if( // fundamental type 349 tokens.length == 1 350 && tokens[0].kind == Token.Kind.Keyword 351 && tokens[0].spelling != "sizeof" 352 && tokens[0].spelling != "alignof" 353 ) 354 return true; 355 356 if( // user defined type 357 tokens.length == 1 358 && tokens[0].kind == Token.Kind.Identifier 359 && context.isUserDefinedType(tokens[0].spelling) 360 ) 361 return true; 362 363 if( // pointer to a type 364 tokens.length >= 2 365 && tokens[$-1] == Token(Token.Kind.Punctuation, "*") 366 && isType(tokens[0 .. $-1]) 367 ) 368 return true; 369 370 if( // const type 371 tokens.length >= 2 372 && tokens[0] == Token(Token.Kind.Keyword, "const") 373 && isType(tokens[1..$]) 374 ) 375 return true; 376 377 if( // typeof 378 tokens.length >= 2 379 && tokens[0] == Token(Token.Kind.Keyword, "typeof") 380 ) 381 return true; 382 383 if ( // macro attribute (e.g. __force) + type 384 tokens.length >= 2 385 && tokens[0].kind == Token.Kind.Identifier 386 && isType(tokens[1..$]) 387 ) 388 return true; 389 390 return false; 391 } 392 393 size_t lastIndex = 0; 394 // find the closing paren for the function-like macro's argument list 395 if(cursor.isMacroFunction) { 396 lastIndex = tokens 397 .countUntil!(t => t == Token(Token.Kind.Punctuation, ")")) 398 +1; // skip the right paren 399 if(lastIndex == 0) 400 throw new Exception(text("Can't fix casts in function-like macro with tokens: ", tokens)); 401 } 402 403 const beginning = tokens[0 .. lastIndex]; 404 const(Token)[] middle; 405 406 // See #244 - macros can have unbalanced parentheses 407 // Apparently libclang tokenises `\\n)` as including the backslash and the newline 408 const numLeftParens = tokens.count!(a => a == Token(Token.Kind.Punctuation, "(") || 409 a == Token(Token.Kind.Punctuation, "\\\n(")); 410 const numRightParens = tokens.count!(a => a == Token(Token.Kind.Punctuation, ")") || 411 a == Token(Token.Kind.Punctuation, "\\\n)")); 412 413 if(numLeftParens != numRightParens) 414 throw new UntranslatableException("Unbalanced parentheses in macro `" ~ cursor.spelling ~ "`"); 415 416 for(size_t i = lastIndex; i < tokens.length - 1; ++i) { 417 if(tokens[i] == Token(Token.Kind.Punctuation, "(")) { 418 // find closing paren 419 long open = 1; 420 size_t scanIndex = i + 1; // skip i + 1 since that's the open paren 421 422 while(open != 0) { 423 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "(")) 424 ++open; 425 // for the 2nd condition, esee it.c.compile.preprocessor.multiline 426 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")") || 427 tokens[scanIndex] == Token(Token.Kind.Punctuation, "\\\n)")) 428 --open; 429 430 ++scanIndex; 431 } 432 // at this point scanIndex is the 1 + index of closing paren 433 434 // we want to ignore e.g. `(int)(foo).sizeof` even if `foo` is a type 435 const followedByDot = 436 tokens.length > scanIndex 437 && tokens[scanIndex].spelling[0] == '.' 438 ; 439 440 if(isType(tokens[i + 1 .. scanIndex - 1]) && !followedByDot) { 441 middle ~= tokens[lastIndex .. i] ~ 442 Token(Token.Kind.Punctuation, "cast(") ~ 443 tokens[i + 1 .. scanIndex]; // includes closing paren 444 lastIndex = scanIndex; 445 // advance i past the sizeof. -1 because of ++i in the for loop 446 i = lastIndex - 1; 447 } 448 } 449 } 450 451 return chain(beginning, middle, tokens[lastIndex .. $]); 452 }