1 module dpp.translation.macro_; 2 3 import dpp.from; 4 5 string[] translateMacro(in from!"clang".Cursor cursor, 6 ref from!"dpp.runtime.context".Context context) 7 @safe 8 in(cursor.kind == from!"clang".Cursor.Kind.MacroDefinition) 9 { 10 import dpp.translation.dlang: maybeRename; 11 import clang: Cursor; 12 import std.file: exists; 13 import std.algorithm: startsWith, canFind; 14 import std.conv: text; 15 16 // we want non-built-in macro definitions to be defined and then preprocessed 17 // again 18 19 if(isBuiltinMacro(cursor)) return []; 20 21 const tokens = cursor.tokens; 22 23 // the only sane way for us to be able to see a macro definition 24 // for a macro that has already been defined is if an #undef happened 25 // in the meanwhile. Unfortunately, libclang has no way of passing 26 // that information to us 27 string maybeUndef; 28 if(context.macroAlreadyDefined(cursor)) 29 maybeUndef = "#undef " ~ cursor.spelling ~ "\n"; 30 31 context.rememberMacro(cursor); 32 const spelling = maybeRename(cursor, context); 33 const dbody = translateToD(cursor, context, tokens); 34 35 // We try here to make it so that literal macros can be imported from 36 // another D module. We also try and make non-function-like macros 37 // that aren't a literal constant but an expression can be imported 38 // as well. To that end we check that we can mixin a declaration of 39 // an enum with the same name of the macro with the original C code. 40 // If so, we mix it in. 41 // Below that, we declare the macro so the the #including .dpp file 42 // uses the preprocessor. 43 if(!cursor.isMacroFunction && tokens.length > 1) { 44 const defineEnum = `enum ` ~ spelling ~ ` = ` ~ dbody ~ `;`; 45 const enumVarName = `enumMixinStr_` ~ spelling; 46 return [ 47 `#ifdef ` ~ spelling, 48 `# undef ` ~ spelling, 49 `#endif`, 50 `static if(!is(typeof(` ~ spelling ~ `))) {`, 51 " private enum " ~ enumVarName ~ " = `" ~ defineEnum ~ "`;", 52 ` static if(is(typeof({ mixin(` ~ enumVarName ~ `); }))) {`, 53 ` mixin(` ~ enumVarName ~ `);`, 54 ` }`, 55 `}`, 56 `#define ` ~ spelling ~ ` ` ~ dbody, 57 ]; 58 } 59 60 const maybeSpace = cursor.isMacroFunction ? "" : " "; 61 return [maybeUndef ~ "#define " ~ spelling ~ maybeSpace ~ dbody ~ "\n"]; 62 } 63 64 65 bool isBuiltinMacro(in from!"clang".Cursor cursor) 66 @safe 67 { 68 import clang: Cursor; 69 import std.file: exists; 70 import std.algorithm: startsWith; 71 72 if(cursor.kind != Cursor.Kind.MacroDefinition) return false; 73 74 return 75 cursor.sourceRange.path == "" 76 || !cursor.sourceRange.path.exists 77 || cursor.isPredefined 78 || cursor.spelling.startsWith("__STDC_") 79 ; 80 } 81 82 83 private bool isLiteralMacro(in from!"clang".Token[] tokens) @safe @nogc pure nothrow { 84 import clang: Token; 85 86 return 87 tokens.length == 2 88 && tokens[0].kind == Token.Kind.Identifier 89 && tokens[1].kind == Token.Kind.Literal 90 ; 91 } 92 93 private bool isStringRepr(T)(in string str) @safe pure { 94 import std.conv: to; 95 import std.exception: collectException; 96 import std..string: strip; 97 98 T dummy; 99 return str.strip.to!T.collectException(dummy) is null; 100 } 101 102 103 private string translateToD( 104 in from!"clang".Cursor cursor, 105 ref from!"dpp.runtime.context".Context context, 106 in from!"clang".Token[] tokens, 107 ) 108 @safe 109 { 110 import dpp.translation.type: translateElaborated; 111 if(isLiteralMacro(tokens)) return fixLiteral(tokens[1]); 112 if(tokens.length == 1) return ""; // e.g. `#define FOO` 113 114 return tokens 115 .fixSizeof(cursor) 116 .fixCasts(cursor, context) 117 .fixArrow 118 .fixNull 119 .toString 120 .translateElaborated(context) 121 ; 122 } 123 124 125 private string toString(R)(R tokens) { 126 import clang: Token; 127 import std.algorithm: map; 128 import std.array: join; 129 130 // skip the identifier because of DPP_ENUM_ 131 return tokens[1..$] 132 .map!(t => t.spelling) 133 .join(" "); 134 } 135 136 private string fixLiteral(in from!"clang".Token token) 137 @safe pure 138 in(token.kind == from!"clang".Token.Kind.Literal) 139 do 140 { 141 return token.spelling 142 .fixLowercaseSuffix 143 .fixMultiCharacterLiterals 144 .fixWideCharStrings 145 .fixOctal 146 .fixMicrosoftSuffixes 147 .fixLongLong 148 ; 149 } 150 151 152 private auto fixArrow(R)(R tokens) { 153 import clang: Token; 154 import std.algorithm: map; 155 156 static const(Token) replace(in Token token) { 157 return token == Token(Token.Kind.Punctuation, "->") 158 ? Token(Token.Kind.Punctuation, ".") 159 : token; 160 } 161 162 return tokens 163 .map!replace 164 ; 165 } 166 167 private auto fixNull(R)(R tokens) 168 { 169 import clang: Token; 170 import std.algorithm: map; 171 import std.array: array; 172 173 static const(Token) replace(in Token token) { 174 return token == Token(Token.Kind.Identifier, "NULL") 175 ? Token(Token.Kind.Identifier, "null") 176 : token; 177 } 178 179 return tokens 180 .map!replace 181 ; 182 } 183 184 version(Windows) 185 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow { 186 import std.algorithm: endsWith; 187 188 if(str.endsWith("i64")) 189 return str[0 .. $-3] ~ "L"; 190 else if(str.endsWith("i32")) 191 return str[0 .. $-3]; 192 else if(str.endsWith("i16")) 193 return str[0 .. $-3]; 194 else if(str.endsWith("i8")) 195 return str[0 .. $-3]; 196 return str; 197 } 198 else 199 private string fixMicrosoftSuffixes(in string str) @safe pure nothrow { 200 return str; 201 } 202 203 private string fixWideCharStrings(in string str) @safe pure nothrow { 204 if(str.length >=3 && str[0] == 'L' && str[1] == '"' && str[$-1] == '"') { 205 return str[1 .. $] ~ "w"; 206 } 207 208 return str; 209 } 210 211 private string fixMultiCharacterLiterals(in string str) @safe pure nothrow { 212 // multi-character literals are implementation-defined, but allowed, 213 // in C I aim to identify them and then distinguish them from a 214 // non-ASCII character, which I'll just forward to D assuming utf-8 source 215 // moreover, the '\uxxx' or other escape sequences should be forwarded 216 if(str.length > 3 && str[0] == '\'' && str[$-1] == '\'' && str[1] != '\\') { 217 // apparently a multi-char literal, let's translate to int 218 // the way this is typically done in common compilers, e.g. 219 // https://gcc.gnu.org/onlinedocs/cpp/Implementation-defined-behavior.html 220 int result; 221 foreach(char ch; str[1 .. $-1]) { 222 // any multi-byte character I'm going to assume 223 // is just a single UTF-8 char and punt on it. 224 if(ch > 127) return str; 225 result <<= 8; 226 result |= cast(ubyte) ch; 227 } 228 import std.conv; 229 return to!string(result); 230 } 231 return str; // not one of these, don't touch 232 } 233 234 private string fixLowercaseSuffix(in string str) @safe pure nothrow { 235 import std.algorithm: endsWith; 236 237 if(str.endsWith("ll")) 238 return str[0 .. $-2] ~ "LL"; 239 if(str.endsWith("l")) 240 return str[0 .. $-1] ~ "L"; 241 return str; 242 } 243 244 private string fixLongLong(in string str) @safe pure { 245 import std.uni : toUpper; 246 const suffix = str.length < 3 ? "" : str[$-3 .. $].toUpper; 247 248 if (suffix.length > 0) { 249 if (suffix == "LLU" || suffix == "ULL") 250 return str[0 .. $-3] ~ "LU"; 251 252 if (suffix[1 .. $] == "LL") 253 return str[0 .. $-2] ~ "L"; 254 } 255 256 return str; 257 } 258 259 260 private string fixOctal(in string spelling) @safe pure { 261 import clang: Token; 262 import std.algorithm: countUntil; 263 import std.uni: isNumber; 264 265 const isOctal = 266 spelling.length > 1 267 && spelling[0] == '0' 268 && spelling[1].isNumber 269 //&& token.spelling.isStringRepr!long 270 ; 271 272 if(!isOctal) return spelling; 273 274 const firstNonZero = spelling.countUntil!(a => a != '0'); 275 if(firstNonZero == -1) return "0"; 276 277 return `std.conv.octal!` ~ spelling[firstNonZero .. $]; 278 } 279 280 281 private auto fixSizeof(R)(R tokens, in from !"clang".Cursor cursor) 282 { 283 import clang: Token; 284 import std.conv: text; 285 import std.algorithm: countUntil; 286 287 // find the closing paren for the function-like macro's argument list 288 size_t lastIndex = 0; 289 if(cursor.isMacroFunction) { 290 lastIndex = tokens 291 .countUntil!(t => t == Token(Token.Kind.Punctuation, ")")) 292 +1; // skip the right paren 293 294 if(lastIndex == 0) // given the +1 above, -1 becomes 0 295 throw new Exception(text("Can't fix sizeof in function-like macro with tokens: ", tokens)); 296 } 297 298 const beginning = tokens[0 .. lastIndex]; 299 const(Token)[] middle; 300 301 for(size_t i = lastIndex; i < tokens.length - 1; ++i) { 302 if(tokens[i] == Token(Token.Kind.Keyword, "sizeof") 303 && tokens[i + 1] == Token(Token.Kind.Punctuation, "(")) 304 { 305 // find closing paren 306 long open = 1; 307 size_t scanIndex = i + 2; // skip i + 1 since that's the open paren 308 309 while(open != 0) { 310 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "(")) 311 ++open; 312 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")")) 313 --open; 314 315 ++scanIndex; 316 } 317 318 middle ~= tokens[lastIndex .. i] ~ tokens[i + 1 .. scanIndex] ~ Token(Token.Kind.Keyword, ".sizeof"); 319 lastIndex = scanIndex; 320 // advance i past the sizeof. -1 because of ++i in the for loop 321 i = lastIndex - 1; 322 } 323 } 324 325 // can't chain here due to fixCasts appending to const(Token)[] 326 return beginning ~ middle ~ tokens[lastIndex .. $]; 327 } 328 329 330 private auto fixCasts(R)( 331 R tokens, 332 in from !"clang".Cursor cursor, 333 in from!"dpp.runtime.context".Context context, 334 ) 335 { 336 import clang: Token; 337 import std.conv: text; 338 import std.algorithm: countUntil; 339 import std.range: chain; 340 341 // if the token array is a built-in or user-defined type 342 bool isType(in Token[] tokens) { 343 344 if( // fundamental type 345 tokens.length == 1 346 && tokens[0].kind == Token.Kind.Keyword 347 && tokens[0].spelling != "sizeof" 348 && tokens[0].spelling != "alignof" 349 ) 350 return true; 351 352 if( // user defined type 353 tokens.length == 1 354 && tokens[0].kind == Token.Kind.Identifier 355 && context.isUserDefinedType(tokens[0].spelling) 356 ) 357 return true; 358 359 if( // pointer to a type 360 tokens.length >= 2 361 && tokens[$-1] == Token(Token.Kind.Punctuation, "*") 362 && isType(tokens[0 .. $-1]) 363 ) 364 return true; 365 366 if( // const type 367 tokens.length >= 2 368 && tokens[0] == Token(Token.Kind.Keyword, "const") 369 && isType(tokens[1..$]) 370 ) 371 return true; 372 373 return false; 374 } 375 376 size_t lastIndex = 0; 377 // find the closing paren for the function-like macro's argument list 378 if(cursor.isMacroFunction) { 379 lastIndex = tokens 380 .countUntil!(t => t == Token(Token.Kind.Punctuation, ")")) 381 +1; // skip the right paren 382 if(lastIndex == 0) 383 throw new Exception(text("Can't fix casts in function-like macro with tokens: ", tokens)); 384 } 385 386 const beginning = tokens[0 .. lastIndex]; 387 const(Token)[] middle; 388 389 for(size_t i = lastIndex; i < tokens.length - 1; ++i) { 390 if(tokens[i] == Token(Token.Kind.Punctuation, "(")) { 391 // find closing paren 392 long open = 1; 393 size_t scanIndex = i + 1; // skip i + 1 since that's the open paren 394 395 while(open != 0) { 396 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, "(")) 397 ++open; 398 // for the 2nd condition, esee it.c.compile.preprocessor.multiline 399 if(tokens[scanIndex] == Token(Token.Kind.Punctuation, ")") || 400 tokens[scanIndex] == Token(Token.Kind.Punctuation, "\\\n)")) 401 --open; 402 403 ++scanIndex; 404 } 405 // at this point scanIndex is the 1 + index of closing paren 406 407 // we want to ignore e.g. `(int)(foo).sizeof` even if `foo` is a type 408 const followedByDot = 409 tokens.length > scanIndex 410 && tokens[scanIndex].spelling[0] == '.' 411 ; 412 413 if(isType(tokens[i + 1 .. scanIndex - 1]) && !followedByDot) { 414 middle ~= tokens[lastIndex .. i] ~ 415 Token(Token.Kind.Punctuation, "cast(") ~ 416 tokens[i + 1 .. scanIndex]; // includes closing paren 417 lastIndex = scanIndex; 418 // advance i past the sizeof. -1 because of ++i in the for loop 419 i = lastIndex - 1; 420 } 421 } 422 } 423 424 return chain(beginning, middle, tokens[lastIndex .. $]); 425 }