2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 using System.Collections.Generic;
28 using System.Reflection;
32 * This is the main compiler class.
38 * Command-line entry point.
40 public static void Main(string[] args)
43 List<string> r = new List<string>();
44 string outBase = null;
45 List<string> entryPoints = new List<string>();
46 string coreRun = null;
50 for (int i = 0; i < args.Length; i ++) {
52 if (!a.StartsWith("-")) {
58 if (++ i >= args.Length) {
65 while (a.StartsWith("-")) {
68 int j = a.IndexOf('=');
72 pname = a.ToLowerInvariant();
74 pval2 = (i + 1) < args.Length
77 pname = a.Substring(0, j).Trim()
79 pval = a.Substring(j + 1);
92 if (outBase != null) {
117 foreach (string ep in pval.Split(',')) {
118 string epz = ep.Trim();
119 if (epz.Length > 0) {
120 entryPoints.Add(epz);
136 if (outBase == null) {
139 if (entryPoints.Count == 0) {
140 entryPoints.Add("main");
142 if (coreRun == null) {
145 T0Comp tc = new T0Comp();
146 tc.enableFlowAnalysis = flow;
149 using (TextReader tr = new StreamReader(
150 Assembly.GetExecutingAssembly()
151 .GetManifestResourceStream("t0-kernel")))
155 foreach (string a in r) {
156 Console.WriteLine("[{0}]", a);
157 using (TextReader tr = File.OpenText(a)) {
161 tc.Generate(outBase, coreRun, entryPoints.ToArray());
162 } catch (Exception e) {
163 Console.WriteLine(e.ToString());
171 "usage: T0Comp.exe [ options... ] file...");
175 " -o file use 'file' as base for output file name (default: 't0out')");
177 " -r name use 'name' as base for run function (default: same as output)");
179 " -m name[,name...]");
181 " define entry point(s)");
183 " -nf disable flow analysis");
188 * If 'delayedChar' is Int32.MinValue then there is no delayed
190 * If 'delayedChar' equals x >= 0 then there is one delayed
191 * character of value x.
192 * If 'delayedChar' equals y < 0 then there are two delayed
193 * characters, a newline (U+000A) followed by character of
196 TextReader currentInput;
200 * Common StringBuilder used to parse tokens; it is reused for
203 StringBuilder tokenBuilder;
206 * There may be a delayed token in some cases.
211 * Defined words are referenced by name in this map. Names are
212 * string-sensitive; for better reproducibility, the map is sorted
215 IDictionary<string, Word> words;
218 * Last defined word is also referenced in 'lastWord'. This is
219 * used by 'immediate'.
224 * When compiling, this builder is used. A stack saves other
225 * builders in case of nested definition.
227 WordBuilder wordBuilder;
228 Stack<WordBuilder> savedWordBuilders;
231 * C code defined for words is kept in this map, by word name.
233 IDictionary<string, string> allCCode;
236 * 'compiling' is true when compiling tokens to a word, false
237 * when interpreting them.
242 * 'quitRunLoop' is set to true to trigger exit of the
243 * interpretation loop when the end of the current input file
249 * 'extraCode' is for C code that is to be added as preamble to
252 List<string> extraCode;
255 * 'extraCodeDefer' is for C code that is to be added in the C
256 * output _after_ the data and code blocks.
258 List<string> extraCodeDefer;
261 * 'dataBlock' is the data block in which constant data bytes
267 * Counter for blocks of constant data.
272 * Flow analysis enable flag.
274 bool enableFlowAnalysis;
277 * Data stack size limit.
282 * Return stack size limit.
288 tokenBuilder = new StringBuilder();
289 words = new SortedDictionary<string, Word>(
290 StringComparer.Ordinal);
291 savedWordBuilders = new Stack<WordBuilder>();
292 allCCode = new SortedDictionary<string, string>(
293 StringComparer.Ordinal);
295 extraCode = new List<string>();
296 extraCodeDefer = new List<string>();
297 enableFlowAnalysis = true;
300 * Native words are predefined and implemented only with
301 * native code. Some may be part of the generated output,
302 * if C code is set for them.
307 * Parses next token as a word name, then a C code snippet.
308 * Sets the C code for that word.
310 AddNative("add-cc:", false, SType.BLANK, cpu => {
314 "EOF reached (missing name)");
316 if (allCCode.ContainsKey(tt)) {
318 "C code already set for: " + tt);
320 allCCode[tt] = ParseCCode();
325 * Parses next token as a word name, then a C code snippet.
326 * A new word is defined, that throws an exception when
327 * invoked during compilation. The C code is set for that
330 AddNative("cc:", false, SType.BLANK, cpu => {
334 "EOF reached (missing name)");
336 Word w = AddNative(tt, false, cpu2 => {
338 "C-only word: " + tt);
340 if (allCCode.ContainsKey(tt)) {
342 "C code already set for: " + tt);
345 allCCode[tt] = ParseCCode(out stackEffect);
346 w.StackEffect = stackEffect;
351 * Parses a C code snippet, then adds it to the generated
354 AddNative("preamble", false, SType.BLANK, cpu => {
355 extraCode.Add(ParseCCode());
360 * Parses a C code snippet, then adds it to the generated
361 * output after the data and code blocks.
363 AddNative("postamble", false, SType.BLANK, cpu => {
364 extraCodeDefer.Add(ParseCCode());
369 * Expects two integers and a string, and makes a
370 * constant that stands for the string as a C constant
371 * expression. The two integers are the expected range
372 * (min-max, inclusive).
374 AddNative("make-CX", false, new SType(3, 1), cpu => {
375 TValue c = cpu.Pop();
376 if (!(c.ptr is TPointerBlob)) {
377 throw new Exception(string.Format(
378 "'{0}' is not a string", c));
382 TValue tv = new TValue(0, new TPointerExpr(
383 c.ToString(), min, max));
389 * Parses two integer constants, then a C code snippet.
390 * It then pushes on the stack, or compiles to the
391 * current word, a value consisting of the given C
392 * expression; the two integers indicate the expected
393 * range (min-max, inclusive) of the C expression when
396 AddNative("CX", true, cpu => {
400 "EOF reached (missing min value)");
402 int min = ParseInteger(tt);
406 "EOF reached (missing max value)");
408 int max = ParseInteger(tt);
410 throw new Exception("min/max in wrong order");
412 TValue tv = new TValue(0, new TPointerExpr(
413 ParseCCode().Trim(), min, max));
415 wordBuilder.Literal(tv);
423 * Interrupt the current execution. This implements
424 * coroutines. It cannot be invoked during compilation.
426 AddNative("co", false, SType.BLANK, cpu => {
427 throw new Exception("No coroutine in compile mode");
432 * Parses next token as word name. It begins definition
433 * of that word, setting it as current target for
434 * word building. Any previously opened word is saved
435 * and will become available again as a target when that
436 * new word is finished building.
438 AddNative(":", false, cpu => {
442 "EOF reached (missing name)");
445 savedWordBuilders.Push(wordBuilder);
449 wordBuilder = new WordBuilder(this, tt);
453 "EOF reached (while compiling)");
456 SType stackEffect = ParseStackEffectNF();
457 if (!stackEffect.IsKnown) {
459 "Invalid stack effect syntax");
461 wordBuilder.StackEffect = stackEffect;
468 * Pops a string as word name, and two integers as stack
469 * effect. It begins definition of that word, setting it
470 * as current target for word building. Any previously
471 * opened word is saved and will become available again as
472 * a target when that new word is finished building.
474 * Stack effect is the pair 'din dout'. If din is negative,
475 * then the stack effect is "unknown". If din is nonnegative
476 * but dout is negative, then the word is reputed never to
479 AddNative("define-word", false, cpu => {
480 int dout = cpu.Pop();
482 TValue s = cpu.Pop();
483 if (!(s.ptr is TPointerBlob)) {
484 throw new Exception(string.Format(
485 "Not a string: '{0}'", s));
487 string tt = s.ToString();
489 savedWordBuilders.Push(wordBuilder);
493 wordBuilder = new WordBuilder(this, tt);
494 wordBuilder.StackEffect = new SType(din, dout);
499 * Ends current word. The current word is registered under
500 * its name, and the previously opened word (if any) becomes
501 * again the building target.
503 AddNative(";", true, cpu => {
505 throw new Exception("Not compiling");
507 Word w = wordBuilder.Build();
508 string name = w.Name;
509 if (words.ContainsKey(name)) {
511 "Word already defined: " + name);
515 if (savedWordBuilders.Count > 0) {
516 wordBuilder = savedWordBuilders.Pop();
525 * Sets the last defined word as immediate.
527 AddNative("immediate", false, cpu => {
528 if (lastWord == null) {
529 throw new Exception("No word defined yet");
531 lastWord.Immediate = true;
535 * literal (immediate)
536 * Pops the current TOS value, and add in the current word
537 * the action of pushing that value. This cannot be used
538 * when no word is being built.
540 WordNative wliteral = AddNative("literal", true, cpu => {
542 wordBuilder.Literal(cpu.Pop());
547 * Pops the current TOS value, which must be an XT (pointer
548 * to a word); the action of calling that word is compiled
549 * in the current word.
551 WordNative wcompile = AddNative("compile", false, cpu => {
553 wordBuilder.Call(cpu.Pop().ToXT());
557 * postpone (immediate)
558 * Parses the next token as a word name, and add to the
559 * current word the action of calling that word. This
560 * basically removes immediatety from the next word.
562 AddNative("postpone", true, cpu => {
567 "EOF reached (missing name)");
570 bool isVal = TryParseLiteral(tt, out v);
571 Word w = LookupNF(tt);
572 if (isVal && w != null) {
573 throw new Exception(String.Format(
574 "Ambiguous: both defined word and"
575 + " literal: {0}", tt));
578 wordBuilder.Literal(v);
579 wordBuilder.CallExt(wliteral);
580 } else if (w != null) {
582 wordBuilder.CallExt(w);
584 wordBuilder.Literal(new TValue(0,
586 wordBuilder.CallExt(wcompile);
589 wordBuilder.Literal(new TValue(0,
590 new TPointerXT(tt)));
591 wordBuilder.CallExt(wcompile);
596 * Interrupt compilation with an error.
598 AddNative("exitvm", false, cpu => {
599 throw new Exception();
603 * Open a new data block. Its symbolic address is pushed
606 AddNative("new-data-block", false, cpu => {
607 dataBlock = new ConstData(this);
608 cpu.Push(new TValue(0, new TPointerBlob(dataBlock)));
612 * Define a new data word. The data address and name are
613 * popped from the stack.
615 AddNative("define-data-word", false, cpu => {
616 string name = cpu.Pop().ToString();
617 TValue va = cpu.Pop();
618 TPointerBlob tb = va.ptr as TPointerBlob;
621 "Address is not a data area");
623 Word w = new WordData(this, name, tb.Blob, va.x);
624 if (words.ContainsKey(name)) {
626 "Word already defined: " + name);
633 * Get an address pointing at the end of the current
634 * data block. This is the address of the next byte that
637 AddNative("current-data", false, cpu => {
638 if (dataBlock == null) {
640 "No current data block");
642 cpu.Push(new TValue(dataBlock.Length,
643 new TPointerBlob(dataBlock)));
647 * Add a byte value to the data block.
649 AddNative("data-add8", false, cpu => {
650 if (dataBlock == null) {
652 "No current data block");
655 if (v < 0 || v > 0xFF) {
657 "Byte value out of range: " + v);
659 dataBlock.Add8((byte)v);
663 * Set a byte value in the data block.
665 AddNative("data-set8", false, cpu => {
666 TValue va = cpu.Pop();
667 TPointerBlob tb = va.ptr as TPointerBlob;
670 "Address is not a data area");
673 if (v < 0 || v > 0xFF) {
675 "Byte value out of range: " + v);
677 tb.Blob.Set8(va.x, (byte)v);
681 * Get a byte value from a data block.
683 AddNative("data-get8", false, new SType(1, 1), cpu => {
684 TValue va = cpu.Pop();
685 TPointerBlob tb = va.ptr as TPointerBlob;
688 "Address is not a data area");
690 int v = tb.Blob.Read8(va.x);
697 AddNative("compile-local-read", false, cpu => {
699 wordBuilder.GetLocal(cpu.Pop().ToString());
701 AddNative("compile-local-write", false, cpu => {
703 wordBuilder.PutLocal(cpu.Pop().ToString());
706 AddNative("ahead", true, cpu => {
710 AddNative("begin", true, cpu => {
714 AddNative("again", true, cpu => {
718 AddNative("until", true, cpu => {
720 wordBuilder.AgainIfNot();
722 AddNative("untilnot", true, cpu => {
724 wordBuilder.AgainIf();
726 AddNative("if", true, cpu => {
728 wordBuilder.AheadIfNot();
730 AddNative("ifnot", true, cpu => {
732 wordBuilder.AheadIf();
734 AddNative("then", true, cpu => {
738 AddNative("cs-pick", false, cpu => {
740 wordBuilder.CSPick(cpu.Pop());
742 AddNative("cs-roll", false, cpu => {
744 wordBuilder.CSRoll(cpu.Pop());
746 AddNative("next-word", false, cpu => {
749 throw new Exception("No next word (EOF)");
751 cpu.Push(StringToBlob(s));
753 AddNative("parse", false, cpu => {
755 string s = ReadTerm(d);
756 cpu.Push(StringToBlob(s));
758 AddNative("char", false, cpu => {
761 throw new Exception("No next character (EOF)");
765 AddNative("'", false, cpu => {
766 string name = Next();
767 cpu.Push(new TValue(0, new TPointerXT(name)));
771 * The "execute" word is valid in generated C code, but
772 * since it jumps to a runtime pointer, its actual stack
773 * effect cannot be computed in advance.
775 AddNative("execute", false, cpu => {
776 cpu.Pop().Execute(this, cpu);
779 AddNative("[", true, cpu => {
783 AddNative("]", false, cpu => {
786 AddNative("(local)", false, cpu => {
788 wordBuilder.DefLocal(cpu.Pop().ToString());
790 AddNative("ret", true, cpu => {
795 AddNative("drop", false, new SType(1, 0), cpu => {
798 AddNative("dup", false, new SType(1, 2), cpu => {
799 cpu.Push(cpu.Peek(0));
801 AddNative("swap", false, new SType(2, 2), cpu => {
804 AddNative("over", false, new SType(2, 3), cpu => {
805 cpu.Push(cpu.Peek(1));
807 AddNative("rot", false, new SType(3, 3), cpu => {
810 AddNative("-rot", false, new SType(3, 3), cpu => {
815 * "roll" and "pick" are special in that the stack slot
816 * they inspect might be known only at runtime, so an
817 * absolute stack effect cannot be attributed. Instead,
818 * we simply hope that the caller knows what it is doing,
819 * and we use a simple stack effect for just the count
820 * value and picked value.
822 AddNative("roll", false, new SType(1, 0), cpu => {
825 AddNative("pick", false, new SType(1, 1), cpu => {
826 cpu.Push(cpu.Peek(cpu.Pop()));
829 AddNative("+", false, new SType(2, 1), cpu => {
830 TValue b = cpu.Pop();
831 TValue a = cpu.Pop();
835 } else if (a.ptr is TPointerBlob
836 && b.ptr is TPointerBlob)
838 cpu.Push(StringToBlob(
839 a.ToString() + b.ToString()));
841 throw new Exception(string.Format(
842 "Cannot add '{0}' to '{1}'", b, a));
845 AddNative("-", false, new SType(2, 1), cpu => {
847 * We can subtract two pointers, provided that
848 * they point to the same blob. Otherwise,
849 * the subtraction second operand must be an
852 TValue b = cpu.Pop();
853 TValue a = cpu.Pop();
854 TPointerBlob ap = a.ptr as TPointerBlob;
855 TPointerBlob bp = b.ptr as TPointerBlob;
856 if (ap != null && bp != null && ap.Blob == bp.Blob) {
857 cpu.Push(new TValue(a.x - b.x));
864 AddNative("neg", false, new SType(1, 1), cpu => {
868 AddNative("*", false, new SType(2, 1), cpu => {
873 AddNative("/", false, new SType(2, 1), cpu => {
878 AddNative("u/", false, new SType(2, 1), cpu => {
883 AddNative("%", false, new SType(2, 1), cpu => {
888 AddNative("u%", false, new SType(2, 1), cpu => {
893 AddNative("<", false, new SType(2, 1), cpu => {
898 AddNative("<=", false, new SType(2, 1), cpu => {
903 AddNative(">", false, new SType(2, 1), cpu => {
908 AddNative(">=", false, new SType(2, 1), cpu => {
913 AddNative("=", false, new SType(2, 1), cpu => {
914 TValue b = cpu.Pop();
915 TValue a = cpu.Pop();
916 cpu.Push(a.Equals(b));
918 AddNative("<>", false, new SType(2, 1), cpu => {
919 TValue b = cpu.Pop();
920 TValue a = cpu.Pop();
921 cpu.Push(!a.Equals(b));
923 AddNative("u<", false, new SType(2, 1), cpu => {
924 uint bx = cpu.Pop().UInt;
925 uint ax = cpu.Pop().UInt;
926 cpu.Push(new TValue(ax < bx));
928 AddNative("u<=", false, new SType(2, 1), cpu => {
929 uint bx = cpu.Pop().UInt;
930 uint ax = cpu.Pop().UInt;
931 cpu.Push(new TValue(ax <= bx));
933 AddNative("u>", false, new SType(2, 1), cpu => {
934 uint bx = cpu.Pop().UInt;
935 uint ax = cpu.Pop().UInt;
936 cpu.Push(new TValue(ax > bx));
938 AddNative("u>=", false, new SType(2, 1), cpu => {
943 AddNative("and", false, new SType(2, 1), cpu => {
948 AddNative("or", false, new SType(2, 1), cpu => {
953 AddNative("xor", false, new SType(2, 1), cpu => {
958 AddNative("not", false, new SType(1, 1), cpu => {
962 AddNative("<<", false, new SType(2, 1), cpu => {
963 int count = cpu.Pop();
964 if (count < 0 || count > 31) {
965 throw new Exception("Invalid shift count");
968 cpu.Push(ax << count);
970 AddNative(">>", false, new SType(2, 1), cpu => {
971 int count = cpu.Pop();
972 if (count < 0 || count > 31) {
973 throw new Exception("Invalid shift count");
976 cpu.Push(ax >> count);
978 AddNative("u>>", false, new SType(2, 1), cpu => {
979 int count = cpu.Pop();
980 if (count < 0 || count > 31) {
981 throw new Exception("Invalid shift count");
984 cpu.Push(ax >> count);
987 AddNative(".", false, new SType(1, 0), cpu => {
988 Console.Write(" {0}", cpu.Pop().ToString());
990 AddNative(".s", false, SType.BLANK, cpu => {
992 for (int i = n - 1; i >= 0; i --) {
993 Console.Write(" {0}", cpu.Peek(i).ToString());
996 AddNative("putc", false, new SType(1, 0), cpu => {
997 Console.Write((char)cpu.Pop());
999 AddNative("puts", false, new SType(1, 0), cpu => {
1000 Console.Write("{0}", cpu.Pop().ToString());
1002 AddNative("cr", false, SType.BLANK, cpu => {
1003 Console.WriteLine();
1005 AddNative("eqstr", false, new SType(2, 1), cpu => {
1006 string s2 = cpu.Pop().ToString();
1007 string s1 = cpu.Pop().ToString();
1012 WordNative AddNative(string name, bool immediate,
1013 WordNative.NativeRun code)
1015 return AddNative(name, immediate, SType.UNKNOWN, code);
1018 WordNative AddNative(string name, bool immediate, SType stackEffect,
1019 WordNative.NativeRun code)
1021 if (words.ContainsKey(name)) {
1022 throw new Exception(
1023 "Word already defined: " + name);
1025 WordNative w = new WordNative(this, name, code);
1026 w.Immediate = immediate;
1027 w.StackEffect = stackEffect;
1032 internal long NextBlobID()
1034 return currentBlobID ++;
1039 int c = delayedChar;
1041 delayedChar = Int32.MinValue;
1042 } else if (c > Int32.MinValue) {
1043 delayedChar = -(c + 1);
1046 c = currentInput.Read();
1049 if (delayedChar >= 0) {
1051 delayedChar = Int32.MinValue;
1053 c = currentInput.Read();
1064 * Un-read the character value 'c'. That value MUST be the one
1065 * that was obtained from NextChar().
1072 if (delayedChar < 0) {
1073 if (delayedChar != Int32.MinValue) {
1074 throw new Exception(
1075 "Already two delayed characters");
1078 } else if (c != '\n') {
1079 throw new Exception("Cannot delay two characters");
1081 delayedChar = -(delayedChar + 1);
1087 string r = delayedToken;
1089 delayedToken = null;
1092 tokenBuilder.Length = 0;
1104 return ParseString();
1107 tokenBuilder.Append((char)c);
1109 if (c < 0 || IsWS(c)) {
1111 return tokenBuilder.ToString();
1119 string r = ParseCCode(out stackEffect);
1120 if (stackEffect.IsKnown) {
1121 throw new Exception(
1122 "Stack effect forbidden in this declaration");
1127 string ParseCCode(out SType stackEffect)
1129 string s = ParseCCodeNF(out stackEffect);
1131 throw new Exception("Error while parsing C code");
1136 string ParseCCodeNF(out SType stackEffect)
1138 stackEffect = SType.UNKNOWN;
1146 if (stackEffect.IsKnown) {
1150 stackEffect = ParseStackEffectNF();
1151 if (!stackEffect.IsKnown) {
1155 } else if (c != '{') {
1162 StringBuilder sb = new StringBuilder();
1174 if (-- count == 0) {
1175 return sb.ToString();
1184 * Parse a stack effect declaration. This method assumes that the
1185 * opening parenthesis has just been read. If the parsing fails,
1186 * then this method returns SType.UNKNOWN.
1188 SType ParseStackEffectNF()
1190 bool seenSep = false;
1191 bool seenBang = false;
1192 int din = 0, dout = 0;
1196 return SType.UNKNOWN;
1200 return SType.UNKNOWN;
1203 } else if (t == ")") {
1205 if (seenBang && dout == 1) {
1208 return new SType(din, dout);
1210 return SType.UNKNOWN;
1214 if (dout == 0 && t == "!") {
1225 string ParseString()
1227 StringBuilder sb = new StringBuilder();
1235 throw new Exception(
1236 "Unfinished literal string");
1241 throw new Exception(String.Format(
1242 "not an hex digit: U+{0:X4}",
1245 acc = (acc << 4) + d;
1246 if (-- hexNum == 0) {
1247 sb.Append((char)acc);
1253 case '\n': SkipNL(); break;
1261 sb.Append(SingleCharEscape(c));
1267 return sb.ToString();
1279 static char SingleCharEscape(int c)
1282 case 'n': return '\n';
1283 case 'r': return '\r';
1284 case 't': return '\t';
1285 case 's': return ' ';
1292 * A backslash+newline sequence occurred in a literal string; we
1293 * check and consume the newline escape sequence (whitespace at
1294 * start of next line, then a double-quote character).
1301 throw new Exception("EOF in literal string");
1304 throw new Exception(
1305 "Unescaped newline in literal string");
1313 throw new Exception(
1314 "Invalid newline escape in literal string");
1318 static char DecodeCharConst(string t)
1320 if (t.Length == 1 && t[0] != '\\') {
1323 if (t.Length >= 2 && t[0] == '\\') {
1326 if (t.Length == 4) {
1327 int x = DecHex(t.Substring(2));
1334 if (t.Length == 6) {
1335 int x = DecHex(t.Substring(2));
1342 if (t.Length == 2) {
1343 return SingleCharEscape(t[1]);
1348 throw new Exception("Invalid literal char: `" + t);
1351 static int DecHex(string s)
1354 foreach (char c in s) {
1359 acc = (acc << 4) + d;
1364 static int HexVal(int c)
1366 if (c >= '0' && c <= '9') {
1368 } else if (c >= 'A' && c <= 'F') {
1369 return c - ('A' - 10);
1370 } else if (c >= 'a' && c <= 'f') {
1371 return c - ('a' - 10);
1377 string ReadTerm(int ct)
1379 StringBuilder sb = new StringBuilder();
1383 throw new Exception(String.Format(
1384 "EOF reached before U+{0:X4}", ct));
1387 return sb.ToString();
1393 static bool IsWS(int c)
1398 void ProcessInput(TextReader tr)
1400 this.currentInput = tr;
1402 Word w = new WordNative(this, "toplevel",
1403 xcpu => { CompileStep(xcpu); });
1404 CPU cpu = new CPU();
1405 Opcode[] code = new Opcode[] {
1407 new OpcodeJumpUncond(-2)
1409 quitRunLoop = false;
1415 Opcode op = cpu.ipBuf[cpu.ipOff ++];
1420 void CompileStep(CPU cpu)
1425 throw new Exception("EOF while compiling");
1431 bool isVal = TryParseLiteral(tt, out v);
1432 Word w = LookupNF(tt);
1433 if (isVal && w != null) {
1434 throw new Exception(String.Format(
1435 "Ambiguous: both defined word and literal: {0}",
1440 wordBuilder.Literal(v);
1441 } else if (w != null) {
1445 wordBuilder.CallExt(w);
1448 wordBuilder.Call(tt);
1453 } else if (w != null) {
1456 throw new Exception(String.Format(
1457 "Unknown word: '{0}'", tt));
1462 string GetCCode(string name)
1465 allCCode.TryGetValue(name, out ccode);
1469 void Generate(string outBase, string coreRun,
1470 params string[] entryPoints)
1473 * Gather all words that are part of the generated
1474 * code. This is done by exploring references
1475 * transitively. All such words are thus implicitly
1478 IDictionary<string, Word> wordSet =
1479 new SortedDictionary<string, Word>(
1480 StringComparer.Ordinal);
1481 Queue<Word> tx = new Queue<Word>();
1482 foreach (string ep in entryPoints) {
1483 if (wordSet.ContainsKey(ep)) {
1486 Word w = Lookup(ep);
1487 wordSet[w.Name] = w;
1490 while (tx.Count > 0) {
1491 Word w = tx.Dequeue();
1492 foreach (Word w2 in w.GetReferences()) {
1493 if (wordSet.ContainsKey(w2.Name)) {
1496 wordSet[w2.Name] = w2;
1504 if (enableFlowAnalysis) {
1505 foreach (string ep in entryPoints) {
1506 Word w = wordSet[ep];
1508 Console.WriteLine("{0}: ds={1} rs={2}",
1509 ep, w.MaxDataStack, w.MaxReturnStack);
1510 if (w.MaxDataStack > dsLimit) {
1511 throw new Exception("'" + ep
1512 + "' exceeds data stack limit");
1514 if (w.MaxReturnStack > rsLimit) {
1515 throw new Exception("'" + ep
1516 + "' exceeds return stack"
1523 * Gather referenced data areas and compute their
1524 * addresses in the generated data block. The address
1525 * 0 in the data block is unaffected so that no
1526 * valid runtime pointer is equal to null.
1528 IDictionary<long, ConstData> blocks =
1529 new SortedDictionary<long, ConstData>();
1530 foreach (Word w in wordSet.Values) {
1531 foreach (ConstData cd in w.GetDataBlocks()) {
1536 foreach (ConstData cd in blocks.Values) {
1537 cd.Address = dataLen;
1538 dataLen += cd.Length;
1542 * Generated code is a sequence of "slot numbers", each
1543 * referencing either a piece of explicit C code, or an
1544 * entry in the table of interpreted words.
1546 * Opcodes other than "call" get the slots 0 to 6:
1549 * 1 const signed value
1550 * 2 get local local number
1551 * 3 put local local number
1552 * 4 jump signed offset
1553 * 5 jump if signed offset
1554 * 6 jump if not signed offset
1556 * The argument, if any, is in "7E" format: the value is
1557 * encoded in 7-bit chunk, with big-endian signed
1558 * convention. Each 7-bit chunk is encoded over one byte;
1559 * the upper bit is 1 for all chunks except the last one.
1561 * Words with explicit C code get the slot numbers
1562 * immediately after 6. Interpreted words come afterwards.
1564 IDictionary<string, int> slots = new Dictionary<string, int>();
1568 * Get explicit C code for words which have such code.
1569 * We use string equality on C code so that words with
1570 * identical implementations get merged.
1572 * We also check that words with no explicit C code are
1575 IDictionary<string, int> ccodeUni =
1576 new Dictionary<string, int>();
1577 IDictionary<int, string> ccodeNames =
1578 new Dictionary<int, string>();
1579 foreach (Word w in wordSet.Values) {
1580 string ccode = GetCCode(w.Name);
1581 if (ccode == null) {
1582 if (w is WordNative) {
1583 throw new Exception(String.Format(
1584 "No C code for native '{0}'",
1590 if (ccodeUni.ContainsKey(ccode)) {
1591 sn = ccodeUni[ccode];
1592 ccodeNames[sn] += " " + EscapeCComment(w.Name);
1595 ccodeUni[ccode] = sn;
1596 ccodeNames[sn] = EscapeCComment(w.Name);
1603 * Assign slot values to all remaining words; we know they
1604 * are all interpreted.
1606 int slotInterpreted = curSlot;
1607 foreach (Word w in wordSet.Values) {
1608 if (GetCCode(w.Name) != null) {
1611 int sn = curSlot ++;
1615 int numInterpreted = curSlot - slotInterpreted;
1618 * Verify that all entry points are interpreted words.
1620 foreach (string ep in entryPoints) {
1621 if (GetCCode(ep) != null) {
1622 throw new Exception(
1623 "Non-interpreted entry point");
1628 * Compute the code block. Each word (without any C code)
1629 * yields some CodeElement instances.
1631 List<CodeElement> gcodeList = new List<CodeElement>();
1632 CodeElement[] interpretedEntry =
1633 new CodeElement[numInterpreted];
1634 foreach (Word w in wordSet.Values) {
1635 if (GetCCode(w.Name) != null) {
1638 int n = gcodeList.Count;
1639 w.GenerateCodeElements(gcodeList);
1640 interpretedEntry[w.Slot - slotInterpreted] =
1643 CodeElement[] gcode = gcodeList.ToArray();
1646 * If there are less than 256 words in total (C +
1647 * interpreted) then we can use "one-byte code" which is
1648 * more compact when the number of words is in the
1652 if (slotInterpreted + numInterpreted >= 256) {
1653 Console.WriteLine("WARNING: more than 255 words");
1654 oneByteCode = false;
1660 * Compute all addresses and offsets. This loops until
1661 * the addresses stabilize.
1664 int[] gcodeLen = new int[gcode.Length];
1666 for (int i = 0; i < gcode.Length; i ++) {
1667 gcodeLen[i] = gcode[i].GetLength(oneByteCode);
1670 for (int i = 0; i < gcode.Length; i ++) {
1671 gcode[i].Address = off;
1672 gcode[i].LastLength = gcodeLen[i];
1675 if (off == totalLen) {
1682 * Produce output file.
1684 using (TextWriter tw = File.CreateText(outBase + ".c")) {
1688 @"/* Automatically generated code; do not modify directly. */
1696 const unsigned char *ip;
1700 t0_parse7E_unsigned(const unsigned char **p)
1709 x = (x << 7) | (uint32_t)(y & 0x7F);
1717 t0_parse7E_signed(const unsigned char **p)
1722 neg = ((**p) >> 6) & 1;
1728 x = (x << 7) | (uint32_t)(y & 0x7F);
1731 return -(int32_t)~x - 1;
1739 #define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
1740 #define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
1741 #define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
1742 #define T0_INT1(x) T0_FBYTE(x, 0)
1743 #define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1744 #define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1745 #define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1746 #define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1748 /* static const unsigned char t0_datablock[]; */
1752 * Add declarations (not definitions) for the
1753 * entry point initialisation functions, and the
1757 foreach (string ep in entryPoints) {
1758 tw.WriteLine("void {0}_init_{1}(void *t0ctx);",
1762 tw.WriteLine("void {0}_run(void *t0ctx);", coreRun);
1765 * Add preamble elements here. They may be needed
1766 * for evaluating constant expressions in the
1769 foreach (string pp in extraCode) {
1771 tw.WriteLine("{0}", pp);
1776 tw.Write("static const unsigned char"
1777 + " t0_datablock[] = {");
1778 bw = new BlobWriter(tw, 78, 1);
1780 foreach (ConstData cd in blocks.Values) {
1787 tw.Write("static const unsigned char"
1788 + " t0_codeblock[] = {");
1789 bw = new BlobWriter(tw, 78, 1);
1790 foreach (CodeElement ce in gcode) {
1791 ce.Encode(bw, oneByteCode);
1797 tw.Write("static const uint16_t t0_caddr[] = {");
1798 for (int i = 0; i < interpretedEntry.Length; i ++) {
1803 tw.Write("\t{0}", interpretedEntry[i].Address);
1809 tw.WriteLine("#define T0_INTERPRETED {0}",
1813 @"#define T0_ENTER(ip, rp, slot) do { \
1814 const unsigned char *t0_newip; \
1816 t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
1817 t0_lnum = t0_parse7E_unsigned(&t0_newip); \
1819 *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
1824 @"#define T0_DEFENTRY(name, slot) \
1828 t0_context *t0ctx = ctx; \
1829 t0ctx->ip = &t0_codeblock[0]; \
1830 T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
1834 foreach (string ep in entryPoints) {
1835 tw.WriteLine("T0_DEFENTRY({0}, {1})",
1836 coreRun + "_init_" + ep,
1842 @"#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++)");
1845 @"#define T0_NEXT(t0ipp) t0_parse7E_unsigned(t0ipp)");
1848 tw.WriteLine("void");
1849 tw.WriteLine("{0}_run(void *t0ctx)", coreRun);
1853 const unsigned char *ip;
1855 #define T0_LOCAL(x) (*(rp - 2 - (x)))
1856 #define T0_POP() (*-- dp)
1857 #define T0_POPi() (*(int32_t *)(-- dp))
1858 #define T0_PEEK(x) (*(dp - 1 - (x)))
1859 #define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x)))
1860 #define T0_PUSH(v) do { *dp = (v); dp ++; } while (0)
1861 #define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0)
1862 #define T0_RPOP() (*-- rp)
1863 #define T0_RPOPi() (*(int32_t *)(-- rp))
1864 #define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0)
1865 #define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0)
1866 #define T0_ROLL(x) do { \
1867 size_t t0len = (size_t)(x); \
1868 uint32_t t0tmp = *(dp - 1 - t0len); \
1869 memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
1870 *(dp - 1) = t0tmp; \
1872 #define T0_SWAP() do { \
1873 uint32_t t0tmp = *(dp - 2); \
1874 *(dp - 2) = *(dp - 1); \
1875 *(dp - 1) = t0tmp; \
1877 #define T0_ROT() do { \
1878 uint32_t t0tmp = *(dp - 3); \
1879 *(dp - 3) = *(dp - 2); \
1880 *(dp - 2) = *(dp - 1); \
1881 *(dp - 1) = t0tmp; \
1883 #define T0_NROT() do { \
1884 uint32_t t0tmp = *(dp - 1); \
1885 *(dp - 1) = *(dp - 2); \
1886 *(dp - 2) = *(dp - 3); \
1887 *(dp - 3) = t0tmp; \
1889 #define T0_PICK(x) do { \
1890 uint32_t t0depth = (x); \
1891 T0_PUSH(T0_PEEK(t0depth)); \
1893 #define T0_CO() do { \
1896 #define T0_RET() goto t0_next
1898 dp = ((t0_context *)t0ctx)->dp;
1899 rp = ((t0_context *)t0ctx)->rp;
1900 ip = ((t0_context *)t0ctx)->ip;
1907 if (t0x < T0_INTERPRETED) {
1919 ip = &t0_codeblock[t0x];
1921 case 1: /* literal constant */
1922 T0_PUSHi(t0_parse7E_signed(&ip));
1924 case 2: /* read local */
1925 T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
1927 case 3: /* write local */
1928 T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
1931 t0off = t0_parse7E_signed(&ip);
1934 case 5: /* jump if */
1935 t0off = t0_parse7E_signed(&ip);
1940 case 6: /* jump if not */
1941 t0off = t0_parse7E_signed(&ip);
1947 SortedDictionary<int, string> nccode =
1948 new SortedDictionary<int, string>();
1949 foreach (string k in ccodeUni.Keys) {
1950 nccode[ccodeUni[k]] = k;
1952 foreach (int sn in nccode.Keys) {
1958 break;", sn, ccodeNames[sn], nccode[sn]);
1965 T0_ENTER(ip, rp, t0x);
1969 ((t0_context *)t0ctx)->dp = dp;
1970 ((t0_context *)t0ctx)->rp = rp;
1971 ((t0_context *)t0ctx)->ip = ip;
1975 * Add the "postamblr" elements here. These are
1976 * elements that may need access to the data
1977 * block or code block, so they must occur after
1980 foreach (string pp in extraCodeDefer) {
1982 tw.WriteLine("{0}", pp);
1987 foreach (CodeElement ce in gcode) {
1988 codeLen += ce.GetLength(oneByteCode);
1990 int dataBlockLen = 0;
1991 foreach (ConstData cd in blocks.Values) {
1992 dataBlockLen += cd.Length;
1996 * Write some statistics on produced code.
1998 Console.WriteLine("code length: {0,6} byte(s)", codeLen);
1999 Console.WriteLine("data length: {0,6} byte(s)", dataLen);
2000 Console.WriteLine("total words: {0} (interpreted: {1})",
2001 slotInterpreted + numInterpreted, numInterpreted);
2004 internal Word Lookup(string name)
2006 Word w = LookupNF(name);
2010 throw new Exception(String.Format("No such word: '{0}'", name));
2013 internal Word LookupNF(string name)
2016 words.TryGetValue(name, out w);
2020 internal TValue StringToBlob(string s)
2022 return new TValue(0, new TPointerBlob(this, s));
2025 internal bool TryParseLiteral(string tt, out TValue tv)
2028 if (tt.StartsWith("\"")) {
2029 tv = StringToBlob(tt.Substring(1));
2032 if (tt.StartsWith("`")) {
2033 tv = DecodeCharConst(tt.Substring(1));
2037 if (tt.StartsWith("-")) {
2039 tt = tt.Substring(1);
2040 } else if (tt.StartsWith("+")) {
2041 tt = tt.Substring(1);
2044 if (tt.StartsWith("0x") || tt.StartsWith("0X")) {
2046 tt = tt.Substring(2);
2047 } else if (tt.StartsWith("0b") || tt.StartsWith("0B")) {
2049 tt = tt.Substring(2);
2051 if (tt.Length == 0) {
2055 bool overflow = false;
2056 uint maxV = uint.MaxValue / radix;
2057 foreach (char c in tt) {
2059 if (d < 0 || d >= radix) {
2066 if ((uint)d > uint.MaxValue - acc) {
2073 if (acc > (uint)0x80000000) {
2079 throw new Exception(
2080 "invalid literal integer (overflow)");
2086 int ParseInteger(string tt)
2089 if (!TryParseLiteral(tt, out tv)) {
2090 throw new Exception("not an integer: " + ToString());
2095 void CheckCompiling()
2098 throw new Exception("Not in compilation mode");
2102 static string EscapeCComment(string s)
2104 StringBuilder sb = new StringBuilder();
2105 foreach (char c in s) {
2106 if (c >= 33 && c <= 126 && c != '%') {
2108 } else if (c < 0x100) {
2109 sb.AppendFormat("%{0:X2}", (int)c);
2110 } else if (c < 0x800) {
2111 sb.AppendFormat("%{0:X2}%{0:X2}",
2112 ((int)c >> 6) | 0xC0,
2113 ((int)c & 0x3F) | 0x80);
2115 sb.AppendFormat("%{0:X2}%{0:X2}%{0:X2}",
2116 ((int)c >> 12) | 0xE0,
2117 (((int)c >> 6) & 0x3F) | 0x80,
2118 ((int)c & 0x3F) | 0x80);
2121 return sb.ToString().Replace("*/", "%2A/");