2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 using System.Collections.Generic;
28 using System.Reflection;
32 * This is the main compiler class.
38 * Command-line entry point.
40 public static void Main(string[] args)
43 List<string> r = new List<string>();
44 string outBase = null;
45 List<string> entryPoints = new List<string>();
46 string coreRun = null;
50 for (int i = 0; i < args.Length; i ++) {
52 if (!a.StartsWith("-")) {
58 if (++ i >= args.Length) {
65 while (a.StartsWith("-")) {
68 int j = a.IndexOf('=');
72 pname = a.ToLowerInvariant();
74 pval2 = (i + 1) < args.Length
77 pname = a.Substring(0, j).Trim()
79 pval = a.Substring(j + 1);
92 if (outBase != null) {
117 foreach (string ep in pval.Split(',')) {
118 string epz = ep.Trim();
119 if (epz.Length > 0) {
120 entryPoints.Add(epz);
136 if (outBase == null) {
139 if (entryPoints.Count == 0) {
140 entryPoints.Add("main");
142 if (coreRun == null) {
145 T0Comp tc = new T0Comp();
146 tc.enableFlowAnalysis = flow;
149 using (TextReader tr = new StreamReader(
150 Assembly.GetExecutingAssembly()
151 .GetManifestResourceStream("t0-kernel")))
155 foreach (string a in r) {
156 Console.WriteLine("[{0}]", a);
157 using (TextReader tr = File.OpenText(a)) {
161 tc.Generate(outBase, coreRun, entryPoints.ToArray());
162 } catch (Exception e) {
163 Console.WriteLine(e.ToString());
171 "usage: T0Comp.exe [ options... ] file...");
175 " -o file use 'file' as base for output file name (default: 't0out')");
177 " -r name use 'name' as base for run function (default: same as output)");
179 " -m name[,name...]");
181 " define entry point(s)");
183 " -nf disable flow analysis");
188 * If 'delayedChar' is Int32.MinValue then there is no delayed
190 * If 'delayedChar' equals x >= 0 then there is one delayed
191 * character of value x.
192 * If 'delayedChar' equals y < 0 then there are two delayed
193 * characters, a newline (U+000A) followed by character of
196 TextReader currentInput;
200 * Common StringBuilder used to parse tokens; it is reused for
203 StringBuilder tokenBuilder;
206 * There may be a delayed token in some cases.
211 * Defined words are referenced by name in this map. Names are
212 * string-sensitive; for better reproducibility, the map is sorted
215 IDictionary<string, Word> words;
218 * Last defined word is also referenced in 'lastWord'. This is
219 * used by 'immediate'.
224 * When compiling, this builder is used. A stack saves other
225 * builders in case of nested definition.
227 WordBuilder wordBuilder;
228 Stack<WordBuilder> savedWordBuilders;
231 * C code defined for words is kept in this map, by word name.
233 IDictionary<string, string> allCCode;
236 * 'compiling' is true when compiling tokens to a word, false
237 * when interpreting them.
242 * 'quitRunLoop' is set to true to trigger exit of the
243 * interpretation loop when the end of the current input file
249 * 'extraCode' is for C code that is to be added as preamble to
252 List<string> extraCode;
255 * 'dataBlock' is the data block in which constant data bytes
261 * Counter for blocks of constant data.
266 * Flow analysis enable flag.
268 bool enableFlowAnalysis;
271 * Data stack size limit.
276 * Return stack size limit.
282 tokenBuilder = new StringBuilder();
283 words = new SortedDictionary<string, Word>(
284 StringComparer.Ordinal);
285 savedWordBuilders = new Stack<WordBuilder>();
286 allCCode = new SortedDictionary<string, string>(
287 StringComparer.Ordinal);
289 extraCode = new List<string>();
290 enableFlowAnalysis = true;
293 * Native words are predefined and implemented only with
294 * native code. Some may be part of the generated output,
295 * if C code is set for them.
300 * Parses next token as a word name, then a C code snippet.
301 * Sets the C code for that word.
303 AddNative("add-cc:", false, SType.BLANK, cpu => {
307 "EOF reached (missing name)");
309 if (allCCode.ContainsKey(tt)) {
311 "C code already set for: " + tt);
313 allCCode[tt] = ParseCCode();
318 * Parses next token as a word name, then a C code snippet.
319 * A new word is defined, that throws an exception when
320 * invoked during compilation. The C code is set for that
323 AddNative("cc:", false, SType.BLANK, cpu => {
327 "EOF reached (missing name)");
329 Word w = AddNative(tt, false, cpu2 => {
331 "C-only word: " + tt);
333 if (allCCode.ContainsKey(tt)) {
335 "C code already set for: " + tt);
338 allCCode[tt] = ParseCCode(out stackEffect);
339 w.StackEffect = stackEffect;
344 * Parses a C code snippet, then adds it to the generated
347 AddNative("preamble", false, SType.BLANK, cpu => {
348 extraCode.Add(ParseCCode());
353 * Expects two integers and a string, and makes a
354 * constant that stands for the string as a C constant
355 * expression. The two integers are the expected range
356 * (min-max, inclusive).
358 AddNative("make-CX", false, new SType(3, 1), cpu => {
359 TValue c = cpu.Pop();
360 if (!(c.ptr is TPointerBlob)) {
361 throw new Exception(string.Format(
362 "'{0}' is not a string", c));
366 TValue tv = new TValue(0, new TPointerExpr(
367 c.ToString(), min, max));
373 * Parses two integer constants, then a C code snippet.
374 * It then pushes on the stack, or compiles to the
375 * current word, a value consisting of the given C
376 * expression; the two integers indicate the expected
377 * range (min-max, inclusive) of the C expression when
380 AddNative("CX", true, cpu => {
384 "EOF reached (missing min value)");
386 int min = ParseInteger(tt);
390 "EOF reached (missing max value)");
392 int max = ParseInteger(tt);
394 throw new Exception("min/max in wrong order");
396 TValue tv = new TValue(0, new TPointerExpr(
397 ParseCCode().Trim(), min, max));
399 wordBuilder.Literal(tv);
407 * Interrupt the current execution. This implements
408 * coroutines. It cannot be invoked during compilation.
410 AddNative("co", false, SType.BLANK, cpu => {
411 throw new Exception("No coroutine in compile mode");
416 * Parses next token as word name. It begins definition
417 * of that word, setting it as current target for
418 * word building. Any previously opened word is saved
419 * and will become available again as a target when that
420 * new word is finished building.
422 AddNative(":", false, cpu => {
426 "EOF reached (missing name)");
429 savedWordBuilders.Push(wordBuilder);
433 wordBuilder = new WordBuilder(this, tt);
437 "EOF reached (while compiling)");
440 SType stackEffect = ParseStackEffectNF();
441 if (!stackEffect.IsKnown) {
443 "Invalid stack effect syntax");
445 wordBuilder.StackEffect = stackEffect;
452 * Pops a string as word name, and two integers as stack
453 * effect. It begins definition of that word, setting it
454 * as current target for word building. Any previously
455 * opened word is saved and will become available again as
456 * a target when that new word is finished building.
458 * Stack effect is the pair 'din dout'. If din is negative,
459 * then the stack effect is "unknown". If din is nonnegative
460 * but dout is negative, then the word is reputed never to
463 AddNative("define-word", false, cpu => {
464 int dout = cpu.Pop();
466 TValue s = cpu.Pop();
467 if (!(s.ptr is TPointerBlob)) {
468 throw new Exception(string.Format(
469 "Not a string: '{0}'", s));
471 string tt = s.ToString();
473 savedWordBuilders.Push(wordBuilder);
477 wordBuilder = new WordBuilder(this, tt);
478 wordBuilder.StackEffect = new SType(din, dout);
483 * Ends current word. The current word is registered under
484 * its name, and the previously opened word (if any) becomes
485 * again the building target.
487 AddNative(";", true, cpu => {
489 throw new Exception("Not compiling");
491 Word w = wordBuilder.Build();
492 string name = w.Name;
493 if (words.ContainsKey(name)) {
495 "Word already defined: " + name);
499 if (savedWordBuilders.Count > 0) {
500 wordBuilder = savedWordBuilders.Pop();
509 * Sets the last defined word as immediate.
511 AddNative("immediate", false, cpu => {
512 if (lastWord == null) {
513 throw new Exception("No word defined yet");
515 lastWord.Immediate = true;
519 * literal (immediate)
520 * Pops the current TOS value, and add in the current word
521 * the action of pushing that value. This cannot be used
522 * when no word is being built.
524 WordNative wliteral = AddNative("literal", true, cpu => {
526 wordBuilder.Literal(cpu.Pop());
531 * Pops the current TOS value, which must be an XT (pointer
532 * to a word); the action of calling that word is compiled
533 * in the current word.
535 WordNative wcompile = AddNative("compile", false, cpu => {
537 wordBuilder.Call(cpu.Pop().ToXT());
541 * postpone (immediate)
542 * Parses the next token as a word name, and add to the
543 * current word the action of calling that word. This
544 * basically removes immediatety from the next word.
546 AddNative("postpone", true, cpu => {
551 "EOF reached (missing name)");
554 bool isVal = TryParseLiteral(tt, out v);
555 Word w = LookupNF(tt);
556 if (isVal && w != null) {
557 throw new Exception(String.Format(
558 "Ambiguous: both defined word and"
559 + " literal: {0}", tt));
562 wordBuilder.Literal(v);
563 wordBuilder.CallExt(wliteral);
564 } else if (w != null) {
566 wordBuilder.CallExt(w);
568 wordBuilder.Literal(new TValue(0,
570 wordBuilder.CallExt(wcompile);
573 wordBuilder.Literal(new TValue(0,
574 new TPointerXT(tt)));
575 wordBuilder.CallExt(wcompile);
580 * Interrupt compilation with an error.
582 AddNative("exitvm", false, cpu => {
583 throw new Exception();
587 * Open a new data block. Its symbolic address is pushed
590 AddNative("new-data-block", false, cpu => {
591 dataBlock = new ConstData(this);
592 cpu.Push(new TValue(0, new TPointerBlob(dataBlock)));
596 * Define a new data word. The data address and name are
597 * popped from the stack.
599 AddNative("define-data-word", false, cpu => {
600 string name = cpu.Pop().ToString();
601 TValue va = cpu.Pop();
602 TPointerBlob tb = va.ptr as TPointerBlob;
605 "Address is not a data area");
607 Word w = new WordData(this, name, tb.Blob, va.x);
608 if (words.ContainsKey(name)) {
610 "Word already defined: " + name);
617 * Get an address pointing at the end of the current
618 * data block. This is the address of the next byte that
621 AddNative("current-data", false, cpu => {
622 if (dataBlock == null) {
624 "No current data block");
626 cpu.Push(new TValue(dataBlock.Length,
627 new TPointerBlob(dataBlock)));
631 * Add a byte value to the data block.
633 AddNative("data-add8", false, cpu => {
634 if (dataBlock == null) {
636 "No current data block");
639 if (v < 0 || v > 0xFF) {
641 "Byte value out of range: " + v);
643 dataBlock.Add8((byte)v);
647 * Set a byte value in the data block.
649 AddNative("data-set8", false, cpu => {
650 TValue va = cpu.Pop();
651 TPointerBlob tb = va.ptr as TPointerBlob;
654 "Address is not a data area");
657 if (v < 0 || v > 0xFF) {
659 "Byte value out of range: " + v);
661 tb.Blob.Set8(va.x, (byte)v);
665 * Get a byte value from a data block.
667 AddNative("data-get8", false, new SType(1, 1), cpu => {
668 TValue va = cpu.Pop();
669 TPointerBlob tb = va.ptr as TPointerBlob;
672 "Address is not a data area");
674 int v = tb.Blob.Read8(va.x);
681 AddNative("compile-local-read", false, cpu => {
683 wordBuilder.GetLocal(cpu.Pop().ToString());
685 AddNative("compile-local-write", false, cpu => {
687 wordBuilder.PutLocal(cpu.Pop().ToString());
690 AddNative("ahead", true, cpu => {
694 AddNative("begin", true, cpu => {
698 AddNative("again", true, cpu => {
702 AddNative("until", true, cpu => {
704 wordBuilder.AgainIfNot();
706 AddNative("untilnot", true, cpu => {
708 wordBuilder.AgainIf();
710 AddNative("if", true, cpu => {
712 wordBuilder.AheadIfNot();
714 AddNative("ifnot", true, cpu => {
716 wordBuilder.AheadIf();
718 AddNative("then", true, cpu => {
722 AddNative("cs-pick", false, cpu => {
724 wordBuilder.CSPick(cpu.Pop());
726 AddNative("cs-roll", false, cpu => {
728 wordBuilder.CSRoll(cpu.Pop());
730 AddNative("next-word", false, cpu => {
733 throw new Exception("No next word (EOF)");
735 cpu.Push(StringToBlob(s));
737 AddNative("parse", false, cpu => {
739 string s = ReadTerm(d);
740 cpu.Push(StringToBlob(s));
742 AddNative("char", false, cpu => {
745 throw new Exception("No next character (EOF)");
749 AddNative("'", false, cpu => {
750 string name = Next();
751 cpu.Push(new TValue(0, new TPointerXT(name)));
755 * The "execute" word is valid in generated C code, but
756 * since it jumps to a runtime pointer, its actual stack
757 * effect cannot be computed in advance.
759 AddNative("execute", false, cpu => {
760 cpu.Pop().Execute(this, cpu);
763 AddNative("[", true, cpu => {
767 AddNative("]", false, cpu => {
770 AddNative("(local)", false, cpu => {
772 wordBuilder.DefLocal(cpu.Pop().ToString());
774 AddNative("ret", true, cpu => {
779 AddNative("drop", false, new SType(1, 0), cpu => {
782 AddNative("dup", false, new SType(1, 2), cpu => {
783 cpu.Push(cpu.Peek(0));
785 AddNative("swap", false, new SType(2, 2), cpu => {
788 AddNative("over", false, new SType(2, 3), cpu => {
789 cpu.Push(cpu.Peek(1));
791 AddNative("rot", false, new SType(3, 3), cpu => {
794 AddNative("-rot", false, new SType(3, 3), cpu => {
799 * "roll" and "pick" are special in that the stack slot
800 * they inspect might be known only at runtime, so an
801 * absolute stack effect cannot be attributed. Instead,
802 * we simply hope that the caller knows what it is doing,
803 * and we use a simple stack effect for just the count
804 * value and picked value.
806 AddNative("roll", false, new SType(1, 0), cpu => {
809 AddNative("pick", false, new SType(1, 1), cpu => {
810 cpu.Push(cpu.Peek(cpu.Pop()));
813 AddNative("+", false, new SType(2, 1), cpu => {
814 TValue b = cpu.Pop();
815 TValue a = cpu.Pop();
819 } else if (a.ptr is TPointerBlob
820 && b.ptr is TPointerBlob)
822 cpu.Push(StringToBlob(
823 a.ToString() + b.ToString()));
825 throw new Exception(string.Format(
826 "Cannot add '{0}' to '{1}'", b, a));
829 AddNative("-", false, new SType(2, 1), cpu => {
831 * We can subtract two pointers, provided that
832 * they point to the same blob. Otherwise,
833 * the subtraction second operand must be an
836 TValue b = cpu.Pop();
837 TValue a = cpu.Pop();
838 TPointerBlob ap = a.ptr as TPointerBlob;
839 TPointerBlob bp = b.ptr as TPointerBlob;
840 if (ap != null && bp != null && ap.Blob == bp.Blob) {
841 cpu.Push(new TValue(a.x - b.x));
848 AddNative("neg", false, new SType(1, 1), cpu => {
852 AddNative("*", false, new SType(2, 1), cpu => {
857 AddNative("/", false, new SType(2, 1), cpu => {
862 AddNative("u/", false, new SType(2, 1), cpu => {
867 AddNative("%", false, new SType(2, 1), cpu => {
872 AddNative("u%", false, new SType(2, 1), cpu => {
877 AddNative("<", false, new SType(2, 1), cpu => {
882 AddNative("<=", false, new SType(2, 1), cpu => {
887 AddNative(">", false, new SType(2, 1), cpu => {
892 AddNative(">=", false, new SType(2, 1), cpu => {
897 AddNative("=", false, new SType(2, 1), cpu => {
898 TValue b = cpu.Pop();
899 TValue a = cpu.Pop();
900 cpu.Push(a.Equals(b));
902 AddNative("<>", false, new SType(2, 1), cpu => {
903 TValue b = cpu.Pop();
904 TValue a = cpu.Pop();
905 cpu.Push(!a.Equals(b));
907 AddNative("u<", false, new SType(2, 1), cpu => {
908 uint bx = cpu.Pop().UInt;
909 uint ax = cpu.Pop().UInt;
910 cpu.Push(new TValue(ax < bx));
912 AddNative("u<=", false, new SType(2, 1), cpu => {
913 uint bx = cpu.Pop().UInt;
914 uint ax = cpu.Pop().UInt;
915 cpu.Push(new TValue(ax <= bx));
917 AddNative("u>", false, new SType(2, 1), cpu => {
918 uint bx = cpu.Pop().UInt;
919 uint ax = cpu.Pop().UInt;
920 cpu.Push(new TValue(ax > bx));
922 AddNative("u>=", false, new SType(2, 1), cpu => {
927 AddNative("and", false, new SType(2, 1), cpu => {
932 AddNative("or", false, new SType(2, 1), cpu => {
937 AddNative("xor", false, new SType(2, 1), cpu => {
942 AddNative("not", false, new SType(1, 1), cpu => {
946 AddNative("<<", false, new SType(2, 1), cpu => {
947 int count = cpu.Pop();
948 if (count < 0 || count > 31) {
949 throw new Exception("Invalid shift count");
952 cpu.Push(ax << count);
954 AddNative(">>", false, new SType(2, 1), cpu => {
955 int count = cpu.Pop();
956 if (count < 0 || count > 31) {
957 throw new Exception("Invalid shift count");
960 cpu.Push(ax >> count);
962 AddNative("u>>", false, new SType(2, 1), cpu => {
963 int count = cpu.Pop();
964 if (count < 0 || count > 31) {
965 throw new Exception("Invalid shift count");
968 cpu.Push(ax >> count);
971 AddNative(".", false, new SType(1, 0), cpu => {
972 Console.Write(" {0}", cpu.Pop().ToString());
974 AddNative(".s", false, SType.BLANK, cpu => {
976 for (int i = n - 1; i >= 0; i --) {
977 Console.Write(" {0}", cpu.Peek(i).ToString());
980 AddNative("putc", false, new SType(1, 0), cpu => {
981 Console.Write((char)cpu.Pop());
983 AddNative("puts", false, new SType(1, 0), cpu => {
984 Console.Write("{0}", cpu.Pop().ToString());
986 AddNative("cr", false, SType.BLANK, cpu => {
989 AddNative("eqstr", false, new SType(2, 1), cpu => {
990 string s2 = cpu.Pop().ToString();
991 string s1 = cpu.Pop().ToString();
996 WordNative AddNative(string name, bool immediate,
997 WordNative.NativeRun code)
999 return AddNative(name, immediate, SType.UNKNOWN, code);
1002 WordNative AddNative(string name, bool immediate, SType stackEffect,
1003 WordNative.NativeRun code)
1005 if (words.ContainsKey(name)) {
1006 throw new Exception(
1007 "Word already defined: " + name);
1009 WordNative w = new WordNative(this, name, code);
1010 w.Immediate = immediate;
1011 w.StackEffect = stackEffect;
1016 internal long NextBlobID()
1018 return currentBlobID ++;
1023 int c = delayedChar;
1025 delayedChar = Int32.MinValue;
1026 } else if (c > Int32.MinValue) {
1027 delayedChar = -(c + 1);
1030 c = currentInput.Read();
1033 if (delayedChar >= 0) {
1035 delayedChar = Int32.MinValue;
1037 c = currentInput.Read();
1048 * Un-read the character value 'c'. That value MUST be the one
1049 * that was obtained from NextChar().
1056 if (delayedChar < 0) {
1057 if (delayedChar != Int32.MinValue) {
1058 throw new Exception(
1059 "Already two delayed characters");
1062 } else if (c != '\n') {
1063 throw new Exception("Cannot delay two characters");
1065 delayedChar = -(delayedChar + 1);
1071 string r = delayedToken;
1073 delayedToken = null;
1076 tokenBuilder.Length = 0;
1088 return ParseString();
1091 tokenBuilder.Append((char)c);
1093 if (c < 0 || IsWS(c)) {
1095 return tokenBuilder.ToString();
1103 string r = ParseCCode(out stackEffect);
1104 if (stackEffect.IsKnown) {
1105 throw new Exception(
1106 "Stack effect forbidden in this declaration");
1111 string ParseCCode(out SType stackEffect)
1113 string s = ParseCCodeNF(out stackEffect);
1115 throw new Exception("Error while parsing C code");
1120 string ParseCCodeNF(out SType stackEffect)
1122 stackEffect = SType.UNKNOWN;
1130 if (stackEffect.IsKnown) {
1134 stackEffect = ParseStackEffectNF();
1135 if (!stackEffect.IsKnown) {
1139 } else if (c != '{') {
1146 StringBuilder sb = new StringBuilder();
1158 if (-- count == 0) {
1159 return sb.ToString();
1168 * Parse a stack effect declaration. This method assumes that the
1169 * opening parenthesis has just been read. If the parsing fails,
1170 * then this method returns SType.UNKNOWN.
1172 SType ParseStackEffectNF()
1174 bool seenSep = false;
1175 bool seenBang = false;
1176 int din = 0, dout = 0;
1180 return SType.UNKNOWN;
1184 return SType.UNKNOWN;
1187 } else if (t == ")") {
1189 if (seenBang && dout == 1) {
1192 return new SType(din, dout);
1194 return SType.UNKNOWN;
1198 if (dout == 0 && t == "!") {
1209 string ParseString()
1211 StringBuilder sb = new StringBuilder();
1219 throw new Exception(
1220 "Unfinished literal string");
1225 throw new Exception(String.Format(
1226 "not an hex digit: U+{0:X4}",
1229 acc = (acc << 4) + d;
1230 if (-- hexNum == 0) {
1231 sb.Append((char)acc);
1237 case '\n': SkipNL(); break;
1245 sb.Append(SingleCharEscape(c));
1251 return sb.ToString();
1263 static char SingleCharEscape(int c)
1266 case 'n': return '\n';
1267 case 'r': return '\r';
1268 case 't': return '\t';
1269 case 's': return ' ';
1276 * A backslash+newline sequence occurred in a literal string; we
1277 * check and consume the newline escape sequence (whitespace at
1278 * start of next line, then a double-quote character).
1285 throw new Exception("EOF in literal string");
1288 throw new Exception(
1289 "Unescaped newline in literal string");
1297 throw new Exception(
1298 "Invalid newline escape in literal string");
1302 static char DecodeCharConst(string t)
1304 if (t.Length == 1 && t[0] != '\\') {
1307 if (t.Length >= 2 && t[0] == '\\') {
1310 if (t.Length == 4) {
1311 int x = DecHex(t.Substring(2));
1318 if (t.Length == 6) {
1319 int x = DecHex(t.Substring(2));
1326 if (t.Length == 2) {
1327 return SingleCharEscape(t[1]);
1332 throw new Exception("Invalid literal char: `" + t);
1335 static int DecHex(string s)
1338 foreach (char c in s) {
1343 acc = (acc << 4) + d;
1348 static int HexVal(int c)
1350 if (c >= '0' && c <= '9') {
1352 } else if (c >= 'A' && c <= 'F') {
1353 return c - ('A' - 10);
1354 } else if (c >= 'a' && c <= 'f') {
1355 return c - ('a' - 10);
1361 string ReadTerm(int ct)
1363 StringBuilder sb = new StringBuilder();
1367 throw new Exception(String.Format(
1368 "EOF reached before U+{0:X4}", ct));
1371 return sb.ToString();
1377 static bool IsWS(int c)
1382 void ProcessInput(TextReader tr)
1384 this.currentInput = tr;
1386 Word w = new WordNative(this, "toplevel",
1387 xcpu => { CompileStep(xcpu); });
1388 CPU cpu = new CPU();
1389 Opcode[] code = new Opcode[] {
1391 new OpcodeJumpUncond(-2)
1393 quitRunLoop = false;
1399 Opcode op = cpu.ipBuf[cpu.ipOff ++];
1404 void CompileStep(CPU cpu)
1409 throw new Exception("EOF while compiling");
1415 bool isVal = TryParseLiteral(tt, out v);
1416 Word w = LookupNF(tt);
1417 if (isVal && w != null) {
1418 throw new Exception(String.Format(
1419 "Ambiguous: both defined word and literal: {0}",
1424 wordBuilder.Literal(v);
1425 } else if (w != null) {
1429 wordBuilder.CallExt(w);
1432 wordBuilder.Call(tt);
1437 } else if (w != null) {
1440 throw new Exception(String.Format(
1441 "Unknown word: '{0}'", tt));
1446 string GetCCode(string name)
1449 allCCode.TryGetValue(name, out ccode);
1453 void Generate(string outBase, string coreRun,
1454 params string[] entryPoints)
1457 * Gather all words that are part of the generated
1458 * code. This is done by exploring references
1459 * transitively. All such words are thus implicitly
1462 IDictionary<string, Word> wordSet =
1463 new SortedDictionary<string, Word>(
1464 StringComparer.Ordinal);
1465 Queue<Word> tx = new Queue<Word>();
1466 foreach (string ep in entryPoints) {
1467 if (wordSet.ContainsKey(ep)) {
1470 Word w = Lookup(ep);
1471 wordSet[w.Name] = w;
1474 while (tx.Count > 0) {
1475 Word w = tx.Dequeue();
1476 foreach (Word w2 in w.GetReferences()) {
1477 if (wordSet.ContainsKey(w2.Name)) {
1480 wordSet[w2.Name] = w2;
1488 if (enableFlowAnalysis) {
1489 foreach (string ep in entryPoints) {
1490 Word w = wordSet[ep];
1492 Console.WriteLine("{0}: ds={1} rs={2}",
1493 ep, w.MaxDataStack, w.MaxReturnStack);
1494 if (w.MaxDataStack > dsLimit) {
1495 throw new Exception("'" + ep
1496 + "' exceeds data stack limit");
1498 if (w.MaxReturnStack > rsLimit) {
1499 throw new Exception("'" + ep
1500 + "' exceeds return stack"
1507 * Gather referenced data areas and compute their
1508 * addresses in the generated data block. The address
1509 * 0 in the data block is unaffected so that no
1510 * valid runtime pointer is equal to null.
1512 IDictionary<long, ConstData> blocks =
1513 new SortedDictionary<long, ConstData>();
1514 foreach (Word w in wordSet.Values) {
1515 foreach (ConstData cd in w.GetDataBlocks()) {
1520 foreach (ConstData cd in blocks.Values) {
1521 cd.Address = dataLen;
1522 dataLen += cd.Length;
1526 * Generated code is a sequence of "slot numbers", each
1527 * referencing either a piece of explicit C code, or an
1528 * entry in the table of interpreted words.
1530 * Opcodes other than "call" get the slots 0 to 6:
1533 * 1 const signed value
1534 * 2 get local local number
1535 * 3 put local local number
1536 * 4 jump signed offset
1537 * 5 jump if signed offset
1538 * 6 jump if not signed offset
1540 * The argument, if any, is in "7E" format: the value is
1541 * encoded in 7-bit chunk, with big-endian signed
1542 * convention. Each 7-bit chunk is encoded over one byte;
1543 * the upper bit is 1 for all chunks except the last one.
1545 * Words with explicit C code get the slot numbers
1546 * immediately after 6. Interpreted words come afterwards.
1548 IDictionary<string, int> slots = new Dictionary<string, int>();
1552 * Get explicit C code for words which have such code.
1553 * We use string equality on C code so that words with
1554 * identical implementations get merged.
1556 * We also check that words with no explicit C code are
1559 IDictionary<string, int> ccodeUni =
1560 new Dictionary<string, int>();
1561 IDictionary<int, string> ccodeNames =
1562 new Dictionary<int, string>();
1563 foreach (Word w in wordSet.Values) {
1564 string ccode = GetCCode(w.Name);
1565 if (ccode == null) {
1566 if (w is WordNative) {
1567 throw new Exception(String.Format(
1568 "No C code for native '{0}'",
1574 if (ccodeUni.ContainsKey(ccode)) {
1575 sn = ccodeUni[ccode];
1576 ccodeNames[sn] += " " + EscapeCComment(w.Name);
1579 ccodeUni[ccode] = sn;
1580 ccodeNames[sn] = EscapeCComment(w.Name);
1587 * Assign slot values to all remaining words; we know they
1588 * are all interpreted.
1590 int slotInterpreted = curSlot;
1591 foreach (Word w in wordSet.Values) {
1592 if (GetCCode(w.Name) != null) {
1595 int sn = curSlot ++;
1599 int numInterpreted = curSlot - slotInterpreted;
1602 * Verify that all entry points are interpreted words.
1604 foreach (string ep in entryPoints) {
1605 if (GetCCode(ep) != null) {
1606 throw new Exception(
1607 "Non-interpreted entry point");
1612 * Compute the code block. Each word (without any C code)
1613 * yields some CodeElement instances.
1615 List<CodeElement> gcodeList = new List<CodeElement>();
1616 CodeElement[] interpretedEntry =
1617 new CodeElement[numInterpreted];
1618 foreach (Word w in wordSet.Values) {
1619 if (GetCCode(w.Name) != null) {
1622 int n = gcodeList.Count;
1623 w.GenerateCodeElements(gcodeList);
1624 interpretedEntry[w.Slot - slotInterpreted] =
1627 CodeElement[] gcode = gcodeList.ToArray();
1630 * If there are less than 256 words in total (C +
1631 * interpreted) then we can use "one-byte code" which is
1632 * more compact when the number of words is in the
1636 if (slotInterpreted + numInterpreted >= 256) {
1637 Console.WriteLine("WARNING: more than 255 words");
1638 oneByteCode = false;
1644 * Compute all addresses and offsets. This loops until
1645 * the addresses stabilize.
1648 int[] gcodeLen = new int[gcode.Length];
1650 for (int i = 0; i < gcode.Length; i ++) {
1651 gcodeLen[i] = gcode[i].GetLength(oneByteCode);
1654 for (int i = 0; i < gcode.Length; i ++) {
1655 gcode[i].Address = off;
1656 gcode[i].LastLength = gcodeLen[i];
1659 if (off == totalLen) {
1666 * Produce output file.
1668 using (TextWriter tw = File.CreateText(outBase + ".c")) {
1672 @"/* Automatically generated code; do not modify directly. */
1680 const unsigned char *ip;
1684 t0_parse7E_unsigned(const unsigned char **p)
1693 x = (x << 7) | (uint32_t)(y & 0x7F);
1701 t0_parse7E_signed(const unsigned char **p)
1706 neg = ((**p) >> 6) & 1;
1712 x = (x << 7) | (uint32_t)(y & 0x7F);
1715 return -(int32_t)~x - 1;
1723 #define T0_VBYTE(x, n) (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
1724 #define T0_FBYTE(x, n) (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
1725 #define T0_SBYTE(x) (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
1726 #define T0_INT1(x) T0_FBYTE(x, 0)
1727 #define T0_INT2(x) T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1728 #define T0_INT3(x) T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1729 #define T0_INT4(x) T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1730 #define T0_INT5(x) T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
1732 static const uint8_t t0_datablock[];
1736 * Add declarations (not definitions) for the
1737 * entry point initialisation functions, and the
1741 foreach (string ep in entryPoints) {
1742 tw.WriteLine("void {0}_init_{1}(void *t0ctx);",
1746 tw.WriteLine("void {0}_run(void *t0ctx);", coreRun);
1749 * Add preamble elements here. They may be needed
1750 * for evaluating constant expressions in the
1753 foreach (string pp in extraCode) {
1755 tw.WriteLine("{0}", pp);
1760 tw.Write("static const uint8_t t0_datablock[] = {");
1761 bw = new BlobWriter(tw, 78, 1);
1763 foreach (ConstData cd in blocks.Values) {
1770 tw.Write("static const uint8_t t0_codeblock[] = {");
1771 bw = new BlobWriter(tw, 78, 1);
1772 foreach (CodeElement ce in gcode) {
1773 ce.Encode(bw, oneByteCode);
1779 tw.Write("static const uint16_t t0_caddr[] = {");
1780 for (int i = 0; i < interpretedEntry.Length; i ++) {
1785 tw.Write("\t{0}", interpretedEntry[i].Address);
1791 tw.WriteLine("#define T0_INTERPRETED {0}",
1795 @"#define T0_ENTER(ip, rp, slot) do { \
1796 const unsigned char *t0_newip; \
1798 t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
1799 t0_lnum = t0_parse7E_unsigned(&t0_newip); \
1801 *((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
1806 @"#define T0_DEFENTRY(name, slot) \
1810 t0_context *t0ctx = ctx; \
1811 t0ctx->ip = &t0_codeblock[0]; \
1812 T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
1816 foreach (string ep in entryPoints) {
1817 tw.WriteLine("T0_DEFENTRY({0}, {1})",
1818 coreRun + "_init_" + ep,
1824 @"#define T0_NEXT(t0ipp) (*(*(t0ipp)) ++)");
1827 @"#define T0_NEXT(t0ipp) t0_parse7E_unsigned(t0ipp)");
1830 tw.WriteLine("void");
1831 tw.WriteLine("{0}_run(void *t0ctx)", coreRun);
1835 const unsigned char *ip;
1837 #define T0_LOCAL(x) (*(rp - 2 - (x)))
1838 #define T0_POP() (*-- dp)
1839 #define T0_POPi() (*(int32_t *)(-- dp))
1840 #define T0_PEEK(x) (*(dp - 1 - (x)))
1841 #define T0_PEEKi(x) (*(int32_t *)(dp - 1 - (x)))
1842 #define T0_PUSH(v) do { *dp = (v); dp ++; } while (0)
1843 #define T0_PUSHi(v) do { *(int32_t *)dp = (v); dp ++; } while (0)
1844 #define T0_RPOP() (*-- rp)
1845 #define T0_RPOPi() (*(int32_t *)(-- rp))
1846 #define T0_RPUSH(v) do { *rp = (v); rp ++; } while (0)
1847 #define T0_RPUSHi(v) do { *(int32_t *)rp = (v); rp ++; } while (0)
1848 #define T0_ROLL(x) do { \
1849 size_t t0len = (size_t)(x); \
1850 uint32_t t0tmp = *(dp - 1 - t0len); \
1851 memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
1852 *(dp - 1) = t0tmp; \
1854 #define T0_SWAP() do { \
1855 uint32_t t0tmp = *(dp - 2); \
1856 *(dp - 2) = *(dp - 1); \
1857 *(dp - 1) = t0tmp; \
1859 #define T0_ROT() do { \
1860 uint32_t t0tmp = *(dp - 3); \
1861 *(dp - 3) = *(dp - 2); \
1862 *(dp - 2) = *(dp - 1); \
1863 *(dp - 1) = t0tmp; \
1865 #define T0_NROT() do { \
1866 uint32_t t0tmp = *(dp - 1); \
1867 *(dp - 1) = *(dp - 2); \
1868 *(dp - 2) = *(dp - 3); \
1869 *(dp - 3) = t0tmp; \
1871 #define T0_PICK(x) do { \
1872 uint32_t t0depth = (x); \
1873 T0_PUSH(T0_PEEK(t0depth)); \
1875 #define T0_CO() do { \
1878 #define T0_RET() goto t0_next
1880 dp = ((t0_context *)t0ctx)->dp;
1881 rp = ((t0_context *)t0ctx)->rp;
1882 ip = ((t0_context *)t0ctx)->ip;
1889 if (t0x < T0_INTERPRETED) {
1901 ip = &t0_codeblock[t0x];
1903 case 1: /* literal constant */
1904 T0_PUSHi(t0_parse7E_signed(&ip));
1906 case 2: /* read local */
1907 T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
1909 case 3: /* write local */
1910 T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
1913 t0off = t0_parse7E_signed(&ip);
1916 case 5: /* jump if */
1917 t0off = t0_parse7E_signed(&ip);
1922 case 6: /* jump if not */
1923 t0off = t0_parse7E_signed(&ip);
1929 SortedDictionary<int, string> nccode =
1930 new SortedDictionary<int, string>();
1931 foreach (string k in ccodeUni.Keys) {
1932 nccode[ccodeUni[k]] = k;
1934 foreach (int sn in nccode.Keys) {
1940 break;", sn, ccodeNames[sn], nccode[sn]);
1947 T0_ENTER(ip, rp, t0x);
1951 ((t0_context *)t0ctx)->dp = dp;
1952 ((t0_context *)t0ctx)->rp = rp;
1953 ((t0_context *)t0ctx)->ip = ip;
1958 foreach (CodeElement ce in gcode) {
1959 codeLen += ce.GetLength(oneByteCode);
1961 int dataBlockLen = 0;
1962 foreach (ConstData cd in blocks.Values) {
1963 dataBlockLen += cd.Length;
1967 * Write some statistics on produced code.
1969 Console.WriteLine("code length: {0,6} byte(s)", codeLen);
1970 Console.WriteLine("data length: {0,6} byte(s)", dataLen);
1971 Console.WriteLine("total words: {0} (interpreted: {1})",
1972 slotInterpreted + numInterpreted, numInterpreted);
1975 internal Word Lookup(string name)
1977 Word w = LookupNF(name);
1981 throw new Exception(String.Format("No such word: '{0}'", name));
1984 internal Word LookupNF(string name)
1987 words.TryGetValue(name, out w);
1991 internal TValue StringToBlob(string s)
1993 return new TValue(0, new TPointerBlob(this, s));
1996 internal bool TryParseLiteral(string tt, out TValue tv)
1999 if (tt.StartsWith("\"")) {
2000 tv = StringToBlob(tt.Substring(1));
2003 if (tt.StartsWith("`")) {
2004 tv = DecodeCharConst(tt.Substring(1));
2008 if (tt.StartsWith("-")) {
2010 tt = tt.Substring(1);
2011 } else if (tt.StartsWith("+")) {
2012 tt = tt.Substring(1);
2015 if (tt.StartsWith("0x") || tt.StartsWith("0X")) {
2017 tt = tt.Substring(2);
2018 } else if (tt.StartsWith("0b") || tt.StartsWith("0B")) {
2020 tt = tt.Substring(2);
2022 if (tt.Length == 0) {
2026 bool overflow = false;
2027 uint maxV = uint.MaxValue / radix;
2028 foreach (char c in tt) {
2030 if (d < 0 || d >= radix) {
2037 if ((uint)d > uint.MaxValue - acc) {
2044 if (acc > (uint)0x80000000) {
2050 throw new Exception(
2051 "invalid literal integer (overflow)");
2057 int ParseInteger(string tt)
2060 if (!TryParseLiteral(tt, out tv)) {
2061 throw new Exception("not an integer: " + ToString());
2066 void CheckCompiling()
2069 throw new Exception("Not in compilation mode");
2073 static string EscapeCComment(string s)
2075 StringBuilder sb = new StringBuilder();
2076 foreach (char c in s) {
2077 if (c >= 33 && c <= 126 && c != '%') {
2079 } else if (c < 0x100) {
2080 sb.AppendFormat("%{0:X2}", (int)c);
2081 } else if (c < 0x800) {
2082 sb.AppendFormat("%{0:X2}%{0:X2}",
2083 ((int)c >> 6) | 0xC0,
2084 ((int)c & 0x3F) | 0x80);
2086 sb.AppendFormat("%{0:X2}%{0:X2}%{0:X2}",
2087 ((int)c >> 12) | 0xE0,
2088 (((int)c >> 6) & 0x3F) | 0x80,
2089 ((int)c & 0x3F) | 0x80);
2092 return sb.ToString().Replace("*/", "%2A/");