From e3b997cc7d75b8dae9b8b512545fa7881ac3cc0b Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Tue, 11 Jul 2017 18:52:07 -0600 Subject: [PATCH] should probably just commit this shit --- NOTES | 423 +++++++++++++++++++++++++++++++++++++++++++ README.md | 287 +++++++++++------------------ sandbox/compiler.md | 40 ++++ sandbox/list/list.go | 39 ++++ 4 files changed, 608 insertions(+), 181 deletions(-) create mode 100644 NOTES create mode 100644 sandbox/compiler.md create mode 100644 sandbox/list/list.go diff --git a/NOTES b/NOTES new file mode 100644 index 0000000..65c328a --- /dev/null +++ b/NOTES @@ -0,0 +1,423 @@ +I need to prioritize the future of this project a bit more. I've been thinking +I'm going to figure this thing out at this level, but I shouldn't even be +working here without a higher level view. + +I can't finish this project without financial help. I don't think I can get a v0 +up without financial help. What this means at minimum, no matter what, I'm going +to have to: + +- Develop a full concept of the language that can get it to where I want to go + - Figure out where I want it to go +- Write the concept into a manifesto of the language +- Write the concept into a proposal for course of action to take in developing + the language further + +I'm unsure about what this language actually is, or is actually going to look +like, but I'm sure of those things. So those are the lowest hanging fruit, and I +should start working on them pronto. It's likely I'll need to experiment with +some ideas which will require coding, and maybe even some big ideas, but those +should all be done under the auspices of developing the concepts of the +language, and not the compiler of the language itself. + +######### + +Elemental types: + +* Tuples +* Arrays +* Integers + +######### + +Been doing thinking and research on ginger's elemental types and what their +properties should be. Ran into roadblock where I was asking myself these +questions: + +* Can I do this without atoms? +* What are different ways atoms can be encoded? +* Can I define language types (elementals) without defining an encoding for + them? + +I also came up with two new possible types: + +* Stream, effectively an interface which produces discreet packets (each has a + length), where the production of one packet indicates the size of the next one + at the same time. +* Tagged, sort of like a stream, effectively a type which says "We don't know + what this will be at compile-time, but we know it will be prefixed with some + kind of tag indicating its type and size. + * Maybe only the size is important + * Maybe precludes user defined types that aren't composites of the + elementals? Maybe that's ok? + +Ran into this: +https://www.ps.uni-saarland.de/~duchier/python/continuations.htm://www.ps.uni-saarland.de/~duchier/python/continuations.html +https://en.wikipedia.org/wiki/Continuation#First-class_continuations + +which is interesting. A lot of my problems now are derived from stack-based +systems and their need for knowing the size input and output data, continuations +seem to be an alternative system? + +I found this: + +http://lambda-the-ultimate.org/node/4512 + +I don't understand any of it, I should definitely learn feather + +I should finish reading this: +http://www.blackhat.com/presentations/bh-usa-07/Ferguson/Whitepaper/bh-usa-07-ferguson-WP.pdf + +######### + +Ok, so I'm back at this for the first time in a while, and I've got a good thing +going. The vm package is working out well, Using tuples and atoms as the basis +of a language is pretty effective (thanks erlang!). I've got basic variable +assignment working as well. No functions yet. Here's the things I still need to +figure out or implement: + +* lang + * constant size arrays + * using them for a "do" macro + * figure out constant, string, int, etc... look at what erlang's actual + primitive types are for a hint + * figure out all needed macros for creating and working with lang types +* vm + * figure out the differentiation between compiler macros and runtime calls + * probably separate the two into two separate call systems + * the current use of varCtx is still pretty ugly, the do macro might help + clean it up +* functions + * are they a primitive? I guess so.... + * declaration and type + * variable deconstruction + * scoping/closures +* compiler macros, need vm's Run to output a lang.Term +* need to learn about linking + * figure out how to include llvm library in compiled binary and make it + callable. runtime macros will come from this + * linking in of other ginger code? or how to import in general +* comiler, a general purpose binary for taking ginger code and turning it + into machine code using the vm package + * swappable syntax, including syntax-dependent macros +* close the loop? + +############ + +I really want contexts to work. They _feel_ right, as far as abstractions go. +And they're clean, if I can work out the details. + +Just had a stupid idea, might as well write it down though. + +Similar to how the DNA and RNA in our cells work, each Context is created with +some starting set of data on it. This will be the initial protein block. Based +on the data there some set of Statements (the RNA) will "latch" on and do +whatever work they're programmed to do. That work could include making new +Contexts and "releasing" them into the ether, where they would get latched onto +(or not). + +There's so many problems with this idea, it's not even a little viable. But here +goes: + +* Order of execution becomes super duper fuzzy. It would be really difficult to + think about how your program is actually going to work. + +* Having Statement sets just latch onto Contexts is super janky. They would get + registered I guess, and it would be pretty straightforward to differentiate + one Context from another, but what about conflicts? If two Statements want to + latch onto the same Context then what? If we wanted to keep the metaphor one + would just get randomly chosen over the other, but obviously that's insane. + +############ + +I explained some of this to ibrahim already, but I might as well get it all +down, cause I've expanded on it a bit since. + +Basically, ops (functions) are fucking everything up. The biggest reason for +this is that they are really really hard to implement without a type annotation +system. The previous big braindump is about that, but basically I can't figure +out a way that feels clean and good enough to be called a "solution" to type +inference. I really don't want to have to add type annotations just to support +functions, at least not until I explore all of my options. + +The only other option I've come up with so far is the context thing. It's nice +because it covers a lot of ground without adding a lot of complexity. Really the +biggest problem with it is it doesn't allow for creating new things which look +like operations. Instead, everything is done with the %do operator, which feels +janky. + +One solution I just thought of is to get rid of the %do operator and simply make +it so that a list of Statements can be used as the operator in another +Statement. This would _probably_ allow for everything that I want to do. One +outstanding problem I'm facing is figuring out if all Statements should take a +Context or not. + +* If they did it would be a lot more explicit what's going on. There wouldn't be + an ethereal "this context" that would need to be managed and thought about. It + would also make things like using a set of Statements as an operator a lot + more straightforward, since without Contexts in the Statement it'll be weird + to "do" a set of Statements in another Context. + +* On the other hand, it's quite a bit more boilerplate. For the most part most + Statements are going to want to be run in "this" context. Also this wouldn't + really decrease the number of necessary macros, since one would still be + needed in order to retrieve the "root" Context. + +* One option would be for a Statement's Context to be optional. I don't really + like this option, it makes a very fundamental datatype (a Statement) a bit + fuzzier. + +* Another thing to think about is that I might just rethink how %bind works so + that it doesn't operate on an ethereal "this" Context. %ctxbind is one attempt + at this, but there's probably other ways. + +* One issue I just thought of with having a set of Statements be used as an + operator is that the argument to that Statement becomes.... weird. What even + is it? Something the set of Statements can access somehow? Then we still need + something like the %in operator. + +Let me backtrack a bit. What's the actual problem? The actual thing I'm +struggling with is allowing for code re-use, specifically pure functions. I +don't think there's any way anyone could argue that pure functions are not an +effective building block in all of programming, so I think I can make that my +statement of faith: pure functions are good and worthwhile, impure functions +are.... fine. + +Implementing them, however, is quite difficult. Moreso than I thought it would +be. The big inhibitor is the method by which I actually pass input data into the +function's body. From an implementation standpoint it's difficult because I +*need* to know how many bytes on the stack the arguments take up. From a syntax +standpoint this is difficult without a type annotation system. And from a +usability standpoint this is difficult because it's a task the programmer has to +do which doesn't really have to do with the actual purpose or content of the +function, it's just a book-keeping exercise. + +So the stack is what's screwing us over here. It's a nice idea, but ultimately +makes what we're trying to do difficult. I'm not sure if there's ever going to +be a method of implementing pure functions that doesn't involve argument/return +value copying though, and therefore which doesn't involve knowing the byte size +of your arguments ahead of time. + +It's probably not worth backtracking this much either. For starters, cpus are +heavily optimized for stack based operations, and much of the way we currently +think about programming is also based on the stack. It would take a lot of +backtracking if we ever moved to something else, if there even is anything else +worth moving to. + +If that's the case, how is the stack actually used then? + +* There's a stack pointer which points at an address on the stack, the stack + being a contiguous range of memory addresses. The place the stack points to is + the "top" of the stack, all higher addresses are considered unused (no matter + what's in them). All the values in the stack are available to the currently + executing code, it simply needs to know either their absolute address or their + relative position to the stack pointer. + +* When a function is "called" the arguments to it are copied onto the top of the + stack, the stack pointer is increased to reflect the new stack height, and the + function's body is jumped to. Inside the body the function need only pop + values off the stack as it expects them, as long as it was called properly it + doesn't matter how or when the function was called. Once it's done operating + the function ensures all the input values have been popped off the stack, and + subsequently pushes the return values onto the stack, and jumps back to the + caller (the return address was also stored on the stack). + +That's not quite right, but it's close enough for most cases. The more I'm +reading about this the more I think it's not going to be worth it to backtrack +passed the stack. There's a lot of compiler and machine specific crap that gets +involved at that low of a level, and I don't think it's worth getting into it. +LLVM did all of that for me, I should learn how to make use of that to make what +I want happen. + +But what do I actually want? That's the hard part. I guess I've come full +circle. I pretty much *need* to use llvm functions. But I can't do it without +declaring the types ahead of time. Ugghh. + +################################ + +So here's the current problem: + +I have the concept of a list of statements representing a code block. It's +possible/probable that more than this will be needed to represent a code block, +but we'll see. + +There's two different ways I think it's logical to use a block: + +* As a way of running statements within a new context which inherits all of its + bindings from the parent. This would be used for things like if statements and + loops, and behaves the way a code block behaves in most other languages. + +* To define a operator body. An operator's body is effectively the same as the + first use-case, except that it has input/output as well. An operator can be + bound to an identifier and used in any statement. + +So the hard part, really, is that second point. I have the first done already. +The second one isn't too hard to "fake" using our current context system, but it +can't be made to be used as an operator in a statement. Here's how to fake it +though: + +* Define the list of statements +* Make a new context +* Bind the "input" bindings into the new context +* Run %do with that new context and list of statements +* Pull the "output" bindings out of that new context + +And that's it. It's a bit complicated but it ultimately works and effectively +inlines a function call. + +It's important that this looks like a normal operator call though, because I +believe in guy steele. Here's the current problems I'm having: + +* Defining the input/output values is the big one. In the inline method those + were defined implicitly based on what the statements actually use, and the + compiler would fail if any were missing or the wrong type. But here we ideally + want to define an actual llvm function and not inline everytime. So we need to + somehow "know" what the input/output is, and their types. + + * The output value isn't actually *that* difficult. We just look at the + output type of the last statement in the list and use that. + + * The input is where it gets tricky. One idea would be to use a statement + with no input as the first statement in the list, and that would define + the input type. The way macros work this could potentially "just work", + but it's tricky. + + * It would also be kind of difficult to make work with operators that take + in multiple parameters too. For example, `bind A, 1` would be the normal + syntax for binding, but if we want to bind an input value it gets weirder. + + * We could use a "future" kind of syntax, like `bind A, _` or something + like that, but that would requre a new expression type and also just + be kind of weird. + + * We could have a single macro which always returns the input, like + `%in` or something. So the bind would become `bind A, %in` or + `bind (A, B), %in` if we ever get destructuring. This isn't a terrible + solution, though a bit unfortunate in that it could get confusing with + different operators all using the same input variable effectively. It + also might be a bit difficult to implement, since it kind of forces us + to only have a single argument to the LLVM function? Hard to say how + that would work. Possibly all llvm functions could be made to take in + a struct, but that would be ghetto af. Not doing a struct would take a + special interaction though.... It might not be possible to do this + without a struct =/ + +* Somehow allowing to define the context which gets used on each call to the + operator, instead of always using a blank one, would be nice. + + * The big part of this problem is actually the syntax for calling the + operator. It's pretty easy to have this handled within the operator by the + %thisctx macro. But we want the operator to be callable by the same syntax + as all other operator calls, and currently that doesn't have any way of + passing in a new context. + + * Additionally, if we're implementing the operator as an LLVM function then + there's not really any way to pass in that context to it without making + those variables global or something, which is shitty. + +* So writing all this out it really feels like I'm dealing with two separate + types that just happen to look similar: + + * Block: a list of statements which run with a variable context. + + * Operator: a list of statements which run with a fixed (empty?) context, + and have input/output. + +* There's so very nearly a symmetry there. Things that are inconsistent: + + * A block doesn't have input/output + + * It sort of does, in the form of the context it's being run with and + %ctxget, but not an explicit input/output like the operator has. + + * If this could be reconciled I think this whole shitshow could be made + to have some consistency. + + * Using %in this pretty much "just works". But it's still weird. Really + we'd want to turn the block into a one-off operator everytime we use + it. This is possible. + + * An operator's context must be empty + + * It doesn't *have* to be, defining the ctx which goes with the operator + could be part of however an operator is created. + +* So after all of that, I think operators and blocks are kind of the same. + + * They both use %in to take in input, and both output using the last statement + in their list of statements. + + * They both have a context bound to them, operators are fixed but a block + changes. + + * An operator is a block with a bound context. + +##############@@@@@@@@@#$%^&^%$#@#$%^&* + +* New problem: type inference. LLVM requires that a function's definition have + the type specified up-front. This kind of blows. Well actually, it blows a lot + more than kind of. There's two things that need to be infered from a List of + Statements then: the input type and the output type. There's two approaches + I've thought of in the current setup. + + * There's two approaches to determining the type of an operator: analyze the + code as ginger expressions, or build the actual llvm structures and + analyze those. + + * Looking at the ginger expressions is definitely somewhat fuzzy. We can + look at all the statements and sub-statements until we find an + instance of %in, then look at what that's in input into. But if it's + simply binding into an Identifier then we have to find the identifier. + If it's destructuring then that gets even *more* complicated. + + * Destructuring is what really makes this approach difficult. + Presumably there's going to be a function that takes in an + Identifier (or %in I guess?) and a set of Statements and returns + the type for that Identifier. If we find that %in is destructured + into a tuple then we would run that function for each constituent + Identifier and put it all together. But then this inference + function is really coupled to %bind, which kind of blows. Also we + may one day want to support destructuring into non-tuples as well, + which would make this even harder. + + * We could make it the job of the macro definition to know its input + and output types, as well as the types of any bindings it makes. + That places some burden on user macros in the future, but then + maybe it can be inferred for user macros? That's a lot of hope. It + would also mean the macro would need the full set of statements + that will ever run in the same Context as it, so it can determine + the types of any bindings it makes. + + * The second method is to build the statements into LLVM structures and + then look at those structures. This has the benefit of being + non-ambiguous once we actually find the answer. LLVM is super strongly + typed, and re-iterates the types involved for every operation. So if + the llvm builder builds it then we need only look for the first usage + of every argument/return and we'll know the types involved. + + * This requires us to use structs for tuples, and not actually use + multiple arguments. Otherwise it won't be possible to know the + difference between a 3 argument function and a 4 argument one + which doesn't use its 4th argument (which shouldn't really happen, + but could). + + * The main hinderence is that the llvm builder is really not + designed for this sort of thing. We could conceivably create a + "dummy" function with bogus types and write the body, analyze the + body, erase the function, and start over with a non-dummy + function. But it's the "analyze the body" step that's difficult. + It's difficult to find the types of things without the llvm.Value + objects in hand, but since building is set up as a recursive + process that becomes non-trivial. This really feels like the way + to go though, I think it's actually doable. + + * This could be something we tack onto llvmVal, and then make + Build return extra data about what types the Statements it + handled input and output. + +* For other setups that would enable this a bit better, the one that keeps + coming to mind is a more pipeline style system. Things like %bind would need + to be refactored from something that takes a Tuple to something that only + takes an Identifier and returns a macro which will bind to that Identifier. + This doesn't *really* solve the type problem I guess, since whatever is input + into the Identifier's bind doesn't necessarily have a type attached to it. + Sooo yeah nvm. diff --git a/README.md b/README.md index 8bf3660..97144ef 100644 --- a/README.md +++ b/README.md @@ -1,193 +1,118 @@ -# Ginger - I'll get it right this time +# Ginger - holy fuck again? -## A note on compile-time vs runtime +## The final result. A language which can do X -Ginger is a language whose primary purpose is to be able to describe and compile -itself. A consequence of this is that it's difficult to describe the actual -process by which compiling is done without first describing the built-in types, -but it's also hard to describe the built-in types without first describing the -process by which compiling is done. So I'm going to do one, then the other, and -I ask you to please bear with me. +- Support my OS + - Compile on many architectures + - Be low level and fast (effectively c-level) + - Be well defined, using a simple syntax + - Extensible based on which section of the OS I'm working on + - Good error messages -## The primitive types +- Support other programmers and other programming areas + - Effectively means able to be used in most purposes + - Able to be quickly learned + - Able to be shared + - Free + - New or improved components shared between computers/platforms/people -Ginger is a language which encompasses itself. That means amongst the "normal" -primitives a language is expected to have it also has a couple which are used -for macros (which other languages would not normally expose outside of the -compiler's implementation). +- Support itself + - Garner a team to work on the compiler + - Team must not require my help for day-to-day + - Team must stick to the manifesto, either through the design or through + trust -``` -// These are numbers -0 -1 -2 -3 +## The language: A manifesto, defines the concept of the language -// These are strings -"hello" -"world" -"how are you?" +- Quips + - Easier is not better -// These are identifiers. Values at runtime are bound to -// identifiers, such that whenever an identifier is used in a non-macro -// statement that value will be replaced with it -foo -barBaz -biz_buz +- Data as the language + - Differentiation between "syntax" and "language", parser vs compiler + - Syntax defines the form which is parsed + - The parser reads the syntax forms into data structures + - Language defines how the syntax is read into data structures and + "understood" (i.e. and what is done with those structures). + - A language maybe have multiple syntaxes, if they all parse into + the same underlying data structures they can be understood in the + same way. + - A compiler turns the parsed language into machine code. An + interpreter performs actions directly based off of the parsed + language. -// These are macro identifiers. They are like identifiers, except they start -// with percent signs, and they represent operations or values which only exist -// at compile-time There are a number of builtin macros, but they can also be -// user-defined. We'll see more of them later -%foo -%barBaz -%biz_buz -``` +- Types, instances, and operations + - A language has a set of elemental types, and composite types + - "The type defines the [fundamental] operations that can be done on the + data, the meaning of the data, and the way values of that type can be + stored" + - Elemental types are all forms of numbers, since numbers are all a + computer really knows + - Composite types take two forms: + - Homogeneous: all composed values are the same type (arrays) + - Heterogeneous: all composed values are different + - If known size and known types per-index, tuples + - A 0-tuple is kind of special, and simply indicates absence of + any value. + - A third type, Any, indicates that the type is unknown at compile-time. + Type information must be passed around with it at runtime. + - An operation has an input and output. It does some action on the input + to produce the output (presumably). An operation may be performed as + many times as needed, given any value of the input type. The types of + both the input and output are constant, and together they form the + operation's type. + - A value is an instance of a type, where the type is known at compile-time + (though the type may be Any). Multiple values may be instances of the same + type. E.g.: 1 and 2 are both instances of int + - A value is immutable + - TODO value is a weird word, since an instance of a type has both a + type and value. I need to think about this more. Instance might be a + better name -## The data structures +- Stack and scope + - A function call operates within a scope. The scope had arguments passed + into it. + - When a function calls another, that other's scope is said to be "inside" + the caller's scope. + - A pure function only operates on the arguments passed into it. + - A pointer allows for modification outside of the current scope, but only a + pointer into an outer scope. A function which does this is "impure" -Like the primitives, ginger has few built-in data structures, and the ones it -does have are primarily used to implement itself. +- Built-in + - Elementals + - ints (n-bit) + - tuples + - stack arrays + - indexable + - head/tail + - reversible (?) + - appendable + - functions (?) + - pointers (?) + - Any (?) + - Elementals must be enough to define the type of a variable + - Ability to create and modify elmental types + - immutable, pure functions + - Other builtin functionality: + - Load/call linked libraries + - Comiletime macros + - Red/Blue -``` -// These are tuples. Each is a unique and different type, based on its number of -// elements, the type of each element, and the order those types are in. The -// type of a tuple must be known at compile-time -1, 2 -4, "foo", 5 - -// These are arrays. Their elements must be of the same type, but their length -// can be dynamically determined at runtime. The type of an array is only -// determined by the type of its elements, which must be known at compile-time -[1, 2, 3] -["a", "b", "c"] - -// These are statements. A statement is a pair of things, the first being a -// macro identifier, the second being an argument (usually a tuple). -%add 1,2 -%incr 1 -``` - -There is a final data structure, called a block, which I haven't come up with a -special sytax for yet, and will be discussed later. - -## Parenthesis - -A pair of parenthesis can be used to enclose any type for clarity. For example: - -``` -// No parenthesis -%add 1, 2 - -// Parenthesis around the argument (the tuple) -%add (1, 2) - -// Parenthsis around the statement -(%add 1, 2) - -// Parenthesis around everything -(%add (1, 2)) -``` - -## Compilation - -Ginger programs are taken as a list of statements (as in, the primitive types -we've defined already). - -During compilation each statement is looked at, first its arguments then its -operator. The arguments are "resolved" first, so that they have only primitive -types that aren't macros, statements or blocks. Then that is passed into the -macro operator which may output a further statement, or may change something in -the context of compilation (e.g. the set of identifier bindings), or both. This -is done until the statement contains no more macros to run, at which point the -process repeats at the next statement. - -### Example - -It's difficult to see this without examples, imo. So here's some example code, -with explanatory comments: - -``` -// %bind is a macro, it takes in a tuple of an identifier and some value, and -// assigns that value to the identifier at runtime -%bind a, 1 - -// %add takes in a tuple of numbers or identifiers and will return the sum of -// them. Here we're then binding that sum (3) to the identifier b. -%bind b, (%add a, 2) - -// The previous two example are fairly simple, but do something subtle. A ginger -// program starts as a list of statements, and must continue to be a list of -// statements after all macros are run. Each of the above is a macro statement -// which returns a "runtime statement", i.e. a construct representing something -// which will happen at runtime. But they are of type `statement` nonetheless, -// so running these macros does not change the overall type of the program (a -// list of statements) - -// Creates an identifier c and returns it. This can't be included at this point, -// because it doesn't return a statement of any sort. -// %ident "c" - -// This first creates an identifier a, which is then part of a tuple (a, 2). -// This tuple is used in a further tuple, now (%add, (a, 2)). Remember, %add is -// simply a macro identifier at this point, it's not actually "run" because it's -// part of a tuple, not a statement, and as such can be passed around like any -// other primitive type. -// -// Finally, the tuple (%add, (a, 2)) is passed into %stmt, which creates a new -// statement from a tuple of an operation and an argument. So the statement -// (%add a, 2) is returned. Since this statement still has a macro, %add, that -// is then called, and it finally returns a runtime statement which adds the -// value a is bound to to 2> -%stmt %add, (%ident "a", 2) - -// This is exactly equivalent to the above statement, except that it skips some -// redundant macro processing. They can be used interchangeably in all cases and -// situations. -%add a, 2 -``` - -## Blocks - -Thus far we've only been able to create code linearly, without much way to do -code-reuse or scoping or anything like that. - -Blocks fix this. A block is composed of three lists: - -- A list of identifiers which will be "imported" from the parent block (the top - level list of list of statements is itself a block, psych!). - -- A list of statements - -- A list of identifiers which will be "exported" from the block into the parent - -There is not yet a special syntax for blocks, but there is a macro operator to -make them, much like the ones for statements and identifiers: - -``` -%bind a, 2 - -%do (%block [a], [ - %bind b, (%add a, 3) -], [b]) - -%println b // prints 5 -``` - -In the above we create a block which imports the `a` identifier, and exports the -`b` identifier that it creates internally. Note that we have to use `%do` -in order to actually "run" the block, since `%block` merely returns the block -structure, which is not a statement. - -This seems kind of like a pain, and not much like a function. But combined with -other macros blocks can be used to implement your own function dispatch, so you -can add in variadic, defaults, named parameters, as well as implement closures, -type methods, and so forth, as needed and in the style desired. - -## Final note - -Keep in mind: blocks, statements, etc... are themselves data structures, and -given appropriate built-in macros they can be manipulated like any other data -structure. These are merely the building blocks for all other language features -(hopefully). +- Questions + - Strings need to be defined in terms of the built-in types, which would be + an array of lists. But this means I'm married to that definition of a + string, it'd be difficult for anyone to define their own and have it + interop. Unless "int" was some kind of macro type that did some fancy + shit, but that's kind of gross. + - An idea of the "equality" of two variables being tied not just to their + value but to the context in which they were created. Would aid in things + like compiler tagging. + - There's a "requirement loop" of things which need figuring out: + - function structure + - types + - seq type + - stack/scope + - Most likely I'm going to need some kind of elemental type to indicate + something should happen at compile-time and not runtime, or the other way + around. +## The roadmap: A plan of action for tackling the language diff --git a/sandbox/compiler.md b/sandbox/compiler.md new file mode 100644 index 0000000..7d09266 --- /dev/null +++ b/sandbox/compiler.md @@ -0,0 +1,40 @@ +I need to figure out how the compiler-time vs run-time execution is going to +work, and how I'm going to differentiate between the two in the language. + +main := MainFunc() +foo := main.Int(1) + +incrFunc := main.NewFunction(inType, outType) +in := incrFunc.In() +add := incrFunc.Var("add") // should be macro? +out := incrFunc.Call(add, incrFunc.Int(1), in) +incrFunc.Return(out) // ugly + +main.Return(main.Call(incrFunc, foo)) + +compiler := NewCompiler() +compiler.Enter(main) + +//////////////////////////////////////////////////////////////////////////////// + +type val { type, llvmVal } + +type func { type, llvmVal } + +//////////////////////////////////////////////////////////////////////////////// + +MACRO DISPATCHER as the thing which has a set of exposed methods. defmacro like +thing can be built on top of it. + +TYPED HEAP. Kind of like a typed map mixed with a set. Maybe looks like + +``` +h := make(heap[float64], 10) +id := h.add(8.5) +eightPointFive := h.get(id) +h.del(id) +``` + +Since the heap is a known size and each element in it is as well it can be +statically allocated at one spot in the stack and the pointer to it passed +farther into the stack as needed. diff --git a/sandbox/list/list.go b/sandbox/list/list.go new file mode 100644 index 0000000..10b3cfe --- /dev/null +++ b/sandbox/list/list.go @@ -0,0 +1,39 @@ +package list + +import "fmt" + +/* + + size isn't really _necessary_ unless O(1) Len is wanted + + append doesn't work well on stack +*/ + +type List struct { + // in practice this would be a constant size, with the compiler knowing the + // size + underlying []int + head, size int +} + +func New(ii ...int) List { + l := List{ + underlying: make([]int, ii), + size: len(ii), + } + copy(l.underlying, ii) + return l +} + +func (l List) Len() int { + return l.size +} + +func (l List) HeadTail() (int, List) { + if l.size == 0 { + panic(fmt.Sprintf("can't take HeadTail of empty list")) + } + return l.underlying[l.head], List{ + underlying: l.underlying, + head: l.head + 1, + size: l.size - 1, + } +}