ast.h
No OneTemporary

File Metadata

Created
Sun, Feb 15, 12:01 PM
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Author: pgess <v.melnychenko@xreate.org>
* File: ast.h
*/
/**
* \file ast.h
* \brief A syntax tree representation and related code
*
* \sa xreate::AST
*/
#ifndef AST_H
#define AST_H
#include "attachments.h"
#include "utils.h"
#include <string>
#include <list>
#include <vector>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <algorithm>
#include <climits>
namespace xreate {
struct ScopedSymbol;
struct Symbol;
}
namespace std {
template<>
struct hash<xreate::ScopedSymbol> {
std::size_t operator()(xreate::ScopedSymbol const& s) const;
};
template<>
struct equal_to<xreate::ScopedSymbol> {
bool operator()(const xreate::ScopedSymbol& __x, const xreate::ScopedSymbol& __y) const;
};
template<>
struct hash<xreate::Symbol> {
size_t operator()(xreate::Symbol const& s) const;
};
template<>
struct equal_to<xreate::Symbol> {
bool operator()(const xreate::Symbol& __x, const xreate::Symbol& __y) const;
};
}
namespace xreate {
struct String_t {
};
struct Identifier_t {
};
struct Number_t {
};
struct Type_t {
};
template<typename A>
class Atom {
};
//DEBT store line:col for all atoms/identifiers
template<> class
Atom<Identifier_t> {
public:
Atom(const std::wstring& value);
Atom(std::string && name);
const std::string& get() const;
private:
std::string __value;
};
template<>
class Atom<Number_t> {
public:
Atom(wchar_t* value);
Atom(int value);
double get()const;
private:
double __value;
};
template<>
class Atom<String_t> {
public:
Atom(const std::wstring& value);
Atom(std::string && name);
const std::string& get() const;
private:
std::string __value;
};
enum class TypePrimitive {
Invalid, Bool, I8, I32, I64, Int, Float, String
};
enum class TypeOperator {
NONE, VOID, REF, ALIAS, VARIANT, ARRAY, RECORD, ACCESS, SLAVE, GUARD
};
struct struct_tag {
};
const struct_tag tag_struct = struct_tag();
/**
* \brief A type representation to support type system
*
* The class represents type in a denormalized form, i.e. with no arguments and aliases substitution
* \sa AST::expandType()
*/
class TypeAnnotation {
public:
TypeAnnotation();
TypeAnnotation(TypePrimitive typ);
TypeAnnotation(TypeOperator op, std::initializer_list<TypeAnnotation> operands);
TypeAnnotation(TypeOperator op, std::vector<TypeAnnotation>&& operands);
static TypeAnnotation alias(const std::string& alias);
void addBindings(std::vector<Atom<Identifier_t>>&& params);
void addFields(std::vector<Atom<Identifier_t>>&& listFields);
bool operator<(const TypeAnnotation& t) const;
// TypeAnnotation (struct_tag, std::initializer_list<TypePrimitive>);
bool isValid() const;
TypeOperator __operator = TypeOperator::NONE;
std::vector<TypeAnnotation> __operands;
TypePrimitive __value;
std::string __valueCustom;
std::vector<std::string> fields;
std::vector<std::string> bindings;
private:
};
enum class Operator {
INVALID, UNDEF, AND, OR, ADD, SUB, MUL, DIV, MOD,
EQU, NE, NEG, LSS,
LSE, GTR, GTE, LIST,
LIST_INDEX, LIST_RANGE,
CALL, CALL_INTRINSIC, QUERY, QUERY_LATE,
IMPL/* implication */, MAP,
FOLD, FOLD_INF, INDEX,
IF, SWITCH, SWITCH_VARIANT, SWITCH_LATE,
CASE, CASE_DEFAULT, LOGIC_AND,
CONTEXT_RULE, VARIANT, SEQUENCE, UPDATE
};
class Function;
class AST;
class CodeScope;
class MetaRuleAbstract;
typedef ManagedPtr<Function> ManagedFnPtr;
typedef ManagedPtr<CodeScope> ManagedScpPtr;
typedef ManagedPtr<MetaRuleAbstract> ManagedRulePtr;
const ManagedScpPtr NO_SCOPE = ManagedScpPtr(UINT_MAX, 0);
/**
* \brief AST node to represent a single instruction or an annotation
* \attention In case of any changes update \ref xreate::ExpressionHints auxiliary helper as well
*
* %Expression is a generic building block of syntax tree which is able to hold node data
* along with child nodes as operands.
*
* \note For types the %expression-like data structure \ref TypeAnnotation is used rather than Expression itself.
* \sa xreate::AST, xreate::TypeAnnotation
*/
struct Expression {
friend class CodeScope;
friend class TranscendLayer;
friend class CFAPass;
friend class ExpressionHints;
Expression(const Operator &oprt, std::initializer_list<Expression> params);
Expression(const Atom<Identifier_t>& ident);
Expression(const Atom<Number_t>& number);
Expression(const Atom<String_t>& a);
Expression();
void setOp(Operator oprt);
void addArg(Expression&& arg);
void addBindings(std::initializer_list<Atom<Identifier_t>> params);
void bindType(TypeAnnotation t);
template<class InputIt>
void addBindings(InputIt paramsBegin, InputIt paramsEnd);
void addTags(const std::list<Expression> tags) const;
void addBlock(ManagedScpPtr scope);
const std::vector<Expression>& getOperands() const;
double getValueDouble() const;
void setValueDouble(double value);
const std::string& getValueString() const;
void setValue(const Atom<Identifier_t>&& v);
bool isValid() const;
bool isDefined() const;
bool operator==(const Expression& other) const;
/**
* \brief is it string, number, compound operation and so on
*/
enum {
INVALID, COMPOUND, IDENT, NUMBER, STRING, BINDING
} __state = INVALID;
/**
* \brief Valid for compound State. Holds type of compound operator
*/
Operator op;
/**
* \brief Unique id to identify expression within syntax tree
*/
unsigned int id;
/**
* \brief Exact meaning depends on particular instruction
* \details As an example, named lists/structs hold field names in bindings
*/
std::vector<std::string> bindings;
/**
* \brief Holds child instructions as arguments
*/
std::vector<Expression> operands;
/**
* \brief Holds type of instruction's result
*/
TypeAnnotation type;
/**
* \brief Holds additional annotations
*/
mutable std::map<std::string, Expression> tags;
/**
* \brief Child code blocks
* \details For example, If statement holds TRUE-branch as first and FALSE-branch as second block here
*/
std::list<CodeScope*> blocks;
private:
std::string __valueS;
double __valueD;
static unsigned int nextVacantId;
};
bool operator<(const Expression&, const Expression&);
template<class InputIt>
void Expression::addBindings(InputIt paramsBegin, InputIt paramsEnd) {
size_t index = bindings.size();
std::transform(paramsBegin, paramsEnd, std::inserter(bindings, bindings.end()),
[&index, this] (const Atom<Identifier_t> atom) {
std::string key = atom.get();
return key;
});
}
typedef std::list<Expression> ExpressionList;
enum class TagModifier {
NONE, ASSERT, REQUIRE
};
enum class DomainAnnotation {
FUNCTION, VARIABLE
};
class RuleArguments : public std::vector<std::pair<std::string, DomainAnnotation>>
{
public:
void add(const Atom<Identifier_t>& name, DomainAnnotation typ);
};
class RuleGuards : public std::vector<Expression> {
public:
void add(Expression&& e);
};
class TranscendLayer;
class LLVMLayer;
class MetaRuleAbstract {
public:
MetaRuleAbstract(RuleArguments&& args, RuleGuards&& guards);
virtual ~MetaRuleAbstract();
virtual void compile(TranscendLayer& layer) = 0;
protected:
RuleArguments __args;
RuleGuards __guards;
};
class RuleWarning : public MetaRuleAbstract {
friend class TranscendLayer;
public:
RuleWarning(RuleArguments&& args, RuleGuards&& guards, Expression&& condition, Atom<String_t>&& message);
virtual void compile(TranscendLayer& layer);
~RuleWarning();
private:
std::string __message;
Expression __condition;
};
typedef unsigned int VNameId;
namespace versions {
typedef int VariableVersion;
const VariableVersion VERSION_NONE = -2;
const VariableVersion VERSION_INIT = 0;
}
template<>
struct AttachmentsDict<versions::VariableVersion> {
typedef versions::VariableVersion Data;
static const unsigned int key = 6;
};
struct ScopedSymbol {
VNameId id;
versions::VariableVersion version;
static const ScopedSymbol RetSymbol;
};
struct Symbol {
ScopedSymbol identifier;
const CodeScope * scope;
};
struct ASTSite {
unsigned int id;
Expression getDefinition() const;
//static Ast registerSite(const Expression& e);
};
struct IdentifierSymbol{};
struct ExprAlias_A{};
struct ExprId_A{};
template<>
struct AttachmentsDict<IdentifierSymbol> {
typedef Symbol Data;
static const unsigned int key = 7;
};
template<>
struct AttachmentsDict<ExprAlias_A> {
typedef Symbol Data;
static const unsigned int key = 9;
};
template<>
struct AttachmentsDict<ExprId_A>{
typedef Expression Data;
static const unsigned int key = 12;
};
typedef std::pair<Expression, TagModifier> Tag;
bool operator<(const ScopedSymbol& s1, const ScopedSymbol& s2);
bool operator==(const ScopedSymbol& s1, const ScopedSymbol& s2);
bool operator<(const Symbol& s1, const Symbol& s2);
bool operator==(const Symbol& s1, const Symbol& s2);
bool operator< (const ASTSite& s1, const ASTSite& s2);
/**
* \brief AST node to represent a single code block/a scope of visibility
*
* Holds a single expression as a `body` along with set of variable assignments(declarations) used in body's expression.
* \sa xreate::AST
*/
class CodeScope {
friend class Function;
friend class PassManager;
public:
CodeScope(CodeScope* parent = 0);
~CodeScope();
/** \brief Set expression as a body */
void setBody(const Expression& body);
/** \brief Returns current code scope body */
const Expression& getBody() const;
/** \brief Adds variable definition to be used in body as well as in other declarations */
Symbol addDefinition(Expression&& var, Expression&& body);
/** \brief Returns symbols' definition */
static const Expression& getDefinition(const Symbol& symbol, bool flagAllowUndefined = false);
const Expression& getDefinition(const ScopedSymbol& symbol, bool flagAllowUndefined = false) const;
/** \brief Adds variable defined elsewhere */
void addBinding(Expression&& var, Expression&& argument, const VNameId hintBindingId = 0);
std::vector<std::string> __bindings;
std::map<std::string, VNameId> __identifiers;
CodeScope* __parent;
//TODO move __definitions to SymbolsAttachments data
//NOTE: definition of return type has index 0
std::unordered_map<ScopedSymbol, Expression> __declarations;
std::vector<Expression> tags;
std::vector<Expression> contextRules;
bool bindExternalArgs = false;
std::set<Symbol> boundArgs;
private:
ScopedSymbol registerIdentifier(const Expression& identifier, const VNameId hintBindingId = 0);
public:
bool recognizeIdentifier(const Expression& identE);
ScopedSymbol findSymbolByAlias(const std::string& alias);
};
/**
* \brief AST node to represent a single function
*
* Holds an `__entry` entry code scope along with `guard` to denote the different specializations.
* \sa xreate::AST
*/
class Function {
friend class Expression;
friend class CodeScope;
friend class AST;
public:
Function(const Atom<Identifier_t>& name);
/**
* \brief Adds function arguments
*/
void addBinding(Atom <Identifier_t>&& name, Expression&& argument, const VNameId hintBindingId=0);
/**
* \brief Adds additional function annotations
*/
void addTag(Expression&& tag, const TagModifier mod);
const std::string& getName() const;
const std::map<std::string, Expression>& getTags() const;
CodeScope* getEntryScope() const;
CodeScope* __entry;
std::string __name;
Expression guard;
private:
std::map<std::string, Expression> __tags;
};
class ExternData;
typedef Expanded<TypeAnnotation> ExpandedType;
struct TypeInferred{};
template<>
struct AttachmentsDict<TypeInferred> {
typedef ExpandedType Data;
static const unsigned int key = 11;
};
enum ASTInterface {
CFA, DFA, Extern, Adhoc
};
struct FunctionSpecialization {
std::string guard;
size_t id;
};
struct FunctionSpecializationQuery {
std::unordered_set<std::string> context;
};
template<>
struct AttachmentsId<Expression>{
static unsigned int getId(const Expression& expression){
return expression.id;
}
};
template<>
struct AttachmentsId<Symbol>{
static unsigned int getId(const Symbol& s){
return s.scope->__declarations.at(s.identifier).id;
}
};
template<>
struct AttachmentsId<ManagedFnPtr>{
static unsigned int getId(const ManagedFnPtr& f){
const Symbol symbolFunction{ScopedSymbol::RetSymbol, f->getEntryScope()};
return AttachmentsId<Symbol>::getId(symbolFunction);
}
};
template<>
struct AttachmentsId<CodeScope*>{
static unsigned int getId(const CodeScope* scope){
const Symbol symbolScope{ScopedSymbol::RetSymbol, scope};
return AttachmentsId<Symbol>::getId(symbolScope);
}
};
template<>
struct AttachmentsId<unsigned int>{
static unsigned int getId(const unsigned int id){
return id;
}
};
class TypeResolver;
enum class IntrinsicFn {
ARR_INIT,
REC_FIELDS,
CON_KEYS
};
namespace details { namespace inconsistent {
/**
* \brief AST in an inconsistent form during construction
*
* Represents AST under construction(**inconsistent state**).
* \attention Clients should use rather xreate::AST unless client's code explicitly works with Syntax Tree during construction.
*
* Typically an instance is created by xreate::XreateManager only and filled out by the parser
* \sa xreate::XreateManager::prepare(std::string&&)
*/
class AST {
friend class xreate::TypeResolver;
public:
AST();
/**
* \brief Adds new function to AST
* \param f Function to register
*/
void add(Function* f);
/**
* \brief Adds new declarative rule to AST
* \param r Declarative Rule
*/
void add(MetaRuleAbstract* r);
/** \brief Registers new code block */
ManagedScpPtr add(CodeScope* scope);
/**
* \brief Add new type to AST
* @param t Type definition
* @param alias Typer name
*/
void add(TypeAnnotation t, Atom<Identifier_t> alias);
/** \brief Current module's name */
std::string getModuleName();
/**
* \brief Looks for function with given name
* \param name Function name to find
* \note Requires that only one function exists under given name
* \return Found function
*/
ManagedPtr<Function> findFunction(const std::string& name);
/** \brief Returns all function in AST */
std::list<ManagedFnPtr> getAllFunctions() const;
/**
* \brief Returns all specializations of a function with a given name
* \param fnName function to find
* \return list of found function specializations
*/
std::list<ManagedFnPtr> getFnSpecializations(const std::string& fnName) const;
/**
* \return First element in Functions/Scopes/Rules list depending on template parameter
* \tparam Target either Function or CodeScope or MetaRuleAbstract
*/
template<class Target>
ManagedPtr<Target> begin();
/**
* \brief Performs all necessary steps after AST is built
*
* Performs all finalization steps and moves AST into consistent state represented by xreate::AST
* \sa xreate::AST
* \return AST in consistent state
*/
xreate::AST* finalize();
typedef std::multimap<std::string, unsigned int> FUNCTIONS_REGISTRY;
//std::list<ExternData> __externdata;
std::list<Expression> __dfadata; //TODO move to more appropriate place
std::list<std::string> __rawImports; //TODO move to more appropriate place
std::multimap<ASTInterface, Expression> __interfacesData; //TODO CFA data here.
private:
std::vector<MetaRuleAbstract*> __rules;
std::vector<Function*> __functions;
std::vector<CodeScope*> __scopes;
FUNCTIONS_REGISTRY __dictFunctions;
protected:
std::map<std::string, TypeAnnotation> __registryTypes;
public:
/**
* \brief Stores DFA scheme for later use by DFA Pass
*
* Treats expression as a DFA scheme and feeds to the DFA Pass later
* \param data DFA Scheme
* \sa xreate::DFAPass
*/
void addDFAData(Expression&& data);
/** \brief Stores data for later use by xreate::ExternLayer */
void addExternData(ExternData&& entry);
/**
* \brief Generalized function to store particular data for later use by particular pass
* \param interface Particular Interface
* \param data Particular data
*/
void addInterfaceData(const ASTInterface& interface, Expression&& data);
/**\name Symbols Recognition */
///@{
public:
//TODO revisit enums/variants, move to codescope
/**
* \brief Tries to find out whether expression is Variant constructor
*/
bool recognizeVariantConstructor(Expression& function);
Atom<Number_t> recognizeVariantConstructor(Atom<Identifier_t> ident);
/**
* \brief Postpones unrecognized identifier for future second round of recognition
* \param scope Code block identifier is encountered
* \param id Identifier
*/
void postponeIdentifier(CodeScope* scope, const Expression& id);
/** \brief Second round of identifiers recognition done right after AST is fully constructed */
void recognizePostponedIdentifiers();
void recognizeIntrinsic(Expression& fn) const;
private:
std::map<std::string, std::pair<TypeAnnotation, int>> __registryVariants;
static std::map<std::string, IntrinsicFn> __registryIntrinsics;
std::set<std::pair<CodeScope*, Expression>> __bucketUnrecognizedIdentifiers;
static void initIntrinsics();
public:
///@}
};
template<>
ManagedPtr<Function>
AST::begin<Function>();
template<>
ManagedPtr<CodeScope>
AST::begin<CodeScope>();
template<>
ManagedPtr<MetaRuleAbstract>
AST::begin<MetaRuleAbstract>();
} } // namespace details::incomplete
/**
* \brief AST in a consistent state
*
* AST has two mutually exclusive possible states:
* - an inconsistent state while AST is under construction. Represented by xreate::details::inconsistent::AST
* - a consistent state when AST is built and finalize() is invoked.
*
* This class represents a consistent state and should be used by clients unless client's code explicitly works with AST under construction.
* Consistent AST enables access to additional functions(such as type management).
* \sa xreate::details::inconsistent::AST
*/
class AST : public details::inconsistent::AST {
public:
AST() : details::inconsistent::AST() {}
/**
* \brief Computes fully expanded form of type by substituting all arguments and aliases
* \param t Type to expand
* \return Expdanded or normal form of type
* \sa TypeAnnotation
*/
ExpandedType expandType(const TypeAnnotation &t) const;
/**
* Searches type by given name
* \param name Typename to search
* \return Expanded or normal form of desired type
* \note if type name is not found returns new undefined type with this name
*/
ExpandedType findType(const std::string& name);
/**
* Invokes Type Inference Analysis to find out expanded(normal) form expressions's type
* \sa typeinference.h
* \param e
* \param expectedT expected type
* \return Type of expression
*/
ExpandedType getType(const Expression& e, const TypeAnnotation& expectedT = TypeAnnotation());
};
}
#endif // AST_H

Event Timeline