package r import ( "regexp" "strings" "unicode/utf8" . "github.com/alecthomas/chroma" // nolint "github.com/alecthomas/chroma/lexers/internal" "github.com/dlclark/regexp2" ) // Raku lexer. var Raku Lexer = internal.Register(MustNewLazyLexer( &Config{ Name: "Raku", Aliases: []string{"perl6", "pl6", "raku"}, Filenames: []string{ "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm", "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc", }, MimeTypes: []string{ "text/x-perl6", "application/x-perl6", "text/x-raku", "application/x-raku", }, DotAll: true, }, rakuRules, )) func rakuRules() Rules { type RakuToken int const ( rakuQuote RakuToken = iota rakuName rakuNameAttribute rakuPod rakuPodFormatter rakuPodDeclaration rakuMultilineComment rakuSlashRegex rakuMatchRegex rakuSubstitutionRegex rakuSubstitutionSingleRegex rakuRegexInsideToken ) const ( colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)` colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})` colonPairPattern = `(?:)(?\w[\w'-]*)(?` + colonPairOpeningBrackets + `)` namePattern = `((?:(?!` + colonPairPattern + `)[\w':-])+)` variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern globalVariablePattern = `[$@%&]+\*` + namePattern ) keywords := []string{ `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`, `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`, `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`, `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`, `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`, `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`, `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`, `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`, `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`, `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`, `dynamic-scope`, `built`, `temp`, } keywordsPattern := Words(`(?)`, `(>=)`, `minmax`, `notandthen`, `S`, } wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...) operators := []string{ `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`, `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`, `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`, `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`, `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`, `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`, `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`, } operatorsPattern := Words(``, ``, operators...) builtinTypes := []string{ `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`, `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`, `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`, `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`, `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`, `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`, `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`, `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`, `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`, `Encoding::Registry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`, `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`, `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`, `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`, `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`, `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`, `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`, `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`, `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`, `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`, `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`, `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`, `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`, `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`, `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`, `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`, `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`, `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`, `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`, `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`, `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`, `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`, `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`, `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`, `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`, `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`, `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`, `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`, `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`, `WhateverCode`, `WrapHandle`, `NativeCall`, // Pragmas `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`, `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`, `strict`, `trace`, `variables`, } builtinTypesPattern := Words(`(? 0 { nextOpenPos := indexAt(text, openingChars, searchPos+nChars) nextClosePos = indexAt(text, closingChars, searchPos+nChars) switch { case nextClosePos == -1: nextClosePos = len(text) nestingLevel = 0 case nextOpenPos != -1 && nextOpenPos < nextClosePos: nestingLevel++ nChars = len(openingChars) searchPos = nextOpenPos default: // next_close_pos < next_open_pos nestingLevel-- nChars = len(closingChars) searchPos = nextClosePos } } endPos = nextClosePos } if endPos < 0 { // if we didn't find a closer, just highlight the // rest of the text in this class endPos = len(text) } adverbre := regexp.MustCompile(`:to\b|:heredoc\b`) var heredocTerminator []rune if adverbre.MatchString(string(adverbs)) { heredocTerminator = text[state.Pos:endPos] if len(heredocTerminator) > 0 { endHeredocPos := indexAt(text[endPos:], heredocTerminator, 0) nChars = len(heredocTerminator) endPos += endHeredocPos } else { endPos = len(text) } } textBetweenBrackets := string(text[state.Pos:endPos]) switch tokenClass { case rakuPod, rakuPodDeclaration, rakuNameAttribute: state.NamedGroups[`value`] = textBetweenBrackets state.NamedGroups[`closing_delimiters`] = string(closingChars) case rakuQuote: if len(heredocTerminator) > 0 { // Length of heredoc terminator + closing chars + `;` heredocFristPunctuationLen := len(heredocTerminator) + len(openingChars) + 1 state.NamedGroups[`opening_delimiters`] = string(openingChars) + string(text[state.Pos:state.Pos+heredocFristPunctuationLen]) state.NamedGroups[`value`] = string(text[state.Pos+heredocFristPunctuationLen : endPos]) state.NamedGroups[`closing_delimiters`] = string(heredocTerminator) } else { state.NamedGroups[`value`] = textBetweenBrackets state.NamedGroups[`closing_delimiters`] = string(closingChars) } default: state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])} } state.Pos = endPos + nChars return nil } } // Raku rules // Empty capture groups are placeholders and will be replaced by bracketsFinder. // DO NOT REMOVE THEM! return Rules{ "root": { Include("common"), {`[{}();]`, Punctuation, nil}, {`\[|\]`, Operator, nil}, {`.+?`, Text, nil}, }, "common": { {`^#![^\n]*$`, CommentHashbang, nil}, Include("pod"), // Multi-line, Embedded comment { "#`(?(?" + bracketsPattern + `)\k*)`, CommentMultiline, bracketsFinder(rakuMultilineComment), }, {`#[^\n]*$`, CommentSingle, nil}, // /regex/ { `(?<=(?:^|\(|=|:|~~|\[|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // Hyperoperator | «*« {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // Hyperoperator | »*» {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // <> {`(?>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")}, // «quoted words» {`(? operators | something < onething > something { `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`, ByGroups(Operator, UsingSelf("root"), Operator), nil, }, // { `(?])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\w[\w':-]*[^(]|\s+\[))`, ByGroups(Punctuation, String, Punctuation), nil, }, {`C?X::['\w:-]+`, NameException, nil}, Include("metaoperator"), // Pair | (key) => value { `(\([^)]+\))(\s*)(=>)(\s*)([^,\n]+)(,?\n*)`, ByGroups(UsingSelf("root"), Text, Operator, Text, UsingSelf("root"), Text), nil, }, // Pair | key => value { `(\w[\w'-]*)(\s*)(=>)(\s*)([^,\n]+)(,?\n*)`, ByGroups(String, Text, Operator, Text, UsingSelf("root"), Text), nil, }, Include("colon-pair"), // Token { // Token with adverbs `(?<=(?:^|\s)(?:regex|token|rule)(\s+))(['\w:-]+)(?=:['\w-]+` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `[({])`, NameFunction, Push("token", "name-adverb"), }, { // Token without adverbs `(?<=(?:^|\s)(?:regex|token|rule)(?:\s+))(['\w:-]+)`, NameFunction, Push("token"), }, // Substitution {`(?<=^|\b|\s)(?(?:qq|q|Q))(?(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?\s*)(?(?[^0-9a-zA-Z:\s])\k*)`, EmitterFunc(quote), bracketsFinder(rakuQuote), }, // Function { `\b(?:\w['\w:-]*)(?=:['\w-]+` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `\()`, NameFunction, Push("name-adverb"), }, {`\b(?:\w['\w:-]*)(?=\()`, NameFunction, nil}, // Method // Method with adverb { `(?(?[^\w:\s])\k*)`, ByGroupNames(map[string]Emitter{ `opening_delimiters`: Punctuation, `delimiter`: nil, }), Mutators(Pop(1), bracketsFinder(rakuMatchRegex)), }, }, "substitution": { Include("colon-pair-attribute"), // Substitution | s{regex} = value { `(?(?` + bracketsPattern + `)\k*)`, ByGroupNames(map[string]Emitter{ `opening_delimiters`: Punctuation, `delimiter`: nil, }), Mutators(Pop(1), bracketsFinder(rakuSubstitutionSingleRegex)), }, // Substitution | s/regex/string/ { `([^\w:\s])((?:\\\\|\\/|.)*?)(\1)((?:\\\\|\\/|.)*?)(\1)`, ByGroups( Punctuation, UsingSelf("regex"), Punctuation, UsingSelf("qq"), Punctuation, ), Mutators(Pop(1), MutatorFunc(makeRegexPoppingRule)), }, }, "number": { {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil}, {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil}, {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil}, { `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`, LiteralNumberFloat, nil, }, {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil}, {`(?<=\d+)i`, NameConstant, nil}, {`\d+(_\d+)*`, LiteralNumberInteger, nil}, }, "name-adverb": { Include("colon-pair-attribute-keyvalue"), Default(Pop(1)), }, "colon-pair": { // :key(value) {colonPairPattern, colonPair(String), bracketsFinder(rakuNameAttribute)}, // :123abc { `(:)(\d+)(\w[\w'-]*)(\s*[,;)]?\s*$)`, ByGroups(Punctuation, UsingSelf("number"), String, Text), nil, }, // :key {`(:!?)(\w[\w'-]*)`, ByGroups(Punctuation, String), nil}, {`\s+`, Text, nil}, }, "colon-pair-attribute": { // :key(value) {colonPairPattern, colonPair(NameAttribute), bracketsFinder(rakuNameAttribute)}, // :123abc { `(:)(\d+)(\w+)(\s*[,;)]?\s*$)`, ByGroups(Punctuation, UsingSelf("number"), NameAttribute, Text), nil, }, // :key {`(:!?)(\w[\w'-]*)`, ByGroups(Punctuation, NameAttribute), nil}, {`\s+`, Text, nil}, }, "colon-pair-attribute-keyvalue": { // :key(value) {colonPairPattern, colonPair(NameAttribute), bracketsFinder(rakuNameAttribute)}, }, "escape-qq": { { `(? { `(?)`, ByGroups(Punctuation, Operator, Punctuation, UsingSelf("root"), Punctuation), nil, }, // {code} {`(?>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil}, // Anchors {`\^\^|\^|\$\$|\$`, NameEntity, nil}, {`\.`, NameEntity, nil}, {`#[^\n]*\n`, CommentSingle, nil}, // Lookaround { `(?)`, ByGroups(Punctuation, Operator, OperatorWord, Punctuation), nil, }, // <$variable> { `(?)`, ByGroups(Punctuation, NameVariable, Punctuation), nil, }, // Capture markers {`(?`, Operator, nil}, {`(?)`, Punctuation, Pop(1)}, {`(?`, Punctuation, Pop(1)}, Include("regex-class-builtin"), Include("variable"), // | | { `(?:(\w[\w-:]*)(=\.?))?(&?\w[\w'-:]+?)(\(.+?\))?(?=>)`, ByGroups( NameVariable, Operator, NameFunction, UsingSelf("root"), ), nil, }, // { `(&?\w[\w':-]*?)(:)((?:.*?(?:\$<\w[\w':-]*>)?.*?)*?)(?=>)`, ByGroups( NameFunction, Punctuation, UsingSelf("root"), ), nil, }, Include("colon-pair-attribute"), {`(?] { `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`, ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation), nil, }, // Z=> {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil}, }, "operator": { // Word Operator {wordOperatorsPattern, OperatorWord, nil}, // Operator {operatorsPattern, Operator, nil}, }, "pod": { // Single-line pod declaration {`(#[|=])\s`, Keyword, Push("pod-single")}, // Multi-line pod declaration { "(?#[|=])(?(?" + bracketsPattern + `)\k*)(?)(?)`, ByGroupNames( map[string]Emitter{ `keyword`: Keyword, `opening_delimiters`: Punctuation, `delimiter`: nil, `value`: UsingSelf("pod-begin"), `closing_delimiters`: Punctuation, }), bracketsFinder(rakuPodDeclaration), }, Include("pod-blocks"), }, "pod-blocks": { // =begin code { `(?<=^ *)(? *)(?=begin)(? +)(?code)(?[^\n]*)(?.*?)(?^\k)(?=end)(? +)\k`, EmitterFunc(podCode), nil, }, // =begin { `(?<=^ *)(? *)(?=begin)(? +)(?!code)(?\w[\w'-]*)(?[^\n]*)(?)(?)`, ByGroupNames( map[string]Emitter{ `ws`: Comment, `keyword`: Keyword, `ws2`: StringDoc, `name`: Keyword, `config`: EmitterFunc(podConfig), `value`: UsingSelf("pod-begin"), `closing_delimiters`: Keyword, }), bracketsFinder(rakuPod), }, // =for ... { `(?<=^ *)(? *)(?=(?:for|defn))(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), Push("pod-paragraph"), }, // =config { `(?<=^ *)(? *)(?=config)(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), nil, }, // =alias { `(?<=^ *)(? *)(?=alias)(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc), nil, }, // =encoding { `(?<=^ *)(? *)(?=encoding)(? +)(?[^\n]+)`, ByGroups(Comment, Keyword, StringDoc, Name), nil, }, // =para ... { `(?<=^ *)(? *)(?=(?:para|table|pod))(?(? *)(?=head\d+)(? *)(?#?)`, ByGroups(Comment, Keyword, GenericHeading, Keyword), Push("pod-single-heading"), }, // =item ... { `(?<=^ *)(? *)(?=(?:item\d*|comment|data|[A-Z]+))(? *)(?#?)`, ByGroups(Comment, Keyword, StringDoc, Keyword), Push("pod-single"), }, { `(?<=^ *)(? *)(?=finish)(?[^\n]*)`, ByGroups(Comment, Keyword, EmitterFunc(podConfig)), Push("pod-finish"), }, // ={custom} ... { `(?<=^ *)(? *)(?=\w[\w'-]*)(? *)(?#?)`, ByGroups(Comment, Name, StringDoc, Keyword), Push("pod-single"), }, // = podconfig { `(?<=^ *)(? *=)(? *)(?(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `) *)*\n)`, ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)), nil, }, }, "pod-begin": { Include("pod-blocks"), Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-paragraph": { {`\n\s*?\n`, StringDoc, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-single": { {`\n`, StringDoc, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-single-heading": { {`\n`, GenericHeading, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, GenericHeading, nil}, }, "pod-finish": { {`\z`, nil, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pre-pod-formatter": { // C, B, ... { `(?[CBIUDTKRPAELZVMSXN])(?<+|«)`, ByGroups(Keyword, Punctuation), Mutators( bracketsFinder(rakuPodFormatter), Push("pod-formatter"), MutatorFunc(podFormatter), ), }, }, "pod-formatter": { // Placeholder rule, will be replaced by podFormatter. DO NOT REMOVE! {`>`, Punctuation, Pop(1)}, Include("pre-pod-formatter"), // Placeholder rule, will be replaced by podFormatter. DO NOT REMOVE! {`.+?`, StringOther, nil}, }, "variable": { {variablePattern, NameVariable, Push("name-adverb")}, {globalVariablePattern, NameVariableGlobal, Push("name-adverb")}, {`[$@](?:<.*?>)+`, NameVariable, nil}, {`\$/`, NameVariable, nil}, {`\$!`, NameVariable, nil}, {`[$@%]`, NameVariable, nil}, }, "single-quote": { {`(?>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%][\w':-]+|\s+\[))`, Punctuation, Pop(1)}, Include("ww"), }, "«": { {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%][\w':-]+|\s+\[))`, Punctuation, Pop(1)}, Include("ww"), }, "ww": { Include("single-quote"), Include("qq"), }, "qq": { Include("qq-variable"), // Function with adverb { `\w[\w:'-]+(?=:['\w-]+` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `\()`, NameFunction, Push("qq-function", "name-adverb"), }, // Function without adverb {`\w[\w:'-]+(?=\((?!"))`, NameFunction, Push("qq-function")}, Include("closure"), Include("escape-hexadecimal"), Include("escape-c-name"), Include("escape-qq"), {`.+?`, StringDouble, nil}, }, "qq-function": { {`(\([^"]*?\))`, UsingSelf("root"), nil}, Default(Pop(1)), }, "qq-variable": { { `(?>|<.*?>|«.*?»)+`, UsingSelf("root"), nil}, // Method { `(\.)([^(\s]+)(\([^"]*?\))`, ByGroups(Operator, NameFunction, UsingSelf("root")), nil, }, Default(Pop(1)), }, "Q": { Include("escape-qq"), {`.+?`, String, nil}, }, "Q-closure": { Include("escape-qq"), Include("closure"), {`.+?`, String, nil}, }, "Q-variable": { Include("escape-qq"), Include("qq-variable"), {`.+?`, String, nil}, }, "closure": { {`(? -1 { idx = utf8.RuneCountInString(text[:idx]) idx += pos } return idx } // Tells if an array of string contains a string func contains(s []string, e string) bool { for _, value := range s { if value == e { return true } } return false } type RakuFormatterRules struct { pop, formatter *CompiledRule } // Pop from the pod_formatter_stack and reformat the pod code func podFormatterPopper(state *LexerState) error { stack, ok := state.Get("pod_formatter_stack").([]RakuFormatterRules) if ok && len(stack) > 0 { // Pop from stack stack = stack[:len(stack)-1] state.Set("pod_formatter_stack", stack) // Call podFormatter to use the last formatter rules err := podFormatter(state) if err != nil { panic(err) } } return nil } // Use the rules from pod_formatter_stack to format the pod code func podFormatter(state *LexerState) error { stack, ok := state.Get("pod_formatter_stack").([]RakuFormatterRules) if ok && len(stack) > 0 { rules := stack[len(stack)-1] state.Rules["pod-formatter"][0] = rules.pop state.Rules["pod-formatter"][len(state.Rules["pod-formatter"])-1] = rules.formatter } return nil } type RulePosition int const ( topRule RulePosition = iota + 1000 bottomRule ) type RuleMakingConfig struct { delimiter []rune pattern string tokenType TokenType mutator Mutator rulePosition RulePosition state *LexerState stateName string pushToStack bool numberOfDelimiterChars int } // Makes compiled rules and returns them, If rule position is given, rules are added to the state // If pushToStack is true, state name will be added to the state stack func makeRuleAndPushMaybe(config RuleMakingConfig) *CompiledRule { var rePattern string if len(config.delimiter) > 0 { delimiter := strings.Repeat(string(config.delimiter), config.numberOfDelimiterChars) rePattern = regexp2.Escape(delimiter) } else { rePattern = config.pattern } regex := regexp2.MustCompile(rePattern, regexp2.None) cRule := &CompiledRule{ Rule: Rule{rePattern, config.tokenType, config.mutator}, Regexp: regex, } state := config.state stateName := config.stateName switch config.rulePosition { case topRule: state.Rules[stateName] = append([]*CompiledRule{cRule}, state.Rules[stateName][1:]...) case bottomRule: state.Rules[stateName] = append(state.Rules[stateName][:len(state.Rules[stateName])-1], cRule) } // Push state name to stack if asked if config.pushToStack { state.Stack = append(state.Stack, config.stateName) } return cRule } // Used when the regex knows its own delimiter and uses `UsingSelf("regex")`, // it only puts a placeholder rule at the top of "regex" state func makeRegexPoppingRule(state *LexerState) error { makeRuleAndPushMaybe(RuleMakingConfig{ pattern: `^$`, rulePosition: topRule, state: state, stateName: "regex", }) return nil } // Emitter for colon pairs, changes token state based on key and brackets func colonPair(tokenClass TokenType) Emitter { return EmitterFunc(func(groups []string, state *LexerState) Iterator { iterators := []Iterator{} tokens := []Token{ {Punctuation, state.NamedGroups[`colon`]}, {Punctuation, state.NamedGroups[`opening_delimiters`]}, {Punctuation, state.NamedGroups[`closing_delimiters`]}, } // Append colon iterators = append(iterators, Literator(tokens[0])) if tokenClass == NameAttribute { iterators = append(iterators, Literator(Token{NameAttribute, state.NamedGroups[`key`]})) } else { var keyTokenState string keyre := regexp.MustCompile(`^\d+$`) if keyre.MatchString(state.NamedGroups[`key`]) { keyTokenState = "common" } else { keyTokenState = "Q" } // Use token state to Tokenise key if keyTokenState != "" { iterator, err := state.Lexer.Tokenise( &TokeniseOptions{ State: keyTokenState, Nested: true, }, state.NamedGroups[`key`]) if err != nil { panic(err) } else { // Append key iterators = append(iterators, iterator) } } } // Append punctuation iterators = append(iterators, Literator(tokens[1])) var valueTokenState string switch state.NamedGroups[`opening_delimiters`] { case "(", "{", "[": valueTokenState = "root" case "<<", "«": valueTokenState = "ww" case "<": valueTokenState = "Q" } // Use token state to Tokenise value if valueTokenState != "" { iterator, err := state.Lexer.Tokenise( &TokeniseOptions{ State: valueTokenState, Nested: true, }, state.NamedGroups[`value`]) if err != nil { panic(err) } else { // Append value iterators = append(iterators, iterator) } } // Append last punctuation iterators = append(iterators, Literator(tokens[2])) return Concaterator(iterators...) }) } // Emitter for quoting constructs, changes token state based on quote name and adverbs func quote(groups []string, state *LexerState) Iterator { keyword := state.NamedGroups[`keyword`] adverbsStr := state.NamedGroups[`adverbs`] iterators := []Iterator{} tokens := []Token{ {Keyword, keyword}, {StringAffix, adverbsStr}, {Text, state.NamedGroups[`ws`]}, {Punctuation, state.NamedGroups[`opening_delimiters`]}, {Punctuation, state.NamedGroups[`closing_delimiters`]}, } // Append all tokens before dealing with the main string iterators = append(iterators, Literator(tokens[:4]...)) var tokenStates []string // Set tokenStates based on adverbs adverbs := strings.Split(adverbsStr, ":") for _, adverb := range adverbs { switch adverb { case "c", "closure": tokenStates = append(tokenStates, "Q-closure") case "qq": tokenStates = append(tokenStates, "qq") case "ww": tokenStates = append(tokenStates, "ww") case "s", "scalar", "a", "array", "h", "hash", "f", "function": tokenStates = append(tokenStates, "Q-variable") } } var tokenState string switch { case keyword == "qq" || contains(tokenStates, "qq"): tokenState = "qq" case adverbsStr == "ww" || contains(tokenStates, "ww"): tokenState = "ww" case contains(tokenStates, "Q-closure") && contains(tokenStates, "Q-variable"): tokenState = "qq" case contains(tokenStates, "Q-closure"): tokenState = "Q-closure" case contains(tokenStates, "Q-variable"): tokenState = "Q-variable" default: tokenState = "Q" } iterator, err := state.Lexer.Tokenise( &TokeniseOptions{ State: tokenState, Nested: true, }, state.NamedGroups[`value`]) if err != nil { panic(err) } else { iterators = append(iterators, iterator) } // Append the last punctuation iterators = append(iterators, Literator(tokens[4])) return Concaterator(iterators...) } // Emitter for pod config, tokenises the properties with "colon-pair-attribute" state func podConfig(groups []string, state *LexerState) Iterator { // Tokenise pod config iterator, err := state.Lexer.Tokenise( &TokeniseOptions{ State: "colon-pair-attribute", Nested: true, }, groups[0]) if err != nil { panic(err) } else { return iterator } } // Emitter for pod code, tokenises the code based on the lang specified func podCode(groups []string, state *LexerState) Iterator { iterators := []Iterator{} tokens := []Token{ {Comment, state.NamedGroups[`ws`]}, {Keyword, state.NamedGroups[`keyword`]}, {Keyword, state.NamedGroups[`ws2`]}, {Keyword, state.NamedGroups[`name`]}, {StringDoc, state.NamedGroups[`value`]}, {Comment, state.NamedGroups[`ws3`]}, {Keyword, state.NamedGroups[`end_keyword`]}, {Keyword, state.NamedGroups[`ws4`]}, {Keyword, state.NamedGroups[`name`]}, } // Append all tokens before dealing with the pod config iterators = append(iterators, Literator(tokens[:4]...)) // Tokenise pod config iterators = append(iterators, podConfig([]string{state.NamedGroups[`config`]}, state)) langMatch := regexp.MustCompile(`:lang\W+(\w+)`).FindStringSubmatch(state.NamedGroups[`config`]) var lang string if len(langMatch) > 1 { lang = langMatch[1] } // Tokenise code based on lang property sublexer := internal.Get(lang) if sublexer != nil { iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`]) if err != nil { panic(err) } else { iterators = append(iterators, iterator) } } else { iterators = append(iterators, Literator(tokens[4])) } // Append the rest of the tokens iterators = append(iterators, Literator(tokens[5:]...)) return Concaterator(iterators...) }