From f983c95f4252b316549357a4ac703d382e0b98cc Mon Sep 17 00:00:00 2001 From: Xavier Mignot Date: Sat, 25 Mar 2023 23:11:57 -0400 Subject: [PATCH 1/4] feat: Add Bicep Lexer --- lib/rouge/demos/bicep | 10 ++++ lib/rouge/lexers/bicep.rb | 111 ++++++++++++++++++++++++++++++++++++++ spec/lexers/bicep_spec.rb | 16 ++++++ spec/visual/samples/bicep | 25 +++++++++ 4 files changed, 162 insertions(+) create mode 100644 lib/rouge/demos/bicep create mode 100644 lib/rouge/lexers/bicep.rb create mode 100644 spec/lexers/bicep_spec.rb create mode 100644 spec/visual/samples/bicep diff --git a/lib/rouge/demos/bicep b/lib/rouge/demos/bicep new file mode 100644 index 0000000000..63e9b2386d --- /dev/null +++ b/lib/rouge/demos/bicep @@ -0,0 +1,10 @@ +targetScope = 'subscription' // To create a resource group + +@description('The Azure region to create the resources in.') +param location string + +// Create a resource group +resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { + name: 'rg-sample' + location: location +} diff --git a/lib/rouge/lexers/bicep.rb b/lib/rouge/lexers/bicep.rb new file mode 100644 index 0000000000..d590da3b50 --- /dev/null +++ b/lib/rouge/lexers/bicep.rb @@ -0,0 +1,111 @@ +module Rouge + module Lexers + class Bicep < Rouge::RegexLexer + tag 'bicep' + filenames '*.bicep' + + title "Bicep" + desc 'Bicep is a domain-specific language (DSL) that uses declarative syntax to deploy Azure resources.' + + keywords = %w( + resource module param var output targetScope dependsOn + existing for in if else true false null + ) + + datatypes = %w(array bool int object string) + + functions = %w( + any array concat contains empty first intersection items last length min max range skip + take union dateTimeAdd utcNow deployment environment loadFileAsBase64 loadTextContent int + json extensionResourceId getSecret list listKeys listKeyValue listAccountSas listSecrets + pickZones reference resourceId subscriptionResourceId tenantResourceId managementGroup + resourceGroup subscription tenant base64 base64ToJson base64ToString dataUri dataUriToString + endsWith format guid indexOf lastIndexOf length newGuid padLeft replace split startsWith + string substring toLower toUpper trim uniqueString uri uriComponent uriComponentToString + ) + + operators = %w(+ - * / % < <= > >= == != && || !) + + punctuation = %w(( ) { } [ ] , : ; =) + + state :root do + mixin :comments + + # Match strings + rule %r/'/, Str::Single, :string + + # Match numbers + rule %r/\b\d+\b/, Num + + # Match keywords + rule %r/\b(#{keywords.join('|')})\b/, Keyword + + # Match data types + rule %r/\b(#{datatypes.join('|')})\b/, Keyword::Type + + # Match functions + rule %r/\b(#{functions.join('|')})\b/, Name::Function + + # Match operators + rule %r/#{operators.map { |o| Regexp.escape(o) }.join('|')}/, Operator + + # Enter a state when encountering an opening curly bracket + rule %r/{/, Punctuation::Indicator, :block + + # Match punctuation + rule %r/#{punctuation.map { |p| Regexp.escape(p) }.join('|')}/, Punctuation + + # Match identifiers + rule %r/[a-zA-Z_]\w*/, Name + + # Match decorators + rule %r/@[a-zA-Z_]\w*/, Name::Decorator + + # Ignore whitespace + rule %r/\s+/, Text + end + + state :comments do + rule %r(//[^\n\r]+), Comment::Single + rule %r(/\*.*?\*/)m, Comment::Multiline + end + + state :string do + rule %r/[^'$}]+/, Str::Single + rule %r/\$(?!\{)/, Str::Single + rule %r/\$[\{]/, Str::Interpol, :interp + rule %r/\'/, Str::Single, :pop! + rule %r/\$+/, Str::Single + end + + state :interp do + rule %r/\}/, Str::Interpol, :pop! + mixin :root + end + + # State for matching code blocks between curly brackets + state :block do + # Match property names + rule %r/\b([a-zA-Z_]\w*)\b(?=\s*:)/, Name::Property + + # Match property values that are strings + rule %r/(?<=[:]\s)('[^']*')/, Str, :string + + # Match property values that are numbers + rule %r/(?<=[:]\s)\b\d+\b/, Num + + # Match property values that are keywords + rule %r/\b(#{keywords.join('|')})\b(?=[,}])/, Keyword::Constant + + # Match closing curly brackets + rule %r/}/, Punctuation::Indicator, :pop! + + # Match nested curly brackets + rule %r/{/, Punctuation::Indicator, :block + + # Include the root state for nested tokens + mixin :root + end + end + end +end diff --git a/spec/lexers/bicep_spec.rb b/spec/lexers/bicep_spec.rb new file mode 100644 index 0000000000..805a327077 --- /dev/null +++ b/spec/lexers/bicep_spec.rb @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::Bicep do + let(:subject) { Rouge::Lexers::Bicep.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.bicep' + deny_guess :filename => 'foo' + end + end + end + \ No newline at end of file diff --git a/spec/visual/samples/bicep b/spec/visual/samples/bicep new file mode 100644 index 0000000000..57a498cb20 --- /dev/null +++ b/spec/visual/samples/bicep @@ -0,0 +1,25 @@ +/* + Target scope can be set using the following values: + - resourceGroup (default) + - subscription + - managementGroup + - tenant +*/ +targetScope = 'subscription' // To create a resource group + +@description('The Azure region to create the resources in.') +param location string + +var suffix = padLeft('my-suffix', 10) + +var array = [ + { + fldmfl: 'fldmlfd' + } +] + +// Create a resource group +resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { + name: 'rg-${suffix}' + location: location +} From 012c29d35021287d7dc6f32fbfee456abfacc2b1 Mon Sep 17 00:00:00 2001 From: Xavier Mignot Date: Sun, 26 Mar 2023 09:47:33 -0400 Subject: [PATCH 2/4] feat: Improve visual sample and update Lexer accordingly --- lib/rouge/lexers/bicep.rb | 3 ++- spec/visual/samples/bicep | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/rouge/lexers/bicep.rb b/lib/rouge/lexers/bicep.rb index d590da3b50..2ee1f1df91 100644 --- a/lib/rouge/lexers/bicep.rb +++ b/lib/rouge/lexers/bicep.rb @@ -22,11 +22,12 @@ class Bicep < Rouge::RegexLexer resourceGroup subscription tenant base64 base64ToJson base64ToString dataUri dataUriToString endsWith format guid indexOf lastIndexOf length newGuid padLeft replace split startsWith string substring toLower toUpper trim uniqueString uri uriComponent uriComponentToString + toObject ) operators = %w(+ - * / % < <= > >= == != && || !) - punctuation = %w(( ) { } [ ] , : ; =) + punctuation = %w(( ) { } [ ] , : ; = .) state :root do mixin :comments diff --git a/spec/visual/samples/bicep b/spec/visual/samples/bicep index 57a498cb20..ae1f455b44 100644 --- a/spec/visual/samples/bicep +++ b/spec/visual/samples/bicep @@ -10,11 +10,12 @@ targetScope = 'subscription' // To create a resource group @description('The Azure region to create the resources in.') param location string -var suffix = padLeft('my-suffix', 10) +var suffix = uniqueString(subscription().subscriptionId, 'my-project') -var array = [ +var someTags = [ { - fldmfl: 'fldmlfd' + key: 'location' + value: location } ] @@ -22,4 +23,6 @@ var array = [ resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { name: 'rg-${suffix}' location: location + + tags: toObject(someTags, tag => tag.key, tag => tag.value) } From 1c0ed6549010a537eabcc3501aa7cfe9db168fc4 Mon Sep 17 00:00:00 2001 From: Xavier Mignot Date: Sun, 26 Mar 2023 14:24:10 -0400 Subject: [PATCH 3/4] refactor: Use sets for lists of keywords and remove redundant rules in block state --- lib/rouge/lexers/bicep.rb | 48 +++++++++++++++++++-------------------- spec/visual/samples/bicep | 4 ++++ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/lib/rouge/lexers/bicep.rb b/lib/rouge/lexers/bicep.rb index 2ee1f1df91..5b37fceac8 100644 --- a/lib/rouge/lexers/bicep.rb +++ b/lib/rouge/lexers/bicep.rb @@ -7,14 +7,19 @@ class Bicep < Rouge::RegexLexer title "Bicep" desc 'Bicep is a domain-specific language (DSL) that uses declarative syntax to deploy Azure resources.' - keywords = %w( + def self.keywords + @keywords ||= Set.new %w( resource module param var output targetScope dependsOn existing for in if else true false null - ) + ) + end - datatypes = %w(array bool int object string) + def self.datatypes + @datatypes ||= Set.new %w(array bool int object string) + end - functions = %w( + def self.functions + @functions ||= Set.new %w( any array concat contains empty first intersection items last length min max range skip take union dateTimeAdd utcNow deployment environment loadFileAsBase64 loadTextContent int json extensionResourceId getSecret list listKeys listKeyValue listAccountSas listSecrets @@ -23,7 +28,8 @@ class Bicep < Rouge::RegexLexer endsWith format guid indexOf lastIndexOf length newGuid padLeft replace split startsWith string substring toLower toUpper trim uniqueString uri uriComponent uriComponentToString toObject - ) + ) + end operators = %w(+ - * / % < <= > >= == != && || !) @@ -38,14 +44,18 @@ class Bicep < Rouge::RegexLexer # Match numbers rule %r/\b\d+\b/, Num - # Match keywords - rule %r/\b(#{keywords.join('|')})\b/, Keyword - - # Match data types - rule %r/\b(#{datatypes.join('|')})\b/, Keyword::Type - - # Match functions - rule %r/\b(#{functions.join('|')})\b/, Name::Function + # Rules for sets of reserved keywords + rule %r/\b\w+\b/ do |m| + if self.class.keywords.include? m[0] + token Keyword + elsif self.class.datatypes.include? m[0] + token Keyword::Type + elsif self.class.functions.include? m[0] + token Name::Function + else + token Name + end + end # Match operators rule %r/#{operators.map { |o| Regexp.escape(o) }.join('|')}/, Operator @@ -89,21 +99,9 @@ class Bicep < Rouge::RegexLexer # Match property names rule %r/\b([a-zA-Z_]\w*)\b(?=\s*:)/, Name::Property - # Match property values that are strings - rule %r/(?<=[:]\s)('[^']*')/, Str, :string - - # Match property values that are numbers - rule %r/(?<=[:]\s)\b\d+\b/, Num - - # Match property values that are keywords - rule %r/\b(#{keywords.join('|')})\b(?=[,}])/, Keyword::Constant - # Match closing curly brackets rule %r/}/, Punctuation::Indicator, :pop! - # Match nested curly brackets - rule %r/{/, Punctuation::Indicator, :block - # Include the root state for nested tokens mixin :root end diff --git a/spec/visual/samples/bicep b/spec/visual/samples/bicep index ae1f455b44..e406acf15c 100644 --- a/spec/visual/samples/bicep +++ b/spec/visual/samples/bicep @@ -17,6 +17,10 @@ var someTags = [ key: 'location' value: location } + { + key: 'isTest' + value: true + } ] // Create a resource group From 2a55643fb1b23ba9eee1bf94c37754e7d88397c2 Mon Sep 17 00:00:00 2001 From: Xavier Mignot Date: Sun, 27 Oct 2024 11:22:02 +0100 Subject: [PATCH 4/4] feat: Update Bicep lexer with latest Bicep features Add new functions, keywords and operators --- lib/rouge/lexers/bicep.rb | 24 +++++++++++++----------- spec/visual/samples/bicep | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/rouge/lexers/bicep.rb b/lib/rouge/lexers/bicep.rb index 5b37fceac8..4bad2b68b8 100644 --- a/lib/rouge/lexers/bicep.rb +++ b/lib/rouge/lexers/bicep.rb @@ -9,8 +9,8 @@ class Bicep < Rouge::RegexLexer def self.keywords @keywords ||= Set.new %w( - resource module param var output targetScope dependsOn - existing for in if else true false null + as assert existing extends extension false for from func if import in metadata module + none null output param provider resource targetScope test true type using var void with ) end @@ -20,18 +20,20 @@ def self.datatypes def self.functions @functions ||= Set.new %w( - any array concat contains empty first intersection items last length min max range skip - take union dateTimeAdd utcNow deployment environment loadFileAsBase64 loadTextContent int - json extensionResourceId getSecret list listKeys listKeyValue listAccountSas listSecrets - pickZones reference resourceId subscriptionResourceId tenantResourceId managementGroup - resourceGroup subscription tenant base64 base64ToJson base64ToString dataUri dataUriToString - endsWith format guid indexOf lastIndexOf length newGuid padLeft replace split startsWith - string substring toLower toUpper trim uniqueString uri uriComponent uriComponentToString - toObject + array base64 base64ToJson base64ToString bool cidrHost cidrSubnet concat contains dataUri + dataUriToString dateTimeAdd dateTimeFromEpoch dateTimeToEpoch deployment empty endsWith + environment extensionResourceId filter first flatten format getSecret groupBy guid indexOf + int intersection items join json last lastIndexOf length listAccountSas listKeys listSecrets + loadFileAsBase64 loadJsonContent loadTextContent loadYamlContent managementGroup + managementGroupResourceId map mapValue max min newGuid objectKeys padLeft parseCidr pickZones + providers range readEnvironmentVariable reduce reference replace resourceGroup resourceId + shallowMerge skip sort split startsWith string subscription subscriptionResourceId substring take + tenant tenantResourceId toLower toObject toUpper trim union uniqueString uri uriComponent + uriComponentToString utcNow ) end - operators = %w(+ - * / % < <= > >= == != && || !) + operators = %w(+ - * / % < <= > >= == != =~ !~ && || ! ?? ... .?) punctuation = %w(( ) { } [ ] , : ; = .) diff --git a/spec/visual/samples/bicep b/spec/visual/samples/bicep index e406acf15c..6e716a3002 100644 --- a/spec/visual/samples/bicep +++ b/spec/visual/samples/bicep @@ -28,5 +28,5 @@ resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = { name: 'rg-${suffix}' location: location - tags: toObject(someTags, tag => tag.key, tag => tag.value) + tags: toObject(someTags, tag => tag.key, tag => tag.?value) }