diff --git a/.bazelrc b/.bazelrc new file mode 100644 index 000000000..7a7c74b6b --- /dev/null +++ b/.bazelrc @@ -0,0 +1,4 @@ +# https://errorprone.info/ +# TODO(#222) Increase to error when we build from jflex-1.7.1 with a lexer annotated with +# @SuppressWarnings +build --javacopt "-Xep:FallThrough:WARN" diff --git a/.travis.bazelrc b/.ci.bazelrc similarity index 90% rename from .travis.bazelrc rename to .ci.bazelrc index a34c9f070..2f60348fe 100644 --- a/.travis.bazelrc +++ b/.ci.bazelrc @@ -1,3 +1,5 @@ +import %workspace%/.bazelrc + # This is from Bazel's former travis setup, to avoid blowing up the RAM usage. startup --host_jvm_args=-Xms2000m startup --host_jvm_args=-Xmx3000m diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 000000000..310dea576 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,12 @@ +# Copyright 2018 Google LLC. +# SPDX-License-Identifier: Apache-2.0 + +container: + image: cirrusci/bazel:latest +task: + name: Bazel build and test + build_script: + - bazel --bazelrc=.ci.bazelrc info --remote_http_cache=http://$CIRRUS_HTTP_CACHE_HOST release + - bazel --bazelrc=.ci.bazelrc build --remote_http_cache=http://$CIRRUS_HTTP_CACHE_HOST //... + test_script: + - bazel --bazelrc=.ci.bazelrc test --remote_http_cache=http://$CIRRUS_HTTP_CACHE_HOST //... diff --git a/.travis.yml b/.travis.yml index b18cbb064..7095ef71d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,7 @@ matrix: - scripts/test-unit.sh - scripts/mvn-site.sh - scripts/mvn-aggregate-srcs.sh + - scripts/preparare-deploy-source-code.sh after_success: scripts/send-code-coverage.sh env: - PUBLISH_SOURCES=true @@ -133,4 +134,5 @@ deploy: - travis condition: - $PUBLISH_SOURCES - script: ./scripts/deploy-source-code.sh + script: scripts/deploy-aggregated-sources.sh + diff --git a/README.md b/README.md index fef8dc2cf..2d69e6685 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ Build status + + Bazel build status + Maven central diff --git a/docs/md/example.md b/docs/md/example.md index 183febd4d..fc98e0064 100644 --- a/docs/md/example.md +++ b/docs/md/example.md @@ -2,23 +2,31 @@ A simple Example: How to work with JFlex {#Example} ======================================== To demonstrate how a lexical specification with JFlex looks like, this -section presents a part of the specification for the Java language. The -example does not describe the whole lexical structure of Java programs, -but only a small and simplified part of it (some keywords, some -operators, comments and only two kinds of literals). It also shows how -to interface with the LALR parser generator CUP [@CUP] and therefore uses -a class `sym` (generated by CUP), where integer constants for the -terminal tokens of the CUP grammar are declared. JFlex comes with a -directory `examples`, where you can find a small standalone scanner that -doesn’t need other tools like CUP to give you working example code without -dependencies. - -The `examples` directory also contains a *complete* JFlex specification of the +section presents a part of the specification for the Java language. + +The example does not describe the whole lexical structure of Java programs, +but only a small and simplified part of it: +- some keywords, +- some operators, +- comments +- and only two kinds of literals. + +It also shows how to interface with the LALR parser generator CUP [@CUP] +and therefore uses a class `sym` (generated by CUP), where integer constants +for the terminal tokens of the CUP grammar are declared. + +You can find this example in `examples/cup-java-simplified`. + +The `examples/cup-java` directory also contains a *complete* JFlex specification of the lexical structure of Java programs together with the CUP parser specification for Java by C. Scott Ananian, obtained from the CUP [@CUP] web site (modified to interface with the JFlex scanner). Both specifications adhere to the Java Language Specification [@LangSpec]. +In `examples/standalone`, you can find a small standalone scanner that +doesn’t need other dependencies or tools like CUP to give you working code. + + ``` /* JFlex example: partial Java language lexer specification */ import java_cup.runtime.*; diff --git a/jflex/examples/cup-java-minijava/BUILD b/jflex/examples/cup-java-minijava/BUILD new file mode 100644 index 000000000..0812fc081 --- /dev/null +++ b/jflex/examples/cup-java-minijava/BUILD @@ -0,0 +1,51 @@ +package(default_visibility = ["//visibility:public"]) + +load("@jflex_rules//jflex:jflex.bzl", "jflex") +load("//cup:cup.bzl", "cup") + +# The best practice is to define the rules in their respective directory in +# - src/main/java/org/example/foo/BUILD +# - src/main/jflex/BUILD +# - src/test/java/org/example/foo/BUILD +# - etc. +# However, this example is simple enough and we can define all rules here. + +java_binary( + name = "minijava_bin", + main_class = "Yylex", + runtime_deps = [":minijava"], +) + +java_library( + name = "minijava", + # glob is not a best practice, but it's good enough for this example + srcs = glob(["src/main/java/**/*.java"]) + [ + ":gen_lexer", + ":gen_parser", + ], + deps = ["//cup:cup_runtime"], +) + +jflex( + name = "gen_lexer", + srcs = ["src/main/jflex/minijava.flex"], + jflex_bin = "//jflex:jflex_bin", + outputs = ["Lexer.java"], +) + +cup( + name = "gen_parser", + src = "src/main/cup/minijava.cup", + symbols = "sym", +) + +# Tests +java_test( + name = "LexerTest", + srcs = ["src/test/java/jflex/examples/minijava/LexerTest.java"], + deps = [ + ":minijava", + "//cup/cup_runtime", + "//third_party/com/google/truth", + ], +) diff --git a/jflex/examples/cup-java-minijava/README.md b/jflex/examples/cup-java-minijava/README.md new file mode 100644 index 000000000..8f651495e --- /dev/null +++ b/jflex/examples/cup-java-minijava/README.md @@ -0,0 +1,32 @@ +# Example from the user manual + +This is the [example from the user manual](http://jflex.de/manual.html#Example). + +This example does not describe the whole lexical structure of Java programs, but only a small and +simplified part of it (some keywords, some operators, comments and only two kinds of literals). +It also shows how to interface with the LALR parser generator CUP and therefore uses a class `sym` +(generated by CUP), where integer constants for the terminal tokens of the CUP grammar are declared. + +For a full implementation of **Java 1.2**, see [cup-java](../cup-java). + +## Build, run, test + +### Using Maven + + ../../mvnw package + +To run the parser: + + java -cp target/cup-java-simplified-1.0.jar:../../../cup/cup/java-cup-11b.jar JavaParser + +or more simply the uberjar version: + + java -jar target/cup-java-simplified-full-1.0.jar + + +## Files + +* `src/main/jflex/minijava.flex` + Partial (simplified) specification of Java. +* `src/test/java/jflex/examples/minijava/LexerTest.java` + Test of the generated lexer. \ No newline at end of file diff --git a/jflex/examples/cup-java-minijava/src/main/cup/minijava.cup b/jflex/examples/cup-java-minijava/src/main/cup/minijava.cup new file mode 100644 index 000000000..6278183aa --- /dev/null +++ b/jflex/examples/cup-java-minijava/src/main/cup/minijava.cup @@ -0,0 +1,67 @@ +/* + * Copyright (C) 1998 C. Scott Ananian + * Copyright (C) 1999 Gerwin Klein + * Copyright (C) 2018 Google LLC + * + * This program is released under the terms of the GPL; see the file + * COPYING for more details. There is NO WARRANTY on this code. + */ +package jflex.examples.minijava; + +import java_cup.runtime.*; + + +/** Parser for a lamguages inspired by Java. */ + +// Keywords +terminal ABSTRACT; // "abstract" +terminal BOOLEAN; // "boolean" primitive_type +terminal BREAK; // "break" break_statement + +// Identifier matches each string that starts with a character of class jletter followed by zero or +// more characters of class jletterdigit +terminal java.lang.String IDENTIFIER; // name + +// Literals +terminal java.lang.Number INTEGER_LITERAL; +terminal java.lang.String STRING_LITERAL; + +// Operators +terminal EQ; +terminal EQEQ; // equality_expression +terminal PLUS; + + +// 19.3) Lexical Structure +non terminal literal; +// 19.4) Types, Values, and Variables +non terminal primitive_type; +// 19.5) Names +non terminal name; +// 19.12) Expressions +non terminal equality_expression; + +// Our simplified grammar +non terminal goal; + +// TODO +start with goal; + +goal ::= name + ; + +// 19.3) Lexical Structure. +literal ::= INTEGER_LITERAL + | STRING_LITERAL + ; + +// 19.4) Types, Values, and Variables +type ::= primitive_type + ; +primitive_type ::= + | BOOLEAN + ; + +// 19.5) Names +name ::= IDENTIFIER + ; diff --git a/testsuite/testcases/src/test/cases/manual-ex/manual.flex b/jflex/examples/cup-java-minijava/src/main/jflex/minijava.flex similarity index 92% rename from testsuite/testcases/src/test/cases/manual-ex/manual.flex rename to jflex/examples/cup-java-minijava/src/main/jflex/minijava.flex index d1c7707d3..ee5657207 100644 --- a/testsuite/testcases/src/test/cases/manual-ex/manual.flex +++ b/jflex/examples/cup-java-minijava/src/main/jflex/minijava.flex @@ -1,5 +1,10 @@ -/* JFlex example: part of Java language lexer specification */ -import java_cup.runtime.*; +// JFlex example from the user Manual + +package jflex.examples.minijava; + +import java_cup.runtime.Symbol; + +/** Lexer of a very minimal version of the Java programming language. */ %% diff --git a/jflex/examples/cup-java-minijava/src/test/java/jflex/examples/minijava/LexerTest.java b/jflex/examples/cup-java-minijava/src/test/java/jflex/examples/minijava/LexerTest.java new file mode 100644 index 000000000..f32a557ae --- /dev/null +++ b/jflex/examples/cup-java-minijava/src/test/java/jflex/examples/minijava/LexerTest.java @@ -0,0 +1,70 @@ +package jflex.examples.minijava; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import org.junit.After; +import org.junit.Test; + +/** + * Test for the generated {@link Lexer}. + * + *

The lexer is probably already correct thanks to the regression tests. This test class is + * mostly here to show how the lexer behaves. + */ +public class LexerTest { + + private Lexer lexer; + + @After + public void resetLexer() { + lexer = null; + } + + @Test + public void scan_tokenIdentifier() throws IOException { + scan("helloWorld"); + assertThat(nextToken()).isEqualTo(sym.IDENTIFIER); + } + + @Test + public void scan_assignment() throws IOException { + scan("boolean debug = 2 == 1 + 1"); + assertThat(nextToken()).isEqualTo(sym.BOOLEAN); + assertThat(nextToken()).isEqualTo(sym.IDENTIFIER); + assertThat(nextToken()).isEqualTo(sym.EQ); + assertThat(nextToken()).isEqualTo(sym.INTEGER_LITERAL); + assertThat(nextToken()).isEqualTo(sym.EQEQ); + assertThat(nextToken()).isEqualTo(sym.INTEGER_LITERAL); + assertThat(nextToken()).isEqualTo(sym.PLUS); + assertThat(nextToken()).isEqualTo(sym.INTEGER_LITERAL); + assertThat(nextToken()).isEqualTo(sym.EOF); + } + + @SuppressWarnings("TryFailThrowable") + @Test + public void scan_illegalChar() throws IOException { + scan("boolean debug;"); + assertThat(nextToken()).isEqualTo(sym.BOOLEAN); + assertThat(nextToken()).isEqualTo(sym.IDENTIFIER); + try { + nextToken(); + fail("Character `;` is not declared in the minijava.flex"); + } catch (Error expected) { + // This is bad, but the JFlex API doesn't allow better + // https://errorprone.info/bugpattern/TryFailThrowable + } + } + + private void scan(String input) { + Reader in = new StringReader(input); + lexer = new Lexer(in); + } + + private int nextToken() throws IOException { + return lexer.next_token().sym; + } +} diff --git a/jflex/src/main/java/jflex/Emitter.java b/jflex/src/main/java/jflex/Emitter.java index de45e63d9..15ac17648 100644 --- a/jflex/src/main/java/jflex/Emitter.java +++ b/jflex/src/main/java/jflex/Emitter.java @@ -434,6 +434,9 @@ private void emitUserCode() { } private void emitClassName() { + // TODO(#222) Actually fix the fall-through violations + println("// See https://github.com/jflex-de/jflex/issues/222"); + println("@SuppressWarnings(\"FallThrough\")"); if (scanner.isPublic) print("public "); if (scanner.isAbstract) print("abstract "); diff --git a/scripts/bazel.sh b/scripts/bazel.sh index eeade1798..cdcdeecc9 100755 --- a/scripts/bazel.sh +++ b/scripts/bazel.sh @@ -9,7 +9,7 @@ source "$BASEDIR"/scripts/logger.sh set -e if [[ $TRAVIS ]]; then - BAZEL="bazel --bazelrc=$TRAVIS_BUILD_DIR/.travis.bazelrc --output_user_root=${HOME}/__bazel_travis_root__ --output_base=${HOME}/__bazel_output_base__" + BAZEL="bazel --bazelrc=$TRAVIS_BUILD_DIR/.ci.bazelrc --output_user_root=${HOME}/__bazel_travis_root__ --output_base=${HOME}/__bazel_output_base__" else BAZEL='bazel' fi diff --git a/scripts/deploy-aggregated-sources.sh b/scripts/deploy-aggregated-sources.sh new file mode 100755 index 000000000..48429d8d0 --- /dev/null +++ b/scripts/deploy-aggregated-sources.sh @@ -0,0 +1,5 @@ +echo "Push to https://github.com/jflex-de/jflex/tree/aggregated-java-sources" +cd repo +# SECURITY NOTICE: Be sure to send stdout & stderr to /dev/null so that the the ${GITHUB_TOKEN} is$ +git remote set-url --push origin "https://${GITHUB_TOKEN}@github.com/jflex-de/jflex.git" > /dev/null +git push diff --git a/scripts/deploy-source-code.sh b/scripts/preparare-deploy-source-code.sh similarity index 68% rename from scripts/deploy-source-code.sh rename to scripts/preparare-deploy-source-code.sh index 57db2a357..3ba63f050 100755 --- a/scripts/deploy-source-code.sh +++ b/scripts/preparare-deploy-source-code.sh @@ -1,5 +1,7 @@ #!/bin/bash -# Push aggregated source code back to git +# Prepare the aggregated source code in the 'repo' directory that is cloned from +# branch [aggregated-java-sources]. + # This is inspired by https://martinrotter.github.io/it-programming/2016/08/26/pushing-git-travis/ CWD="$PWD" @@ -10,13 +12,17 @@ source "$BASEDIR"/scripts/logger.sh set -e git_clone() { + if [[ -d repo ]]; then + backup=$(mktemp -d) + logi "Move existing repo to $backup" + mv repo $backup + fi if [[ -z "$CI" ]]; then logi "Cloning ssh://git@github.com:jflex-de/jflex.git (aggregated-java-sources)" - git clone --depth 1 --branch aggregated-java-sources "git@github.com:jflex-de/jflex.git" repo > /dev/null 2>&1 + git clone --depth 1 --branch aggregated-java-sources "git@github.com:jflex-de/jflex.git" else - logi "Cloning https://[GITHUB_TOKEN]@github.com/jflex-de/jflex/tree/aggregated-java-sources" - # SECURITY NOTICE: Be sure to send stdout & stderr to /dev/null so that the the ${GITHUB_TOKEN} is never revealed - git clone --depth 1 --branch aggregated-java-sources "https://${GITHUB_TOKEN}@github.com/jflex-de/jflex.git" repo > /dev/null 2>&1 + logi "Cloning https://github.com/jflex-de/jflex/tree/aggregated-java-sources" + git clone --depth 1 --branch aggregated-java-sources "https://github.com/jflex-de/jflex.git" repo fi } @@ -29,16 +35,22 @@ update_source() { cd repo git config user.name "Travis CI" git config user.email "deploy@travis-ci.org" - git rm -r META-INF jflex java_cup UnicodeProperties.java.skeleton - jar -xf ../target/jflex-*-sources.jar + git rm -r java + mkdir -p java + cd java + jar -xf ../../target/jflex-*-sources.jar logi "Remove unrelated sources" - logi "Download deps and Compile" - ./compile.sh + rm -rf jflex/maven logi "Checking licenses" - [[ -f LICENSE_CUP ]] || loge "Missing LICENSE_CUP for CUP" - [[ -f LICENSE_JFLEX ]] || loge "Missing LICENSE_JFLEX for JFlex" - [[ $(head -1 LICENSE_JFLEX | cut -f 1 -d " ") == "JFlex" ]] || loge "JFlex license has bad content" + [[ $(head -1 LICENSE_JFLEX | cut -f 1 -d " ") == "JFlex" ]] || \ + loge "JFlex license has bad content" && cat LICENSE_JFLEX + mv LICENSE_JFLEX .. + mv LICENSE_CUP .. + cd .. + + logi "Download deps and Compile" + ./compile.sh logi "Update git sources" git add --all @@ -54,14 +66,6 @@ update_source() { cd .. } -git_push() { - cd repo - logi "Push to https://github.com/jflex-de/jflex/tree/aggregated-java-sources" - git log -1 - git push - cd .. -} - # N.B. TRAVIS_BRANCH is the name of the branch targeted by the pull request (if PR) logi "On branch ${TRAVIS_PULL_REQUEST_SLUG}:${TRAVIS_PULL_REQUEST_BRANCH} → ${TRAVIS_BRANCH}" @@ -74,8 +78,6 @@ if [[ -z "$CI" ]]; then logi "git log -1" logi "git diff HEAD^1" logi "# git push" -else - git_push fi cd "$CWD" diff --git a/testsuite/testcases/src/test/cases/manual-ex/.gitignore b/testsuite/testcases/src/test/cases/manual-ex/.gitignore deleted file mode 100644 index c9c7aa513..000000000 --- a/testsuite/testcases/src/test/cases/manual-ex/.gitignore +++ /dev/null @@ -1 +0,0 @@ -Lexer.java diff --git a/testsuite/testcases/src/test/cases/manual-ex/manual-flex.output b/testsuite/testcases/src/test/cases/manual-ex/manual-flex.output deleted file mode 100644 index 2df8305d9..000000000 --- a/testsuite/testcases/src/test/cases/manual-ex/manual-flex.output +++ /dev/null @@ -1,11 +0,0 @@ -Reading "src/test/cases/manual-ex/manual.flex" - -Warning in file "src/test/cases/manual-ex/manual.flex" (line 90): -".|\n" does not match all characters, because "." excludes all Unicode newline chars - use "[^]" instead -.|\n { throw new Error("Illegal character <"+ -^ -Constructing NFA : 160 states in NFA -Converting NFA to DFA : -.......................................................... -62 states before minimization, 43 states in minimized DFA -Writing code to "src/test/cases/manual-ex/Lexer.java" diff --git a/testsuite/testcases/src/test/cases/manual-ex/manual.test b/testsuite/testcases/src/test/cases/manual-ex/manual.test deleted file mode 100644 index f8e5bbe22..000000000 --- a/testsuite/testcases/src/test/cases/manual-ex/manual.test +++ /dev/null @@ -1,7 +0,0 @@ -name: manual -description: -the introductory example from the manual - -jflex: --nobak - -javac-files: Lexer.java sym.java diff --git a/testsuite/testcases/src/test/cases/manual-ex/sym.java b/testsuite/testcases/src/test/cases/manual-ex/sym.java deleted file mode 100644 index 5aa831c28..000000000 --- a/testsuite/testcases/src/test/cases/manual-ex/sym.java +++ /dev/null @@ -1,114 +0,0 @@ - -//---------------------------------------------------- -// The following code was generated by CUP v0.10k -// Sat May 05 23:45:07 CEST 2001 -//---------------------------------------------------- - -/** CUP generated interface containing symbol constants. */ -public interface sym { - /* terminals */ - public static final int SHORT = 4; - public static final int IDENTIFIER = 98; - public static final int ANDEQ = 90; - public static final int GT = 70; - public static final int IMPLEMENTS = 36; - public static final int CONST = 101; - public static final int STRICTFP = 100; - public static final int NOTEQ = 75; - public static final int PLUSEQ = 85; - public static final int RBRACK = 11; - public static final int CATCH = 55; - public static final int COMMA = 15; - public static final int RBRACE = 17; - public static final int THROW = 53; - public static final int RPAREN = 20; - public static final int LBRACK = 10; - public static final int LT = 69; - public static final int ANDAND = 79; - public static final int OROR = 80; - public static final int DOUBLE = 9; - public static final int LBRACE = 16; - public static final int TRANSIENT = 32; - public static final int LPAREN = 19; - public static final int XOREQ = 91; - public static final int PROTECTED = 25; - public static final int INTEGER_LITERAL = 93; - public static final int NOT = 63; - public static final int FINAL = 29; - public static final int FLOAT = 8; - public static final int GOTO = 102; - public static final int URSHIFTEQ = 89; - public static final int PACKAGE = 22; - public static final int COMP = 62; - public static final int EQ = 18; - public static final int BOOLEAN_LITERAL = 95; - public static final int MOD = 65; - public static final int CLASS = 34; - public static final int SUPER = 40; - public static final int ABSTRACT = 28; - public static final int NATIVE = 30; - public static final int LONG = 6; - public static final int PLUS = 60; - public static final int QUESTION = 81; - public static final int WHILE = 48; - public static final int EXTENDS = 35; - public static final int INTERFACE = 41; - public static final int CHAR = 7; - public static final int BOOLEAN = 2; - public static final int SWITCH = 44; - public static final int DO = 47; - public static final int FOR = 49; - public static final int RSHIFTEQ = 88; - public static final int VOID = 37; - public static final int DIV = 64; - public static final int PUBLIC = 24; - public static final int RETURN = 52; - public static final int MULT = 14; - public static final int ELSE = 43; - public static final int TRY = 54; - public static final int GTEQ = 72; - public static final int BREAK = 50; - public static final int DOT = 12; - public static final int INT = 5; - public static final int NULL_LITERAL = 99; - public static final int THROWS = 38; - public static final int STRING_LITERAL = 97; - public static final int EQEQ = 74; - public static final int EOF = 0; - public static final int SEMICOLON = 13; - public static final int THIS = 39; - public static final int DEFAULT = 46; - public static final int MULTEQ = 82; - public static final int IMPORT = 23; - public static final int MINUS = 61; - public static final int LTEQ = 71; - public static final int OR = 78; - public static final int error = 1; - public static final int URSHIFT = 68; - public static final int SYNCHRONIZED = 31; - public static final int DIVEQ = 83; - public static final int LSHIFTEQ = 87; - public static final int FINALLY = 56; - public static final int CONTINUE = 51; - public static final int INSTANCEOF = 73; - public static final int IF = 42; - public static final int MODEQ = 84; - public static final int MINUSMINUS = 59; - public static final int COLON = 21; - public static final int CHARACTER_LITERAL = 96; - public static final int OREQ = 92; - public static final int VOLATILE = 33; - public static final int CASE = 45; - public static final int PLUSPLUS = 58; - public static final int NEW = 57; - public static final int RSHIFT = 67; - public static final int BYTE = 3; - public static final int AND = 76; - public static final int PRIVATE = 26; - public static final int STATIC = 27; - public static final int LSHIFT = 66; - public static final int XOR = 77; - public static final int FLOATING_POINT_LITERAL = 94; - public static final int MINUSEQ = 86; -} - diff --git a/third_party/README.md b/third_party/README.md index d4e3f63b3..ba168a917 100644 --- a/third_party/README.md +++ b/third_party/README.md @@ -1,9 +1,18 @@ # Bazel third-party packages -This is *not* an example. -It contains BUILD aliases for the [Bazel build system][bazel]. +Contains BUILD aliases for the [Bazel build system][bazel]. To read how to use JFlex on your Bazel project, please read -[de/jflex/README.md](de/jflex/README.md) +[jflex/README.md](/README.md) + +## Add a new dependency + +Contributors who want to add a new dependency need to + +1. Add the `maven_jar` rule in `deps.bzl` +2. Create a directory in `third_party` that matches the artifact groupId. +3. Add a BUILD file in that directory + - The build must have a `license()` declaration. + - The build target must declare all its dependencies. [bazel]: https://bazel.build/