From d2425cec1c830a890d581e1b4d7a31a709b60a99 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Tue, 21 Dec 2021 09:39:07 +0100 Subject: [PATCH] ParseMode: Convert from enum to configurable struct (#50) * ParseMode: Convert from enum to configurable struct This allows users to only opt-in to some of the laxness, if necessary * Cleanup/expand docs * Cleanup docs in minimize Co-authored-by: Jake Shadle --- src/expression/minimize.rs | 22 ++++++----- src/expression/parser.rs | 6 +-- src/lexer.rs | 77 ++++++++++++++++++++++++++++---------- tests/check.rs | 6 +-- tests/lexer.rs | 4 +- 5 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/expression/minimize.rs b/src/expression/minimize.rs index a9726a3..6aa80ab 100644 --- a/src/expression/minimize.rs +++ b/src/expression/minimize.rs @@ -1,3 +1,4 @@ +use super::Expression; use crate::{LicenseReq, Licensee}; use std::fmt; @@ -36,21 +37,22 @@ impl std::error::Error for MinimizeError { } } -impl super::Expression { +impl Expression { /// Given a set of [`Licensee`]s, attempts to find the minimum number that - /// satisfy this [`Expression`]. The list of licensees should be given in - /// priority order, eg, if you wish to accept the `Apache-2.0` license if - /// it is available and the `MIT` if not, putting `Apache-2.0` before `MIT` - /// will cause the ubiquitous `Apache-2.0 OR MIT` expression to minimize to - /// just `Apache-2.0` as only only 1 of the licenses is required, and the - /// `Apache-2.0` has priority. + /// satisfy this [`Expression`]. + /// + /// The list of licensees should be given in priority order, eg, if you wish + /// to accept the `Apache-2.0` license if it is available, and the `MIT` if + /// not, putting `Apache-2.0` before `MIT` will cause the ubiquitous + /// `Apache-2.0 OR MIT` expression to minimize to just `Apache-2.0` as only + /// 1 of the licenses is required, and `Apache-2.0` has priority. /// /// # Errors /// /// This method will fail if more than 64 unique licensees are satisfied by - /// this expression, but such a case is unlikely to say the least in a real - /// world scenario. The list of licensees must also actually satisfy this - /// expression, otherwise it can't be minimized. + /// this expression, but such a case is unlikely in a real world scenario. + /// The list of licensees must also actually satisfy this expression, + /// otherwise it can't be minimized. /// /// # Example /// diff --git a/src/expression/parser.rs b/src/expression/parser.rs index 18d0c84..d12e087 100644 --- a/src/expression/parser.rs +++ b/src/expression/parser.rs @@ -23,7 +23,7 @@ impl Expression { /// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap(); /// ``` pub fn parse(original: &str) -> Result { - Self::parse_mode(original, ParseMode::Strict) + Self::parse_mode(original, ParseMode::STRICT) } /// Parses an expression with the specified `ParseMode`. With @@ -33,7 +33,7 @@ impl Expression { /// ``` /// spdx::Expression::parse_mode( /// "mit/Apache-2.0 WITH LLVM-exception", - /// spdx::ParseMode::Lax + /// spdx::ParseMode::LAX /// ).unwrap(); /// ``` pub fn parse_mode(original: &str, mode: ParseMode) -> Result { @@ -130,7 +130,7 @@ impl Expression { .. }) => { // Handle GNU licenses differently, as they should *NOT* be used with the `+` - if mode == ParseMode::Strict && id.is_gnu() { + if !mode.allow_postfix_plus_on_gpl && id.is_gnu() { return Err(ParseError { original: original.to_owned(), span: lt.span, diff --git a/src/lexer.rs b/src/lexer.rs index 9ab4904..afd38c5 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -3,24 +3,59 @@ use crate::{ ExceptionId, LicenseId, }; -/// Available modes when parsing SPDX expressions -#[derive(Copy, Clone, PartialEq)] -pub enum ParseMode { - /// Strict SPDX parsing. +/// Parsing configuration for SPDX expression +#[derive(Default, Copy, Clone)] +pub struct ParseMode { + /// The `AND`, `OR`, and `WITH` operators are required to be uppercase in + /// the SPDX spec, but enabling this option allows them to be lowercased + pub allow_lower_case_operators: bool, + /// Allows the use of `/` as a synonym for the `OR` operator. This also + /// allows for not having whitespace between the `/` and the terms on either + /// side + pub allow_slash_as_or_operator: bool, + /// Allows some invalid/imprecise identifiers as synonyms for an actual + /// license identifier. See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES) + /// for a list of the current synonyms. Note that this list is not + /// comprehensive but can be expanded upon when invalid identifiers are + /// found in the wild. + pub allow_imprecise_license_names: bool, + /// The various GPL licenses diverge from every other license in the SPDX + /// license list by having an `-or-later` variant that used as a suffix on a + /// base license (eg. `GPL-3.0-or-later`) rather than the canonical `GPL-3.0+`. + /// This option just allows GPL licenses to be treated similarly to all of + /// the other SPDX licenses. + pub allow_postfix_plus_on_gpl: bool, +} + +impl ParseMode { + /// Strict, specification compliant SPDX parsing. + /// /// 1. Only license identifiers in the SPDX license list, or /// Document/LicenseRef, are allowed. The license identifiers are also /// case-sensitive. /// 1. `WITH`, `AND`, and `OR` are the only valid operators - Strict, + pub const STRICT: Self = Self { + allow_lower_case_operators: false, + allow_slash_as_or_operator: false, + allow_imprecise_license_names: false, + allow_postfix_plus_on_gpl: false, + }; + /// Allow non-conforming syntax for crates-io compatibility + /// /// 1. Additional, invalid, identifiers are accepted and mapped to a correct - /// SPDX license identifier. See - /// [identifiers::IMPRECISE_NAMES](../identifiers/constant.IMPRECISE_NAMES.html) - /// for the list of additionally accepted identifiers and the license they + /// SPDX license identifier. + /// See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES) for the + /// list of additionally accepted identifiers and the license they /// correspond to. /// 1. `/` can by used as a synonym for `OR`, and doesn't need to be /// separated by whitespace from the terms it combines - Lax, + pub const LAX: Self = Self { + allow_lower_case_operators: true, + allow_slash_as_or_operator: true, + allow_imprecise_license_names: true, + allow_postfix_plus_on_gpl: true, + }; } /// A single token in an SPDX license expression @@ -85,7 +120,7 @@ pub struct Lexer<'a> { inner: &'a str, original: &'a str, offset: usize, - lax: bool, + mode: ParseMode, } impl<'a> Lexer<'a> { @@ -96,7 +131,7 @@ impl<'a> Lexer<'a> { inner: text, original: text, offset: 0, - lax: false, + mode: ParseMode::STRICT, } } @@ -110,7 +145,7 @@ impl<'a> Lexer<'a> { inner: text, original: text, offset: 0, - lax: mode == ParseMode::Lax, + mode, } } @@ -197,7 +232,7 @@ impl<'a> Iterator for Lexer<'a> { } Some('(') => ok_token(Token::OpenParen), Some(')') => ok_token(Token::CloseParen), - Some('/') if self.lax => Some(Ok((Token::Or, 1))), + Some('/') if self.mode.allow_slash_as_or_operator => Some(Ok((Token::Or, 1))), Some(_) => match Lexer::find_text_token(self.inner) { None => Some(Err(ParseError { original: self.original.to_owned(), @@ -211,9 +246,9 @@ impl<'a> Iterator for Lexer<'a> { ok_token(Token::And) } else if m == "OR" { ok_token(Token::Or) - } else if self.lax && m == "and" { + } else if self.mode.allow_lower_case_operators && m == "and" { ok_token(Token::And) - } else if self.lax && m == "or" { + } else if self.mode.allow_lower_case_operators && m == "or" { ok_token(Token::Or) } else if let Some(lic_id) = crate::license_id(m) { ok_token(Token::Spdx(lic_id)) @@ -230,11 +265,13 @@ impl<'a> Iterator for Lexer<'a> { doc_ref: None, lic_ref, }) - } else if let Some((lic_id, token_len)) = if self.lax { - crate::imprecise_license_id(self.inner) - } else { - None - } { + } else if let Some((lic_id, token_len)) = + if self.mode.allow_imprecise_license_names { + crate::imprecise_license_id(self.inner) + } else { + None + } + { Some(Ok((Token::Spdx(lic_id), token_len))) } else { Some(Err(ParseError { diff --git a/tests/check.rs b/tests/check.rs index cfbe590..c0eccd7 100644 --- a/tests/check.rs +++ b/tests/check.rs @@ -10,13 +10,13 @@ macro_rules! exact { macro_rules! check { ($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => { - check_mode!(spdx::ParseMode::Strict, $le => [$($logical_expr => $is_allowed),+]) + check_mode!(spdx::ParseMode::STRICT, $le => [$($logical_expr => $is_allowed),+]) }; } macro_rules! check_lax { ($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => { - check_mode!(spdx::ParseMode::Lax, $le => [$($logical_expr => $is_allowed),+]) + check_mode!(spdx::ParseMode::LAX, $le => [$($logical_expr => $is_allowed),+]) }; } @@ -191,7 +191,7 @@ fn gpl_or_later_plus_strict() { #[test] fn gpl_or_later_plus_lax() { - spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::Lax).unwrap(); + spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::LAX).unwrap(); } #[test] diff --git a/tests/lexer.rs b/tests/lexer.rs index ffafdc9..eb61542 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs @@ -120,7 +120,7 @@ fn fails_with_slash() { #[test] fn lax_takes_slash() { - let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::Lax) + let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::LAX) .map(|r| r.map(|lt| lt.token).unwrap()) .collect(); assert_eq!( @@ -131,7 +131,7 @@ fn lax_takes_slash() { #[test] fn fixes_license_names() { - let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::Lax) + let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::LAX) .map(|r| r.map(|lt| lt.token).unwrap()) .collect(); assert_eq!(