diff --git a/CHANGELOG.md b/CHANGELOG.md index d3be890..1e205a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate ### Added - [PR#84](https://github.com/EmbarkStudios/spdx/pull/84) resolved [#67](https://github.com/EmbarkStudios/spdx/issues/67) by inling the `askalono` crate to allow detection of license texts or headers from arbitrary text data. There are multiple features flags associated with this new feature. +- [PR#85](https://github.com/EmbarkStudios/spdx/pull/85) resolved [#82](https://github.com/EmbarkStudios/spdx/issues/82) by optionally allowing the parsing of unknown identifiers via `ParseMode::allow_unknown`. Unknown identifiers are either treated as `LicenseRef-` or `AdditionRef-` depending on their position. Unknown identifiers on positions that are invalid for either licenses or exceptions are still considered parse errors. ## [0.12.0] - 2025-08-19 ### Added diff --git a/src/expression/parser.rs b/src/expression/parser.rs index 6eab6b7..b4385f3 100644 --- a/src/expression/parser.rs +++ b/src/expression/parser.rs @@ -124,6 +124,7 @@ impl Expression { can.push_str("AdditionRef-"); can.push_str(add_ref); } + Token::Unknown(_u) => unreachable!(), } } @@ -188,7 +189,7 @@ impl Expression { None | Some(Token::And | Token::Or | Token::OpenParen) => &["", "("], Some(Token::CloseParen) => &["AND", "OR"], Some(Token::Exception(_) | Token::AdditionRef { .. }) => &["AND", "OR", ")"], - Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"], + Some(Token::Spdx(_) | Token::Unknown(_)) => &["AND", "OR", "WITH", ")", "+"], Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"], Some(Token::With) => &[""], }; @@ -282,7 +283,9 @@ impl Expression { _ => return make_err_for_token(last_token, lt.span), }, Token::With => match last_token { - Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {} + Some( + Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus | Token::Unknown(_), + ) => {} _ => return make_err_for_token(last_token, lt.span), }, Token::Or | Token::And => match last_token { @@ -292,7 +295,8 @@ impl Expression { | Token::CloseParen | Token::Exception(_) | Token::AdditionRef { .. } - | Token::Plus, + | Token::Plus + | Token::Unknown(_), ) => { let new_op = match lt.token { Token::Or => Op::Or, @@ -342,7 +346,8 @@ impl Expression { | Token::Plus | Token::Exception(_) | Token::AdditionRef { .. } - | Token::CloseParen, + | Token::CloseParen + | Token::Unknown(_), ) => { while let Some(top) = op_stack.pop() { match top.op { @@ -387,6 +392,35 @@ impl Expression { }, _ => return make_err_for_token(last_token, lt.span), }, + Token::Unknown(unknown) => { + match last_token { + None | Some(Token::And | Token::Or | Token::OpenParen) => { + // This is the same position as a valid SPDX license id, + // so assume that is what the user was attempting + expr_queue.push(ExprNode::Req(ExpressionReq { + req: LicenseReq { + license: LicenseItem::Other(Box::new(LicenseRef { + doc_ref: None, + lic_ref: (*unknown).to_owned(), + })), + addition: None, + }, + span: lt.span.start as u32..lt.span.end as u32, + })); + } + Some(Token::With) => { + let Some(ExprNode::Req(lic)) = expr_queue.last_mut() else { + return make_err_for_token(last_token, lt.span); + }; + + lic.req.addition = Some(AdditionItem::Other(Box::new(AdditionRef { + doc_ref: None, + add_ref: (*unknown).to_owned(), + }))); + } + _ => return make_err_for_token(last_token, lt.span), + } + } } last_token = Some(lt.token); @@ -400,7 +434,8 @@ impl Expression { | Token::Exception(_) | Token::AdditionRef { .. } | Token::CloseParen - | Token::Plus, + | Token::Plus + | Token::Unknown(_), ) => {} // We have to have at least one valid license requirement None => { diff --git a/src/lexer.rs b/src/lexer.rs index f1a2b3a..fd45cde 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -26,8 +26,10 @@ pub struct ParseMode { /// This option just allows GPL licenses to be treated similarly to all of /// the other SPDX licenses. pub allow_postfix_plus_on_gpl: bool, - /// How deprecated license identifiers are treated + /// Whether deprecated license or exception identifiers are allowed pub allow_deprecated: bool, + /// Whether unknown license or exception identifiers are allowed + pub allow_unknown: bool, } impl ParseMode { @@ -38,11 +40,13 @@ impl ParseMode { /// case-sensitive. /// 1. `WITH`, `AND`, and `OR`, case-insensitive, are the only valid operators /// 1. Deprecated licenses are not allowed + /// 1. Unknown licenses or exeptions are not allowed pub const STRICT: Self = Self { allow_slash_as_or_operator: false, allow_imprecise_license_names: false, allow_postfix_plus_on_gpl: false, allow_deprecated: false, + allow_unknown: false, }; /// Allow non-conforming syntax for crates-io compatibility @@ -55,11 +59,13 @@ impl ParseMode { /// 1. `/` can by used as a synonym for `OR`, and doesn't need to be /// separated by whitespace from the terms it combines /// 1. Deprecated license identifiers are allowed + /// 1. Unknown licenses or exeptions are not allowed pub const LAX: Self = Self { allow_slash_as_or_operator: true, allow_imprecise_license_names: true, allow_postfix_plus_on_gpl: true, allow_deprecated: true, + allow_unknown: false, }; } @@ -84,6 +90,8 @@ pub enum Token<'a> { /// The name of the addition reference add_ref: &'a str, }, + /// An unknown license term was encountered + Unknown(&'a str), /// A postfix `+` indicating "or later" for a particular SPDX license id Plus, /// A `(` for starting a group @@ -127,6 +135,7 @@ impl Token<'_> { }) + "AdditionRef-".len() + add_ref.len() } + Token::Unknown(u) => u.len(), } } } @@ -322,6 +331,8 @@ impl<'a> Iterator for Lexer<'a> { } { Some(Ok((Token::Spdx(lic_id), token_len))) + } else if self.mode.allow_unknown { + ok_token(Token::Unknown(m)) } else { Some(Err(ParseError { original: self.original.to_owned(), diff --git a/tests/check.rs b/tests/check.rs index 6f3b654..d367c13 100644 --- a/tests/check.rs +++ b/tests/check.rs @@ -1,6 +1,6 @@ #![allow(clippy::nonminimal_bool, clippy::eq_op, clippy::cognitive_complexity)] -use spdx::LicenseItem; +use spdx::{Expression, LicenseItem}; macro_rules! exact { ($req:expr, $e:expr) => { @@ -559,3 +559,78 @@ fn too_many_to_minimize() { spdx::expression::MinimizeError::TooManyRequirements(65) ); } + +/// Test that we handle unknown licenses and exceptions +#[test] +fn handles_unknown() { + const UNKNOWN: spdx::ParseMode = spdx::ParseMode { + allow_deprecated: false, + allow_imprecise_license_names: false, + allow_postfix_plus_on_gpl: false, + allow_slash_as_or_operator: false, + allow_unknown: true, + }; + + let single = spdx::Expression::parse_mode("sigh", UNKNOWN).unwrap(); + + fn get_reqs(e: &Expression) -> Vec { + e.requirements().map(|er| er.req.clone()).collect() + } + + fn bad(s: &str) -> spdx::LicenseReq { + spdx::LicenseReq { + license: LicenseItem::Other(Box::new(spdx::LicenseRef { + lic_ref: s.into(), + doc_ref: None, + })), + addition: None, + } + } + + assert_eq!(get_reqs(&single), vec![bad("sigh")]); + + let compound = spdx::Expression::parse_mode("bad or MIT", UNKNOWN).unwrap(); + + assert_eq!( + get_reqs(&compound), + vec![ + bad("bad"), + spdx::LicenseReq::from(spdx::license_id("MIT").unwrap()) + ] + ); + + let parens = spdx::Expression::parse_mode("(bad and Apache-2.0) or superbad", UNKNOWN).unwrap(); + + assert_eq!( + get_reqs(&parens), + vec![ + bad("bad"), + spdx::LicenseReq::from(spdx::license_id("Apache-2.0").unwrap()), + bad("superbad") + ] + ); + + let exc = spdx::Expression::parse_mode( + "terrible and (Apache-2.0 with even-worse or superbad)", + UNKNOWN, + ) + .unwrap(); + + assert_eq!( + get_reqs(&exc), + vec![ + bad("terrible"), + spdx::LicenseReq { + license: spdx::LicenseItem::Spdx { + id: spdx::license_id("Apache-2.0").unwrap(), + or_later: false + }, + addition: Some(spdx::AdditionItem::Other(Box::new(spdx::AdditionRef { + add_ref: "even-worse".into(), + doc_ref: None, + }))), + }, + bad("superbad") + ] + ); +}