Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] - ReleaseDate
### Added
- [PR#84](https://github.com/EmbarkStudios/spdx/pull/84) resolved [#67](https://github.com/EmbarkStudios/spdx/issues/67) by inling the `askalono` crate to allow detection of license texts or headers from arbitrary text data. There are multiple features flags associated with this new feature.
- [PR#85](https://github.com/EmbarkStudios/spdx/pull/85) resolved [#82](https://github.com/EmbarkStudios/spdx/issues/82) by optionally allowing the parsing of unknown identifiers via `ParseMode::allow_unknown`. Unknown identifiers are either treated as `LicenseRef-<unknown identifier>` or `AdditionRef-<unknown identifier>` depending on their position. Unknown identifiers on positions that are invalid for either licenses or exceptions are still considered parse errors.

## [0.12.0] - 2025-08-19
### Added
Expand Down
45 changes: 40 additions & 5 deletions src/expression/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ impl Expression {
can.push_str("AdditionRef-");
can.push_str(add_ref);
}
Token::Unknown(_u) => unreachable!(),
}
}

Expand Down Expand Up @@ -188,7 +189,7 @@ impl Expression {
None | Some(Token::And | Token::Or | Token::OpenParen) => &["<license>", "("],
Some(Token::CloseParen) => &["AND", "OR"],
Some(Token::Exception(_) | Token::AdditionRef { .. }) => &["AND", "OR", ")"],
Some(Token::Spdx(_)) => &["AND", "OR", "WITH", ")", "+"],
Some(Token::Spdx(_) | Token::Unknown(_)) => &["AND", "OR", "WITH", ")", "+"],
Some(Token::LicenseRef { .. } | Token::Plus) => &["AND", "OR", "WITH", ")"],
Some(Token::With) => &["<addition>"],
};
Expand Down Expand Up @@ -282,7 +283,9 @@ impl Expression {
_ => return make_err_for_token(last_token, lt.span),
},
Token::With => match last_token {
Some(Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus) => {}
Some(
Token::Spdx(_) | Token::LicenseRef { .. } | Token::Plus | Token::Unknown(_),
) => {}
_ => return make_err_for_token(last_token, lt.span),
},
Token::Or | Token::And => match last_token {
Expand All @@ -292,7 +295,8 @@ impl Expression {
| Token::CloseParen
| Token::Exception(_)
| Token::AdditionRef { .. }
| Token::Plus,
| Token::Plus
| Token::Unknown(_),
) => {
let new_op = match lt.token {
Token::Or => Op::Or,
Expand Down Expand Up @@ -342,7 +346,8 @@ impl Expression {
| Token::Plus
| Token::Exception(_)
| Token::AdditionRef { .. }
| Token::CloseParen,
| Token::CloseParen
| Token::Unknown(_),
) => {
while let Some(top) = op_stack.pop() {
match top.op {
Expand Down Expand Up @@ -387,6 +392,35 @@ impl Expression {
},
_ => return make_err_for_token(last_token, lt.span),
},
Token::Unknown(unknown) => {
match last_token {
None | Some(Token::And | Token::Or | Token::OpenParen) => {
// This is the same position as a valid SPDX license id,
// so assume that is what the user was attempting
expr_queue.push(ExprNode::Req(ExpressionReq {
req: LicenseReq {
license: LicenseItem::Other(Box::new(LicenseRef {
doc_ref: None,
lic_ref: (*unknown).to_owned(),
})),
addition: None,
},
span: lt.span.start as u32..lt.span.end as u32,
}));
}
Some(Token::With) => {
let Some(ExprNode::Req(lic)) = expr_queue.last_mut() else {
return make_err_for_token(last_token, lt.span);
};

lic.req.addition = Some(AdditionItem::Other(Box::new(AdditionRef {
doc_ref: None,
add_ref: (*unknown).to_owned(),
})));
}
_ => return make_err_for_token(last_token, lt.span),
}
}
}

last_token = Some(lt.token);
Expand All @@ -400,7 +434,8 @@ impl Expression {
| Token::Exception(_)
| Token::AdditionRef { .. }
| Token::CloseParen
| Token::Plus,
| Token::Plus
| Token::Unknown(_),
) => {}
// We have to have at least one valid license requirement
None => {
Expand Down
13 changes: 12 additions & 1 deletion src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ pub struct ParseMode {
/// This option just allows GPL licenses to be treated similarly to all of
/// the other SPDX licenses.
pub allow_postfix_plus_on_gpl: bool,
/// How deprecated license identifiers are treated
/// Whether deprecated license or exception identifiers are allowed
pub allow_deprecated: bool,
/// Whether unknown license or exception identifiers are allowed
pub allow_unknown: bool,
}

impl ParseMode {
Expand All @@ -38,11 +40,13 @@ impl ParseMode {
/// case-sensitive.
/// 1. `WITH`, `AND`, and `OR`, case-insensitive, are the only valid operators
/// 1. Deprecated licenses are not allowed
/// 1. Unknown licenses or exeptions are not allowed
pub const STRICT: Self = Self {
allow_slash_as_or_operator: false,
allow_imprecise_license_names: false,
allow_postfix_plus_on_gpl: false,
allow_deprecated: false,
allow_unknown: false,
};

/// Allow non-conforming syntax for crates-io compatibility
Expand All @@ -55,11 +59,13 @@ impl ParseMode {
/// 1. `/` can by used as a synonym for `OR`, and doesn't need to be
/// separated by whitespace from the terms it combines
/// 1. Deprecated license identifiers are allowed
/// 1. Unknown licenses or exeptions are not allowed
pub const LAX: Self = Self {
allow_slash_as_or_operator: true,
allow_imprecise_license_names: true,
allow_postfix_plus_on_gpl: true,
allow_deprecated: true,
allow_unknown: false,
};
}

Expand All @@ -84,6 +90,8 @@ pub enum Token<'a> {
/// The name of the addition reference
add_ref: &'a str,
},
/// An unknown license term was encountered
Unknown(&'a str),
/// A postfix `+` indicating "or later" for a particular SPDX license id
Plus,
/// A `(` for starting a group
Expand Down Expand Up @@ -127,6 +135,7 @@ impl Token<'_> {
}) + "AdditionRef-".len()
+ add_ref.len()
}
Token::Unknown(u) => u.len(),
}
}
}
Expand Down Expand Up @@ -322,6 +331,8 @@ impl<'a> Iterator for Lexer<'a> {
}
{
Some(Ok((Token::Spdx(lic_id), token_len)))
} else if self.mode.allow_unknown {
ok_token(Token::Unknown(m))
} else {
Some(Err(ParseError {
original: self.original.to_owned(),
Expand Down
77 changes: 76 additions & 1 deletion tests/check.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#![allow(clippy::nonminimal_bool, clippy::eq_op, clippy::cognitive_complexity)]

use spdx::LicenseItem;
use spdx::{Expression, LicenseItem};

macro_rules! exact {
($req:expr, $e:expr) => {
Expand Down Expand Up @@ -559,3 +559,78 @@ fn too_many_to_minimize() {
spdx::expression::MinimizeError::TooManyRequirements(65)
);
}

/// Test that we handle unknown licenses and exceptions
#[test]
fn handles_unknown() {
const UNKNOWN: spdx::ParseMode = spdx::ParseMode {
allow_deprecated: false,
allow_imprecise_license_names: false,
allow_postfix_plus_on_gpl: false,
allow_slash_as_or_operator: false,
allow_unknown: true,
};

let single = spdx::Expression::parse_mode("sigh", UNKNOWN).unwrap();

fn get_reqs(e: &Expression) -> Vec<spdx::LicenseReq> {
e.requirements().map(|er| er.req.clone()).collect()
}

fn bad(s: &str) -> spdx::LicenseReq {
spdx::LicenseReq {
license: LicenseItem::Other(Box::new(spdx::LicenseRef {
lic_ref: s.into(),
doc_ref: None,
})),
addition: None,
}
}

assert_eq!(get_reqs(&single), vec![bad("sigh")]);

let compound = spdx::Expression::parse_mode("bad or MIT", UNKNOWN).unwrap();

assert_eq!(
get_reqs(&compound),
vec![
bad("bad"),
spdx::LicenseReq::from(spdx::license_id("MIT").unwrap())
]
);

let parens = spdx::Expression::parse_mode("(bad and Apache-2.0) or superbad", UNKNOWN).unwrap();

assert_eq!(
get_reqs(&parens),
vec![
bad("bad"),
spdx::LicenseReq::from(spdx::license_id("Apache-2.0").unwrap()),
bad("superbad")
]
);

let exc = spdx::Expression::parse_mode(
"terrible and (Apache-2.0 with even-worse or superbad)",
UNKNOWN,
)
.unwrap();

assert_eq!(
get_reqs(&exc),
vec![
bad("terrible"),
spdx::LicenseReq {
license: spdx::LicenseItem::Spdx {
id: spdx::license_id("Apache-2.0").unwrap(),
or_later: false
},
addition: Some(spdx::AdditionItem::Other(Box::new(spdx::AdditionRef {
add_ref: "even-worse".into(),
doc_ref: None,
}))),
},
bad("superbad")
]
);
}