Skip to content

Commit

Permalink
Merge pull request #2 from SteveLauC/guess
Browse files Browse the repository at this point in the history
refactor: ex-export the Guess type
  • Loading branch information
bzz authored Sep 22, 2023
2 parents 45a244d + a69bb31 commit 3fd51cf
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/go/guess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,13 @@ pub struct GoGuess {
}


/// The result of a language guess.
#[derive(Debug)]
pub struct Guess {
/// Which language it is
pub language: String,
/// If there are more than one possibles languages, the first language
/// (alphabetically) will be returned, and this field will be set to false.
pub safe: bool,
}

Expand Down
61 changes: 60 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use std::os::raw::c_uchar;

use crate::go::slice::{GoSlice, ToGoSlice};
use crate::go::string::{GoString, ToGoString};
use crate::go::guess::{GoGuess, Guess};
use crate::go::guess::GoGuess;

pub use go::guess::Guess;

mod go;

Expand Down Expand Up @@ -40,6 +42,18 @@ extern "C" {
// fn GetLanguagesByVimModeline(filename: GoString, content: GoSlice, candidates: GoSlice, result: &mut GoSlice);
}

/// `get_languages()` applies a sequence of strategies based on the given filename and
/// content to find out the most probable languages to return.
///
/// If it finds a strategy that produces a single result, it will be returned;
/// otherise the last strategy that returned multiple results will be returned.
/// If the content is binary, no results will be returned. This matches the
/// behavior of [Linguist.detect]
///
/// At least one of arguments should be set. If content is missing, language
/// detection will be based on the filename. The function won't read the file, given an empty content.
///
/// [Linguist.detect]: https://github.com/github/linguist/blob/aad49acc0624c70d654a8dce447887dbbc713c7a/lib/linguist.rb#L14-L49
pub fn get_languages(filename: &str, content: &[u8]) -> Result<Vec<String>, NulError> {
let c_filename = CString::new(filename).expect("Can't construct string");
let c_content = CString::new(content).expect("Can't construct content string");
Expand All @@ -50,6 +64,12 @@ pub fn get_languages(filename: &str, content: &[u8]) -> Result<Vec<String>, NulE
}
}

/// `get_languages_by_content()` returns a slice of languages for the given
/// content. It is a Strategy that uses content-based regexp heuristics and a
/// filename extension.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_content(filename: &str, content: &[u8]) -> Result<Guess, NulError> {
let c_filename = CString::new(filename)?;
let c_content = CString::new(content)?;
Expand All @@ -65,6 +85,7 @@ pub fn get_language_by_content(filename: &str, content: &[u8]) -> Result<Guess,
}
}

/// `get_language_extensions()` returns all extensions associated with the given language.
pub fn get_language_extensions(language: &str) -> Result<Vec<String>, NulError> {
let c_language = CString::new(language)?;
let mut go_result = GoSlice::default();
Expand All @@ -74,6 +95,8 @@ pub fn get_language_extensions(language: &str) -> Result<Vec<String>, NulError>
}
}

/// `get_language()` applies a sequence of strategies based on the given filename
/// and content to find out the most probable language to return.
pub fn get_language(filename: &str, content: &[u8]) -> Result<String, NulError> {
let c_filename = CString::new(filename)?;
let c_content = CString::new(content)?;
Expand All @@ -88,6 +111,7 @@ pub fn get_language(filename: &str, content: &[u8]) -> Result<String, NulError>
}
}

/// `get_mime_type()` returns a MIME type of a given file based on its languages.
pub fn get_mime_type(path: &str, language: &str) -> Result<String, NulError> {
let c_path = CString::new(path)?;
let c_language = CString::new(language)?;
Expand All @@ -96,72 +120,107 @@ pub fn get_mime_type(path: &str, language: &str) -> Result<String, NulError> {
}
}


/// `get_language_by_extension()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_extension(filename: &str) -> Result<Guess, NulError> {
let c_filename = CString::new(filename)?;
unsafe { Ok(Guess::from(GetLanguageByExtension(c_filename.as_go_string()))) }
}

/// `get_language_by_filename()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_filename(filename: &str) -> Result<Guess, NulError> {
let c_filename = CString::new(filename)?;
unsafe { Ok(Guess::from(GetLanguageByFilename(c_filename.as_go_string()))) }
}

/// `get_language_by_modeline()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_modeline(content: &[u8]) -> Result<Guess, NulError> {
let c_content = CString::new(content)?;
unsafe { Ok(Guess::from(GetLanguageByModeline(c_content.as_go_slice()))) }
}

/// `get_language_by_shebang()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_shebang(content: &[u8]) -> Result<Guess, NulError> {
let c_content = CString::new(content)?;
unsafe { Ok(Guess::from(GetLanguageByShebang(c_content.as_go_slice()))) }
}

/// `get_language_by_vim_modeline()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_vim_modeline(content: &[u8]) -> Result<Guess, NulError> {
let c_content = CString::new(content)?;
unsafe { Ok(Guess::from(GetLanguageByVimModeline(c_content.as_go_slice()))) }
}

/// `get_language_by_emacs_modeline()` returns detected language.
///
/// If there are more than one possibles languages it returns the first language
/// by alphabetically order and the `safe` field of `Guess` will be set to false.
pub fn get_language_by_emacs_modeline(content: &[u8]) -> Result<Guess, NulError> {
let c_content = CString::new(content)?;
unsafe { Ok(Guess::from(GetLanguageByEmacsModeline(c_content.as_go_slice()))) }
}

/// `is_binary()` detects if data is a binary value based
/// on this [code snippet](http://git.kernel.org/cgit/git/git.git/tree/xdiff-interface.c?id=HEAD#n198).
pub fn is_binary(data: &[u8]) -> Result<bool, NulError> {
let c_data = CString::new(data)?;
unsafe { Ok(IsBinary(c_data.as_go_slice()) == 1) }
}

/// `is_configuration()` tells if filename is in one of the configuration languages.
pub fn is_configuration(path: &str) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
unsafe { Ok(IsConfiguration(c_path.as_go_string()) == 1) }
}

/// `is_documentation()` returns whether or not path is a documentation path.
pub fn is_documentation(path: &str) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
unsafe { Ok(IsDocumentation(c_path.as_go_string()) == 1) }
}

/// `is_dot_file()` returns whether or not path has dot as a prefix.
pub fn is_dot_file(path: &str) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
unsafe { Ok(IsDotFile(c_path.as_go_string()) == 1) }
}

/// `is_image()` tells if a given file is an image (PNG, JPEG or GIF format).
pub fn is_image(path: &str) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
unsafe { Ok(IsImage(c_path.as_go_string()) == 1) }
}

/// `is_vendor()` returns whether or not path is a vendor path.
pub fn is_vendor(path: &str) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
unsafe { Ok(IsVendor(c_path.as_go_string()) == 1) }
}

/// `is_generated()` returns whether the file with the given path and content
/// is a generated file.
pub fn is_generated(path: &str, content: &[u8]) -> Result<bool, NulError> {
let c_path = CString::new(path)?;
let c_content = CString::new(content)?;
unsafe { Ok(IsGenerated(c_path.as_go_string(), c_content.as_go_slice()) == 1) }
}

/// `get_color()` returns the HTML color code of a given language.
pub fn get_color(language: &str) -> Result<String, NulError> {
let c_language = CString::new(language)?;
unsafe { Ok(GetColor(c_language.as_go_string()).to_string()) }
Expand Down

0 comments on commit 3fd51cf

Please sign in to comment.