diff --git a/Cargo.toml b/Cargo.toml index e5aa359..e6a70c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,23 +1,22 @@ [package] -name = "lodestone_parser" -version = "0.1.1" +name = "lodestone-parser" +version = "1.0.0" authors = ["Anna Clemens "] -edition = "2018" +edition = "2021" [features] default = ["logic", "with_serde"] logic = [ "cssparser", - "failure", "lazy_static", "scraper", + "thiserror", "with_serde", ] with_serde = [ "serde", - "serde_derive", "ffxiv_types/with_serde", "chrono/serde", "url/serde" @@ -26,11 +25,10 @@ with_serde = [ [dependencies] chrono = "0.4" cssparser = { version = "0.27", optional = true } -failure = { version = "0.1", optional = true } lazy_static = { version = "1", optional = true } -scraper = { version = "0.12", optional = true } -serde = { version = "1", optional = true } -serde_derive = { version = "1", optional = true } +scraper = { version = "0.13", optional = true } +serde = { version = "1", features = ["derive"], optional = true } +thiserror = { version = "1", optional = true } url = "2" [dependencies.ffxiv_types] diff --git a/README.md b/README.md index 8492244..6fc33cc 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# lodestone_parser +# lodestone-parser It parses Lodestone HTML. It's also nowhere near done. diff --git a/src/error.rs b/src/error.rs index 7207e6a..767f509 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,19 +1,17 @@ -use failure::Fail; - pub type Result = std::result::Result; -#[derive(Debug, Fail)] +#[derive(Debug, thiserror::Error)] pub enum Error { - #[fail(display = "couldn't find expected element on the lodestone: {}", _0)] + #[error("couldn't find expected element on the lodestone: {0}")] MissingElement(String), - #[fail(display = "the content scraped from the lodestone was invalid: {}", _0)] + #[error("the content scraped from the lodestone was invalid: {0}")] InvalidContent(String), - #[fail(display = "invalid page (1 through {} available)", _0)] + #[error("invalid page (1 through {0} available)")] InvalidPage(u64), - #[fail(display = "invalid number: {}", _0)] + #[error("invalid number: {0}")] InvalidNumber(std::num::ParseIntError), - #[fail(display = "invalid url: {}", _0)] + #[error("invalid url: {0}")] InvalidUrl(url::ParseError), } diff --git a/src/lib.rs b/src/lib.rs index 1a1f037..ec3e8e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(crate_visibility_modifier)] #![allow(clippy::unreadable_literal)] pub extern crate ffxiv_types; @@ -9,7 +8,7 @@ pub mod error; pub mod logic; pub mod models; -crate mod util; +pub(crate) mod util; #[cfg(feature = "logic")] pub use crate::logic::*; diff --git a/src/logic.rs b/src/logic.rs index 6fd2fd0..953264d 100644 --- a/src/logic.rs +++ b/src/logic.rs @@ -36,7 +36,7 @@ pub use self::{ search::*, }; -crate fn plain_parse(html: &Html, select: &scraper::Selector) -> Result { +pub(crate) fn plain_parse(html: &Html, select: &scraper::Selector) -> Result { let string = html .select(select) .next() @@ -46,7 +46,7 @@ crate fn plain_parse(html: &Html, select: &scraper::Selector) -> Result Ok(string) } -crate fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector) -> Result { +pub(crate) fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector) -> Result { let string = html .select(select) .next() @@ -56,7 +56,7 @@ crate fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector) Ok(string) } -crate fn parse_id(a: &Element) -> Result { +pub(crate) fn parse_id(a: &Element) -> Result { let href = a.attr("href").ok_or_else(|| Error::invalid_content("href on link", None))?; let last = href .split('/') @@ -66,7 +66,7 @@ crate fn parse_id(a: &Element) -> Result { last.parse().map_err(Error::InvalidNumber) } -crate fn parse_grand_company(text: &str) -> Result { +pub(crate) fn parse_grand_company(text: &str) -> Result { let mut x = text.split(" / "); let gc_str = x .next() diff --git a/src/logic/character.rs b/src/logic/character.rs index d3f43b2..274ec9d 100644 --- a/src/logic/character.rs +++ b/src/logic/character.rs @@ -27,23 +27,20 @@ use std::{ selectors!( PROFILE_FACE => ".frame__chara__face > img"; PROFILE_PORTRAIT => ".character__detail__image > a > img"; - PROFILE_NAME => ".frame__chara__name"; - PROFILE_WORLD => ".frame__chara__world"; - PROFILE_TITLE => ".frame__chara__title"; - PROFILE_NAME_DAY => ".character-block__birth"; + PROFILE_NAME => ".frame__chara__box > .frame__chara__name"; + PROFILE_WORLD => ".frame__chara__box > .frame__chara__world"; + PROFILE_TITLE => ".frame__chara__box > .frame__chara__title"; + PROFILE_NAME_DAY => ".character-block .character-block__birth"; PROFILE_RACE_CLAN_GENDER => "div.character-block:nth-of-type(1) > .character-block__box > .character-block__name"; PROFILE_GUARDIAN => "div.character-block:nth-of-type(2) > .character-block__box > .character-block__name"; PROFILE_CITY_STATE => "div.character-block:nth-of-type(3) > .character-block__box > .character-block__name"; PROFILE_GRAND_COMPANY => "div.character-block:nth-of-type(4) > .character-block__box > .character-block__name"; PROFILE_FREE_COMPANY => ".character__freecompany__name > h4 > a"; - PROFILE_TEXT => ".character__selfintroduction"; - PROFILE_MOUNT => ".character__mounts > .character__icon__list .character__item_icon.js__tooltip"; - PROFILE_MINION => ".character__minion > .character__icon__list .character__item_icon.js__tooltip"; + PROFILE_TEXT => ".character__content > .character__selfintroduction"; + // PROFILE_MOUNT => ".character__mounts > .character__icon__list .character__item_icon.js__tooltip"; + // PROFILE_MINION => ".character__minion > .character__icon__list .character__item_icon.js__tooltip"; - PROFILE_CLASS => "ul.character__job > li"; - CLASS_NAME => ".character__job__name"; - CLASS_LEVEL => ".character__job__level"; - CLASS_EXP => ".character__job__exp"; + PROFILE_CLASS => "div.character__level__list > ul > li"; ); pub fn parse(id: u64, html: &str) -> Result { @@ -67,8 +64,8 @@ pub fn parse(id: u64, html: &str) -> Result { let jobs = parse_jobs(&html)?; - let mounts = parse_mounts(&html)?; - let minions = parse_minions(&html)?; + // let mounts = parse_mounts(&html)?; + // let minions = parse_minions(&html)?; let face = parse_face(&html)?; let portrait = parse_portrait(&html)?; @@ -88,8 +85,8 @@ pub fn parse(id: u64, html: &str) -> Result { free_company_id, profile_text, jobs, - mounts, - minions, + // mounts, + // minions, face, portrait, }) @@ -97,7 +94,7 @@ pub fn parse(id: u64, html: &str) -> Result { fn parse_world(html: &Html) -> Result { let parts_str = plain_parse(html, &*PROFILE_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str) @@ -219,54 +216,42 @@ fn parse_jobs(html: &Html) -> Result> { } fn parse_job(elem: ElementRef) -> Result<(Job, JobInfo)> { - let job = crate::logic::plain_parse_elem(elem, &*CLASS_NAME) - .and_then(|x| Job::parse(&x).ok_or_else(|| Error::invalid_content("valid job", Some(&x))))?; + let img = elem.first_child() + .ok_or_else(|| Error::invalid_content("missing job icon", None))?; + let img = img.value().as_element() + .ok_or_else(|| Error::invalid_content("job icon is not an element", None))?; + let tooltip = img.attr("data-tooltip") + .ok_or_else(|| Error::invalid_content("missing data-tooltip attribute", None))?; + let parts: Vec<&str> = tooltip.split(" / ").collect(); + let parts: Vec<&str> = parts[0].split(" (").collect(); + let job = Job::parse(&parts[0]).ok_or_else(|| Error::invalid_content("valid job", Some(&parts[0])))?; - let level_str = crate::logic::plain_parse_elem(elem, &*CLASS_LEVEL)?; - let level: Option = match level_str.as_str() { + let level_str: String = elem.text().collect(); + let level: Option = match level_str.trim() { "-" => None, x => Some(x.parse().map_err(Error::InvalidNumber)?), }; - let exp_str = crate::logic::plain_parse_elem(elem, &*CLASS_EXP)?; - let mut exp_split = exp_str.split(" / "); - - let first_exp = exp_split.next().unwrap(); // must have first element - let experience: Option = match first_exp { - "-" | "--" => None, - x => Some(x.replace(",", "").parse().map_err(Error::InvalidNumber)?), - }; - - let second_exp = exp_split - .next() - .ok_or_else(|| Error::invalid_content("experience split by ` / `", Some(&exp_str)))?; - let next_level_experience: Option = match second_exp { - "-" | "--" => None, - x => Some(x.replace(",", "").parse().map_err(Error::InvalidNumber)?), - }; - let info = JobInfo { level, - experience, - next_level_experience, }; Ok((job, info)) } -fn parse_minions(html: &Html) -> Result> { - html.select(&*PROFILE_MINION) - .map(parse_icon) - .map(|res| res.map(|(name, icon)| Minion { name, icon })) - .collect() -} - -fn parse_mounts(html: &Html) -> Result> { - html.select(&*PROFILE_MOUNT) - .map(parse_icon) - .map(|res| res.map(|(name, icon)| Mount { name, icon })) - .collect() -} +// fn parse_minions(html: &Html) -> Result> { +// html.select(&*PROFILE_MINION) +// .map(parse_icon) +// .map(|res| res.map(|(name, icon)| Minion { name, icon })) +// .collect() +// } +// +// fn parse_mounts(html: &Html) -> Result> { +// html.select(&*PROFILE_MOUNT) +// .map(parse_icon) +// .map(|res| res.map(|(name, icon)| Mount { name, icon })) +// .collect() +// } fn parse_icon(elem: ElementRef) -> Result<(String, Url)> { let name = elem diff --git a/src/logic/free_company.rs b/src/logic/free_company.rs index 768208e..62b5b07 100644 --- a/src/logic/free_company.rs +++ b/src/logic/free_company.rs @@ -78,7 +78,7 @@ pub fn parse(id: u64, html: &str) -> Result { fn parse_world(html: &Html) -> Result { let parts_str = plain_parse(html, &*FC_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str.trim()) diff --git a/src/logic/linkshell.rs b/src/logic/linkshell.rs index 3ab999b..86921e4 100644 --- a/src/logic/linkshell.rs +++ b/src/logic/linkshell.rs @@ -41,7 +41,7 @@ pub fn parse(id: u64, html_str: &str) -> Result { fn parse_world(html: &Html) -> Result { let parts_str = plain_parse(html, &*LS_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str) diff --git a/src/logic/search.rs b/src/logic/search.rs index b3818c4..e2110ef 100644 --- a/src/logic/search.rs +++ b/src/logic/search.rs @@ -22,7 +22,7 @@ selectors!( NO_RESULTS => "p.parts__zero"; ); -crate fn parse_pagination(html: &Html) -> Result { +pub(crate) fn parse_pagination(html: &Html) -> Result { const LODESTONE_PER_PAGE: f32 = 50.0; let total_str = crate::logic::plain_parse(&html, &*PAGINATION_TOTAL)?; @@ -66,7 +66,7 @@ crate fn parse_pagination(html: &Html) -> Result { }) } -crate fn parse_no_results(html: &Html) -> bool { +pub(crate) fn parse_no_results(html: &Html) -> bool { html.select(&*NO_RESULTS) .next() .map(|x| x.text().collect::() == "Your search yielded no results.") diff --git a/src/logic/search/character.rs b/src/logic/search/character.rs index 7be1252..56f63d0 100644 --- a/src/logic/search/character.rs +++ b/src/logic/search/character.rs @@ -50,7 +50,7 @@ pub fn parse(html: &str) -> Result> { }) } -crate fn parse_single(html: ElementRef) -> Result { +pub(crate) fn parse_single(html: ElementRef) -> Result { let id = parse_id(html)?; let name = plain_parse(html, &*ITEM_NAME)?; @@ -82,7 +82,7 @@ fn parse_id(html: ElementRef) -> Result { fn parse_world(html: ElementRef) -> Result { let parts_str = plain_parse(html, &*ITEM_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str) diff --git a/src/logic/search/free_company.rs b/src/logic/search/free_company.rs index 733e8b5..b2c35ab 100644 --- a/src/logic/search/free_company.rs +++ b/src/logic/search/free_company.rs @@ -98,7 +98,7 @@ fn parse_grand_company(html: ElementRef) -> Result { fn parse_world(html: ElementRef) -> Result { let parts_str = plain_parse(html, &*ITEM_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str) diff --git a/src/logic/search/linkshell.rs b/src/logic/search/linkshell.rs index afcc81c..22e5f2e 100644 --- a/src/logic/search/linkshell.rs +++ b/src/logic/search/linkshell.rs @@ -69,7 +69,7 @@ fn parse_id(html: ElementRef) -> Result { fn parse_world(html: ElementRef) -> Result { let parts_str = plain_parse(html, &*ITEM_WORLD)?; - let mut parts = parts_str.split("\u{00a0}("); + let mut parts = parts_str.split(" ["); let world_str = parts.next() .ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?; World::from_str(world_str) diff --git a/src/models.rs b/src/models.rs index 2faafb4..cc45e03 100644 --- a/src/models.rs +++ b/src/models.rs @@ -2,7 +2,7 @@ macro_rules! ffxiv_enum { ($(#[$meta:meta])* $name:ident { $($variant:ident => $str_repr:expr),+$(,)? }) => { $(#[$meta])* #[derive(Debug, PartialEq, Clone, Copy)] - #[cfg_attr(feature = "with_serde", derive(serde_derive::Serialize, serde_derive::Deserialize))] + #[cfg_attr(feature = "with_serde", derive(serde::Serialize, serde::Deserialize))] pub enum $name { $($variant,)+ } diff --git a/src/models/character.rs b/src/models/character.rs index dfbcba7..50ac65f 100644 --- a/src/models/character.rs +++ b/src/models/character.rs @@ -1,6 +1,6 @@ use super::GrandCompany; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; use ffxiv_types::{World, Race, Clan, Guardian}; use url::Url; @@ -30,10 +30,10 @@ pub struct Character { pub jobs: BTreeMap, - #[cfg_attr(feature = "with_serde", serde(default))] - pub mounts: Vec, - #[cfg_attr(feature = "with_serde", serde(default))] - pub minions: Vec, + // #[cfg_attr(feature = "with_serde", serde(default))] + // pub mounts: Vec, + // #[cfg_attr(feature = "with_serde", serde(default))] + // pub minions: Vec, pub face: Url, pub portrait: Url, @@ -50,8 +50,6 @@ pub struct GrandCompanyInfo { #[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))] pub struct JobInfo { pub level: Option, - pub experience: Option, - pub next_level_experience: Option, } #[derive(Debug)] @@ -92,6 +90,7 @@ ffxiv_enum!( WhiteMage => "white mage", Scholar => "scholar", Astrologian => "astrologian", + Sage => "sage", Pugilist => "pugilist", Monk => "monk", Lancer => "lancer", @@ -109,6 +108,7 @@ ffxiv_enum!( RedMage => "red mage", BlueMage => "blue mage", Dancer => "dancer", + Reaper => "reaper", Carpenter => "carpenter", Blacksmith => "blacksmith", diff --git a/src/models/free_company.rs b/src/models/free_company.rs index 028a6e1..554038a 100644 --- a/src/models/free_company.rs +++ b/src/models/free_company.rs @@ -2,7 +2,7 @@ use super::GrandCompany; use chrono::{DateTime, Utc}; use ffxiv_types::World; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; use url::Url; use std::collections::BTreeMap; diff --git a/src/models/linkshell.rs b/src/models/linkshell.rs index 1e5986e..8d09d52 100644 --- a/src/models/linkshell.rs +++ b/src/models/linkshell.rs @@ -1,7 +1,7 @@ use super::search::{Paginated, character::CharacterSearchItem}; use ffxiv_types::World; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; #[derive(Debug)] #[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))] diff --git a/src/models/search.rs b/src/models/search.rs index 7727e8c..0b660b0 100644 --- a/src/models/search.rs +++ b/src/models/search.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; pub mod character; pub mod free_company; diff --git a/src/models/search/character.rs b/src/models/search/character.rs index 281c193..61ef4a1 100644 --- a/src/models/search/character.rs +++ b/src/models/search/character.rs @@ -1,7 +1,7 @@ use crate::models::character::GrandCompanyInfo; use ffxiv_types::World; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; use url::Url; diff --git a/src/models/search/free_company.rs b/src/models/search/free_company.rs index 3769504..8789e80 100644 --- a/src/models/search/free_company.rs +++ b/src/models/search/free_company.rs @@ -2,7 +2,7 @@ use crate::models::GrandCompany; use chrono::{DateTime, Utc}; use ffxiv_types::World; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; use url::Url; #[derive(Debug)] diff --git a/src/models/search/linkshell.rs b/src/models/search/linkshell.rs index be9bc0f..033d5d0 100644 --- a/src/models/search/linkshell.rs +++ b/src/models/search/linkshell.rs @@ -1,5 +1,5 @@ use ffxiv_types::World; -#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize}; +#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize}; #[derive(Debug)] #[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))] diff --git a/src/util/serde/opt_u64_str.rs b/src/util/serde/opt_u64_str.rs index f2dc046..c241946 100644 --- a/src/util/serde/opt_u64_str.rs +++ b/src/util/serde/opt_u64_str.rs @@ -1,6 +1,6 @@ use serde::{Deserializer, Deserialize, Serializer, de::Unexpected}; -crate fn serialize(u: &Option, serializer: S) -> Result +pub(crate) fn serialize(u: &Option, serializer: S) -> Result where S: Serializer, { match *u { @@ -9,7 +9,7 @@ crate fn serialize(u: &Option, serializer: S) -> Result } } -crate fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> +pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de> { let s: Option = Deserialize::deserialize(deserializer)?; diff --git a/src/util/serde/u64_str.rs b/src/util/serde/u64_str.rs index 0e5c684..bcd6fad 100644 --- a/src/util/serde/u64_str.rs +++ b/src/util/serde/u64_str.rs @@ -1,13 +1,13 @@ use serde::{Deserializer, Deserialize, Serializer, de::Unexpected}; #[allow(clippy::trivially_copy_pass_by_ref)] -crate fn serialize(u: &u64, serializer: S) -> Result +pub(crate) fn serialize(u: &u64, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(&u.to_string()) } -crate fn deserialize<'de, D>(deserializer: D) -> Result +pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result where D: Deserializer<'de> { let s: String = Deserialize::deserialize(deserializer)?;