feat: update a bit

This commit is contained in:
Anna 2022-06-16 10:10:03 -04:00
parent af0541f664
commit 7e88d7d1c3
Signed by: anna
GPG Key ID: 0B391D8F06FCD9E0
22 changed files with 82 additions and 102 deletions

View File

@ -1,23 +1,22 @@
[package]
name = "lodestone_parser"
version = "0.1.1"
name = "lodestone-parser"
version = "1.0.0"
authors = ["Anna Clemens <git@annaclemens.io>"]
edition = "2018"
edition = "2021"
[features]
default = ["logic", "with_serde"]
logic = [
"cssparser",
"failure",
"lazy_static",
"scraper",
"thiserror",
"with_serde",
]
with_serde = [
"serde",
"serde_derive",
"ffxiv_types/with_serde",
"chrono/serde",
"url/serde"
@ -26,11 +25,10 @@ with_serde = [
[dependencies]
chrono = "0.4"
cssparser = { version = "0.27", optional = true }
failure = { version = "0.1", optional = true }
lazy_static = { version = "1", optional = true }
scraper = { version = "0.12", optional = true }
serde = { version = "1", optional = true }
serde_derive = { version = "1", optional = true }
scraper = { version = "0.13", optional = true }
serde = { version = "1", features = ["derive"], optional = true }
thiserror = { version = "1", optional = true }
url = "2"
[dependencies.ffxiv_types]

View File

@ -1,3 +1,3 @@
# lodestone_parser
# lodestone-parser
It parses Lodestone HTML. It's also nowhere near done.

View File

@ -1,19 +1,17 @@
use failure::Fail;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Fail)]
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[fail(display = "couldn't find expected element on the lodestone: {}", _0)]
#[error("couldn't find expected element on the lodestone: {0}")]
MissingElement(String),
#[fail(display = "the content scraped from the lodestone was invalid: {}", _0)]
#[error("the content scraped from the lodestone was invalid: {0}")]
InvalidContent(String),
#[fail(display = "invalid page (1 through {} available)", _0)]
#[error("invalid page (1 through {0} available)")]
InvalidPage(u64),
#[fail(display = "invalid number: {}", _0)]
#[error("invalid number: {0}")]
InvalidNumber(std::num::ParseIntError),
#[fail(display = "invalid url: {}", _0)]
#[error("invalid url: {0}")]
InvalidUrl(url::ParseError),
}

View File

@ -1,4 +1,3 @@
#![feature(crate_visibility_modifier)]
#![allow(clippy::unreadable_literal)]
pub extern crate ffxiv_types;
@ -9,7 +8,7 @@ pub mod error;
pub mod logic;
pub mod models;
crate mod util;
pub(crate) mod util;
#[cfg(feature = "logic")]
pub use crate::logic::*;

View File

@ -36,7 +36,7 @@ pub use self::{
search::*,
};
crate fn plain_parse(html: &Html, select: &scraper::Selector) -> Result<String> {
pub(crate) fn plain_parse(html: &Html, select: &scraper::Selector) -> Result<String> {
let string = html
.select(select)
.next()
@ -46,7 +46,7 @@ crate fn plain_parse(html: &Html, select: &scraper::Selector) -> Result<String>
Ok(string)
}
crate fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector) -> Result<String> {
pub(crate) fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector) -> Result<String> {
let string = html
.select(select)
.next()
@ -56,7 +56,7 @@ crate fn plain_parse_elem<'a>(html: ElementRef<'a>, select: &scraper::Selector)
Ok(string)
}
crate fn parse_id(a: &Element) -> Result<u64> {
pub(crate) fn parse_id(a: &Element) -> Result<u64> {
let href = a.attr("href").ok_or_else(|| Error::invalid_content("href on link", None))?;
let last = href
.split('/')
@ -66,7 +66,7 @@ crate fn parse_id(a: &Element) -> Result<u64> {
last.parse().map_err(Error::InvalidNumber)
}
crate fn parse_grand_company(text: &str) -> Result<GrandCompanyInfo> {
pub(crate) fn parse_grand_company(text: &str) -> Result<GrandCompanyInfo> {
let mut x = text.split(" / ");
let gc_str = x
.next()

View File

@ -27,23 +27,20 @@ use std::{
selectors!(
PROFILE_FACE => ".frame__chara__face > img";
PROFILE_PORTRAIT => ".character__detail__image > a > img";
PROFILE_NAME => ".frame__chara__name";
PROFILE_WORLD => ".frame__chara__world";
PROFILE_TITLE => ".frame__chara__title";
PROFILE_NAME_DAY => ".character-block__birth";
PROFILE_NAME => ".frame__chara__box > .frame__chara__name";
PROFILE_WORLD => ".frame__chara__box > .frame__chara__world";
PROFILE_TITLE => ".frame__chara__box > .frame__chara__title";
PROFILE_NAME_DAY => ".character-block .character-block__birth";
PROFILE_RACE_CLAN_GENDER => "div.character-block:nth-of-type(1) > .character-block__box > .character-block__name";
PROFILE_GUARDIAN => "div.character-block:nth-of-type(2) > .character-block__box > .character-block__name";
PROFILE_CITY_STATE => "div.character-block:nth-of-type(3) > .character-block__box > .character-block__name";
PROFILE_GRAND_COMPANY => "div.character-block:nth-of-type(4) > .character-block__box > .character-block__name";
PROFILE_FREE_COMPANY => ".character__freecompany__name > h4 > a";
PROFILE_TEXT => ".character__selfintroduction";
PROFILE_MOUNT => ".character__mounts > .character__icon__list .character__item_icon.js__tooltip";
PROFILE_MINION => ".character__minion > .character__icon__list .character__item_icon.js__tooltip";
PROFILE_TEXT => ".character__content > .character__selfintroduction";
// PROFILE_MOUNT => ".character__mounts > .character__icon__list .character__item_icon.js__tooltip";
// PROFILE_MINION => ".character__minion > .character__icon__list .character__item_icon.js__tooltip";
PROFILE_CLASS => "ul.character__job > li";
CLASS_NAME => ".character__job__name";
CLASS_LEVEL => ".character__job__level";
CLASS_EXP => ".character__job__exp";
PROFILE_CLASS => "div.character__level__list > ul > li";
);
pub fn parse(id: u64, html: &str) -> Result<Character> {
@ -67,8 +64,8 @@ pub fn parse(id: u64, html: &str) -> Result<Character> {
let jobs = parse_jobs(&html)?;
let mounts = parse_mounts(&html)?;
let minions = parse_minions(&html)?;
// let mounts = parse_mounts(&html)?;
// let minions = parse_minions(&html)?;
let face = parse_face(&html)?;
let portrait = parse_portrait(&html)?;
@ -88,8 +85,8 @@ pub fn parse(id: u64, html: &str) -> Result<Character> {
free_company_id,
profile_text,
jobs,
mounts,
minions,
// mounts,
// minions,
face,
portrait,
})
@ -97,7 +94,7 @@ pub fn parse(id: u64, html: &str) -> Result<Character> {
fn parse_world(html: &Html) -> Result<World> {
let parts_str = plain_parse(html, &*PROFILE_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str)
@ -219,54 +216,42 @@ fn parse_jobs(html: &Html) -> Result<BTreeMap<Job, JobInfo>> {
}
fn parse_job(elem: ElementRef) -> Result<(Job, JobInfo)> {
let job = crate::logic::plain_parse_elem(elem, &*CLASS_NAME)
.and_then(|x| Job::parse(&x).ok_or_else(|| Error::invalid_content("valid job", Some(&x))))?;
let img = elem.first_child()
.ok_or_else(|| Error::invalid_content("missing job icon", None))?;
let img = img.value().as_element()
.ok_or_else(|| Error::invalid_content("job icon is not an element", None))?;
let tooltip = img.attr("data-tooltip")
.ok_or_else(|| Error::invalid_content("missing data-tooltip attribute", None))?;
let parts: Vec<&str> = tooltip.split(" / ").collect();
let parts: Vec<&str> = parts[0].split(" (").collect();
let job = Job::parse(&parts[0]).ok_or_else(|| Error::invalid_content("valid job", Some(&parts[0])))?;
let level_str = crate::logic::plain_parse_elem(elem, &*CLASS_LEVEL)?;
let level: Option<u8> = match level_str.as_str() {
let level_str: String = elem.text().collect();
let level: Option<u8> = match level_str.trim() {
"-" => None,
x => Some(x.parse().map_err(Error::InvalidNumber)?),
};
let exp_str = crate::logic::plain_parse_elem(elem, &*CLASS_EXP)?;
let mut exp_split = exp_str.split(" / ");
let first_exp = exp_split.next().unwrap(); // must have first element
let experience: Option<u64> = match first_exp {
"-" | "--" => None,
x => Some(x.replace(",", "").parse().map_err(Error::InvalidNumber)?),
};
let second_exp = exp_split
.next()
.ok_or_else(|| Error::invalid_content("experience split by ` / `", Some(&exp_str)))?;
let next_level_experience: Option<u64> = match second_exp {
"-" | "--" => None,
x => Some(x.replace(",", "").parse().map_err(Error::InvalidNumber)?),
};
let info = JobInfo {
level,
experience,
next_level_experience,
};
Ok((job, info))
}
fn parse_minions(html: &Html) -> Result<Vec<Minion>> {
html.select(&*PROFILE_MINION)
.map(parse_icon)
.map(|res| res.map(|(name, icon)| Minion { name, icon }))
.collect()
}
fn parse_mounts(html: &Html) -> Result<Vec<Mount>> {
html.select(&*PROFILE_MOUNT)
.map(parse_icon)
.map(|res| res.map(|(name, icon)| Mount { name, icon }))
.collect()
}
// fn parse_minions(html: &Html) -> Result<Vec<Minion>> {
// html.select(&*PROFILE_MINION)
// .map(parse_icon)
// .map(|res| res.map(|(name, icon)| Minion { name, icon }))
// .collect()
// }
//
// fn parse_mounts(html: &Html) -> Result<Vec<Mount>> {
// html.select(&*PROFILE_MOUNT)
// .map(parse_icon)
// .map(|res| res.map(|(name, icon)| Mount { name, icon }))
// .collect()
// }
fn parse_icon(elem: ElementRef) -> Result<(String, Url)> {
let name = elem

View File

@ -78,7 +78,7 @@ pub fn parse(id: u64, html: &str) -> Result<FreeCompany> {
fn parse_world(html: &Html) -> Result<World> {
let parts_str = plain_parse(html, &*FC_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str.trim())

View File

@ -41,7 +41,7 @@ pub fn parse(id: u64, html_str: &str) -> Result<Linkshell> {
fn parse_world(html: &Html) -> Result<World> {
let parts_str = plain_parse(html, &*LS_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str)

View File

@ -22,7 +22,7 @@ selectors!(
NO_RESULTS => "p.parts__zero";
);
crate fn parse_pagination(html: &Html) -> Result<Pagination> {
pub(crate) fn parse_pagination(html: &Html) -> Result<Pagination> {
const LODESTONE_PER_PAGE: f32 = 50.0;
let total_str = crate::logic::plain_parse(&html, &*PAGINATION_TOTAL)?;
@ -66,7 +66,7 @@ crate fn parse_pagination(html: &Html) -> Result<Pagination> {
})
}
crate fn parse_no_results(html: &Html) -> bool {
pub(crate) fn parse_no_results(html: &Html) -> bool {
html.select(&*NO_RESULTS)
.next()
.map(|x| x.text().collect::<String>() == "Your search yielded no results.")

View File

@ -50,7 +50,7 @@ pub fn parse(html: &str) -> Result<Paginated<CharacterSearchItem>> {
})
}
crate fn parse_single(html: ElementRef) -> Result<CharacterSearchItem> {
pub(crate) fn parse_single(html: ElementRef) -> Result<CharacterSearchItem> {
let id = parse_id(html)?;
let name = plain_parse(html, &*ITEM_NAME)?;
@ -82,7 +82,7 @@ fn parse_id(html: ElementRef) -> Result<u64> {
fn parse_world(html: ElementRef) -> Result<World> {
let parts_str = plain_parse(html, &*ITEM_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str)

View File

@ -98,7 +98,7 @@ fn parse_grand_company(html: ElementRef) -> Result<GrandCompany> {
fn parse_world(html: ElementRef) -> Result<World> {
let parts_str = plain_parse(html, &*ITEM_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str)

View File

@ -69,7 +69,7 @@ fn parse_id(html: ElementRef) -> Result<u64> {
fn parse_world(html: ElementRef) -> Result<World> {
let parts_str = plain_parse(html, &*ITEM_WORLD)?;
let mut parts = parts_str.split("\u{00a0}(");
let mut parts = parts_str.split(" [");
let world_str = parts.next()
.ok_or_else(|| Error::invalid_content("world with data centre in parens", Some(&parts_str)))?;
World::from_str(world_str)

View File

@ -2,7 +2,7 @@ macro_rules! ffxiv_enum {
($(#[$meta:meta])* $name:ident { $($variant:ident => $str_repr:expr),+$(,)? }) => {
$(#[$meta])*
#[derive(Debug, PartialEq, Clone, Copy)]
#[cfg_attr(feature = "with_serde", derive(serde_derive::Serialize, serde_derive::Deserialize))]
#[cfg_attr(feature = "with_serde", derive(serde::Serialize, serde::Deserialize))]
pub enum $name {
$($variant,)+
}

View File

@ -1,6 +1,6 @@
use super::GrandCompany;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
use ffxiv_types::{World, Race, Clan, Guardian};
use url::Url;
@ -30,10 +30,10 @@ pub struct Character {
pub jobs: BTreeMap<Job, JobInfo>,
#[cfg_attr(feature = "with_serde", serde(default))]
pub mounts: Vec<Mount>,
#[cfg_attr(feature = "with_serde", serde(default))]
pub minions: Vec<Minion>,
// #[cfg_attr(feature = "with_serde", serde(default))]
// pub mounts: Vec<Mount>,
// #[cfg_attr(feature = "with_serde", serde(default))]
// pub minions: Vec<Minion>,
pub face: Url,
pub portrait: Url,
@ -50,8 +50,6 @@ pub struct GrandCompanyInfo {
#[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))]
pub struct JobInfo {
pub level: Option<u8>,
pub experience: Option<u64>,
pub next_level_experience: Option<u64>,
}
#[derive(Debug)]
@ -92,6 +90,7 @@ ffxiv_enum!(
WhiteMage => "white mage",
Scholar => "scholar",
Astrologian => "astrologian",
Sage => "sage",
Pugilist => "pugilist",
Monk => "monk",
Lancer => "lancer",
@ -109,6 +108,7 @@ ffxiv_enum!(
RedMage => "red mage",
BlueMage => "blue mage",
Dancer => "dancer",
Reaper => "reaper",
Carpenter => "carpenter",
Blacksmith => "blacksmith",

View File

@ -2,7 +2,7 @@ use super::GrandCompany;
use chrono::{DateTime, Utc};
use ffxiv_types::World;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
use url::Url;
use std::collections::BTreeMap;

View File

@ -1,7 +1,7 @@
use super::search::{Paginated, character::CharacterSearchItem};
use ffxiv_types::World;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
#[derive(Debug)]
#[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
pub mod character;
pub mod free_company;

View File

@ -1,7 +1,7 @@
use crate::models::character::GrandCompanyInfo;
use ffxiv_types::World;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
use url::Url;

View File

@ -2,7 +2,7 @@ use crate::models::GrandCompany;
use chrono::{DateTime, Utc};
use ffxiv_types::World;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
use url::Url;
#[derive(Debug)]

View File

@ -1,5 +1,5 @@
use ffxiv_types::World;
#[cfg(feature = "with_serde")] use serde_derive::{Deserialize, Serialize};
#[cfg(feature = "with_serde")] use serde::{Deserialize, Serialize};
#[derive(Debug)]
#[cfg_attr(feature = "with_serde", derive(Deserialize, Serialize))]

View File

@ -1,6 +1,6 @@
use serde::{Deserializer, Deserialize, Serializer, de::Unexpected};
crate fn serialize<S>(u: &Option<u64>, serializer: S) -> Result<S::Ok, S::Error>
pub(crate) fn serialize<S>(u: &Option<u64>, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,
{
match *u {
@ -9,7 +9,7 @@ crate fn serialize<S>(u: &Option<u64>, serializer: S) -> Result<S::Ok, S::Error>
}
}
crate fn deserialize<'de, D>(deserializer: D) -> Result<Option<u64>, D::Error>
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<Option<u64>, D::Error>
where D: Deserializer<'de>
{
let s: Option<String> = Deserialize::deserialize(deserializer)?;

View File

@ -1,13 +1,13 @@
use serde::{Deserializer, Deserialize, Serializer, de::Unexpected};
#[allow(clippy::trivially_copy_pass_by_ref)]
crate fn serialize<S>(u: &u64, serializer: S) -> Result<S::Ok, S::Error>
pub(crate) fn serialize<S>(u: &u64, serializer: S) -> Result<S::Ok, S::Error>
where S: Serializer,
{
serializer.serialize_str(&u.to_string())
}
crate fn deserialize<'de, D>(deserializer: D) -> Result<u64, D::Error>
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<u64, D::Error>
where D: Deserializer<'de>
{
let s: String = Deserialize::deserialize(deserializer)?;