chore: initial commit

This commit is contained in:
Kyle Clemens 2018-09-02 15:12:52 -04:00
commit 6820aff913
Signed by: anna
GPG Key ID: 0B391D8F06FCD9E0
19 changed files with 9023 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
**/*.rs.bk
Cargo.lock

28
Cargo.toml Normal file
View File

@ -0,0 +1,28 @@
cargo-features = ["edition"]
[package]
name = "lodestone_parser"
version = "0.1.0"
authors = ["Kyle Clemens <git@kyleclemens.com>"]
edition = "2018"
[dependencies]
cssparser = "0.23"
failure = "0.1"
lazy_static = "1"
scraper = "0.7"
serde = "1"
serde_derive = "1"
serde_json = "1"
url = "1"
url_serde = "0.2"
[dependencies.chrono]
version = "0.4"
features = ["serde"]
[dependencies.ffxiv_types]
version = "0.2"
default-features = false
features = ["worlds", "with_serde"]

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Kyle Clemens
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# lodestone_parser
It parses Lodestone HTML. It's also nowhere near done.

1401
html/Characters/A.html Normal file

File diff suppressed because one or more lines are too long

1541
html/Characters/Duvivi.html Normal file

File diff suppressed because one or more lines are too long

1485
html/Characters/Sakae.html Normal file

File diff suppressed because one or more lines are too long

1215
html/FCs/Cobra.html Normal file

File diff suppressed because one or more lines are too long

1253
html/FCs/Rose.html Normal file

File diff suppressed because one or more lines are too long

1583
html/Searches/Character.html Normal file

File diff suppressed because one or more lines are too long

28
schemas/FreeCompany.md Normal file
View File

@ -0,0 +1,28 @@
# FreeCompany
|Key|Value|Description|
|---|---|---|
|`name`|`String`|The name of the Free Company.|
|`world`|`String`|The world the Free Company is on.|
|`slogan`|`String`|The Free Company's slogan.|
|`crest`|`Array` of `String`|The image URLs that are layered to created the Free Company crest.|
|`active_members`|`u16`|The amount of active members.
|`rank`|`u8`|The Free Company's rank ([1,8]).|
|`pvp_rankings`|`PvpRankings`|The Free Company's PvP rankings.|
|`formed`|`DateTime` (UTC, RFC3339 formatted)|The date and time at which the Free Company was created.|
|`estate`|`Estate?`|The Free Company's estate.|
## PvpRankings
|Key|Value|Description|
|---|---|---|
|`weekly`|`u64?`|The weekly rank or `null` if unranked.|
|`monthly`|`u64?`|The monthly rank or `null` if unranked.|
## Estate
|Key|Value|Description|
|---|---|---|
|`name`|`String`|The name of the estate.|
|`address`|`String`|The estate's address.|
|`greeting`|`String`|The greeting set on the estate.|

32
src/error.rs Normal file
View File

@ -0,0 +1,32 @@
use failure::Fail;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Fail)]
pub enum Error {
#[fail(display = "couldn't find expected element on the Lodestone: {}", _0)]
MissingElement(String),
#[fail(display = "the content scraped from the Lodestone was invalid: {}", _0)]
InvalidContent(String),
#[fail(display = "invalid number: {}", _0)]
InvalidNumber(std::num::ParseIntError),
#[fail(display = "invalid url: {}", _0)]
InvalidUrl(url::ParseError),
}
impl Error {
pub fn missing_element(select: &scraper::Selector) -> Self {
use cssparser::ToCss;
let css = select.selectors.iter().map(|x| x.to_css_string()).collect::<Vec<_>>().join(" ");
Error::MissingElement(css)
}
pub fn invalid_content(expecting: &str, found: Option<&str>) -> Self {
let s = match found {
Some(f) => format!("expecting `{}`, found `{}`", expecting, f),
None => format!("expecting `{}`", expecting),
};
Error::InvalidContent(s)
}
}

9
src/lib.rs Normal file
View File

@ -0,0 +1,9 @@
#![feature(macro_at_most_once_rep)]
#[macro_use] extern crate failure;
#[macro_use] extern crate lazy_static;
#[macro_use] extern crate serde_derive;
pub mod error;
pub mod logic;
pub mod models;

12
src/logic.rs Normal file
View File

@ -0,0 +1,12 @@
macro_rules! selectors {
($($name:ident => $selector:expr);+$(;)?) => {
lazy_static! {
$(
static ref $name: scraper::Selector = scraper::Selector::parse($selector).unwrap();
)+
}
}
}
pub mod character;
pub mod free_company;

106
src/logic/character.rs Normal file
View File

@ -0,0 +1,106 @@
use crate::models::character::{
Character,
CityState,
Clan,
Gender,
GrandCompany,
GrandCompanyInfo,
Guardian,
Race,
};
use scraper::Html;
use std::str::FromStr;
selectors!(
PROFILE_NAME => ".frame__chara__name";
PROFILE_WORLD => ".frame__chara__world";
PROFILE_TITLE => ".frame__chara__title";
PROFILE_NAME_DAY => ".character-block__birth";
PROFILE_RACE_CLAN_GENDER => "div.character-block:nth-of-type(1) > .character-block__box > .character-block__name";
PROFILE_GUARDIAN => "div.character-block:nth-of-type(2) > .character-block__box > .character-block__name";
PROFILE_CITY_STATE => "div.character-block:nth-of-type(3) > .character-block__box > .character-block__name";
PROFILE_GRAND_COMPANY => "div.character-block:nth-of-type(4) > .character-block__box > .character-block__name";
PROFILE_FREE_COMPANY => ".character__freecompany__name > h4 > a";
PROFILE_TEXT => ".character__selfintroduction";
);
pub fn parse(id: u64, html: &str) -> Option<Character> {
let html = Html::parse_document(html);
let name = html.select(&*PROFILE_NAME).next()?.text().collect();
let world_str: String = html.select(&*PROFILE_WORLD).next()?.text().collect();
let world = ffxiv_types::World::from_str(&world_str).ok()?;
let title: Option<String> = html
.select(&*PROFILE_TITLE)
.next()
.map(|x| x.text().collect());
let mut rcg = html.select(&*PROFILE_RACE_CLAN_GENDER).next()?.text();
let race = Race::parse(rcg.next()?)?;
let mut clan_gender_str = rcg.next()?.split(" / ");
let clan = Clan::parse(clan_gender_str.next()?)?;
let gender = Gender::parse(clan_gender_str.next()?)?;
let name_day = html.select(&*PROFILE_NAME_DAY).next()?.text().collect();
let guardian_str: String = html.select(&*PROFILE_GUARDIAN).next()?.text().collect();
let guardian = Guardian::parse(&guardian_str)?;
let city_state_str: String = html.select(&*PROFILE_CITY_STATE).next()?.text().collect();
let city_state = CityState::parse(&city_state_str)?;
let grand_company: Option<GrandCompanyInfo> = html
.select(&*PROFILE_GRAND_COMPANY)
.next()
.map(|x| x.text().collect::<String>())
.and_then(|x| {
let mut x = x.split(" / ");
let gc = GrandCompany::parse(x.next()?)?;
Some(GrandCompanyInfo {
grand_company: gc,
rank: x.next()?.to_string(),
})
});
let free_company_id: Option<u64> = html
.select(&*PROFILE_FREE_COMPANY)
.next()
.and_then(|x| x.value().attr("href"))
.and_then(|x| x
.split('/')
.filter(|x| !x.is_empty())
.last())
.and_then(|x| x.parse().ok());
let profile_text = html
.select(&*PROFILE_TEXT)
.next()?
.text()
.collect::<String>()
.trim()
.to_string();
Some(Character {
id,
name,
world,
race,
clan,
gender,
title,
name_day,
guardian,
city_state,
grand_company,
free_company_id,
profile_text,
})
}

155
src/logic/free_company.rs Normal file
View File

@ -0,0 +1,155 @@
use crate::{
error::*,
models::free_company::{FreeCompany, PvpRankings, Estate},
};
use chrono::{DateTime, Local, TimeZone, Utc};
use ffxiv_types::World;
use scraper::Html;
use url::Url;
use std::str::FromStr;
selectors!(
FC_NAME => ".entry__freecompany__name";
FC_WORLD => "p.entry__freecompany__gc:nth-of-type(3)";
FC_SLOGAN => ".freecompany__text__message.freecompany__text";
FC_TAG => ".freecompany__text__tag.freecompany__text";
FC_CREST => ".entry__freecompany__crest__image > img";
FC_FORMED => "p.freecompany__text:nth-of-type(5) > script";
FC_ACTIVE_MEMBERS => "p.freecompany__text:nth-of-type(6)";
FC_RANK => "p.freecompany__text:nth-of-type(7)";
FC_WEEKLY_RANKING => ".character__ranking__data tr:nth-of-type(1) > th";
FC_MONTHLY_RANKING => ".character__ranking__data tr:nth-of-type(2) > th";
FC_ESTATE_MISSING => ".freecompany__estate__none";
FC_ESTATE_NAME => ".freecompany__estate__name";
FC_ESTATE_ADDRESS => ".freecompany__estate__text";
FC_ESTATE_GREETING => ".freecompany__estate__greeting";
);
pub fn parse(id: u64, html: &str) -> Result<FreeCompany> {
let html = Html::parse_document(html);
let name = plain_parse(&html, &*FC_NAME)?;
let world = parse_world(&html)?;
let slogan = plain_parse(&html, &*FC_SLOGAN)?;
let crest = parse_crest(&html)?;
let active_members = parse_active_members(&html)?;
let rank = parse_rank(&html)?;
let pvp_rankings = PvpRankings {
weekly: parse_pvp_rank(&html, &*FC_WEEKLY_RANKING)?,
monthly: parse_pvp_rank(&html, &*FC_MONTHLY_RANKING)?,
};
let formed = parse_formed(&html)?;
let estate = parse_estate(&html)?;
Ok(FreeCompany {
name,
world,
slogan,
crest,
active_members,
rank,
pvp_rankings,
formed,
estate,
})
}
fn plain_parse(html: &Html, select: &scraper::Selector) -> Result<String> {
let string = html
.select(select)
.next()
.ok_or(Error::missing_element(select))?
.text()
.collect();
Ok(string)
}
fn parse_world(html: &Html) -> Result<World> {
let world_str = plain_parse(html, &*FC_WORLD)?;
let trimmed = world_str.trim();
World::from_str(trimmed)
.map_err(|_| Error::invalid_content("a world", Some(trimmed)))
}
fn parse_active_members(html: &Html) -> Result<u16> {
plain_parse(&html, &*FC_ACTIVE_MEMBERS)
.and_then(|x| x.parse().map_err(Error::InvalidNumber))
}
fn parse_rank(html: &Html) -> Result<u8> {
plain_parse(&html, &*FC_RANK)
.and_then(|x| x.parse().map_err(Error::InvalidNumber))
}
fn parse_pvp_rank(html: &Html, select: &scraper::Selector) -> Result<Option<u64>> {
let rank_str = plain_parse(html, select)?;
let rank = rank_str
.split(":")
.nth(1)
.ok_or_else(|| Error::invalid_content("colon-separated text", Some(&rank_str)))
.and_then(|x| x
.split(" ")
.next()
.ok_or_else(|| Error::invalid_content("space-separated text", Some(&rank_str))))?;
if rank == "--" {
return Ok(None);
}
rank
.parse()
.map(Some)
.map_err(Error::InvalidNumber)
}
fn parse_formed(html: &Html) -> Result<DateTime<Utc>> {
let script = html
.select(&*FC_FORMED)
.next()
.ok_or(Error::missing_element(&*FC_FORMED))?
.inner_html();
let timestamp = script
.split("strftime(")
.nth(1)
.ok_or(Error::invalid_content("strftime call", Some(&script)))?
.split(",")
.next()
.ok_or(Error::invalid_content("comma-separated strftime call", Some(&script)))?;
let timestamp: i64 = timestamp.parse().map_err(Error::InvalidNumber)?;
let utc = Local.timestamp(timestamp, 0).with_timezone(&Utc);
Ok(utc)
}
fn parse_estate(html: &Html) -> Result<Option<Estate>> {
if html.select(&*FC_ESTATE_MISSING).next().is_some() {
return Ok(None);
}
let name = plain_parse(html, &*FC_ESTATE_NAME)?;
let address = plain_parse(html, &*FC_ESTATE_ADDRESS)?;
let greeting = plain_parse(html, &*FC_ESTATE_GREETING)?;
Ok(Some(Estate {
name,
address,
greeting,
}))
}
fn parse_crest(html: &Html) -> Result<Vec<Url>> {
html.select(&*FC_CREST)
.into_iter()
.filter_map(|x| x.value().attr("src"))
.map(|x| Url::parse(x).map_err(Error::InvalidUrl))
.collect()
}

27
src/models.rs Normal file
View File

@ -0,0 +1,27 @@
macro_rules! ffxiv_enum {
($name:ident { $($variant:ident => $str_repr:expr),+$(,)? }) => {
#[derive(Debug, Serialize)]
pub enum $name {
$($variant,)+
}
impl $name {
pub fn parse(s: &str) -> Option<Self> {
let res = match s {
$($str_repr => $name::$variant,)+
_ => return None,
};
Some(res)
}
pub fn name(&self) -> &str {
match *self {
$($name::$variant => $str_repr,)+
}
}
}
}
}
pub mod character;
pub mod free_company;

77
src/models/character.rs Normal file
View File

@ -0,0 +1,77 @@
use ffxiv_types::World;
#[derive(Debug, Serialize)]
pub struct Character {
pub id: u64,
pub name: String,
pub world: World,
pub race: Race,
pub clan: Clan,
pub gender: Gender,
pub title: Option<String>,
pub name_day: String,
pub guardian: Guardian,
pub city_state: CityState,
pub grand_company: Option<GrandCompanyInfo>,
pub free_company_id: Option<u64>,
pub profile_text: String,
}
#[derive(Debug, Serialize)]
pub struct GrandCompanyInfo {
pub grand_company: GrandCompany,
pub rank: String,
}
ffxiv_enum!(Gender {
Male => "",
Female => "",
});
ffxiv_enum!(Race {
AuRa => "Au Ra",
Elezen => "Elezen",
Hyur => "Hyur",
Lalafell => "Lalafell",
Miqote => "Miqo'te",
Roegadyn => "Roegadyn",
});
ffxiv_enum!(Clan {
Raen => "Raen",
Xaela => "Xaela",
Duskwight => "Duskwight",
Wildwood => "Wildwood",
Highlander => "Highlander",
Midlander => "Midlander",
Dunesfolk => "Dunesfolk",
Plainsfolk => "Plainsfolk",
SeekerOfTheMoon => "Seeker of the Moon",
SeekerOfTheSun => "Seeker of the Sun",
Hellsguard => "Hellsguard",
SeaWolf => "Sea Wolf",
});
ffxiv_enum!(GrandCompany {
Flames => "Immortal Flames",
Maelstrom => "Maelstrom",
TwinAdders => "Order of the Twin Adder",
});
ffxiv_enum!(Guardian {
Althyk => "Althyk, the Keeper",
Halone => "Halone, the Fury",
Menphina => "Menphina, the Lover",
Oschon => "Oschon, the Wanderer",
Rhalgr => "Rhalgr, the Destroyer",
});
ffxiv_enum!(CityState {
Gridania => "Gridania",
LimsaLominsa => "Limsa Lominsa",
UlDah => "Ul'dah",
});

View File

@ -0,0 +1,44 @@
use chrono::{DateTime, Utc};
use ffxiv_types::World;
use url::Url;
#[derive(Debug, Serialize)]
pub struct FreeCompany {
pub name: String,
pub world: World,
pub slogan: String,
#[serde(serialize_with = "multi_url")]
pub crest: Vec<Url>,
pub active_members: u16,
pub rank: u8,
pub pvp_rankings: PvpRankings,
pub formed: DateTime<Utc>,
pub estate: Option<Estate>,
}
#[derive(Debug, Serialize)]
pub struct PvpRankings {
pub weekly: Option<u64>,
pub monthly: Option<u64>,
}
#[derive(Debug, Serialize)]
pub struct Estate {
pub name: String,
pub address: String,
pub greeting: String,
}
fn multi_url<S>(urls: &Vec<Url>, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer,
{
use serde::ser::SerializeSeq;
let mut seq = serializer.serialize_seq(Some(urls.len()))?;
for url in urls {
seq.serialize_element(&url_serde::Ser::new(url))?;
}
seq.end()
}