100-exercises-to-learn-rust/helpers/mdbook-link-shortener/src/lib.rs

225 lines
7.5 KiB
Rust
Raw Normal View History

2024-08-06 23:09:04 +08:00
use anyhow::{Context, Error};
use bimap::BiHashMap;
use itertools::Itertools;
use mdbook::book::{Book, Chapter};
use mdbook::preprocess::{Preprocessor, PreprocessorContext};
use mdbook::BookItem;
use std::collections::{BTreeMap, BTreeSet};
use std::fs::File;
use std::path::PathBuf;
use std::str::FromStr;
pub struct LinkShortener;
struct AliasGenerator {
cursors: [usize; 3],
}
impl AliasGenerator {
const ALPHABET: &'static [u8] = b"f2z4x6v8bnm3q5w7e9rtyuplkshgjdca";
fn new() -> AliasGenerator {
AliasGenerator { cursors: [0, 0, 0] }
}
/// Generate a 4 alphanumeric long alias, starting from "aaaa" and incrementing by one each time
/// until "9999", using only lowercase letters and numbers.
/// We skip ambiguous characters like "0", "o", "1", "l".
fn next(&mut self) -> String {
let mut alias = String::with_capacity(4);
for cursor in &mut self.cursors {
alias.push(Self::ALPHABET[*cursor] as char);
}
for cursor in self.cursors.iter_mut().rev() {
if *cursor == Self::ALPHABET.len() - 1 {
*cursor = 0;
} else {
*cursor += 1;
break;
}
}
alias
}
/// Generate a unique alias that is not already used by the `link2alias` map.
fn next_until_unique(&mut self, link2alias: &BiHashMap<String, String>) -> String {
let mut alias = self.next();
while link2alias.contains_right(&alias) {
alias = self.next();
}
alias
}
}
impl LinkShortener {
pub fn new() -> LinkShortener {
LinkShortener
}
}
impl Preprocessor for LinkShortener {
fn name(&self) -> &str {
"link-shortener"
}
fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
let config = ctx
.config
.get_preprocessor(self.name())
.context("Failed to get preprocessor configuration")?;
let root_url = {
let root_url = config.get("base_url").context("Failed to get `base_url`")?;
root_url
.as_str()
.context("`base_url` is not a string")?
.to_owned()
};
let mapping = {
let mapping = config.get("mapping").context("Failed to get `mapping`")?;
let mapping = mapping
.as_str()
.context("`mapping` is not a string")?
.to_owned();
PathBuf::from_str(&mapping).context("Failed to parse `mapping` as a path")?
};
let mut link2alias = {
match File::open(&mapping) {
Ok(file) => {
serde_json::from_reader(file).context("Failed to parse existing mapping")?
}
Err(e) => {
if e.kind() == std::io::ErrorKind::NotFound {
BiHashMap::new()
} else {
return Err(e).context("Failed to open existing mapping");
}
}
}
};
let verify = config
.get("verify")
.context("Failed to get `verify`")?
.as_bool()
.context("`verify` is not a boolean")?;
// Env var overrides config
let verify = std::env::var("LINK_SHORTENER_VERIFY")
.map(|v| v == "true")
.unwrap_or(verify);
let mut alias_gen = AliasGenerator::new();
book.sections.iter_mut().for_each(|i| {
if let BookItem::Chapter(c) = i {
c.content = replace_anchors(c, &root_url, &mut alias_gen, &mut link2alias, verify)
.expect("Error converting links for chapter");
for i in c.sub_items.iter_mut() {
if let BookItem::Chapter(sub_chapter) = i {
sub_chapter.content = replace_anchors(
sub_chapter,
&root_url,
&mut alias_gen,
&mut link2alias,
verify,
)
.expect("Error converting links for subchapter");
}
}
}
});
if !verify {
std::fs::create_dir_all(mapping.parent().expect("Mapping file path has no parent"))?;
let mut file = File::create(&mapping).context("Failed to upsert mapping file")?;
let ordered = link2alias.iter().collect::<BTreeMap<_, _>>();
serde_json::to_writer_pretty(&mut file, &ordered)?;
}
Ok(book)
}
fn supports_renderer(&self, _renderer: &str) -> bool {
true
}
}
fn replace_anchors(
chapter: &mut Chapter,
root_url: &str,
alias_gen: &mut AliasGenerator,
link2alias: &mut BiHashMap<String, String>,
verify: bool,
) -> Result<String, anyhow::Error> {
use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag};
use pulldown_cmark_to_cmark::cmark;
let mut buf = String::with_capacity(chapter.content.len());
let mut unshortened_links = BTreeSet::new();
let events = Parser::new_ext(&chapter.content, Options::all())
.map(|e| {
let Event::Start(Tag::Link {
link_type,
dest_url,
title,
id,
}) = &e
else {
return e;
};
match link_type {
LinkType::Autolink
| LinkType::Shortcut
| LinkType::Inline
| LinkType::Reference
| LinkType::Collapsed => {
if dest_url.starts_with("http") {
let alias = if let Some(alias) = link2alias.get_by_left(dest_url.as_ref()) {
alias.to_owned()
} else {
if verify {
unshortened_links.insert(dest_url.to_string());
return e;
}
let alias = alias_gen.next_until_unique(&link2alias);
alias
};
link2alias.insert(dest_url.to_string(), alias.clone());
Event::Start(Tag::Link {
link_type: link_type.to_owned(),
dest_url: CowStr::from(format!(
"{root_url}/{alias}",
root_url = root_url,
alias = alias
)),
title: title.clone(),
id: id.clone(),
})
} else {
e
}
}
LinkType::Email
| LinkType::ReferenceUnknown
| LinkType::CollapsedUnknown
| LinkType::ShortcutUnknown => e,
}
})
.collect_vec();
if verify && !unshortened_links.is_empty() {
let unshortened_links = unshortened_links.iter().join(", ");
return Err(anyhow::anyhow!(
"The following links are not shortened: {unshortened_links}\nRun again with `LINK_SHORTENER_VERIFY=false` to update the mapping \
with the shortened links."
));
}
cmark(events.into_iter(), &mut buf)
.map(|_| buf)
.map_err(|err| anyhow::anyhow!("Markdown serialization failed: {err}"))
}