add images fetching

This commit is contained in:
jasongao97 2022-07-26 22:16:43 -04:00
parent 57c7bb2c74
commit c1df70cb43
5 changed files with 146 additions and 8 deletions

View file

@ -8,6 +8,12 @@
"files": [
"content/*.html"
],
"images": {
"files": [
"content/**/*.jpg",
"content/**/*.png"
]
},
"builds": [
{
"format": "pdf",

88
package-lock.json generated
View file

@ -2639,6 +2639,16 @@
"requires": {
"@types/node-fetch": "^2.5.10",
"node-fetch": "^2.6.1"
},
"dependencies": {
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
}
}
}
},
"@parcel/bundler-default": {
@ -5554,6 +5564,16 @@
"integrity": "sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==",
"requires": {
"node-fetch": "2.6.7"
},
"dependencies": {
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
}
}
}
},
"cross-spawn": {
@ -5794,6 +5814,11 @@
"assert-plus": "^1.0.0"
}
},
"data-uri-to-buffer": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.0.tgz",
"integrity": "sha512-Vr3mLBA8qWmcuschSLAOogKgQ/Jwxulv3RNE4FXnYWRGujzrRWQI4m12fQqRkwX06C0KanhLr4hK+GydchZsaA=="
},
"dataloader": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/dataloader/-/dataloader-2.0.0.tgz",
@ -7425,6 +7450,15 @@
"resolved": "https://registry.npmjs.org/fd/-/fd-0.0.3.tgz",
"integrity": "sha512-iAHrIslQb3U68OcMSP0kkNWabp7sSN6d2TBSb2JO3gcLJVDd4owr/hKM4SFJovFOUeeXeItjYgouEDTMWiVAnA=="
},
"fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"requires": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
}
},
"figures": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/figures/-/figures-3.2.0.tgz",
@ -7740,6 +7774,14 @@
"mime-types": "^2.1.12"
}
},
"formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"requires": {
"fetch-blob": "^3.1.2"
}
},
"forwarded": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
@ -8200,6 +8242,14 @@
"yallist": "^4.0.0"
}
},
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
}
},
"p-locate": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz",
@ -8877,6 +8927,14 @@
"universalify": "^2.0.0"
}
},
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
}
},
"string-width": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
@ -11719,12 +11777,19 @@
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-3.2.1.tgz",
"integrity": "sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A=="
},
"node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="
},
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"version": "3.2.9",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.2.9.tgz",
"integrity": "sha512-/2lI+DBecVvVm9tDhjziTVjo2wmTsSxSk58saUYP0P/fRJ3xxtfMDY24+CKTkfm0Dlhyn3CSXNL0SoRiCZ8Rzg==",
"requires": {
"whatwg-url": "^5.0.0"
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
}
},
"node-gyp": {
@ -15011,6 +15076,16 @@
"requires": {
"buffer": "^5.7.0",
"node-fetch": "^2.6.1"
},
"dependencies": {
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
"integrity": "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==",
"requires": {
"whatwg-url": "^5.0.0"
}
}
}
},
"table": {
@ -16081,6 +16156,11 @@
"resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
"integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="
},
"web-streams-polyfill": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
"integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q=="
},
"webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",

View file

@ -37,6 +37,7 @@
"magicbook": "^0.1.20",
"magicbook-codesplit": "^0.1.6",
"magicbook-katex": "0.0.7",
"node-fetch": "^3.2.9",
"postcss": "^8.4.14",
"prismjs": "^1.28.0",
"react": "^18.2.0",

View file

@ -1,6 +1,10 @@
import { promises as fs } from 'fs';
import { promises as fs, createWriteStream } from 'node:fs';
import { pipeline } from 'node:stream';
import { promisify } from 'node:util';
import { toHtml } from 'hast-util-to-html';
import { visit } from 'unist-util-visit';
import rehypeFormat from 'rehype-format';
import fetch from 'node-fetch';
import { fetchPages, fetchBlockChildren } from './lib/notion-api.mjs';
import { fromNotion } from './lib/hast-from-notion.mjs';
@ -40,7 +44,7 @@ async function importDatabase(pages) {
return {
title: page.properties['Title'],
src: `./${page.properties['File Name']}.html`,
slug: page.properties['Slug'],
slug: page.properties['Slug'] || page.properties['File Name'],
};
});
@ -50,6 +54,27 @@ async function importDatabase(pages) {
);
}
async function downloadImage({ url, name, dir }) {
const streamPipeline = promisify(pipeline);
const response = await fetch(url);
if (!response.ok)
throw new Error(`unexpected response ${response.statusText}`);
const contentType = response.headers.get('Content-Type');
let ext;
if (contentType === 'image/jpeg') ext = 'jpg';
if (contentType === 'image/png') ext = 'png';
const relativePath = `${dir}${name}.${ext}`;
await streamPipeline(
response.body,
createWriteStream(`${DESTINATION_FOLDER}${relativePath}`),
);
return relativePath;
}
async function importPage({ id, properties }) {
// Get all page content recursively
const pageContent = await fetchBlockChildren({
@ -60,6 +85,29 @@ async function importPage({ id, properties }) {
// Transform Notion content to hast
const hast = fromNotion(pageContent, properties['Title']);
// Count all images and numbering
let images = [];
visit(hast, { tagName: 'img' }, async (node, _, parent) => {
images.push([node, parent]);
});
// Create sub directory & Download all images
const dir = `images/${properties['File Name']}/`;
await fs.mkdir(`${DESTINATION_FOLDER}${dir}`, { recursive: true });
await Promise.all(
images.map(async ([node], index) => {
const name = `${properties['File Name']}_${index + 1}`;
const relativePath = await downloadImage({
url: node.properties.src,
dir,
name,
});
node.properties.src = relativePath;
}),
);
// Format using plugin
formatHast(hast);

View file

@ -50,7 +50,10 @@ function transform(block) {
case 'paragraph':
return h('p', block.paragraph.rich_text.map(transformText));
case 'image':
if (block.image.type !== 'external') return null;
/**
* inline code in caption are transformed to classNames
* e.g. `half-width-right`
*/
const className = block.image.caption
.filter(({ annotations }) => annotations.code)
.map(({ text }) => text.content)
@ -60,7 +63,7 @@ function transform(block) {
.map(transformText);
return h('figure', { class: className || null }, [
h('img', { src: block.image.external.url, alt: caption }),
h('img', { src: block.image[block.image.type].url, alt: caption }),
h('figcaption', caption),
]);
case 'quote':