From da9e6e442e98120739259bb4c64d1cfd02182718 Mon Sep 17 00:00:00 2001 From: mrq Date: Mon, 10 Oct 2022 16:05:36 +0000 Subject: [PATCH] added quickly cobbled together fetch script (node only, will port to python later) --- utils/renamer/README.md | 0 utils/renamer/fetch.js | 74 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100755 utils/renamer/README.md create mode 100755 utils/renamer/fetch.js diff --git a/utils/renamer/README.md b/utils/renamer/README.md new file mode 100755 index 0000000..e69de29 diff --git a/utils/renamer/fetch.js b/utils/renamer/fetch.js new file mode 100755 index 0000000..430dc05 --- /dev/null +++ b/utils/renamer/fetch.js @@ -0,0 +1,74 @@ +let FS = require("fs") +let Fetch = require("node-fetch") + +let config = { + query: `leib_(tas)`, // example query if no argument is passed + + output: `./in/`, // directory to save your files + cache: `./cache.json`, // JSON file of cached tags, will speed up processing when used for the renamer script + + limit: 10, // how many posts to pull in one go + rateLimit: 500, // time to wait between requests, in milliseconds, e621 imposes a rate limit of 2 requests per second +} + +let cache; +try { + cache = JSON.parse( FS.readFileSync(config.cache) ) +} catch ( e ) { + cache = {}; +} + +let args = process.argv; +args.shift(); +args.shift(); + +if ( args.length ) config.query = args.join(" "); + +let parse = async () => { + console.log(`Fetching: ${config.query}`) + + let posts = []; + let last = ''; + + do { + let query = [`tags=${config.query}`] + if ( config.limit ) query.push(`limit=${config.limit}`) + if ( last ) query.push(`page=b${last}`) + + query = encodeURI(query.join("&")); + console.log(`Querying: ${query}`) + let r = await Fetch( `https://e621.net/posts.json?${query}`, { + headers: { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' + } + } ); + let j = JSON.parse(await r.text()); + posts = j.posts; + + for ( let i in posts ) { + let post = posts[i]; + last = `${post.id}` + cache[post.file.md5] = post; + + if ( FS.existsSync(`${config.output}${post.file.md5}.${post.file.ext}`) ) continue; + + await Fetch(post.file.url).then(res => new Promise((resolve, reject) => { + const dest = FS.createWriteStream(`${config.output}${post.file.md5}.${post.file.ext}`); + res.body.pipe(dest); + dest.on('close', () => { + console.log(`Downloaded https://e621.net/posts/${post.id}`) + resolve() + }); + dest.on('error', reject); + })); + } + + if ( config.rateLimit ) await new Promise( (resolve) => { + setTimeout(resolve, config.rateLimit) + } ) + } while ( posts.length ); + + FS.writeFileSync(config.cache, JSON.stringify( cache, null, "\t" )) +} + +parse(); \ No newline at end of file