pixiv-scrapper/pixiv/downloader.go

141 lines
3.1 KiB
Go
Raw Normal View History

2021-02-20 17:40:56 +00:00
package pixiv
import (
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"syscall"
2021-02-20 17:40:56 +00:00
)
type artworkFile struct {
directory string
filename string
url string
}
type artwork struct {
tags []string
files []artworkFile
}
func (a *artwork) tagsToString() (r string) {
for _, tag := range a.tags {
r = fmt.Sprintf("%s,%s", r, tag)
}
return
}
func getExtension(fullpath string) (ext string) {
parts := strings.Split(fullpath, ".")
ext = parts[len(parts)-1]
return
}
2021-02-20 17:40:56 +00:00
//DownloadIllust .
func (p *Pixiv) downloadIllust(i Illust) (err error) {
if !i.Complited() {
err = p.ComplateIllust(&i)
if err != nil {
return
}
}
2021-02-20 17:40:56 +00:00
var art artwork
for _, tag := range i.Tags.Tags {
if len(tag.Translation.En) > 0 {
art.tags = append(art.tags, tag.Translation.En)
} else if len(tag.Romaji) > 0 {
art.tags = append(art.tags, tag.Romaji)
} else {
art.tags = append(art.tags, tag.Tag)
}
}
2021-02-22 19:04:11 +00:00
art.tags = append(art.tags, i.UserName)
art.tags = append(art.tags, fmt.Sprintf("pixiv_id_%s", i.UserID))
2021-02-20 17:40:56 +00:00
for pageNumber, page := range i.Pages {
directory := fmt.Sprintf("%s/%s_%s/", p.WorkDirectory, i.UserID, i.UserAccount)
filename := fmt.Sprintf("%s_p%d.%s", i.ID, pageNumber, getExtension(page.URLs.Original))
art.files = append(art.files,
artworkFile{directory: directory,
filename: filename,
url: page.URLs.Original})
}
for _, file := range art.files {
err := os.MkdirAll(file.directory, 0755)
if err != nil {
return err
}
outfile, err := os.Create(file.directory + file.filename)
if err != nil {
return err
}
defer outfile.Close()
//log.Printf("Downloading %s to %s", file.url, outfile.Name())
err = p.downloadTo(file.url, outfile)
if err != nil {
return err
}
if p.setxattr {
err = syscall.Setxattr(outfile.Name(), "user.xdg.tags", []byte(art.tagsToString()), 0)
if err != nil {
return err
}
}
2021-02-20 17:40:56 +00:00
}
return
}
func (p *Pixiv) downloadTo(addr string, file *os.File) (err error) {
req, err := http.NewRequest("GET", addr, nil)
if err != nil {
err = fmt.Errorf("NewRequest failed (%s)", addr)
return
}
req.AddCookie(&p.phpsessid)
req.Header.Set("User-Agent", p.Ua)
req.Header.Set("Referer", "https://www.pixiv.net")
var resp *http.Response
for i := 0; i < p.RetryCount; i++ {
resp, err = p.client.Do(req)
if err != nil {
log.Printf("%s", err.Error())
log.Printf("Retry %d of %d...", i, p.RetryCount)
continue
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
log.Printf("GET %s failed: bad status code: %d", addr, resp.StatusCode)
log.Printf("Retry %d of %d...", i, p.RetryCount)
continue
}
err = file.Truncate(0)
if err != nil {
return
}
_, err = io.Copy(file, resp.Body)
if err != nil {
log.Printf("%s", err.Error())
log.Printf("Retry %d of %d...", i, p.RetryCount)
continue
}
break
}
return
}
func (p *Pixiv) downloadWorker() {
for illust := range p.DownloadChannel {
err := p.downloadIllust(illust)
if err != nil {
p.logChannel <- fmt.Sprintf("%s %s %s\n", illust.ID, "illust", "failed")
continue
}
p.logChannel <- fmt.Sprintf("%s %s %s\n", illust.ID, "illust", "downloaded")
}
return
}