pixiv-scrapper/pixiv/new.go

97 lines
2.3 KiB
Go
Raw Normal View History

2021-02-20 17:40:56 +00:00
package pixiv
import (
"fmt"
"log"
"net/http"
"os"
2021-02-25 22:05:07 +00:00
"regexp"
2021-02-20 17:40:56 +00:00
"h12.io/socks"
)
//Pixiv is API
type Pixiv struct {
phpsessid http.Cookie
Ua string
client *http.Client
RetryCount int
ItemsPerRequest int
WorkDirectory string
logChannel chan string
DownloadChannel chan Illust
setxattr bool
2021-02-25 22:05:07 +00:00
nameMatcher *regexp.Regexp
idSubexpNumbers []int
2021-02-20 17:40:56 +00:00
}
//New returns object with methods to access API functions
func New(cookies string, logFilePath string, threads int, xattrs bool) (p Pixiv) {
2021-02-20 17:40:56 +00:00
p.phpsessid = http.Cookie{Name: "PHPSESSID", Value: cookies}
p.Ua = "Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0"
p.client = &http.Client{}
p.RetryCount = 5
p.ItemsPerRequest = 100
p.WorkDirectory = fmt.Sprintf("%s/Pictures/pixiv", os.Getenv("HOME"))
p.setxattr = xattrs
2021-02-25 22:05:07 +00:00
p.nameMatcher = regexp.MustCompile(typicalFilenamesRegex)
2021-02-20 17:40:56 +00:00
if len(logFilePath) > 0 {
logfile, err := os.OpenFile(logFilePath, os.O_APPEND, 664)
if err != nil {
log.Fatal(err.Error())
}
p.logChannel = make(chan string)
go p.logger(logfile)
} else {
p.logChannel = make(chan string)
go p.logger(os.Stdout)
}
p.DownloadChannel = make(chan Illust)
for i := 0; i < threads; i++ {
go p.downloadWorker()
}
return
}
//Close .
func (p *Pixiv) Close() {
close(p.logChannel)
close(p.DownloadChannel)
}
func (p *Pixiv) logger(logfile *os.File) {
for entry := range p.logChannel {
logfile.WriteString(entry)
}
log.Println("Closing log file")
logfile.Close()
}
//SetProxy sets SOCKS proxy for all requests
func (p *Pixiv) SetProxy(proxy string) (err error) {
dialSocksProxy := socks.Dial(proxy)
tr := &http.Transport{Dial: dialSocksProxy}
p.client.Transport = tr
return
}
2021-02-25 22:05:07 +00:00
// TypicalFilenamesMatcher matches typical pixiv's filenames. Returns ok if strings match and id extracted from string.
func (p *Pixiv) TypicalFilenamesMatcher(name string) (ok bool, id string) {
ok = p.nameMatcher.MatchString(name)
if ok {
for subexpIndex, subexpName := range p.nameMatcher.SubexpNames() {
if subexpIndex == 0 {
// we don't need main expression here
continue
}
// try to read ID with each subexp and return on first match
id = p.nameMatcher.ReplaceAllString(name, fmt.Sprintf("${%s}", subexpName))
if len(id) > 0 {
break
}
}
}
return
}