Documentation
¶
Index ¶
- Constants
- Variables
- type BaseSpiderConf
- type Context
- type MiddlewareHandler
- type MiddlewareHandlerErr
- type Option
- type Resource
- type Schedule
- type Spider
- func (s *Spider) SetConcurrent(num int) *Spider
- func (s *Spider) SetGlobalPreRun(f MiddlewareHandlerErr) *Spider
- func (s *Spider) SetRangeTime(sleepTime int) *Spider
- func (s *Spider) SetRules(key string, h ...MiddlewareHandler) *Spider
- func (s *Spider) SetTimeTicker(num int) *Spider
- func (s *Spider) Start()
- func (s *Spider) Stop()
Constants ¶
View Source
const ( DefaultType = "default" RestyType = "resty" ImocType = "imroc" )
Variables ¶
View Source
var Sc = NewSchedule()
Functions ¶
This section is empty.
Types ¶
type BaseSpiderConf ¶
type BaseSpiderConf struct {
EnableCookie bool
EnableProxy bool
ProxyUrl string
DownloaderType string
Cookie *cookiejar.Jar
}
BaseSpiderConf 基础爬虫配置
type Context ¶
type MiddlewareHandler ¶
type MiddlewareHandler func(ctx *Context)
type MiddlewareHandlerErr ¶
type Option ¶
type Option func(b *BaseSpiderConf)
func NewCookieJar ¶
func NewDownloader ¶
func NewProxyUrl ¶
type Resource ¶
type Resource struct {
SpiderUniqueKey string
*downloader.Request
// contains filtered or unexported fields
}
type Schedule ¶
type Schedule struct {
ResourcePoolList chan Resource
ConcurrentNum int // 并发数量
// contains filtered or unexported fields
}
func NewSchedule ¶
func NewSchedule() *Schedule
func (*Schedule) AddResource ¶
type Spider ¶
type Spider struct {
UniqueKey string // 唯一标识符
STATUS uint // 状态
Downloader downloader.Downloader // 下载器
RuleHandlers map[string][]MiddlewareHandler // 规则中间件
CloseCallback func(s *Spider) // 回调关闭
// contains filtered or unexported fields
}
func (*Spider) SetConcurrent ¶
func (*Spider) SetGlobalPreRun ¶
func (s *Spider) SetGlobalPreRun(f MiddlewareHandlerErr) *Spider
func (*Spider) SetRangeTime ¶
func (*Spider) SetRules ¶
func (s *Spider) SetRules(key string, h ...MiddlewareHandler) *Spider
SetRules 设置爬虫key=规则名
func (*Spider) SetTimeTicker ¶
SetTimeTicker 设置探活时间 默认十秒
Click to show internal directories.
Click to hide internal directories.