first commit

This commit is contained in:
Lincong 2025-04-02 17:27:24 +08:00
commit 788302b850
22 changed files with 2140 additions and 0 deletions

113
pom.xml Normal file
View File

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.5</version>
<relativePath /> <!-- lookup parent from repository -->
</parent>
<groupId>tech.riemann</groupId>
<artifactId>bidding</artifactId>
<version>1.0.0</version>
<name>bidding</name>
<description>招标信息爬虫</description>
<properties>
<java.version>17</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>6.1.6</version>
</dependency>
<dependency>
<groupId>org.nutz</groupId>
<artifactId>nutz-spring-boot-starter</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>com.ibeetl</groupId>
<artifactId>beetl</artifactId>
<version>3.16.1.RELEASE</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid-spring-boot-3-starter</artifactId>
<version>1.2.22</version>
</dependency>
<dependency>
<groupId>com.oceanbase</groupId>
<artifactId>oceanbase-client</artifactId>
<version>2.4.9</version>
</dependency>
<dependency>
<groupId>club.zhcs</groupId>
<artifactId>open-api-spring-boot-starter</artifactId>
<version>3.2.5</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>
<!-- added on 20250214 -->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.11.0</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-chrome-driver</artifactId>
<version>4.11.0</version>
</dependency>
</dependencies>
<build>
<finalName>${project.artifactId}</finalName>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,26 @@
package tech.riemann.bidding;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.TaskScheduler;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
@SpringBootApplication
@EnableScheduling
@EnableAsync
public class BiddingApplication {
public static void main(String[] args) {
SpringApplication.run(BiddingApplication.class, args);
}
@Bean
TaskScheduler taskScheduler() {
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
taskScheduler.setPoolSize(50);
return taskScheduler;
}
}

View File

@ -0,0 +1,273 @@
package tech.riemann.bidding.component;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.nutz.dao.Cnd;
import org.nutz.http.Header;
import org.nutz.http.Http;
import org.nutz.json.Json;
import org.nutz.lang.Lang;
import org.nutz.lang.util.NutMap;
import org.nutz.log.Logs;
import org.nutz.spring.boot.service.ExtService;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.entity.ScheduledLog;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/*
* 通用方法,非请勿动!!!
*/
public abstract class NoticeCollector implements InitializingBean {
/**
* 机器人地址
*/
public static final String ROBOT_URL = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=7251bcc2-7158-4175-a5ef-bc529e432ee6";
/**
* 机器人地址(三人群)
*/
public static final String ROBOT_URL_GX = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=cc1e8fe7-3244-4ffe-80ac-b1528b202a01";
/**
* 关键词
*/
public static final String KEY_WORDS = "人力外包、驻场、资源池、软件开发、技术服务、开发服务、人员外包、技术开发、现场、外协";
/**
* 行业限定
*/
public static final String INDUSTRY_KEY_WORDS = "证券、基金、保险、信托、资管、银行、养老金、中国、国家、烟草、电力、电信、移动";
/**
* 重点关注
*/
public static final String ATTENTION_KEY_WORDS = "资源池、外包";
/**
* 黑名單
*/
public static final String BLOCKED_KEY_WORDS = "单一、竞争、中标、结果、询价、公示、终止、成交、延期、磋商、失败";
public static final Pattern DATE_PATTERN = Pattern.compile("\\d{4}[-]\\d{2}[-]\\d{2}");
public static final Pattern NUMBER_PATTERN = Pattern.compile("\\d+");
/**
* 待发送队列
*/
public static final LinkedBlockingQueue<Notice> NOTICES_QUEUE = new LinkedBlockingQueue<>();
/**
* 提取日期
*
* @param info
* @return
*/
public LocalDate date(String info) {
Matcher matcher = DATE_PATTERN.matcher(info);
if (matcher.find()) {
String date = matcher.group();
return LocalDate.parse(date, DateTimeFormatter.ofPattern("yyyy-MM-dd"));
}
return LocalDate.now();
}
/**
* 提取数字
*
* @param info
* @return
*/
public static long number(String info) {
Matcher matcher = NUMBER_PATTERN.matcher(info);
if (matcher.find()) {
String data = matcher.group();
return Long.parseLong(data);
}
return 0;
}
/**
* 采集渠道
*
* @return
*/
public abstract Channel channel();
/**
* 采集方法,对应渠道自行实现
*/
public abstract void collect();
protected static final NutMap DUPLICATE_STATUS = NutMap.NEW();
/**
* 开始采集,由采集渠道自行决定定时频率等
*/
@Async
@Scheduled(cron = "0 */2 * * * ?")
public void start() {
DUPLICATE_STATUS.setv(channel().name(), 0);// 新一轮调度,重置当前渠道的重复数据
Logs.get().debugf("渠道:%s 开始进行数据采集!", channel().getDescription());
ScheduledLog log = startLog();
collect();
stopLog(log);
}
/**
* 推送数据
*
* @param notices
* @return
*/
public boolean pushNotices(List<Notice> notices) {
return notices.stream().map(this::pushNotice).allMatch(item -> item); // 全部都OK,不然就终止
}
/**
* 推送数据
*
* @param notice
* @return
*/
public boolean pushNotice(Notice notice) {
if (notice.getPublishDate().isBefore(LocalDate.now().minusDays(2))) { // 两天之前的数据,直接不要了
return false;
}
if (noticeRepository().count(Cnd.where(Notice::getChannel, ExtService.EQ, notice.getChannel())
.and(Notice::getKey, ExtService.EQ, notice.getKey())) == 0) {
// 没有,插入数据,发送通知
noticeRepository().insert(notice);
sendMessage(notice);
return true;
} else {
int currentDuplicate = DUPLICATE_STATUS.getInt(channel().name());
if (currentDuplicate >= 20) {// 超过3次重复,终止采集
DUPLICATE_STATUS.setv(channel().name(), 0);
return false;
}
DUPLICATE_STATUS.setv(channel().name(), currentDuplicate + 1); // 重复的次数
return true;
}
}
/**
* 看门狗,单独线程扫描待发送渠道进行消息发送,需要渠道实现通过
*/
@Override
public void afterPropertiesSet() {
String messageTemplate = """
<font>%s</font>
>发布时间:<font color='comment'>%s</font>
>来源:<font color='comment'>%s</font>
>详情:<font color='comment'>[点击查看详情](%s)</font>
""";
new Thread(() -> {
while (true) {
Notice notice = NOTICES_QUEUE.poll();
if (notice != null) {
// 发送消息通知
Http.post3(ROBOT_URL,
Json.toJson(NutMap.NEW().addv("msgtype", "markdown").addv("markdown",
NutMap.NEW().addv("content",
String.format(messageTemplate, notice.getTitle(), notice.getPublishDate(),
notice.getChannel().getDescription(), notice.getUrl())))),
Header.create().asJsonContentType(), 500);
Http.post3(ROBOT_URL_GX,
Json.toJson(NutMap.NEW().addv("msgtype", "markdown").addv("markdown",
NutMap.NEW().addv("content",
String.format(messageTemplate, notice.getTitle(), notice.getPublishDate(),
notice.getChannel().getDescription(), notice.getUrl())))),
Header.create().asJsonContentType(), 500);
if (attention(notice)) {
// 发送AT助理消息
Http.post3(ROBOT_URL,
Json.toJson(NutMap.NEW().addv("msgtype", "text").addv("text",
NutMap.NEW().addv("mentioned_mobile_list", Lang.list("13811608471","13018059968","18343641000","18996359755","13673518683"))
.addv("content", "检测到【资源池、外包】项目,请立即确认!"+ notice.getTitle() +": "+ notice.getUrl()))),
Header.create().asJsonContentType(), 500);
}
}
try {
Thread.sleep(notice == null ? 100 : 5000);
} catch (InterruptedException e) {
throw Lang.wrapThrow(e);
}
}
}).start();
}
public boolean attention(Notice notice) {
List<String> keyWords = Lang.list(ATTENTION_KEY_WORDS.split(""));
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key))
|| keyWords.stream().anyMatch(key -> notice.getContent().contains(key));
}
/**
* 发送消息
*
* @param notice
*/
public void sendMessage(Notice notice) {
/**
* 1. 关键词匹配放入队列<br>
* 2. 队列数据单独线程定时消费
*/
if (match(notice)) {
NOTICES_QUEUE.add(notice);
}
}
/**
* 发送消息的匹配规则
*
* @param notice 公告
* @return 是否匹配
*/
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key))
&& blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key))
|| keyWords.stream().anyMatch(key -> notice.getContent().contains(key));
}
return false;
}
public abstract ScheduledLogRepository scheduledLogRepository();
public abstract NoticeRepository noticeRepository();
/**
*
* 开始日志
*
* @return
*/
public ScheduledLog startLog() {
return ScheduledLog.builder().channel(channel()).threadId(Thread.currentThread().getId()).build();
}
/**
* 停止日志
*
* @param log
*/
public void stopLog(ScheduledLog log) {
log.setEnd(LocalDateTime.now());
scheduledLogRepository().insert(log);
}
public static void main(String[] args) {
Http.post3(ROBOT_URL,
Json.toJson(NutMap.NEW().addv("msgtype", "text").addv("text",
NutMap.NEW().addv("mentioned_mobile_list", Lang.list("13811608471","13018059968","18343641000","18996359755","13673518683"))
.addv("content", "本条消息是测试,检测到【资源池、人力外包】项目,请立即确认!"))),
Header.create().asJsonContentType(), 500);
}
}

View File

@ -0,0 +1,95 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class BOCNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.BOC;
}
@Override
public void collect() {
for (int page = 1; page <= 20; page++) {
String indexPageName = page == 1 ? "" : "_" + page;
String url = String.format(channel().getUrl(), indexPageName);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".news ul li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(link.replace("/","").replace("_", "").replace(".html", "").replace(".", ""))
.title(element.select("a").text())
.content("")
.url("https://www.bankofchina.com/aboutboc/bi6"+link.substring(1))
.publishDate(date(element.select("span").text().substring(2,12)))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
} catch (IOException e) {
logger.debug(e);
break;
}
}
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key))
&& blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
}

View File

@ -0,0 +1,133 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.time.LocalDate;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class BOCQNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.BOCQ;
}
@Override
public void collect() {
for (int page = 1; page <= 20; page++) {
String indexPageName = page == 1 ? "" : "_" + page;
String url = String.format(channel().getUrl(), indexPageName);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".dhy_b li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(link.replace("/","").replace("_", "").replace(".html", "").replace(".", ""))
.title(element.select("a").text())
.content("")
.url("http://www.cqcbank.com.cn"+link)
.publishDate(date(element.select("span").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
} catch (IOException e) {
logger.debug(e);
break;
}
}
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key))
&& blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
public static void main(String[] args) {
for (int page = 1; page <= 20; page++) {
String indexPageName = page == 1 ? "" : "_" + page;
String url = String.format(Channel.BOCQ.getUrl(), indexPageName);
System.out.println("获取页面内容Url"+url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".dhy_b li");
if (elements.isEmpty()) {
System.out.println("没有内容,退出:"+url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
System.out.println("element:" + element);
String link = element.select("a").attr("href");
LocalDate pDate = LocalDate.parse(element.select("span").text());
Notice notice = Notice.builder()
.channel(Channel.BOCQ)
.key(link.replace("/","").replace("_", "").replace(".html", "").replace(".", ""))
.title(element.select("a").text())
.content("")
.url("http://www.cqcbank.com.cn"+link)
.publishDate(pDate)
.build();
System.out.println("notice:\t"+notice.toString());
notices.add(notice);
}
} catch (IOException e) {
// logger.debug(e);
System.out.println("IOException:" + e.getMessage());
break;
}
};
// });
}
}

View File

@ -0,0 +1,92 @@
package tech.riemann.bidding.component.impl;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.dao.Cnd;
import org.nutz.http.Header;
import org.nutz.http.Http;
import org.nutz.http.Response;
import org.nutz.lang.Strings;
import org.nutz.log.Logs;
import org.nutz.spring.boot.service.ExtService;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
* 上海宝华国际渠道采集器
*/
// @Component
@Deprecated
@RequiredArgsConstructor
public class BaoSteelNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public Channel channel() {
return Channel.BAO_STEEL;
}
@Override
public void collect() {
// 没有cookie会报521,目前cookie值写死,后续需要研究是不是在请求首页的时候自动下发了cookie信息
Response response = Http.get(channel().getUrl(),
Header.create()
.set("Host", "baosteelbidding.zbytb.com")
.set("Referer", "https://baosteelbidding.zbytb.com/")
.set("Cookie",
"__jsluid_s=ac68c37cdc617959a40cef00227811e1; Du4_city=132%7Chttps%3A%2F%2Fbaosteelbidding.zbytb.com%2F; __jsl_clearance_s=1712113445.458|0|%2F%2BzxEoTkUCTMNxnyj9Nu2c1jodI%3D; Hm_lvt_47b9a4b804f6b4f81affae66cb8a57e9=1712023024,1712113449; Hm_lpvt_47b9a4b804f6b4f81affae66cb8a57e9=1712113475; Du4_vistor_st=3")
.set("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"),
5000);
Logs.get().debugf("上海宝华国际渠道采集,网页访问状态码: %d", response.getStatus());
if (response.isOK()) {
// dom 解析
Document document = Jsoup.parse(response.getContent());
// .li_dot
Elements elements = document.select("ul.li_dot li");
Notice notice = Notice.builder().build();
for (Element element : elements) {
if (Strings.equals(element.attr("class"), "kws")) { // 第二行
notice.setPublishDate(date(element.text()));
if (noticeRepository.count(Cnd.where(Notice::getChannel, ExtService.EQ, notice.getChannel())
.and(Notice::getKey, ExtService.EQ, notice.getKey())) == 0) {
// 没有,插入数据,发送通知
noticeRepository.insert(notice);
sendMessage(notice);
}
} else { // 第一行
Elements link = element.select("a");
String url = link.first().attr("href");
notice = Notice.builder()
.channel(channel())
.key(number(url) + "")
.title(link.first().text())
.content("")
.url(url)
.build();
}
}
}
}
}

View File

@ -0,0 +1,135 @@
package tech.riemann.bidding.component.impl;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.stream.Collectors;
import org.nutz.http.Http;
import org.nutz.http.Response;
import org.nutz.lang.Lang;
import org.nutz.lang.util.NutMap;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class CFCPNNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.CFCPN;
}
@Override
public void collect() {
for (int page = 1;; page++) {
Response response = Http.post2(channel().getUrl(), NutMap.NEW().addv("noticeType", 1).addv("noticeState", 1)
.addv("pageNo", page).addv("isValid", 1).addv("orderBy", "publish_time desc"), 5000, 5000);
if (response.isOK()) {
List<NutMap> records = Lang.map(response.getContent()).getList("rows", NutMap.class);
if (records.isEmpty()) {
break;
}
List<Notice> notices = records.stream().map(item -> {
String content = "";
return Notice.builder().key(item.getString("id")).channel(channel())
.title(item.getString("noticeTitle")).content(content)
.url(String.format(
"http://www.cfcpn.com/jcw/sys/index/goUrl?url=modules/sys/login/detail&column=undefined&searchVal=%s",
item.getString("id")))
.publishDate(item.getAs("publishTime", LocalDateTime.class).toLocalDate()).build();
}).collect(Collectors.toList());
if (!pushNotices(notices)) {
break;
}
try {
Thread.sleep(5000);
} catch (Exception e) {
Logs.get().debug(e);
break;
}
if (notices.stream()
.anyMatch(item -> item.getPublishDate().isBefore(LocalDate.now().minus(2, ChronoUnit.DAYS)))) {// 早于两天的数据了,不再翻页
break;
}
} else {
Logs.get().debugf("请求发生错误,状态码为:%d", response.getStatus());
break;
}
}
}
public static void main(String[] args) {
for (int page = 1;; page++) {
Response response = Http.post2(Channel.CFCPN.getUrl(),
NutMap.NEW().addv("noticeType", 1).addv("noticeState", 1).addv("pageNo", page).addv("isValid", 1)
.addv("orderBy", "publish_time desc"),
5000, 5000);
if (response.isOK()) {
List<NutMap> records = Lang.map(response.getContent()).getList("rows", NutMap.class);
if (records.isEmpty()) {
Logs.get().info("records is empty!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
break;
}
List<Notice> notices = records.stream().map(item -> {
String content = "";
return Notice.builder().key(item.getString("id")).channel(Channel.CFCPN)
.title(item.getString("noticeTitle")).content(content)
.url(String.format(
"http://www.cfcpn.com/jcw/sys/index/goUrl?url=modules/sys/login/detail&column=undefined&searchVal=%s",
item.getString("id")))
.publishDate(item.getAs("publishTime", LocalDateTime.class).toLocalDate()).build();
}).collect(Collectors.toList());
// if (!pushNotices(notices)) {
// break;
// }
try {
Thread.sleep(5000);
} catch (Exception e) {
Logs.get().info("Exception happened:" + e.toString());
Logs.get().debug(e);
break;
}
notices.stream().forEach(System.out::println);
System.out.println("page=" + page + "" + "now():" + LocalDate.now() + "now()-5days:"
+ LocalDate.now().minus(5, ChronoUnit.DAYS));
if (notices.stream()
.anyMatch(item -> item.getPublishDate().isBefore(LocalDate.now().minus(5, ChronoUnit.DAYS)))) {// 早于两天的数据了,不再翻页
break;
}
} else {
Logs.get().info("请求发生错误,状态码为:%d" + response.getStatus());
Logs.get().debugf("请求发生错误,状态码为:%d", response.getStatus());
break;
}
}
}
}

View File

@ -0,0 +1,150 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.time.LocalDate;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class CQRCBNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.CQRCB;
}
@Override
public void collect() {
int totalPages = getTotalPages();
totalPages = totalPages > 20 ? 20 : totalPages;
for (int page = 1; page <= totalPages; page++) {
String urlSuffixStr = page == 1 ? "index" : "index_" + page + ".html";
String url = String.format(channel().getUrl(), urlSuffixStr);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".sideCont ul li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(link.replace("/","").replace(".html", ""))
.title(element.select("a").text().replace("·",""))
.content("")
.url("https://www.cqrcb.com"+link)
.publishDate(date(element.select("span").text().substring(1,11)))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
} catch (IOException e) {
logger.debug(e);
break;
}
}
}
private int getTotalPages() {
String tempUrlStr = channel().getUrl() + "/index.html";
int totalPages;
try {
Document document = Jsoup.connect(tempUrlStr).get();
Elements elements = document.select(".pages em");
String pageRelativeStr = elements.first().select("span").text();
int lastSlashPos = pageRelativeStr.lastIndexOf("/");
totalPages = Integer.parseInt(pageRelativeStr.substring(lastSlashPos+1));
} catch (Exception e) {
// TODO: handle exception
logger.debug(e);
totalPages = 0;
}
return totalPages;
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key))
&& blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
public static void main(String[] args) {
for (int page = 1; page <= 20; page++) {
String urlSuffixStr = page == 1 ? "index" : "index_" + page + ".html";
String url = String.format(Channel.CQRCB.getUrl(), urlSuffixStr);
System.out.println("获取页面内容Url"+url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".sideCont ul li");
if (elements.isEmpty()) {
System.out.println("没有内容,退出:"+url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
LocalDate pDate = LocalDate.parse(element.select("span").text().substring(1,11));
Notice notice = Notice.builder()
.channel(Channel.CQRCB)
.key(link.replace("/","").replace(".html", ""))
.title(element.select("a").text().replace("·",""))
.content("")
.url("https://www.cqrcb.com"+link)
.publishDate(pDate)
.build();
System.out.println("notice:\t"+notice.toString());
notices.add(notice);
}
} catch (IOException e) {
logger.debug(e);
break;
}
}
// });
}
}

View File

@ -0,0 +1,82 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class ChinaCcsscmNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.CHINA_CCSSCM;
}
@Override
public void collect() {
for (int page = 1;; page++) {
String url = String.format(channel().getUrl(), page);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".Top5 ul li");
// 发起
if (elements.isEmpty()) {
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(number(link) + "")
.title(element.select("a").text())
.content("")
.url(link.startsWith("http") ? link : "https://zb.chinaccsscm.cn" + link)
.publishDate(date(element.select(".Right").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
}
catch (IOException e) {
Logs.get().debug(e);
break;
}
}
}
}

View File

@ -0,0 +1,88 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class EBIDDINGNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.E_BIDDING;
}
@Override
public void collect() {
Arrays.stream(KEY_WORDS.split("")).forEach(keyword -> {
logger.debugf("爬取关键字Url%s", keyword);
for (int page = 1; page <= 20; page++) {
String url = String.format(channel().getUrl(), keyword, page);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".newslist>li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(number(link.substring(link.lastIndexOf("/"))) + "")
.title(element.select("a").attr("title"))
.content("")
.url(link)
.publishDate(date(element.select(".newsDate div").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
}
catch (IOException e) {
logger.debug(e);
break;
}
}
});
}
}

View File

@ -0,0 +1,99 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class GCZBNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.GCZB;
}
@Override
public void collect() {
Arrays.stream(KEY_WORDS.split("")).forEach(keyword -> {
logger.debugf("爬取关键字Url%s", keyword);
for (int page = 1; page <= 20; page++) {
String url = String.format(channel().getUrl(), page, keyword);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".lists_center ul li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(number(link) + "")
.title(element.select("a").text())
.content("")
.url(link)
.publishDate(date(element.select("b").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
}
catch (IOException e) {
logger.debug(e);
break;
}
}
});
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
List<String> industrys = Lang.list(INDUSTRY_KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key)) && blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key)) && industrys.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
}

View File

@ -0,0 +1,87 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class GXZBNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
private static final Log logger = Logs.get();
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.GXZB;
}
@Override
public void collect() {
for (int page = 1; page <= 20; page++) {
String url = String.format(channel().getUrl(), page);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".newslist>li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(number(link.substring(link.lastIndexOf("/"))) + "")
.title(element.select("a").attr("title"))
.content("")
.url(link)
.publishDate(date(element.select(".newsDate div").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
}
catch (IOException e) {
logger.debug(e);
break;
}
}
}
public static void main(String[] args) {
String link = "https://ebid.gxzb.com.cn/biddingBulletin/2024-04-25/46236.html";
String number = NoticeCollector.number(link.substring(link.lastIndexOf("/")))+"";
System.out.println(number);
}
}

View File

@ -0,0 +1,111 @@
package tech.riemann.bidding.component.impl;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.nutz.http.Header;
import org.nutz.http.Http;
import org.nutz.http.Response;
import org.nutz.json.Json;
import org.nutz.lang.Lang;
import org.nutz.lang.util.NutMap;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class PICCECNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
private static final Log logger = Logs.get();
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.PICCEC;
}
@Override
public void collect() {
//供应商征集
//"siteId": "725", "categoryId": "211", "city": "", "county": "", "purchaseMode": ""
//
for (int page = 1;; page++) {
Response response = Http.post3(channel().getUrl(),
Json.toJson(NutMap.NEW()
.addv("dto", NutMap.NEW().addv("siteId", "725")
.addv("categoryId", "211,213,214,215,216,217")
.addv("city", "")
.addv("county", "")
.addv("purchaseMode", "")
)
.addv("pageNo", page)
.addv("pageSize", 10)),
Header.create().asJsonContentType(),
5000);
if (response.isOK()) {
List<NutMap> records = Lang.map(response.getContent()).getAs("res",NutMap.class).getList("rows", NutMap.class);
if (records.isEmpty()) {
break;
}
List<Notice> notices = records.stream()
.map(item -> {
String content = "";
String url=item.getString("url");
logger.debugf("获取详情页面Url%s", url);
return Notice.builder()
.key(Arrays.stream(url.split("/")).map(NoticeCollector::number).filter(urlitem->urlitem != 0).map(urlitem->urlitem+"").collect(Collectors.joining("-")))
.channel(channel())
.title(item.getString("title"))
.content(content)
.url("https://ec.picc.com/cms/default/webfile"+url)
.publishDate( LocalDate.parse(item.getString("publishDate"), DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'+0800'", Locale.CHINESE)))
.build();
})
.collect(Collectors.toList());
if (!pushNotices(notices)) {
break;
}
try {
Thread.sleep(5000);
}
catch (Exception e) {
Logs.get().debug(e);
break;
}
if (notices.stream().anyMatch(item -> item.getPublishDate().isBefore(LocalDate.now().minus(2, ChronoUnit.DAYS)))) {// 早于两天的数据了,不再翻页
break;
}
} else {
Logs.get().debugf("请求发生错误,状态码为:%d", response.getStatus());
break;
}
}
}
}

View File

@ -0,0 +1,136 @@
package tech.riemann.bidding.component.impl;
import java.io.IOException;
import java.time.LocalDate;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.nutz.lang.Lang;
import org.nutz.log.Log;
import org.nutz.log.Logs;
import org.springframework.stereotype.Component;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
*
*/
@Component
@RequiredArgsConstructor
public class SWSCNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private static final Log logger = Logs.get();
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Override
public Channel channel() {
return Channel.SWSC;
}
@Override
public void collect() {
for (int page = 1; page <= 20; page++) {
String urlSuffixStr = page == 1 ? "" : "index_" + page + ".html";
String url = String.format(channel().getUrl(), urlSuffixStr);
logger.debugf("获取页面内容Url%s", url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".m-list ul li");
if (elements.isEmpty()) {
logger.debugf("没有内容,退出:%s", url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
String link = element.select("a").attr("href");
Notice notice = Notice.builder()
.channel(channel())
.key(link.replace("/","").replace(".html", ""))
.title(element.select("a").attr("title"))
.content("")
.url("https://www.swsc.com.cn"+link)
.publishDate(date(element.select(".li-right").text()))
.build();
notices.add(notice);
}
if (!pushNotices(notices)) {
break;
}
} catch (IOException e) {
logger.debug(e);
break;
}
}
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key))
&& blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
public static void main(String[] args) {
for (int page = 1; page <= 20; page++) {
String urlSuffixStr = page == 1 ? "" : "index_" + page + ".html";
String url = String.format(Channel.SWSC.getUrl(), urlSuffixStr);
System.out.println("获取页面内容Url"+url);
try {
Document document = Jsoup.connect(url).get();
Elements elements = document.select(".m-list ul li");
if (elements.isEmpty()) {
System.out.println("没有内容,退出:"+url);
break;
}
List<Notice> notices = Lang.list();
for (Element element : elements) {
System.out.println("element:" + element);
String link = element.select("a").attr("href");
LocalDate pDate = LocalDate.parse(element.select(".li-right").text());
Notice notice = Notice.builder()
.channel(Channel.SWSC)
.key(link.replace("/","").replace(".html", ""))
.title(element.select("a").attr("title"))
.content(element.select("a").text())
.url("https://www.swsc.com.cn"+link)
.publishDate(pDate)
.build();
System.out.println("notice:\t"+notice.toString());
notices.add(notice);
}
// if (!pushNotices(notices)) {
// break;
// }
} catch (IOException e) {
// logger.debug(e);
System.out.println("IOException:" + e.getMessage());
break;
}
};
// });
}
}

View File

@ -0,0 +1,114 @@
package tech.riemann.bidding.component.impl;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.stream.Collectors;
import org.nutz.http.Http;
import org.nutz.http.Response;
import org.nutz.lang.Lang;
import org.nutz.lang.util.NutMap;
import org.springframework.stereotype.Component;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.component.NoticeCollector;
import tech.riemann.bidding.entity.Notice;
import tech.riemann.bidding.entity.Notice.Channel;
import tech.riemann.bidding.repository.NoticeRepository;
import tech.riemann.bidding.repository.ScheduledLogRepository;
/**
* 中化商务采集器
*/
@Component
@RequiredArgsConstructor
public class SinoChemitcNoticeCollector extends NoticeCollector {
private final NoticeRepository noticeRepository;
private final ScheduledLogRepository scheduledLogRepository;
@Override
public ScheduledLogRepository scheduledLogRepository() {
return scheduledLogRepository;
}
@Override
public NoticeRepository noticeRepository() {
return noticeRepository;
}
@Getter
@AllArgsConstructor
public enum Type {
/**
*
*/
ZB_YS_BG(1, "招标/预审/变更"),
/**
*
*/
FZB(3, "非招标"),
/**
*
*/
PB_ZB_JG(2, "评标/中标结果");
int value;
String description;
}
protected String url(int page, String start, String end) {
return String.format(channel().getUrl(), page, 50, Type.ZB_YS_BG.getValue(), start, end);
}
@Override
public Channel channel() {
return Channel.SINOCHEMITC;
}
@Override
public void collect() {
for (int page = 1;; page++) {
Response response = Http.get(url(page, LocalDate.now().minus(1, ChronoUnit.DAYS).format(DateTimeFormatter.ofPattern("yyyy-MM-dd")), ""));
if (response.isOK()) {
String content = response.getContent();
List<NutMap> records = Lang.map(content).getAs("data", NutMap.class).getList("records", NutMap.class);
if (records.isEmpty()) {
break;
}
List<Notice> notices = records.stream()
.map(d -> Notice.builder()
.channel(channel())
.key(d.getString("id"))
.title(d.getString("title"))
.content(d.getString("content"))
.url(String
.format("https://d.sinochemitc.com/#/zcnotice/detale/xq?id=%s", d.getString("id")))
.publishDate(d.getAs("publishDate", LocalDate.class))
.build())
.collect(Collectors.toList());
if (!pushNotices(notices)) {
break;
}
} else {
break;
}
}
}
@Override
public boolean match(Notice notice) {
List<String> blacks = Lang.list(BLOCKED_KEY_WORDS.split(""));
List<String> keyWords = Lang.list(KEY_WORDS.split(""));
if (blacks.stream().noneMatch(key -> notice.getTitle().contains(key)) && blacks.stream().noneMatch(key -> notice.getContent().contains(key))) {
return keyWords.stream().anyMatch(key -> notice.getTitle().contains(key));
}
return false;
}
}

View File

@ -0,0 +1,49 @@
package tech.riemann.bidding.entity;
import java.time.LocalDateTime;
import org.nutz.dao.entity.annotation.Column;
import org.nutz.dao.entity.annotation.Comment;
import org.nutz.spring.boot.service.entity.IdEntity;
import com.fasterxml.jackson.annotation.JsonFormat;
import com.fasterxml.jackson.annotation.JsonFormat.Shape;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.media.Schema.RequiredMode;
import lombok.AllArgsConstructor;
import lombok.Builder.Default;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
@Accessors(chain = true)
@EqualsAndHashCode(callSuper = true)
public class IdBaseEntity extends IdEntity {
/**
*
*/
private static final long serialVersionUID = 1L;
@Schema(description = "创建时间", requiredMode = RequiredMode.NOT_REQUIRED)
@Column("created_time")
@Comment("创建时间")
@Default
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", locale = "GMT+8", shape = Shape.STRING)
protected LocalDateTime createdTime = LocalDateTime.now();
@Schema(description = "最后更新时间", requiredMode = RequiredMode.NOT_REQUIRED)
@Column("updated_time")
@Comment("最后更新时间")
@Default
@JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss", locale = "GMT+8", shape = Shape.STRING)
protected LocalDateTime updatedTime = LocalDateTime.now();
}

View File

@ -0,0 +1,190 @@
package tech.riemann.bidding.entity;
import java.time.LocalDate;
import org.nutz.dao.entity.annotation.ColDefine;
import org.nutz.dao.entity.annotation.ColType;
import org.nutz.dao.entity.annotation.Column;
import org.nutz.dao.entity.annotation.Comment;
import org.nutz.dao.entity.annotation.Table;
import org.nutz.json.JsonField;
import org.nutz.lang.util.NutMap;
import com.fasterxml.jackson.annotation.JsonGetter;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.media.Schema.RequiredMode;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import lombok.experimental.FieldNameConstants;
import lombok.experimental.SuperBuilder;
@Data
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
@FieldNameConstants
@EqualsAndHashCode(callSuper = true)
@Accessors(chain = true)
@Table("t_notice")
@Comment("招标公告")
@Schema(name = "Notice", description = "招标公告")
public class Notice extends IdBaseEntity {
private static final long serialVersionUID = 1L;
@Schema(description = "公告唯一标识(渠道内唯一)", requiredMode = RequiredMode.REQUIRED)
@Column("n_key")
@Comment("公告唯一标识(渠道内唯一)")
@ColDefine(notNull = true, width = 100)
String key;
@Schema(description = "公告标题", requiredMode = RequiredMode.REQUIRED)
@Column("n_title")
@Comment("公告标题")
@ColDefine(notNull = true, width = 200)
String title;
@Schema(description = "公告链接", requiredMode = RequiredMode.REQUIRED)
@Column("n_url")
@Comment("公告链接")
@ColDefine(notNull = true, width = 500)
String url;
@Schema(description = "公告渠道", requiredMode = RequiredMode.REQUIRED)
@Column("n_channel")
@Comment("公告渠道")
@ColDefine(notNull = true, width = 50)
Channel channel;
@Schema(description = "公告内容", requiredMode = RequiredMode.AUTO)
@Column("n_content")
@Comment("公告内容")
@ColDefine(notNull = false, type = ColType.TEXT)
String content;
@Schema(description = "招标金额", requiredMode = RequiredMode.AUTO)
@Column("n_amount")
@Comment("招标金额")
@ColDefine(notNull = false)
double amount;
@Schema(description = "发布时间", requiredMode = RequiredMode.AUTO)
@Column("n_publish_date")
@Comment("发布时间")
@ColDefine(notNull = false)
LocalDate publishDate;
@Getter
@AllArgsConstructor
public enum Channel {
/**
*
*/
SINOCHEMITC("sinochemitc", "中化商务",
"https://d.sinochemitc.com/api/management/bidding/hy-bulletin-list?current=%d&size=%d&bulletinType=%d&bidType=&keyword=&startTime=%s&endTime=%s"),
/**
*
*/
CHINA_CCSSCM(
"chinaccsscm", "中通服总部",
"https://zb.chinaccsscm.cn/zbgg/index_%d.jhtml"),
/**
*
*/
CFCPN(
"cfcpn", "金采网",
"http://www.cfcpn.com/jcw/noticeinfo/noticeInfo/dataNoticeList"),
/**
* 1天内北京上海重庆只匹配标题
*/
GCZB(
"gczb", "招标与采购网",
"https://www.gc-zb.com/search/index.html?page=%d&keyword=%s&h_lx=&h_province=19,43,47&vague=0&date=1&search_field=1"),
/**
*
*/
BOC(
"boc", "中国银行采购公告",
"https://www.bankofchina.com/aboutboc/bi6/index%s.html"),
/**
*
*/
PICCEC(
"piccec", "人保E采",
"https://ec.picc.com/cms/api/dynamicData/queryContentPage"),
/**
*
*/
GXZB(
"gxzb", "国信招标",
"https://ebid.gxzb.com.cn/cms/category/bulletinList.html?searchDate=1999-04-25&dates=300&word=&categoryId=88&exactSearch=&industryName=&status=&tabName=招标投标&page=%d"),
/**
*
*/
BAO_STEEL(
"baosteel", "上海宝华国际",
"https://baosteelbidding.zbytb.com/fuwu/"),
/**
*
*/
BOCQ(
"bocq", "重庆银行采购供应商征集公告",
"http://www.cqcbank.com.cn/cn/jrch/cgxx/hjjkh/ddfa/index%s.html"),
/**
*
*/
SWSC(
"swsc", "西南证券",
"https://www.swsc.com.cn/html/goSwsc/cgxxgg/cgxmgs/"),
/**
*
*/
CQRCB(
"cqrcb", "重庆农村商业银行",
"https://www.cqrcb.com/cqrcb/aboutus/cgxx"),
/**
*
*/
CEBPUBSERVICE(
"cebpubservice", "中国招标投标公共服务平台",
"http://www.cebpubservice.com/"),
/**
*
*/
E_BIDDING(
"ebidding", "国信e采",
"https://www.e-bidding.org/cms/category/bulletinList.html?searchDate=1999-06-21&dates=300&word=%s&categoryId=88&exactSearch=&industryName=&status=&tabName=招标投标&page=%d"),
/**
*
*/
GTJA(
"gtja", "国泰君安",
"https://www.gtja.com/content/info-open/supplier/purchase-info.html?year=&keyword=%s");
String code;
String description;
String url;
}
@JsonGetter
@JsonField
public NutMap getChannelInfo() {
return channel == null ? null
: NutMap.NEW()
.addv("name", channel.name())
.addv("code", channel.getCode())
.addv("description",
channel.getDescription());
}
public void setChannelInfo(NutMap typeInfo) {
// do nothing
}
}

View File

@ -0,0 +1,62 @@
package tech.riemann.bidding.entity;
import java.time.LocalDateTime;
import org.nutz.dao.entity.annotation.ColDefine;
import org.nutz.dao.entity.annotation.Column;
import org.nutz.dao.entity.annotation.Comment;
import org.nutz.dao.entity.annotation.Table;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.media.Schema.RequiredMode;
import lombok.AllArgsConstructor;
import lombok.Builder.Default;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import lombok.experimental.FieldNameConstants;
import lombok.experimental.SuperBuilder;
import tech.riemann.bidding.entity.Notice.Channel;
/**
*
*/
@Data
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
@FieldNameConstants
@EqualsAndHashCode(callSuper = true)
@Accessors(chain = true)
@Table("t_scheduled_log")
@Comment("调度日志")
@Schema(name = "ScheduledLog", description = "调度日志")
public class ScheduledLog extends IdBaseEntity {
private static final long serialVersionUID = 1L;
@Schema(description = "公告渠道", requiredMode = RequiredMode.REQUIRED)
@Column("l_channel")
@Comment("公告渠道")
@ColDefine(notNull = true, width = 50)
Channel channel;
@Schema(description = "开始时间", requiredMode = RequiredMode.REQUIRED)
@Column("l_start")
@Comment("开始时间")
@Default
LocalDateTime start = LocalDateTime.now();
@Schema(description = "结束时间", requiredMode = RequiredMode.REQUIRED)
@Column("l_end")
@Comment("结束时间")
@Default
LocalDateTime end = LocalDateTime.now();
@Schema(description = "线程id", requiredMode = RequiredMode.REQUIRED)
@Column("l_thread_id")
@Comment("线程id")
long threadId;
}

View File

@ -0,0 +1,21 @@
package tech.riemann.bidding.repository;
import org.nutz.dao.Dao;
import org.nutz.spring.boot.service.interfaces.IdEntityService;
import org.springframework.stereotype.Repository;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.entity.Notice;
@Repository
@RequiredArgsConstructor
public class NoticeRepository implements IdEntityService<Notice> {
private final Dao dao;
@Override
public Dao dao() {
return dao;
}
}

View File

@ -0,0 +1,21 @@
package tech.riemann.bidding.repository;
import org.nutz.dao.Dao;
import org.nutz.spring.boot.service.interfaces.IdEntityService;
import org.springframework.stereotype.Repository;
import lombok.RequiredArgsConstructor;
import tech.riemann.bidding.entity.ScheduledLog;
@Repository
@RequiredArgsConstructor
public class ScheduledLogRepository implements IdEntityService<ScheduledLog> {
private final Dao dao;
@Override
public Dao dao() {
return dao;
}
}

View File

@ -0,0 +1 @@
spring.application.name=bidding

View File

@ -0,0 +1,62 @@
spring:
application:
name: bidding
jackson:
date-format: yyyy-MM-dd HH:mm:ss
datasource:
type: com.alibaba.druid.pool.DruidDataSource
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://localhost:3306/bidding
username: root
password: 123456
druid:
db-type: mysql
filters: stat,wall,log4j2
initial-size: 10
min-idle: 1
max-active: 50
max-wait: 60000
time-between-eviction-runs-millis: 60000
min-evictable-idle-time-millis: 300000
validation-query: SELECT 'ezalor'
test-while-idle: true
test-on-borrow: true
test-on-return: false
pool-prepared-statements: true
max-pool-prepared-statement-per-connection-size: 20
web-stat-filter:
enabled: true
url-pattern: /*
exclusions: /druid/*,*.js,*.gif,*.jpg,*.png,*.css,*.ico
stat-view-servlet:
enabled: true
url-pattern: /druid/*
reset-enable: true
nutz:
dao:
runtime:
basepackage:
- tech.riemann.bidding.entity
- BOOT-INF.classes.tech.riemann.bidding.entity
# - tech.riemann.bidding.component
check-index: true
create: true
delete-column: false
migration: true
sql-template:
enable: true
type: beetl
sql-manager:
paths:
- sqls/mysql
- BOOT-INF/classes/sqls/mysql
logging:
file:
name: ${user.home}/logs/${spring.application.name}.log
path: ${user.home}/logs
level:
"[org.nutz]": debug
"[tech.riemann]": debug
"[org.apache.logging]": off
springfox: off
"[io.swagger]": off