1
1
package main
2
2
3
3
import (
4
+ "crypto/hmac"
5
+ "crypto/sha256"
6
+ "encoding/base64"
4
7
"encoding/json"
5
8
"flag"
6
9
"fmt"
7
10
"io/ioutil"
8
11
"math/rand"
9
12
"net/http"
13
+ "net/url"
10
14
"os"
11
15
"regexp"
12
16
"strings"
@@ -21,7 +25,7 @@ import (
21
25
22
26
const (
23
27
ListPostURL = "https://blog.csdn.net/%s/article/list/%d?"
24
- PostDetailURL = "https://mp .csdn.net/mdeditor/ getArticle?id=%s"
28
+ PostDetailURL = "https://bizapi .csdn.net/blog-console-api/v3/editor/ getArticle?id=%s&model_type= "
25
29
HexoHeader = `
26
30
---
27
31
title: %s
@@ -40,7 +44,6 @@ categories: %s
40
44
</html>`
41
45
)
42
46
43
- var postTime = time .Now ()
44
47
45
48
type DetailData struct {
46
49
Data PostDetail `json:"data"`
@@ -63,39 +66,45 @@ var (
63
66
count int
64
67
wg sync.WaitGroup
65
68
bar * pgbar.Bar
69
+ postTime = time .Now ()
70
+ )
71
+
72
+ const (
73
+ appSecret = "9znpamsyl2c7cdrr9sas0le9vbc3r6ba"
74
+ appCaKey = "203803574"
75
+ signHeaders = "x-ca-key,x-ca-nonce"
66
76
)
67
77
68
78
func init () {
69
79
flag .StringVar (& username , "username" , "junmoxi" , "your csdn username" )
70
- flag .StringVar (& cookie , "cookie" , "UserName=junmoxi; UserToken=c3c29cca48be43c4884fe36d052d5851 ;" , "your csdn cookie" )
80
+ flag .StringVar (& cookie , "cookie" , "UserName=junmoxi;UserToken=b9023fb39b534543a5e65f7cae7cb3c4 ;" , "your csdn cookie" )
71
81
flag .IntVar (& page , "page" , - 1 , "download pages" )
72
82
flag .Parse ()
83
+ rand .Seed (time .Now ().Unix ())
73
84
}
74
85
75
86
func main () {
76
87
urls , err := crawlPosts (username )
77
88
if err != nil {
78
89
panic (err )
79
90
}
80
-
81
91
bar = pgbar .NewBar (0 , "下载进度" , len (urls ))
82
- for _ , url := range urls {
92
+
93
+ for _ , ul := range urls {
83
94
wg .Add (1 )
84
- go crawlPostMarkdown (url )
95
+ go crawlPostMarkdown (ul )
85
96
}
86
-
87
97
wg .Wait ()
88
98
}
89
99
90
100
// Crawl posts by username
91
101
func crawlPosts (username string ) ([]string , error ) {
92
- client := http.Client {}
93
- var (
94
- urls []string
95
- )
102
+ defer fmt .Println ("地址抓取完成,开始下载..." )
96
103
104
+ var urls []string
97
105
for {
98
- resp , err := client .Get (fmt .Sprintf (ListPostURL , username , currentPage ))
106
+ fmt .Printf ("正在抓取第%d页文章地址... \n " , currentPage )
107
+ resp , err := http .DefaultClient .Get (fmt .Sprintf (ListPostURL , username , currentPage ))
99
108
if err != nil {
100
109
return nil , err
101
110
}
@@ -124,34 +133,50 @@ func crawlPosts(username string) ([]string, error) {
124
133
}
125
134
126
135
func crawlPostMarkdown (url string ) {
136
+ defer wg .Done ()
137
+ defer bar .Add ()
138
+ defer func () {
139
+ if err := recover (); err != nil {
140
+ fmt .Println (err )
141
+ }
142
+ }()
143
+
127
144
index := strings .LastIndex (url , "/" )
128
145
id := url [index + 1 :]
146
+ apiUrl := fmt .Sprintf (PostDetailURL , id )
129
147
130
- client := http.Client {}
148
+ uuid := createUUID ()
149
+ sign := createSignature (uuid , apiUrl )
131
150
132
- req , _ := http .NewRequest ("GET" , fmt . Sprintf ( PostDetailURL , id ) , nil )
151
+ req , _ := http .NewRequest ("GET" ,apiUrl , nil )
133
152
req .Header .Set ("cookie" , cookie )
153
+ req .Header .Set ("x-ca-key" , appCaKey )
154
+ req .Header .Set ("x-ca-nonce" , uuid )
155
+ req .Header .Set ("x-ca-signature" , sign )
156
+ req .Header .Set ("x-ca-signature-headers" , signHeaders )
157
+ req .Header .Set ("Accept" , "*/*" )
134
158
135
- resp , err := client .Do (req )
159
+ resp , err := http . DefaultClient .Do (req )
136
160
if err != nil {
137
161
return
138
162
}
139
-
163
+ if resp .StatusCode != http .StatusOK {
164
+ return
165
+ }
140
166
data , err := ioutil .ReadAll (resp .Body )
141
167
if err != nil {
142
168
return
143
169
}
144
-
145
- post := new (DetailData )
146
- err = json .Unmarshal (data , post )
170
+ var post DetailData
171
+ err = json .Unmarshal (data , & post )
147
172
if err != nil {
148
173
return
149
174
}
150
175
151
176
if post .Data .Markdowncontent != "" {
152
- go buildMarkdownPost (post .Data )
177
+ buildMarkdownPost (post .Data )
153
178
} else if post .Data .Content != "" {
154
- go buildHtmlPost (post .Data )
179
+ buildHtmlPost (post .Data )
155
180
}
156
181
}
157
182
@@ -171,15 +196,10 @@ func buildMarkdownPost(post PostDetail) {
171
196
rand .Seed (time .Now ().UnixNano ())
172
197
d := rand .Intn (3 ) + 1
173
198
postTime = postTime .AddDate (0 , 0 , - d ).Add (time .Hour )
174
-
175
199
count ++
176
-
177
- defer wg .Done ()
178
- defer bar .Add ()
179
200
}
180
201
181
202
func buildHtmlPost (post PostDetail ) {
182
-
183
203
html := fmt .Sprintf (HtmlBody , post .Title , post .Content )
184
204
err := ioutil .WriteFile (
185
205
fmt .Sprintf ("%s.html" , post .Title ),
@@ -188,7 +208,42 @@ func buildHtmlPost(post PostDetail) {
188
208
if err != nil {
189
209
return
190
210
}
211
+ }
191
212
192
- defer wg .Done ()
193
- defer bar .Add ()
213
+ func createSignature (uuid , apiUrl string ) string {
214
+ u , err := url .Parse (apiUrl )
215
+ if err != nil {
216
+ panic (err )
217
+ }
218
+ query := u .Query ().Encode ()
219
+ query = query [:len (query )- 1 ]
220
+ message := fmt .Sprintf ("GET\n */*\n \n \n \n x-ca-key:%s\n x-ca-nonce:%s\n %s?%s" , appCaKey , uuid , u .Path , query )
221
+ hc := hmac .New (sha256 .New , []byte (appSecret ))
222
+ hc .Write ([]byte (message ))
223
+ res := hc .Sum (nil )
224
+
225
+ result := base64 .StdEncoding .EncodeToString (res )
226
+ return result
194
227
}
228
+
229
+ func createUUID () string {
230
+ s := strings.Builder {}
231
+ chars := make ([]string , 0 , 10 )
232
+ for i := 97 ; i < 103 ; i ++ {
233
+ chars = append (chars , string (i ))
234
+ }
235
+ for i := 49 ; i < 58 ; i ++ {
236
+ chars = append (chars , string (i ))
237
+ }
238
+ xs := "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"
239
+ for _ , k := range xs {
240
+ x := string (k )
241
+ if x == "4" || x == "-" {
242
+ s .WriteString (x )
243
+ } else {
244
+ i := rand .Intn (len (chars ))
245
+ s .WriteString (chars [i ])
246
+ }
247
+ }
248
+ return s .String ()
249
+ }
0 commit comments