1+ #!/usr/bin/env php
2+ <?php
3+
4+ // API Credentials
5+ // You can either provide them as environment variables
6+ // or hard-code them in the empty strings below.
7+ $ baseUrl = getenv ('BS_URL ' ) ?: '' ;
8+ $ clientId = getenv ('BS_TOKEN_ID ' ) ?: '' ;
9+ $ clientSecret = getenv ('BS_TOKEN_SECRET ' ) ?: '' ;
10+
11+ // Output File
12+ // Can be provided as a arguments when calling the script
13+ // or be hard-coded as strings below.
14+ $ outputFile = $ argv [1 ] ?? './sitemap.xml ' ;
15+
16+ // Script logic
17+ ////////////////
18+
19+ // Check we have required options
20+ if (empty ($ outputFile )) {
21+ errorOut ("An output file needs to be provided " );
22+ }
23+
24+ // Create the output folder if it does not exist
25+ $ outDir = dirname ($ outputFile );
26+ if (!is_dir ($ outDir )) {
27+ mkdir ($ outDir , 0777 , true );
28+ }
29+
30+ // Clean up the base path
31+ $ baseUrl = rtrim ($ baseUrl , '/ ' );
32+
33+ // Additional endpoints not fetched via API entities
34+ $ nowDate = date_format (new DateTime (), 'Y-m-d ' );
35+ $ additionalEndpoints = [
36+ ['endpoint ' => '/ ' , 'updated ' => $ nowDate ],
37+ ['endpoint ' => '/books ' , 'updated ' => $ nowDate ],
38+ ['endpoint ' => '/search ' , 'updated ' => $ nowDate ],
39+ ['endpoint ' => '/login ' , 'updated ' => $ nowDate ],
40+ ];
41+
42+ // Get all shelf URLs
43+ $ shelves = getAllOfAtListEndpoint ("api/shelves " , []);
44+ $ shelfEndpoints = array_map (function ($ shelf ) {
45+ return ['endpoint ' => '/shelves/ ' . $ shelf ['slug ' ], 'updated ' => $ shelf ['updated_at ' ]];
46+ }, $ shelves );
47+
48+ // Get all book URLs and map for chapters & pages
49+ $ books = getAllOfAtListEndpoint ("api/books " , []);
50+ $ bookSlugsById = [];
51+ $ bookEndpoints = array_map (function ($ book ) use (&$ bookSlugsById ) {
52+ $ bookSlugsById [$ book ['id ' ]] = $ book ['slug ' ];
53+ return ['endpoint ' => '/books/ ' . $ book ['slug ' ], 'updated ' => $ book ['updated_at ' ]];
54+ }, $ books );
55+
56+ // Get all chapter URLs and map for pages
57+ $ chapters = getAllOfAtListEndpoint ("api/chapters " , []);
58+ $ chapterEndpoints = array_map (function ($ chapter ) use ($ bookSlugsById ) {
59+ $ bookSlug = $ bookSlugsById [$ chapter ['book_id ' ]];
60+ return ['endpoint ' => '/books/ ' . $ bookSlug . '/chapter/ ' . $ chapter ['slug ' ], 'updated ' => $ chapter ['updated_at ' ]];
61+ }, $ chapters );
62+
63+ // Get all page URLs
64+ $ pages = getAllOfAtListEndpoint ("api/pages " , []);
65+ $ pageEndpoints = array_map (function ($ page ) use ($ bookSlugsById ) {
66+ $ bookSlug = $ bookSlugsById [$ page ['book_id ' ]];
67+ return ['endpoint ' => '/books/ ' . $ bookSlug . '/page/ ' . $ page ['slug ' ], 'updated ' => $ page ['updated_at ' ]];
68+ }, $ pages );
69+
70+ // Gather all our endpoints
71+ $ allEndpoints = $ additionalEndpoints
72+ + $ pageEndpoints
73+ + $ chapterEndpoints
74+ + $ bookEndpoints
75+ + $ shelfEndpoints ;
76+
77+ // Fetch our sitemap XML
78+ $ xmlSitemap = generateSitemapXml ($ allEndpoints );
79+ // Write to the output file
80+ file_put_contents ($ outputFile , $ xmlSitemap );
81+
82+ /**
83+ * Generate out the XML content for a sitemap
84+ * for the given URLs.
85+ */
86+ function generateSitemapXml (array $ endpoints ): string
87+ {
88+ global $ baseUrl ;
89+ $ doc = new DOMDocument ("1.0 " , "UTF-8 " );
90+ $ urlset = $ doc ->createElement ('urlset ' );
91+ $ urlset ->setAttribute ('xmlns ' , 'http://www.sitemaps.org/schemas/sitemap/0.9 ' );
92+
93+ $ doc ->appendChild ($ urlset );
94+ foreach ($ endpoints as $ endpointInfo ) {
95+ $ date = (new DateTime ($ endpointInfo ['updated ' ]))->format ('Y-m-d ' );
96+ $ url = $ doc ->createElement ('url ' );
97+ $ loc = $ url ->appendChild ($ doc ->createElement ('loc ' ));
98+ $ urlText = $ doc ->createTextNode ($ baseUrl . $ endpointInfo ['endpoint ' ]);
99+ $ loc ->appendChild ($ urlText );
100+ $ url ->appendChild ($ doc ->createElement ('lastmod ' , $ date ));
101+ $ url ->appendChild ($ doc ->createElement ('changefreq ' , 'monthly ' ));
102+ $ url ->appendChild ($ doc ->createElement ('priority ' , '0.8 ' ));
103+ $ urlset ->appendChild ($ url );
104+ }
105+
106+ return $ doc ->saveXML ();
107+ }
108+
109+ /**
110+ * Consume all items from the given API listing endpoint.
111+ */
112+ function getAllOfAtListEndpoint (string $ endpoint , array $ params ): array
113+ {
114+ $ count = 100 ;
115+ $ offset = 0 ;
116+ $ all = [];
117+
118+ do {
119+ $ endpoint = $ endpoint . '? ' . http_build_query (array_merge ($ params , ['count ' => $ count , 'offset ' => $ offset ]));
120+ $ resp = apiGetJson ($ endpoint );
121+
122+ $ total = $ resp ['total ' ] ?? 0 ;
123+ $ new = $ resp ['data ' ] ?? [];
124+ array_push ($ all , ...$ new );
125+ $ offset += $ count ;
126+ } while ($ offset < $ total );
127+
128+ return $ all ;
129+ }
130+
131+ /**
132+ * Make a simple GET HTTP request to the API.
133+ */
134+ function apiGet (string $ endpoint ): string
135+ {
136+ global $ baseUrl , $ clientId , $ clientSecret ;
137+ $ url = rtrim ($ baseUrl , '/ ' ) . '/ ' . ltrim ($ endpoint , '/ ' );
138+ $ opts = ['http ' => ['header ' => "Authorization: Token {$ clientId }: {$ clientSecret }" ]];
139+ $ context = stream_context_create ($ opts );
140+ return @file_get_contents ($ url , false , $ context );
141+ }
142+
143+ /**
144+ * Make a simple GET HTTP request to the API &
145+ * decode the JSON response to an array.
146+ */
147+ function apiGetJson (string $ endpoint ): array
148+ {
149+ $ data = apiGet ($ endpoint );
150+ return json_decode ($ data , true );
151+ }
152+
153+ /**
154+ * DEBUG: Dump out the given variables and exit.
155+ */
156+ function dd (...$ args )
157+ {
158+ foreach ($ args as $ arg ) {
159+ var_dump ($ arg );
160+ }
161+ exit (1 );
162+ }
163+
164+ /**
165+ * Alert of an error then exit the script.
166+ */
167+ function errorOut (string $ text )
168+ {
169+ echo "ERROR: " . $ text ;
170+ exit (1 );
171+ }
0 commit comments