@@ -266,6 +266,10 @@ def __init__(self, tag, attrs = dict()):
266
266
self .tag = tag
267
267
self .attrs = attrs
268
268
269
+ # String representation
270
+ def __repr__ (self ):
271
+ return self .tag
272
+
269
273
# Support comparison (compare by tag only)
270
274
def __eq__ (self , other ):
271
275
if other is Element :
@@ -291,12 +295,22 @@ class Section:
291
295
"""
292
296
293
297
# Initialize HTML section
294
- def __init__ (self , el ):
295
- self .el = el
298
+ def __init__ (self , el , depth = 0 ):
299
+ self .el = el
300
+ self .depth = depth
301
+
302
+ # Initialize section data
296
303
self .text = []
297
304
self .title = []
298
305
self .id = None
299
306
307
+ # String representation
308
+ def __repr__ (self ):
309
+ if self .id :
310
+ return "#" .join ([self .el .tag , self .id ])
311
+ else :
312
+ return self .el .tag
313
+
300
314
# Check whether the section should be excluded
301
315
def is_excluded (self ):
302
316
return self .el .is_excluded ()
@@ -350,15 +364,16 @@ def handle_starttag(self, tag, attrs):
350
364
351
365
# Handle headings
352
366
if tag in ([f"h{ x } " for x in range (1 , 7 )]):
367
+ depth = len (self .context )
353
368
if "id" in attrs :
354
369
355
370
# Ensure top-level section
356
371
if tag != "h1" and not self .data :
357
- self .section = Section (Element ("hx" ))
372
+ self .section = Section (Element ("hx" ), depth )
358
373
self .data .append (self .section )
359
374
360
375
# Set identifier, if not first section
361
- self .section = Section (el )
376
+ self .section = Section (el , depth )
362
377
if self .data :
363
378
self .section .id = attrs ["id" ]
364
379
@@ -398,6 +413,20 @@ def handle_endtag(self, tag):
398
413
if not self .context or self .context [- 1 ] != tag :
399
414
return
400
415
416
+ # Check whether we're exiting the current context, which happens when
417
+ # a headline is nested in another element. In that case, we close the
418
+ # current section, continuing to append data to the previous section,
419
+ # which could also be a nested section – see https://bit.ly/3IxxIJZ
420
+ if self .section .depth > len (self .context ):
421
+ for section in reversed (self .data ):
422
+ if section .depth and section .depth <= len (self .context ):
423
+
424
+ # Set depth to 0 in order to denote that the current section
425
+ # is exited and must not be considered again.
426
+ self .section .depth = 0
427
+ self .section = section
428
+ break
429
+
401
430
# Remove element from skip list
402
431
el = self .context .pop ()
403
432
if el in self .skip :
@@ -407,19 +436,14 @@ def handle_endtag(self, tag):
407
436
# Render closing tag if kept
408
437
if not self .skip .intersection (self .context ):
409
438
if tag in self .keep :
439
+
440
+ # Check whether we're inside the section title
410
441
data = self .section .text
411
- if self .section .el in reversed ( self .context ) :
442
+ if self .section .el in self .context :
412
443
data = self .section .title
413
444
414
- # Remove element if empty (or only whitespace)
415
- if data [- 1 ] == f"<{ tag } >" :
416
- del data [- 1 :]
417
- elif data [- 1 ].isspace () and data [- 2 ] == f"<{ tag } >" :
418
- del data [- 2 :]
419
-
420
445
# Append to section title or text
421
- else :
422
- data .append (f"</{ tag } >" )
446
+ data .append (f"</{ tag } >" )
423
447
424
448
# Called for the text contents of each tag
425
449
def handle_data (self , data ):
@@ -439,7 +463,7 @@ def handle_data(self, data):
439
463
self .data .append (self .section )
440
464
441
465
# Handle section headline
442
- if self .section .el in reversed ( self .context ) :
466
+ if self .section .el in self .context :
443
467
permalink = False
444
468
for el in self .context :
445
469
if el .tag == "a" and el .attrs .get ("class" ) == "headerlink" :
0 commit comments