提交 5112269c authored 作者: 朱政's avatar 朱政

fix:智库报告页面搜索功能修改搜索bug

上级 5d3fed98
流水线 #409 已通过 于阶段
in 1 分 41 秒
...@@ -47,6 +47,7 @@ export default { ...@@ -47,6 +47,7 @@ export default {
const searchKey = ref(''); const searchKey = ref('');
const matchList = ref([]); const matchList = ref([]);
const matchIdx = ref(0); const matchIdx = ref(0);
const pageIndexCache = {};
// pdfjs 3.x 的 renderTextLayer 在不同入口下导出不一致,这里做一次缓存 + 兜底加载 // pdfjs 3.x 的 renderTextLayer 在不同入口下导出不一致,这里做一次缓存 + 兜底加载
const pdfjsApiRef = shallowRef(pdfjsLib); const pdfjsApiRef = shallowRef(pdfjsLib);
...@@ -75,6 +76,12 @@ export default { ...@@ -75,6 +76,12 @@ export default {
if (!layer) return; if (!layer) return;
const rects = layer.querySelectorAll('.highlight-rect'); const rects = layer.querySelectorAll('.highlight-rect');
rects.forEach(n => n.remove()); rects.forEach(n => n.remove());
const pageWrap = layer.closest('.page-wrap');
if (pageWrap) {
const wrapMarks = pageWrap.querySelectorAll('.highlight-rect');
wrapMarks.forEach(n => n.remove());
}
}); });
}; };
...@@ -137,6 +144,14 @@ export default { ...@@ -137,6 +144,14 @@ export default {
textDivs: [], textDivs: [],
enhanceTextSelection: false enhanceTextSelection: false
}).promise; }).promise;
} else if (typeof api?.TextLayer === 'function') {
// pdfjs-dist v4/v5:renderTextLayer 已移除,改用 TextLayer.render()
const tl = new api.TextLayer({
textContentSource: textContent,
container: textLayer,
viewport
});
await tl.render();
} }
} catch (e) { } catch (e) {
console.warn('textLayer 渲染失败', e); console.warn('textLayer 渲染失败', e);
...@@ -164,6 +179,7 @@ export default { ...@@ -164,6 +179,7 @@ export default {
searchKey.value = ''; searchKey.value = '';
renderedPageCount.value = 0; renderedPageCount.value = 0;
resolveRenderAll = null; resolveRenderAll = null;
Object.keys(pageIndexCache).forEach(k => delete pageIndexCache[k]);
try { try {
const pdf = await pdfjsLib.getDocument(url).promise; const pdf = await pdfjsLib.getDocument(url).promise;
...@@ -192,20 +208,120 @@ export default { ...@@ -192,20 +208,120 @@ export default {
if (!doc || !key) return; if (!doc || !key) return;
// 首次搜索时确保所有页的 textLayer 已渲染完成,避免“越搜越多” // 首次搜索时确保所有页的 textLayer 已渲染完成,避免“越搜越多”
await waitAllPagesRendered(); await waitAllPagesRendered();
// textLayer 内 span/布局在渲染 promise resolve 后仍可能有一次 DOM/布局收敛
await nextTick();
await new Promise((r) => requestAnimationFrame(() => r()));
// 重新搜索时 textLayer 可能仍有增量调整;每次搜索都重建页索引,避免数量漂移
Object.keys(pageIndexCache).forEach(k => delete pageIndexCache[k]);
const stripSpaces = (text) => String(text ?? '').replace(/[\s\u00A0\u200B\u200C\u200D\uFEFF]+/g, '');
const buildPageIndex = (layer) => {
const spans = Array.from(layer.querySelectorAll('span'));
const ranges = [];
let rawText = '';
for (const el of spans) {
const t = String(el.textContent ?? '');
const start = rawText.length;
rawText += t;
ranges.push({ el, start, end: rawText.length });
}
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) { // 兜底:去掉空白后的文本 + 位置映射(stripIndex -> rawIndex)
const strippedToRawIndex = [];
let strippedText = '';
for (let i = 0; i < rawText.length; i++) {
const ch = rawText[i];
if (/[\s\u00A0\u200B\u200C\u200D\uFEFF]/.test(ch)) continue;
strippedToRawIndex.push(i);
strippedText += ch;
}
const rawToSegments = (rawStart, rawEnd) => {
const segs = [];
for (const r of ranges) {
const segStart = Math.max(rawStart, r.start);
const segEnd = Math.min(rawEnd, r.end);
if (segStart < segEnd) {
segs.push({
el: r.el,
startIdx: segStart - r.start,
endIdx: segEnd - r.start,
});
}
}
return segs;
};
return { rawText, strippedText, strippedToRawIndex, rawToSegments };
};
const getOrBuildPageIndex = (pageNum) => {
if (pageIndexCache[pageNum]) return pageIndexCache[pageNum];
const layer = overlayMap[pageNum]; const layer = overlayMap[pageNum];
if (!layer) continue; if (!layer) {
const nodes = Array.from(layer.querySelectorAll('span')); pageIndexCache[pageNum] = null;
for (const el of nodes) { return null;
const t = (el.textContent || ''); }
if (!t) continue; const index = buildPageIndex(layer);
let start = 0; pageIndexCache[pageNum] = index;
while (true) { return index;
const idx = t.indexOf(key, start); };
if (idx === -1) break;
matchList.value.push({ pageNum, el, startIdx: idx, endIdx: idx + key.length }); const findAllIndexes = (text, needle) => {
start = idx + Math.max(1, key.length); const list = [];
if (!needle) return list;
let from = 0;
while (from <= text.length) {
const idx = text.indexOf(needle, from);
if (idx === -1) break;
list.push(idx);
from = idx + Math.max(1, needle.length);
}
return list;
};
for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) {
const index = getOrBuildPageIndex(pageNum);
const { rawText, strippedText, strippedToRawIndex, rawToSegments } = index
? index
: { rawText: '', strippedText: '', strippedToRawIndex: [], rawToSegments: () => [] };
// 第一优先:原始文本直接匹配(可跨 span)
const rawHits = rawText ? findAllIndexes(rawText, key) : [];
for (const hit of rawHits) {
const segments = rawToSegments(hit, hit + key.length);
if (segments.length) matchList.value.push({ pageNum, segments });
}
// 兜底:去空白匹配(常见于 PDF 将单词/中文切成多段并插入空白)
if (!rawHits.length && strippedText) {
const strippedKey = stripSpaces(key);
const strippedHits = findAllIndexes(strippedText, strippedKey);
for (const hit of strippedHits) {
const rawStart = strippedToRawIndex[hit] ?? 0;
const rawEndExclusiveIdx = strippedToRawIndex[Math.min(strippedToRawIndex.length - 1, hit + strippedKey.length - 1)] ?? rawStart;
const rawEnd = rawEndExclusiveIdx + 1;
const segments = rawToSegments(rawStart, rawEnd);
if (segments.length) matchList.value.push({ pageNum, segments });
}
}
// 最终兜底:如果该页 textLayer 不可用/为空,直接用 pdf.js 的 textContent 做“页级”检索
// 说明:这类命中只能做到定位页码,无法保证精确高亮(因为缺少 span/Range 映射)。
if (!rawHits.length && !strippedText) {
try {
const page = await doc.getPage(pageNum);
const tc = await page.getTextContent();
const pageText = (tc?.items || []).map(i => String(i?.str ?? '')).join('');
if (!pageText) continue;
const hasRaw = pageText.includes(key);
const hasStripped = stripSpaces(pageText).includes(stripSpaces(key));
if (hasRaw || hasStripped) {
matchList.value.push({ pageNum, segments: [], fallback: true });
}
} catch (_) {
// ignore
} }
} }
} }
...@@ -218,30 +334,57 @@ export default { ...@@ -218,30 +334,57 @@ export default {
if (idx < 0 || idx >= matchList.value.length) return; if (idx < 0 || idx >= matchList.value.length) return;
matchIdx.value = idx; matchIdx.value = idx;
const m = matchList.value[idx]; const m = matchList.value[idx];
const el = m?.el; if (m?.fallback) {
// 兜底命中:只定位页码,不做高亮
goToPage(m.pageNum);
return;
}
const firstSeg = m?.segments?.[0];
const el = firstSeg?.el;
if (!el) return; if (!el) return;
clearHighlights(); clearHighlights();
// 用 Range 精确计算“子串”在页面上的矩形位置,再画黄色块,避免把整段 span 都标黄 const layer = overlayMap[m.pageNum];
const textNode = el.firstChild; if (!layer) return;
if (textNode && textNode.nodeType === Node.TEXT_NODE) { const pageWrap = layer.closest('.page-wrap');
// 用 Range 精确计算“子串”在页面上的矩形位置,再画黄色块(支持跨 span)
const containerRect = (pageWrap || layer).getBoundingClientRect();
const segs = Array.isArray(m?.segments) ? m.segments : [];
for (const seg of segs) {
const segEl = seg?.el;
if (!segEl) continue;
const textNode = segEl.firstChild;
if (!textNode || textNode.nodeType !== Node.TEXT_NODE) continue;
try { try {
const range = document.createRange(); const range = document.createRange();
range.setStart(textNode, Math.max(0, m.startIdx ?? 0)); range.setStart(textNode, Math.max(0, seg.startIdx ?? 0));
range.setEnd(textNode, Math.max(0, m.endIdx ?? 0)); range.setEnd(textNode, Math.max(0, seg.endIdx ?? 0));
const rectList = Array.from(range.getClientRects()); const rectList = Array.from(range.getClientRects());
const pageWrap = el.closest('.page-wrap'); if (rectList.length) {
const layer = overlayMap[m.pageNum];
if (pageWrap && layer && rectList.length) {
const pageRect = pageWrap.getBoundingClientRect();
rectList.forEach(r => { rectList.forEach(r => {
const mark = document.createElement('div'); const mark = document.createElement('div');
mark.className = 'highlight-rect'; mark.className = 'highlight-rect';
mark.style.left = (r.left - pageRect.left) + 'px'; mark.style.zIndex = '5';
mark.style.top = (r.top - pageRect.top) + 'px'; mark.style.left = (r.left - containerRect.left) + 'px';
mark.style.top = (r.top - containerRect.top) + 'px';
mark.style.width = r.width + 'px'; mark.style.width = r.width + 'px';
mark.style.height = r.height + 'px'; mark.style.height = r.height + 'px';
layer.appendChild(mark); (pageWrap || layer).appendChild(mark);
}); });
} else {
// Range 兜底为空时:用 span 自身的矩形画块(精度低,但尽量可见)
const r = segEl.getBoundingClientRect();
if (r.width > 0 && r.height > 0) {
const mark = document.createElement('div');
mark.className = 'highlight-rect';
mark.style.zIndex = '5';
mark.style.left = (r.left - containerRect.left) + 'px';
mark.style.top = (r.top - containerRect.top) + 'px';
mark.style.width = r.width + 'px';
mark.style.height = r.height + 'px';
(pageWrap || layer).appendChild(mark);
}
} }
range.detach?.(); range.detach?.();
} catch (e) { } catch (e) {
...@@ -358,19 +501,22 @@ canvas { ...@@ -358,19 +501,22 @@ canvas {
color: transparent; color: transparent;
} }
.textLayer :deep(.highlight-text) { .textLayer :deep(.highlight-rect) {
position: absolute;
background: #ff0; background: #ff0;
opacity: 0.6; opacity: 0.6;
padding: 0 1px;
border-radius: 2px; border-radius: 2px;
pointer-events: none;
z-index: 5;
} }
.textLayer :deep(.highlight-rect) { .page-wrap :deep(.highlight-rect) {
position: absolute; position: absolute;
background: #ff0; background: #ff0;
opacity: 0.6; opacity: 0.6;
border-radius: 2px; border-radius: 2px;
pointer-events: none; pointer-events: none;
z-index: 3;
} }
.loading { .loading {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论