如何解决MuPDF-赋予Similer功能以根据文本C语言制作情感数组
我正在研究用C语言开发的ndk lib。我想为其添加功能。为此,我需要一个简单的功能,该功能类似于波纹管功能(它已经存在于lib中)。波纹管功能适用于给定文本中的搜索词,如果找到匹配项,它将以数组形式返回结果。同样,我希望从给定的页面文本中获取一系列句子。
JNIEXPORT jobjectArray JNICALL
JNI_FN(Mupdfpage_search)(jnienv * env,jobject thiz,jlong dochandle,jlong pagehandle,jstring text)
{
renderdocument_t *doc = (renderdocument_t*) (long) dochandle;
renderpage_t *page = (renderpage_t*) (long) pagehandle;
// DEBUG("Mupdfpage(%p).search(%p,%p)",thiz,doc,page);
if (!doc || !page)
{
return NULL;
}
const char *str = (*env)->GetStringUTFChars(env,text,NULL);
if (str == NULL)
{
return NULL;
}
ArrayListHelper alh;
PageTextBoxHelper ptbh;
CharacterHelper ch;
if (!ArrayListHelper_init(&alh,env) || !PageTextBoxHelper_init(&ptbh,env)|| !CharacterHelper_init(&ch,env))
{
DEBUG("search(): JNI helper initialization Failed"); //,pagehandle);
return NULL;
}
jobject arrayList = ArrayListHelper_create(&alh);
// DEBUG("Mupdfpage(%p).search(%p,%p): array: %p",page,arrayList);
if (!arrayList)
{
return NULL;
}
fz_rect *hit_bBox = NULL;
fz_stext_sheet *sheet = NULL;
fz_stext_page *pagetext = NULL;
fz_device *dev = NULL;
int pos;
int len;
int i,n;
int hit_count = 0;
fz_try(doc->ctx)
{
fz_rect rect;
// DEBUG("Mupdfpage(%p).search(%p,%p): load page text",page);
fz_bound_page(doc->ctx,page->page,&rect);
sheet = fz_new_stext_sheet(doc->ctx);
pagetext = fz_new_stext_page(doc->ctx,&rect);
dev = fz_new_stext_device(doc->ctx,sheet,pagetext,NULL);
fz_run_page(doc->ctx,dev,&fz_identity,NULL);
// DEBUG("Mupdfpage(%p).search(%p,%p): free text device",page);
fz_close_device(doc->ctx,dev);
fz_drop_device(doc->ctx,dev);
dev = NULL;
len = textlen(pagetext);
// DEBUG("Mupdfpage(%p).search(%p,%p): text length: %d",len);
for (pos = 0; pos < len; pos++)
{
fz_rect rr = fz_empty_rect;
// DEBUG("Mupdfpage(%p).search(%p,%p): match %d",pos);
n = match(doc->ctx,&ch,str,pos);
if (n > 0)
{
// DEBUG("Mupdfpage(%p).search(%p,%p): match found: %d,%d",pos,n);
for (i = 0; i < n; i++)
{
fz_rect tmp_rr = bBoxcharat(doc->ctx,pos + i);
rr = *fz_union_rect(&rr,&tmp_rr);
}
if (!fz_is_empty_rect(&rr))
{
int coords[4];
coords[0] = (rr.x0);
coords[1] = (rr.y0);
coords[2] = (rr.x1);
coords[3] = (rr.y1);
// DEBUG("Mupdfpage(%p).search(%p,%p): found rectangle (%d,%d - %d,%d)",coords[0],coords[1],coords[2],coords[3]);
jobject ptb = PageTextBoxHelper_create(&ptbh);
if (ptb)
{
// DEBUG("Mupdfpage(%p).search(%p,%p): rect %p",ptb);
PageTextBoxHelper_setRect(&ptbh,ptb,coords);
// PageTextBoxHelper_setText(&ptbh,txt);
// DEBUG("Mupdfpage(%p).search(%p,%p): add rect %p to array %p",arrayList);
ArrayListHelper_add(&alh,arrayList,ptb);
}
}
}
}
} fz_always(doc->ctx)
{
// DEBUG("Mupdfpage(%p).search(%p,%p): free resources",page);
if (pagetext)
{
fz_drop_stext_page(doc->ctx,pagetext);
}
if (sheet)
{
fz_drop_stext_sheet(doc->ctx,sheet);
}
if (dev)
{
fz_drop_device(doc->ctx,dev);
}
}fz_catch(doc->ctx)
{
jclass cls;
(*env)->ReleaseStringUTFChars(env,str);
cls = (*env)->FindClass(env,"java/lang/OutOfMemoryError");
if (cls != NULL)
{
(*env)->ThrowNew(env,cls,"Out of memory in MuPDFCore_searchPage");
}
(*env)->DeleteLocalRef(env,cls);
return NULL;
}
(*env)->ReleaseStringUTFChars(env,str);
return arrayList;
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。