[转]Clucene的PHP接口开发
作者: 火车头 日期: 2008-02-28 05:13
关于PHP与Clucene的接口开发
见前言:
以前在空闲时,研究过PHP的扩展模块开发,苦于没有找到一个比较好的调试方法,因此只做了少量的基本工作,感觉zend公司开发的PHP扩展模块有点过于复杂,调试真是让人头痛的事,若哪位有比跟踪调试的好方法,请留言。
zend PHP的扩展模块开发帮助方面的,google等能找到,这里把曾经做过的接口部分给出来大家看看
clucene可能有不稳定的部分,在写接口前最好先测试,至于哪里有问题,当时忘做笔记了,暂时忘了
下载: php_clucene.h
- /************************************
- ** php_clucene.h
- /************************************
- #ifndef PHP_CLUCENE_H
- #define PHP_CLUCENE_H
- #ifdef __cplusplus
- extern "C" {
- #endif
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include <php.h>
- #ifdef HAVE_CLUCENE
- #include <php_ini.h>
- #include <SAPI.h>
- #include <ext/standard/info.h>
- #include <Zend/zend_extensions.h>
- #include <Zend/zend_exceptions.h>
- #ifdef __cplusplus
- } /* extern "C" */
- #endif
- #include <CLucene.h>
- /* Class structures */
- typedef struct _index_searcher_object {
- zend_object std;
- lucene::search::IndexSearcher *searcher;
- const wchar_t **field;
- } index_searcher_object;
- typedef struct _hits_object {
- zend_object std;
- lucene::search::Hits *hits;
- } hits_object;
- //没实现
- /******************************
- typedef struct _index_writer_object {
- zend_object std;
- lucene::index::IndexWriter *writer;
- wchar_t *field;
- } index_writer_object;
- /******************************/
- /* Standard analyzer */
- //ictclas 分词系统
- //static lucene::analysis::standard::StandardAnalyzer analyzer;
- static lucene::analysis::standard::StandardIctclas analyzer;
- #ifdef __cplusplus
- extern "C" {
- #endif
- extern zend_module_entry clucene_module_entry;
- #define phpext_clucene_ptr &clucene_module_entry
- #ifdef PHP_WIN32
- #define PHP_CLUCENE_API __declspec(dllexport)
- #else
- #define PHP_CLUCENE_API
- #endif
- PHP_MINIT_FUNCTION(clucene);
- PHP_MSHUTDOWN_FUNCTION(clucene);
- PHP_RINIT_FUNCTION(clucene);
- PHP_RSHUTDOWN_FUNCTION(clucene);
- PHP_MINFO_FUNCTION(clucene);
- #ifdef ZTS
- #include "TSRM.h"
- #endif
- #define FREE_RESOURCE(resource) zend_list_delete(Z_LVAL_P(resource))
- #define PROP_GET_LONG(name) Z_LVAL_P(zend_read_property(_this_ce, _this_zval, #name, strlen(#name), 1 TSRMLS_CC))
- #define PROP_SET_LONG(name, l) zend_update_property_long(_this_ce, _this_zval, #name, strlen(#name), l TSRMLS_CC)
- #define PROP_GET_DOUBLE(name) Z_DVAL_P(zend_read_property(_this_ce, _this_zval, #name, strlen(#name), 1 TSRMLS_CC))
- #define PROP_SET_DOUBLE(name, d) zend_update_property_double(_this_ce, _this_zval, #name, strlen(#name), d TSRMLS_CC)
- #define PROP_GET_STRING(name) Z_STRVAL_P(zend_read_property(_this_ce, _this_zval, #name, strlen(#name), 1 TSRMLS_CC))
- #define PROP_GET_STRLEN(name) Z_STRLEN_P(zend_read_property(_this_ce, _this_zval, #name, strlen(#name), 1 TSRMLS_CC))
- #define PROP_SET_STRING(name, s) zend_update_property_string(_this_ce, _this_zval, #name, strlen(#name), s TSRMLS_CC)
- #define PROP_SET_STRINGL(name, s, l) zend_update_property_string(_this_ce, _this_zval, #name, strlen(#name), s, l TSRMLS_CC)
- #ifdef __cplusplus
- } /* extern "C" */
- #endif
- #endif /* PHP_HAVE_CLUCENE */
- #endif /* PHP_CLUCENE_H */
下载: clucene.cpp
- /************************************
- ** clucene.cpp
- /************************************
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include <locale>
- #include <syslog.h>
- #include "CLucene/util/WideCharToMultiByte.h"
- #include "CLucene/util/Ictinf.h"
- #include "php_clucene.h"
- #define PHP_CLUCENE_MODULE_VERSION "0.1.5"
- using namespace lucene::analysis;
- using namespace lucene::document;
- using namespace lucene::queryParser;
- using namespace lucene::search;
- using namespace lucene::index;
- using namespace lucene::util;
- #if HAVE_CLUCENE
- /* Other analyzers should be added in the future */
- //StandardIctclas
- enum {STANDARD_ANALYZER};
- static zend_object_handlers index_searcher_object_handlers;
- static zend_object_handlers hits_object_handlers;
- zend_class_entry *index_searcher_class_entry;
- zend_class_entry *hits_class_entry;
- /* {{{ clucene_module_entry */
- zend_module_entry clucene_module_entry = {
- STANDARD_MODULE_HEADER,
- "clucene",
- NULL,
- PHP_MINIT(clucene), /* Replace with NULL if there is nothing to do at php startup */
- NULL, /* PHP_MSHUTDOWN(clucene), */
- PHP_RINIT(clucene), /* PHP_RINIT(clucene), */
- NULL, /* PHP_RSHUTDOWN(clucene), */
- PHP_MINFO(clucene),
- PHP_CLUCENE_MODULE_VERSION,
- STANDARD_MODULE_PROPERTIES
- };
- /* }}} */
- /* {{{ index_searcher_object_dtor */
- static void index_searcher_object_dtor(void *object, zend_object_handle handle TSRMLS_DC)
- {
- index_searcher_object *intern = (index_searcher_object*) object;
- zend_hash_destroy(intern->std.properties);
- FREE_HASHTABLE(intern->std.properties);
- /* Free other members */
- if (intern->searcher != NULL) {
- intern->searcher->close();
- delete intern->searcher;
- }
- if (intern->field != NULL) {
- delete [] intern->field;
- }
- efree(object);
- } /* }}} */
- /* {{{ index_searcher_object_new */
- static zend_object_value index_searcher_object_new(zend_class_entry *class_type TSRMLS_DC)
- {
- zend_object_value retval;
- index_searcher_object *intern;
- zval *tmp;
- intern = (index_searcher_object*) emalloc(sizeof(index_searcher_object));
- memset(intern, 0, sizeof(index_searcher_object));
- intern->searcher = NULL;
- intern->field = NULL;
- intern->std.ce = class_type;
- ALLOC_HASHTABLE(intern->std.properties);
- zend_hash_init(intern->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0);
- zend_hash_copy(intern->std.properties,
- &class_type->default_properties,
- (copy_ctor_func_t) zval_add_ref,
- (void *) &tmp, sizeof(zval *));
- retval.handle = zend_objects_store_put(intern,
- index_searcher_object_dtor,
- NULL,
- NULL TSRMLS_CC);
- retval.handlers = &index_searcher_object_handlers;
- return retval;
- } /* }}} */
- /* {{{ hits_object_dtor */
- static void hits_object_dtor(void *object, zend_object_handle handle TSRMLS_DC)
- {
- hits_object *intern = (hits_object*) object;
- zend_hash_destroy(intern->std.properties);
- FREE_HASHTABLE(intern->std.properties);
- /* Free other members */
- if (intern->hits != NULL) {
- delete intern->hits;
- }
- efree(object);
- } /* }}} */
- /* {{{ hits_object_new */
- static zend_object_value hits_object_new(zend_class_entry *class_type TSRMLS_DC)
- {
- zend_object_value retval;
- hits_object *intern;
- zval *tmp;
- intern = (hits_object*) emalloc(sizeof(hits_object));
- memset(intern, 0, sizeof(hits_object));
- intern->hits = NULL;
- intern->std.ce = class_type;
- ALLOC_HASHTABLE(intern->std.properties);
- zend_hash_init(intern->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0);
- zend_hash_copy(intern->std.properties,
- &class_type->default_properties,
- (copy_ctor_func_t) zval_add_ref,
- (void *) &tmp, sizeof(zval *));
- retval.handle = zend_objects_store_put(intern,
- hits_object_dtor,
- NULL,
- NULL TSRMLS_CC);
- retval.handlers = &hits_object_handlers;
- return retval;
- } /* }}} */
- /* {{{ hits_object_create */
- static zend_object_value hits_object_create(zend_class_entry *class_type, Hits *hits TSRMLS_DC)
- {
- zend_object_value retval;
- hits_object *intern;
- zval *tmp;
- intern = (hits_object*) emalloc(sizeof(hits_object));
- memset(intern, 0, sizeof(hits_object));
- intern->hits = hits;
- intern->std.ce = class_type;
- ALLOC_HASHTABLE(intern->std.properties);
- zend_hash_init(intern->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0);
- zend_hash_copy(intern->std.properties,
- &class_type->default_properties,
- (copy_ctor_func_t) zval_add_ref,
- (void *) &tmp, sizeof(zval *));
- retval.handle = zend_objects_store_put(intern,
- hits_object_dtor,
- NULL,
- NULL TSRMLS_CC);
- retval.handlers = &hits_object_handlers;
- return retval;
- } /* }}} */
- #ifdef COMPILE_DL_CLUCENE
- extern "C" {
- ZEND_GET_MODULE(clucene)
- } /* extern "C" */
- #endif
- /* Forward declarations for IndexSearcher methods */
- PHP_METHOD(IndexSearcher, __construct);
- PHP_METHOD(IndexSearcher, search);
- PHP_METHOD(IndexSearcher, close);
- static zend_function_entry index_searcher_functions[] = {
- PHP_ME(IndexSearcher, __construct, NULL, ZEND_ACC_PUBLIC)
- PHP_ME(IndexSearcher, search, NULL, ZEND_ACC_PUBLIC)
- PHP_ME(IndexSearcher, close, NULL, ZEND_ACC_PUBLIC)
- {NULL, NULL, NULL}
- };
- /* Forward declarations for Hits methods */
- PHP_METHOD(Hits, __construct);
- PHP_METHOD(Hits, length);
- PHP_METHOD(Hits, get);
- PHP_METHOD(Hits, id);
- PHP_METHOD(Hits, score);
- /* The creation of a Hits object is not allowed */
- static zend_function_entry hits_functions[] = {
- PHP_ME(Hits, __construct, NULL, ZEND_ACC_PRIVATE)
- PHP_ME(Hits, length, NULL, ZEND_ACC_PUBLIC)
- PHP_ME(Hits, get, NULL, ZEND_ACC_PUBLIC)
- PHP_ME(Hits, id, NULL, ZEND_ACC_PUBLIC)
- PHP_ME(Hits, score, NULL, ZEND_ACC_PUBLIC)
- {NULL, NULL, NULL}
- };
- /* {{{ proto void IndexSearcher::__construct(string path)
- Constructs a new IndexSearcher instance. */
- PHP_METHOD(IndexSearcher, __construct)
- {
- index_searcher_object *intern;
- char *path;
- long path_len;
- zval *fields,**z_item;
- const wchar_t **fields_ary;
- int i,count;
- php_set_error_handling(EH_THROW, zend_exception_get_default()
- TSRMLS_CC);
- /* TODO: Add the analyzer as a 3rd parameter when several
- * analyzers will be available.
- */
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa", &path,
- &path_len,&fields) == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- //for(i=0;i<)
- count = zend_hash_num_elements(Z_ARRVAL_P(fields));
- fields_ary =_CL_NEWARRAY(const wchar_t*,count+1);
- zend_hash_internal_pointer_reset(Z_ARRVAL_P(fields));
- for (i = 0; i < count; i ++) {
- char* key;
- ulong idx;
- // 获取当前数据
- zend_hash_get_current_data(Z_ARRVAL_P(fields), (void**) &z_item);
- convert_to_string_ex(z_item);
- fields_ary[i] = Misc::_charToWide(Z_STRVAL_PP(z_item));
- /*
- if (zend_hash_get_current_key(Z_ARRVAL_P(fields), &key, &idx, 0) == HASH_KEY_IS_STRING) {
- // KEY为字符串
- php_printf("array[%s] = %s", key, Z_STRVAL_PP(z_item));
- } else {
- // KEY为数字
- php_printf("array[%d] = %s", idx, Z_STRVAL_PP(z_item));
- }
- */
- // 将数组中的内部指针向前移动一位
- zend_hash_move_forward(Z_ARRVAL_P(fields));
- }
- fields_ary[count]=NULL;
- intern = (index_searcher_object*) zend_object_store_get_object(getThis() TSRMLS_CC);
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- try {
- intern->searcher = new IndexSearcher(path);
- } catch (CLuceneError& error) {
- zend_throw_exception(zend_exception_get_default(), error.what(), 0 TSRMLS_CC);
- return;
- }
- intern->field = fields_ary;
- } /* }}} */
- /* {{{ proto object IndexSearcher::search(string query)
- Return documents matching query */
- PHP_METHOD(IndexSearcher, search)
- {
- index_searcher_object *intern;
- char *query_string,*sort_string,*pp;
- WCHAR *wline,*vfield;
- wchar_t *tmp,*field;
- long len,sort_len;
- QueryParser *parser;
- Query *query;
- Hits *hits;
- int n,i;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &query_string, &len,&sort_string,&sort_len) == FAILURE) {
- return;
- }
- intern = (index_searcher_object*) zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->searcher == NULL) {
- zend_throw_exception(zend_exception_get_default(), "The IndexSearcher has been closed", 0 TSRMLS_CC);
- //return;
- }
- //parser = new QueryParser(intern->field, &analyzer);
- //parser = new MultiFieldQueryParser(intern->field, &analyzer);
- //tmp = Misc::_charToWide(query_string);
- str_to_UnicodeChar((const char*)sort_string,vfield);
- field = (wchar_t*)vfield;
- str_to_UnicodeChar((const char*)query_string,wline);
- tmp = (wchar_t*)wline;
- query = MultiFieldQueryParser::parse(tmp,intern->field, &analyzer);
- if(strlen(sort_string)>0)
- {
- SortField *sort_Field = new SortField(field);
- Sort *sort = new Sort(sort_Field);
- //Sort *sort = new Sort((const wchar_t*)field,true);
- hits = intern->searcher->search(query,sort);
- }else
- hits = intern->searcher->search(query);
- //syslog(LOG_NOTICE,"pamire993=%d\n",hits->length());
- n=(hits->length()/100);
- for(i=0;i<n;i++)
- Document* doc = &hits->doc(100*(i+1));
- //delete [] tmp;
- delete query;
- delete wline;wline=NULL;
- //delete parser;
- return_value->type = IS_OBJECT;
- return_value->value.obj = hits_object_create(hits_class_entry, hits);
- }
- /* }}} */
- /* {{{ proto void IndexSearcher::close()
- Frees resources associated with the searcher */
- PHP_METHOD(IndexSearcher, close)
- {
- index_searcher_object *intern;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
- return;
- }
- intern = (index_searcher_object*)
- zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->searcher != NULL)
- {
- intern->searcher->close();
- delete intern->searcher;
- intern->searcher = NULL;
- }
- delete [] intern->field;
- intern->field = NULL;
- }
- /* }}} */
- /* {{{ proto void Hits::__construct()
- Constructs a new Hits instance. */
- PHP_METHOD(Hits, __construct)
- {
- hits_object *intern;
- php_set_error_handling(EH_THROW, zend_exception_get_default()
- TSRMLS_CC);
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- intern = (hits_object*) zend_object_store_get_object(getThis() TSRMLS_CC);
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- } /* }}} */
- /* {{{ proto int Hits::length()
- Returns the total number of hits available in this set. */
- PHP_METHOD(Hits, length)
- {
- hits_object *intern;
- long hits_length;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- intern = (hits_object*)
- zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->hits != NULL) {
- hits_length = (long) intern->hits->length();
- } else {
- /* Should not occur */
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Object was not fully initialized");
- return;
- /* hits_length = 0; */
- }
- RETURN_LONG(hits_length);
- } /* }}} */
- /* {{{ proto string|NULL Hits::get(int n, string name)
- Returns the string value of the name field for the nth document in
- this set or NULL. */
- PHP_METHOD(Hits, get)
- {
- hits_object *intern = NULL;
- long n;
- long max;
- char *name = NULL;
- char *tmp = NULL;
- char *value = NULL;
- wchar_t *wname = NULL;
- long len;
- size_t sz;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ls", &n, &name, &len) == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- intern = (hits_object*) zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->hits != NULL) {
- max = (long) intern->hits->length();
- if ((n < 0) || (n >= max)) {
- zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Seek position %ld is out of range", n);
- return;
- }
- try {
- Document& doc = intern->hits->doc(n);
- wname = Misc::_charToWide(name);
- if (doc.get(wname) != NULL) {
- //tmp = Misc::_wideToChar(doc.get(wname));
- UnicodeChar_to_str((WCHAR*)doc.get(wname),tmp);
- } else {
- tmp = NULL;
- }
- delete [] wname;
- } catch (CLuceneError& error) {
- zend_throw_exception(zend_exception_get_default(), error.what(), 0 TSRMLS_CC);
- return;
- }
- } else {
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Object was not fully initialized");
- return;
- }
- if (tmp != NULL) {
- value = estrdup(tmp);
- delete tmp;tmp = NULL;
- RETURN_STRING(value, 0);
- } else {
- RETURN_NULL();
- }
- } /* }}} */
- /* {{{ proto int Hits::id(int n)
- Returns the id for the nth document in this set. */
- PHP_METHOD(Hits, id)
- {
- hits_object *intern;
- long n;
- long max;
- long id;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &n) == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- intern = (hits_object*)
- zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->hits != NULL) {
- max = (long) intern->hits->length();
- if ((n < 0) || (n >= max)) {
- zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Seek position %ld is out of range", n);
- return;
- }
- try {
- id = (long) (intern->hits->id(n));
- } catch (CLuceneError& error) {
- zend_throw_exception(zend_exception_get_default(), error.what(), 0 TSRMLS_CC);
- return;
- }
- } else {
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Object was not fully initialized");
- return;
- }
- RETURN_LONG(id);
- } /* }}} */
- /* {{{ proto float Hits::score(int n)
- Returns the score for the nth document in this set. */
- PHP_METHOD(Hits, score)
- {
- hits_object *intern;
- long n;
- long max;
- double score;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &n) == FAILURE) {
- php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC);
- return;
- }
- intern = (hits_object*)
- zend_object_store_get_object(getThis() TSRMLS_CC);
- if (intern->hits != NULL) {
- max = (long) intern->hits->length();
- if ((n < 0) || (n >= max)) {
- zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Seek position %ld is out of range", n);
- return;
- }
- try {
- score = (double) (intern->hits->score(n));
- } catch (CLuceneError& error) {
- zend_throw_exception(zend_exception_get_default(), error.what(), 0 TSRMLS_CC);
- return;
- }
- } else {
- php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Object was not fully initialized");
- return;
- }
- RETURN_DOUBLE(score);
- } /* }}} */
- /* {{{ PHP_MINIT_FUNCTION */
- PHP_MINIT_FUNCTION(clucene)
- {
- zend_class_entry index_searcher_ce;
- zend_class_entry hits_ce;
- /* IndexSearcher class */
- INIT_CLASS_ENTRY(index_searcher_ce, "IndexSearcher", index_searcher_functions);
- index_searcher_class_entry = zend_register_internal_class(&index_searcher_ce TSRMLS_CC);
- index_searcher_class_entry->create_object = index_searcher_object_new;
- memcpy(&index_searcher_object_handlers, zend_get_std_object_handlers(),
- sizeof(zend_object_handlers));
- index_searcher_class_entry->ce_flags |= ZEND_ACC_FINAL_CLASS;
- /* Hits class */
- INIT_CLASS_ENTRY(hits_ce, "Hits", hits_functions);
- hits_class_entry = zend_register_internal_class(&hits_ce TSRMLS_CC);
- hits_class_entry->create_object = hits_object_new;
- memcpy(&hits_object_handlers, zend_get_std_object_handlers(),
- sizeof(zend_object_handlers));
- hits_class_entry->ce_flags |= ZEND_ACC_FINAL_CLASS;
- REGISTER_LONG_CONSTANT("STANDARD_ANALYZER", STANDARD_ANALYZER, CONST_CS | CONST_PERSISTENT);
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MSHUTDOWN_FUNCTION */
- PHP_MSHUTDOWN_FUNCTION(clucene)
- {
- /* add your stuff here */
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RINIT_FUNCTION */
- PHP_RINIT_FUNCTION(clucene)
- {
- /* add your stuff here */
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_RSHUTDOWN_FUNCTION */
- PHP_RSHUTDOWN_FUNCTION(clucene)
- {
- /* add your stuff here */
- return SUCCESS;
- }
- /* }}} */
- /* {{{ PHP_MINFO_FUNCTION */
- PHP_MINFO_FUNCTION(clucene)
- {
- php_info_print_table_start();
- {
- php_info_print_table_row(2, "CLucene support", "enabled");
- php_info_print_table_row(2, "Extension version", PHP_CLUCENE_MODULE_VERSION);
- }
- php_info_print_table_end();
- }
- /* }}} */
- #endif /* HAVE_CLUCENE */
评论: 0 |
引用: 0 |
阅读: 947
发表评论
订阅
上一篇
返回
下一篇