ubuntu下的中文搜索sphinx的安装配置
一.安装依赖包文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo apt-get install make gcc g++ automake libtool mysql-client libmysqlclient15-dev libxml2-dev libexpat1-dev文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
二.安装中文分词文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo wget -c http://www.coreseek.cn/uploads/csft/3.1/Source/mmseg-3.1.tar.gz文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo tar zxvf mmseg-3.1.tar.gz -C ../software/文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo ./configure --prefix=/usr/local/mmseg文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo make文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo make install文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo mkdir dict文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo cp /usr/local/src/tarbag/words.txt.uni ./uni.lib文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo vim mmseg.ini文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
[mmseg]文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
merge_number_and_asci=1; //字母和数字连续出现是否切分文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
number_and_asci_joint=-.; //连接数字和字母可用的符号文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
compress_space=0;文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
seperate_number_asci=1; //是否拆分数字文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
三.安装sphinx文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo wget http://www.coreseek.cn/uploads/csft/3.1/Source/csft-3.1.tar.gz文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo tar zxvf csft-3.1.tar.gz -C ../software/文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo ./configure --prefix=/usr/local/csft --with-mysql=/usr/local/mysql --with-mysql-includes=/usr/local/mysql/include --with-mysql-libs=/usr/local/mysql/lib --with-mmseg=/usr/local/mmseg --with-mmseg-includes=/usr/local/mmseg/include/mmseg --with-mmseg-libs=/usr/local/mmseg/lib文章源自运维生存时间-https://www.ttlsa.com/sphinx/install-sphinx-on-ubuntu-2/
$ sudo make
$ sudo make install
四.新建sph_counter表
CREATE TABLE `sph_counter` (
`counter_id` int(11) NOT NULL,
`max_doc_id` int(11) NOT NULL,
PRIMARY KEY (`counter_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
五.配置
$ cd /usr/local/csft/etc/
$ sudo cp sphinx.conf.dist sphinx.conf
$ sudo vim sphinx.conf
source bbs
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass =
sql_db = test
sql_sock = /tmp/mysqld.sock
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
sql_query_pre = REPLACE INTO sph_counter SELECT 1,MAX(pid) FROM pre_forum_post
sql_query = \
SELECT pid, fid, tid, first, invisible, authorid, dateline, subject, message \
FROM pre_forum_post \
WHERE pid<=(SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_attr_uint = fid
sql_attr_uint = tid
sql_attr_uint = first
sql_attr_uint = invisible
sql_attr_uint = authorid
sql_attr_timestamp = dateline
sql_query_info = SELECT * FROM documents WHERE id=$id
}
source bbs_delta : bbs
{
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
sql_query = \
SELECT pid, fid, tid, first, invisible, authorid, dateline, subject, message \
FROM pre_forum_post \
WHERE pid>(SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
}
source bbs_merge : bbs
{
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
sql_query = \
SELECT pid, fid, tid, first, invisible, authorid, dateline, subject, message \
FROM pre_forum_post \
WHERE pid>(SELECT max_doc_id FROM sph_counter WHERE counter_id=1)
sql_query_post = REPLACE INTO sph_counter SELECT 1, MAX(pid) FROM pre_forum_post
}
index bbs
{
source = bbs
path = /usr/local/csft/var/data/bbs
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
charset_type = zh_cn.utf-8
charset_dictpath = /usr/local/mmseg/dict
html_strip = 0
}
index bbs_delta : bbs
{
source = bbs
path = /usr/local/csft/var/data/bbs_delta
}
index bbs_merge : bbs
{
source = bbs
path = /usr/local/csft/var/data/bbs_merge
}
indexer
{
mem_limit = 256M
}
searchd
{
log = /usr/local/csft/var/log/searchd.log
query_log = /usr/local/csft/var/log/query.log
read_timeout = 5
client_timeout = 300
max_children = 30
pid_file = /usr/local/csft/var/log/searchd.pid
max_matches = 1000
seamless_rotate = 1
preopen_indexes = 0
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256
max_filter_values = 4096
}
六.生成索引
$ sudo /usr/local/csft/bin/indexer --config /usr/local/csft/etc/sphinx.conf --all
Coreseek Full Text Server 3.1
Copyright (c) 2006-2008 coreseek.com
using config file '/usr/local/csft/etc/sphinx.conf'...
indexing index 'bbs'...
iniparser: cannot open /usr/local/mmseg/dict/mmseg.ini
collected 3 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 3 docs, 39578 bytes
total 0.050 sec, 799410.19 bytes/sec, 60.60 docs/sec
indexing index 'bbs_delta'...
collected 3 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 3 docs, 39578 bytes
total 0.044 sec, 902329.94 bytes/sec, 68.40 docs/sec
indexing index 'bbs_merge'...
collected 3 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 3 docs, 39578 bytes
total 0.022 sec, 1767980.00 bytes/sec, 134.01 docs/sec
total 9 reads, 0.0 sec, 21.3 kb/read avg, 0.0 msec/read avg
total 21 writes, 0.0 sec, 10.9 kb/write avg, 0.0 msec/write avg
七.测试
$ sudo /usr/local/csft/bin/search --config /usr/local/csft/etc/sphinx.conf "盛大"
Coreseek Full Text Server 3.1
Copyright (c) 2006-2008 coreseek.com
using config file '/usr/local/csft/etc/sphinx.conf'...
index 'bbs': query '盛大 ': returned 1 matches of 1 total in 0.004 sec
displaying matches:
1. document=29, weight=2, fid=33, tid=20, first=1, invisible=0, authorid=2, dateline=Thu Dec 23 06:14:00 2004
words:
1. '盛': 1 documents, 1 hits
2. '大': 2 documents, 54 hits
index 'bbs_delta': query '盛大 ': returned 1 matches of 1 total in 0.000 sec
displaying matches:
1. document=29, weight=2, fid=33, tid=20, first=1, invisible=0, authorid=2, dateline=Thu Dec 23 06:14:00 2004
words:
1. '盛': 1 documents, 1 hits
2. '大': 2 documents, 54 hits
index 'bbs_merge': query '盛大 ': returned 1 matches of 1 total in 0.000 sec
displaying matches:
1. document=29, weight=2, fid=33, tid=20, first=1, invisible=0, authorid=2, dateline=Thu Dec 23 06:14:00 2004
words:
1. '盛': 1 documents, 1 hits
2. '大': 2 documents, 54 hits
八.启动searchd
$ sudo /usr/local/csft/bin/searchd --config /usr/local/csft/etc/sphinx.conf
Coreseek Full Text Server 3.1
Copyright (c) 2006-2008 coreseek.com
using config file '/usr/local/csft/etc/sphinx.conf'...
listening on all interfaces, port=3312
九.计划任务更新合并索引
$ sudo crontab -e
# m h dom mon dow command
*/5 * * * * /usr/local/csft/bin/indexer --config /usr/local/csft/etc/sphinx.conf bbs_delta --rotate
00 04 * * * /usr/local/csft/bin/indexer --config /usr/local/csft/etc/sphinx.conf bbs_merge --rotate && /usr/local/csft/bin/indexer --config /usr/local/csft/etc/sphinx.conf --merge bbs bbs_merge --rotate
sphinx-1.x版本会实时索引的。
评论