Skip to content

Commit 422ff8b

Browse files
committed
update
1 parent 20ae85c commit 422ff8b

6 files changed

Lines changed: 220 additions & 101 deletions

File tree

README.md

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,19 @@ class Index
2121
{
2222
public function index()
2323
{
24-
$source = '李彦宏是马云最大威胁嘛?';
24+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
2525
$pullWord = new PullWord($source);
26-
$result = $pullWord->get();
27-
// 结果 李彦 李彦宏 彦宏 马云 最大 大威 威胁
26+
$result = $pullWord->pull()->toJson()->get();
27+
// $result = $pullWord->service('pull', $source)->get();
28+
// 结果 => string(169) "[{"t":"352净水器"},{"t":"净水器"},{"t":"家用"},{"t":"大通量"},{"t":"1000g"},{"t":"反渗透"},{"t":"反渗透纯水机"},{"t":"渗透"},{"t":"纯水机"}]
29+
30+
31+
// 商品分类
32+
// $source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
33+
// $pullWord = new PullWord($source);
34+
// $result = $pullWord->classify()->get();
35+
// // $result = $pullWord->service('classify', $source)->get();
36+
// 结果 => string(27) "{"class":"电器","idx":11}"
2837
}
2938
}
3039
```
@@ -36,9 +45,13 @@ class Index
3645
{
3746
public function index(PullWord $pullWord)
3847
{
39-
$source = '李彦宏是马云最大威胁嘛?';
40-
$result = $pullWord->source($source)->get();
48+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
49+
$result = $pullWord->pull($source)->get();
4150
var_dump($result);
51+
52+
// $source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
53+
// $result = $pullWord->classify($source)->get();
54+
// var_dump($result);
4255
}
4356
}
4457
```
@@ -51,8 +64,8 @@ class Index
5164
{
5265
public function index()
5366
{
54-
$source = '李彦宏是马云最大威胁嘛?';
55-
$result = PullWord::source($source)->get();
67+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
68+
$result = PullWord::pull($source')->get();
5669
var_dump($result);
5770
}
5871
}
@@ -64,8 +77,8 @@ class Index
6477
{
6578
public function index()
6679
{
67-
$source = '李彦宏是马云最大威胁嘛?';
68-
$result = app('pullword')->source($source)->get();
80+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
81+
$result = app('pullword')->pull($source)->get();
6982
var_dump($result);
7083
}
7184
}
@@ -75,22 +88,22 @@ class Index
7588
### 其它链式方法
7689
#### json返回
7790
```php
78-
$source = '李彦宏是马云最大威胁嘛?';
79-
PullWord::source($source)->toJson()->get();
80-
// 结果 [{"t":"李彦"},{"t":"李彦宏"},{"t":"彦宏"},{"t":"马云"},{"t":"最大"},{"t":"大威"},{"t":"威胁"}]
91+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
92+
PullWord::pull($source)->toJson()->get();
93+
// 结果 [{"t":"352净水器","p":"1"},{"t":"净水器","p":"1"},{"t":"家用","p":"1"},{"t":"大通量","p":"0.923331"},{"t":"1000g","p":"0.959741"},{"t":"反渗透","p":"0.944082"},{"t":"反渗透纯水机","p":"0.964588"},{"t":"渗透","p":"0.838643"},{"t":"纯水机","p":"0.928798"}]
8194
```
8295
#### 调试模式
8396
结果含有出词概率
8497
```php
85-
$source = '李彦宏是马云最大威胁嘛?';
86-
PullWord::source($source)->debug()->get();
87-
// 结果 李彦:0.23007 李彦宏:0.900302 彦宏:0.0703263 马云:1 最大:0.892363 大威:0.289136 威胁:0.9367
98+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
99+
PullWord::pull($source)->debug()->get();
100+
// 结果 [{"t":"352净水器","p":"1"},{"t":"净水器","p":"1"},{"t":"家用","p":"1"},{"t":"大通量","p":"0.923331"},{"t":"1000g","p":"0.959741"},{"t":"反渗透","p":"0.944082"},{"t":"反渗透纯水机","p":"0.964588"},{"t":"渗透","p":"0.838643"},{"t":"纯水机","p":"0.928798"}]
88101
```
89102

90103
#### 设置阈值
91104
出词概率阈值(0-1之间的小数),1表示只有100%有把握的词才出
92105
```php
93-
$source = '李彦宏是马云最大威胁嘛?';
94-
PullWord::source($source)->threshold(0.4)->get();
95-
// 结果 李彦宏 马云 最大 威胁
106+
$source = '352净水器K10家用直饮大通量1000G 厨下RO反渗透纯水机';
107+
PullWord::pull($source)->threshold(0.4)->toJson()->get();
108+
// 结果 [{"t":"352"},{"t":"352净水器"},{"t":"净水"},{"t":"净水器"},{"t":"家用"},{"t":"大通量"},{"t":"通量"},{"t":"1000g"},{"t":"反渗透"},{"t":"反渗透纯水机"},{"t":"渗透"},{"t":"纯水"},{"t":"纯水机"},{"t":"水机"}]
96109
```

src/PullWord.php

Lines changed: 20 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -2,109 +2,46 @@
22

33
namespace PullWord;
44

5-
use GuzzleHttp\Client;
6-
use PullWord\Exception\HttpException;
7-
use PullWord\Exception\InvalidArgumentException;
5+
use PullWord\Service\Classify;
6+
use PullWord\Service\Pull;
7+
88

99
class PullWord
1010
{
11-
// 抽词内容
12-
private $source = '';
13-
14-
// 调试模式 默认关闭
15-
private $debug = 0;
16-
17-
// 出词概率阈值
18-
private $threshold = 0;
11+
protected $namespace = "\\PullWord\\Service\\";
1912

20-
// 是否json格式返回 默认返回文本
21-
private $json = 0;
22-
23-
private $client;
24-
25-
const URI = 'http://api.pullword.com';
13+
protected $source;
2614

2715
public function __construct($source = '')
2816
{
2917
if ($source) {
3018
$this->source = $source;
3119
}
32-
33-
$this->client = new Client([
34-
'base_uri' => self::URI,
35-
'timeout' => 10
36-
]);
3720
}
3821

39-
/**
40-
* 设置抽词内容
41-
*
42-
* @param [type] $source
43-
* @return $this
44-
*/
45-
public function source($source)
22+
public function service($service, $source = '')
4623
{
47-
$this->source = $source;
48-
49-
return $this;
50-
}
24+
if ($source) {
25+
$this->source = $source;
26+
}
5127

52-
/**
53-
* 结果返回json格式
54-
*
55-
* @return $this
56-
*/
57-
public function toJson()
58-
{
59-
$this->json = 1;
60-
return $this;
61-
}
28+
if (class_exists($service)) {
29+
$service = new $service($this->source);
30+
} else {
31+
$class = $this->namespace . ucfirst($service);
32+
$service = new $class($this->source);
33+
}
6234

63-
/**
64-
* 开启阈值
65-
* 出词概率阈值(0-1之间的小数),1表示只有100%有把握的词才出
66-
* @param $value
67-
* @return $this
68-
*/
69-
public function threshold($value)
70-
{
71-
$this->threshold = floatval($value);
72-
return $this;
35+
return $service;
7336
}
7437

75-
76-
/**
77-
* 开启调试模式
78-
* @return $this
79-
*/
80-
public function debug()
38+
public function pull($source = '')
8139
{
82-
$this->debug = 1;
83-
return $this;
40+
return $this->service(Pull::class, $source);
8441
}
8542

86-
/**
87-
* 获取结果
88-
* @return mixed
89-
* @throws HttpException
90-
* @throws InvalidArgumentException
91-
*/
92-
public function get()
43+
public function classify($source = '')
9344
{
94-
if (!$this->source) {
95-
throw new InvalidArgumentException("Source Empty!");
96-
}
97-
98-
$query = ['source' => $this->source, 'param1' => $this->threshold, 'param2' => $this->debug, 'json' => $this->json];
99-
100-
try {
101-
$response = $this->client->get('/get.php', [
102-
'query' => http_build_query($query)
103-
]);
104-
} catch(\Exception $e) {
105-
throw new HttpException($e->getMessage(), $e->getCode(), $e);
106-
}
107-
108-
return $response->getBody()->getContents();
45+
return $this->service(Classify::class, $source);
10946
}
11047
}

src/Service.php

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
<?php
2+
3+
namespace PullWord;
4+
5+
use GuzzleHttp\Client;
6+
use PullWord\Exception\HttpException;
7+
use PullWord\Exception\InvalidArgumentException;
8+
9+
abstract class Service
10+
{
11+
// 服务接口
12+
protected $uri;
13+
14+
// 内容
15+
protected $source = null;
16+
17+
// 调试模式 默认关闭
18+
protected $debug = false;
19+
20+
// 出词概率阈值
21+
protected $threshold = 0.5;
22+
23+
// 是否 返回 json 格式 默认返回文本
24+
protected $json = false;
25+
26+
private $client;
27+
28+
public function __construct($source = '')
29+
{
30+
$this->source = $source;
31+
32+
$this->client = new Client([
33+
'timeout' => 5
34+
]);
35+
}
36+
37+
/**
38+
* 设置抽词内容
39+
*
40+
* @param [type] $source
41+
* @return $this
42+
*/
43+
public function source($source)
44+
{
45+
$this->source = $source;
46+
47+
return $this;
48+
}
49+
50+
/**
51+
* 结果返回json格式
52+
*
53+
* @return $this
54+
*/
55+
public function toJson()
56+
{
57+
$this->json = true;
58+
59+
return $this;
60+
}
61+
62+
/**
63+
* 开启阈值
64+
* 出词概率阈值(0-1之间的小数),1表示只有100%有把握的词才出
65+
*
66+
* @param $value
67+
* @return $this
68+
*/
69+
public function threshold($value)
70+
{
71+
$this->threshold = floatval($value);
72+
73+
return $this;
74+
}
75+
76+
/**
77+
* 开启调试模式
78+
*
79+
* @return $this
80+
*/
81+
public function debug($value = true)
82+
{
83+
$this->debug = $value;
84+
85+
return $this;
86+
}
87+
88+
/**
89+
* 获取查询内容
90+
*
91+
* @return void
92+
*/
93+
public function getSource()
94+
{
95+
return $this->source;
96+
}
97+
98+
/**
99+
* 获取 api查询参数
100+
*
101+
* @return array
102+
*/
103+
public function getQuery()
104+
{
105+
return [
106+
'source' => $this->source,
107+
'param1' => $this->threshold,
108+
'param2' => $this->debug,
109+
'json' => $this->json
110+
];
111+
}
112+
113+
public function getApi()
114+
{
115+
return $this->uri;
116+
}
117+
118+
/**
119+
* 获取结果
120+
* @return mixed
121+
* @throws HttpException
122+
* @throws InvalidArgumentException
123+
*/
124+
public function get()
125+
{
126+
if (!$this->getSource()) {
127+
throw new InvalidArgumentException("缺少分词内容 source !");
128+
}
129+
130+
try {
131+
$response = $this->client->get($this->getApi(), [
132+
'query' => http_build_query($this->getQuery()),
133+
]);
134+
} catch (\Exception $e) {
135+
throw new HttpException($e->getMessage(), $e->getCode(), $e);
136+
}
137+
138+
return $response->getBody()->getContents();
139+
}
140+
}

src/Service/Classify.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
namespace PullWord\Service;
4+
5+
use PullWord\Service;
6+
7+
/**
8+
* 分词服务
9+
*
10+
*/
11+
class Classify extends Service
12+
{
13+
protected $uri = 'http://www.pullwave.com:50001/get.php';
14+
}

src/Service/Pull.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
namespace PullWord\Service;
4+
5+
use PullWord\Service;
6+
7+
/**
8+
* 分词服务
9+
*
10+
*/
11+
class Pull extends Service
12+
{
13+
protected $uri = 'http://api.pullword.com/get.php';
14+
}

0 commit comments

Comments
 (0)