mirror of
https://github.com/owncloud/ocis.git
synced 2025-04-18 23:44:07 +03:00
Merge pull request #11163 from owncloud/search_prevent_file_indexing
feat: include special directories that won't be indexed
This commit is contained in:
commit
b29dc771f4
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -42,6 +43,15 @@ const (
|
||||
_slowQueryDuration = 500 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
// _skipPathNames is a list of paths that should be skipped when walking the tree.
|
||||
// In case of directories, just the directory itself is skipped, not its content.
|
||||
_skipPathNames = []string{"."}
|
||||
// _skipPathDirs is a list of directories that should be skipped when walking the tree.
|
||||
// The directory itself and its content is skipped.
|
||||
_skipPathDirs = []string{"./.space"}
|
||||
)
|
||||
|
||||
// Searcher is the interface to the SearchService
|
||||
type Searcher interface {
|
||||
Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error)
|
||||
@ -436,8 +446,14 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
relPath := utils.MakeRelativePath(filepath.Join(wd, info.Path))
|
||||
if slices.Contains(_skipPathDirs, relPath) {
|
||||
s.logger.Info().Str("path", relPath).Msg("skipping directory from being indexed")
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
ref := &provider.Reference{
|
||||
Path: utils.MakeRelativePath(filepath.Join(wd, info.Path)),
|
||||
Path: relPath,
|
||||
ResourceId: &rootID,
|
||||
}
|
||||
s.logger.Debug().Str("path", ref.Path).Msg("Walking tree")
|
||||
@ -484,6 +500,18 @@ func (s *Service) UpsertItem(ref *provider.Reference) {
|
||||
return
|
||||
}
|
||||
|
||||
if slices.Contains(_skipPathNames, path) || slices.Contains(_skipPathDirs, path) {
|
||||
s.logger.Info().Str("path", path).Msg("file won't be indexed")
|
||||
return
|
||||
}
|
||||
|
||||
for _, skipPath := range _skipPathDirs {
|
||||
if strings.HasPrefix(path, skipPath+"/") {
|
||||
s.logger.Info().Str("path", path).Msg("file is in a directory that won't be indexed")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
doc, err := s.extractor.Extract(ctx, stat.Info)
|
||||
if err != nil {
|
||||
s.logger.Error().Err(err).Msg("failed to extract resource content")
|
||||
|
@ -16,6 +16,7 @@ import (
|
||||
"github.com/owncloud/ocis/v2/services/search/pkg/config"
|
||||
"github.com/owncloud/ocis/v2/services/search/pkg/content"
|
||||
contentMocks "github.com/owncloud/ocis/v2/services/search/pkg/content/mocks"
|
||||
"github.com/owncloud/ocis/v2/services/search/pkg/engine"
|
||||
engineMocks "github.com/owncloud/ocis/v2/services/search/pkg/engine/mocks"
|
||||
"github.com/owncloud/ocis/v2/services/search/pkg/search"
|
||||
revactx "github.com/owncloud/reva/v2/pkg/ctx"
|
||||
@ -63,13 +64,49 @@ var _ = Describe("Searchprovider", func() {
|
||||
ri = &sprovider.ResourceInfo{
|
||||
Id: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "opaqueid",
|
||||
},
|
||||
ParentId: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "parentopaqueid",
|
||||
},
|
||||
Path: "foo.pdf",
|
||||
Path: "./foo.pdf",
|
||||
Size: 12345,
|
||||
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
|
||||
}
|
||||
|
||||
ri2 = &sprovider.ResourceInfo{
|
||||
Id: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "opaqueid",
|
||||
},
|
||||
ParentId: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "parentopaqueid",
|
||||
},
|
||||
Type: sprovider.ResourceType_RESOURCE_TYPE_CONTAINER,
|
||||
Path: "./.space",
|
||||
Size: 12345,
|
||||
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
|
||||
}
|
||||
|
||||
ri2_1 = &sprovider.ResourceInfo{
|
||||
Id: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "opaqueid_1",
|
||||
},
|
||||
ParentId: &sprovider.ResourceId{
|
||||
StorageId: "storageid",
|
||||
SpaceId: "spaceid",
|
||||
OpaqueId: "opaqueid",
|
||||
},
|
||||
Type: sprovider.ResourceType_RESOURCE_TYPE_FILE,
|
||||
Path: "./.space/file.pdf",
|
||||
Size: 12345,
|
||||
Mtime: &typesv1beta1.Timestamp{Seconds: 4000},
|
||||
}
|
||||
@ -122,15 +159,46 @@ var _ = Describe("Searchprovider", func() {
|
||||
User: user,
|
||||
}, nil)
|
||||
extractor.On("Extract", mock.Anything, mock.Anything, mock.Anything).Return(content.Document{}, nil)
|
||||
indexClient.On("Upsert", mock.Anything, mock.Anything).Return(nil)
|
||||
indexClient.On("Upsert", mock.Anything, mock.MatchedBy(func(r engine.Resource) bool {
|
||||
return r.ID == "storageid$spaceid!opaqueid" && r.Path == "./foo.pdf"
|
||||
})).Return(nil)
|
||||
indexClient.On("Search", mock.Anything, mock.Anything).Return(&searchsvc.SearchIndexResponse{}, nil)
|
||||
gatewayClient.On("Stat", mock.Anything, mock.Anything).Return(&sprovider.StatResponse{
|
||||
gatewayClient.On("Stat", mock.Anything, mock.MatchedBy(func(sreq *sprovider.StatRequest) bool {
|
||||
return sreq.Ref.ResourceId.StorageId == "storageid" &&
|
||||
sreq.Ref.ResourceId.OpaqueId == "spaceid" &&
|
||||
sreq.Ref.ResourceId.SpaceId == "spaceid"
|
||||
})).Return(&sprovider.StatResponse{
|
||||
Status: status.NewOK(context.Background()),
|
||||
Info: ri,
|
||||
}, nil)
|
||||
err := s.IndexSpace(&sprovider.StorageSpaceId{OpaqueId: "storageid$spaceid!spaceid"})
|
||||
Expect(err).ShouldNot(HaveOccurred())
|
||||
})
|
||||
|
||||
It("walks the space skipping .space directory", func() {
|
||||
gatewayClient.On("GetUserByClaim", mock.Anything, mock.Anything).Return(&userv1beta1.GetUserByClaimResponse{
|
||||
Status: status.NewOK(context.Background()),
|
||||
User: user,
|
||||
}, nil)
|
||||
extractor.On("Extract", mock.Anything, mock.Anything, mock.Anything).Return(content.Document{}, nil)
|
||||
gatewayClient.On("ListContainer", mock.Anything, mock.Anything).Return(&sprovider.ListContainerResponse{
|
||||
Status: status.NewOK(context.Background()),
|
||||
Infos: []*sprovider.ResourceInfo{ri2_1},
|
||||
}, nil)
|
||||
gatewayClient.On("Stat", mock.Anything, mock.MatchedBy(func(sreq *sprovider.StatRequest) bool {
|
||||
return sreq.Ref.ResourceId.StorageId == "storageid" &&
|
||||
sreq.Ref.ResourceId.OpaqueId == "spaceid" &&
|
||||
sreq.Ref.ResourceId.SpaceId == "spaceid"
|
||||
})).Return(&sprovider.StatResponse{
|
||||
Status: status.NewOK(context.Background()),
|
||||
Info: ri2,
|
||||
}, nil)
|
||||
|
||||
indexClient.AssertNotCalled(GinkgoT(), "Upsert", mock.Anything, mock.Anything)
|
||||
indexClient.AssertNotCalled(GinkgoT(), "Search", mock.Anything, mock.Anything)
|
||||
err := s.IndexSpace(&sprovider.StorageSpaceId{OpaqueId: "storageid$spaceid!spaceid"})
|
||||
Expect(err).ShouldNot(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Search", func() {
|
||||
|
@ -262,22 +262,20 @@ The expected failures in this file are from features in the owncloud/ocis repo.
|
||||
- [apiSearch1/search.feature:42](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L42)
|
||||
- [apiSearch1/search.feature:69](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L69)
|
||||
- [apiSearch1/search.feature:70](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L70)
|
||||
- [apiSearch1/search.feature:111](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L111)
|
||||
- [apiSearch1/search.feature:112](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L112)
|
||||
- [apiSearch1/search.feature:196](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L196)
|
||||
- [apiSearch1/search.feature:197](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L197)
|
||||
- [apiSearch1/search.feature:198](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L198)
|
||||
- [apiSearch1/search.feature:220](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L220)
|
||||
- [apiSearch1/search.feature:221](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L221)
|
||||
- [apiSearch1/search.feature:222](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L222)
|
||||
- [apiSearch1/search.feature:241](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L241)
|
||||
- [apiSearch1/search.feature:242](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L242)
|
||||
- [apiSearch1/search.feature:243](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L243)
|
||||
- [apiSearch1/search.feature:260](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L260)
|
||||
- [apiSearch1/search.feature:261](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L261)
|
||||
- [apiSearch1/search.feature:259](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L259)
|
||||
- [apiSearch1/search.feature:277](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L277)
|
||||
- [apiSearch1/search.feature:278](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L278)
|
||||
- [apiSearch1/search.feature:279](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L279)
|
||||
- [apiSearch1/search.feature:301](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L301)
|
||||
- [apiSearch1/search.feature:302](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L302)
|
||||
- [apiSearch1/search.feature:303](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L303)
|
||||
- [apiSearch1/search.feature:353](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L353)
|
||||
- [apiSearch1/search.feature:354](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L354)
|
||||
- [apiSearch1/search.feature:355](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch1/search.feature#L355)
|
||||
- [apiSearch2/tagSearch.feature:34](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L34)
|
||||
- [apiSearch2/tagSearch.feature:35](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L35)
|
||||
- [apiSearch2/tagSearch.feature:62](https://github.com/owncloud/ocis/blob/master/tests/acceptance/features/apiSearch2/tagSearch.feature#L62)
|
||||
|
@ -97,14 +97,14 @@ Feature: Search
|
||||
| new |
|
||||
| spaces |
|
||||
|
||||
@issue-10329
|
||||
Scenario Outline: user can search hidden files
|
||||
@issue-10329 @issue-11028
|
||||
Scenario Outline: user can't search hidden files
|
||||
Given using <dav-path-version> DAV path
|
||||
And user "Alice" has created a folder ".space" in space "project101"
|
||||
When user "Alice" searches for "*.sp*" using the WebDAV API
|
||||
Then the HTTP status code should be "207"
|
||||
And the search result should contain "1" entries
|
||||
And the search result of user "Alice" should contain these entries:
|
||||
And the search result should contain "0" entries
|
||||
And the search result of user "Alice" should not contain these entries:
|
||||
| /.space |
|
||||
Examples:
|
||||
| dav-path-version |
|
||||
@ -172,13 +172,12 @@ Feature: Search
|
||||
| new |
|
||||
| spaces |
|
||||
|
||||
@issue-10329
|
||||
Scenario: user can search project space by name
|
||||
@issue-10329 @issue-11028
|
||||
Scenario: user can't search project space by name
|
||||
Given using spaces DAV path
|
||||
When user "Alice" searches for '*project101*' using the WebDAV API
|
||||
Then the HTTP status code should be "207"
|
||||
And the search result should contain "1" entries
|
||||
And for user "Alice" the search result should contain space "project101"
|
||||
And the search result should contain "0" entries
|
||||
|
||||
@issue-10329
|
||||
Scenario Outline: user can search inside folder in space
|
||||
|
@ -168,7 +168,7 @@ Feature: media type search
|
||||
| *rar* | /data.rar |
|
||||
| *bzip2* | /data.tar.bz2 |
|
||||
|
||||
@issue-10329
|
||||
@issue-10329 @issue-11028
|
||||
Scenario: search files with different mediatype filter
|
||||
Given user "Alice" has created folder "testFolder"
|
||||
And user "Alice" has uploaded file "filesForUpload/lorem.txt" to "lorem.txt"
|
||||
@ -184,9 +184,8 @@ Feature: media type search
|
||||
And user "Alice" has uploaded file "filesForUpload/data.tar.bz2" to "data.tar.bz2"
|
||||
When user "Alice" searches for "mediatype:folder" using the WebDAV API
|
||||
Then the HTTP status code should be "207"
|
||||
And the search result should contain "2" entries
|
||||
And the search result should contain "1" entries
|
||||
And the search result of user "Alice" should contain these entries:
|
||||
| %spaceid% |
|
||||
| testFolder |
|
||||
When user "Alice" searches for "mediatype:document" using the WebDAV API
|
||||
Then the HTTP status code should be "207"
|
||||
|
@ -242,7 +242,7 @@ Feature: tag search
|
||||
| new |
|
||||
| spaces |
|
||||
|
||||
@issue-10329
|
||||
@issue-10329 @issue-11163
|
||||
Scenario Outline: search resources using different search patterns (KQL feature)
|
||||
Given using spaces DAV path
|
||||
And user "Alice" has created the following folders
|
||||
@ -278,5 +278,4 @@ Feature: tag search
|
||||
| (tag:mathe OR tag:klass10) NOT tag:physik | 2 | /answers | /verification work |
|
||||
| tag:mathe NOT name:exercises | 1 | /answers | |
|
||||
| tag:mathe AND NOT name:exercises | 1 | /answers | |
|
||||
# The third finding is the personal space itself
|
||||
| NOT tag:mathe | 3 | /verification work | /withoutTagFolder |
|
||||
| NOT tag:mathe | 2 | /verification work | /withoutTagFolder |
|
||||
|
Loading…
x
Reference in New Issue
Block a user